From 3321ea1eca372e72514eb41474d8192114bdc90d Mon Sep 17 00:00:00 2001 From: agopi Date: Fri, 6 Apr 2018 13:02:05 -0400 Subject: [PATCH 1/7] Updated to work with elastic5 --- bml/bml.py | 4 ++-- bml/config.yml | 2 +- bml/lib/elastic_backend.py | 5 +---- bml/lib/timeseries_uploaddb.py | 4 +--- requirements.txt | 2 +- 5 files changed, 6 insertions(+), 11 deletions(-) diff --git a/bml/bml.py b/bml/bml.py index 65eb3fb..16a0ea1 100644 --- a/bml/bml.py +++ b/bml/bml.py @@ -94,9 +94,9 @@ def main(): str(args.days) + "d", args.version, update=False) elif args.timeseries_uuid is not None: - lib.timeseries_uploaddb.insert_timeseriessummaries_db(config, args.timeseries_uuid) # noqa + lib.timeseries_uploaddb.insert_timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa elif args.loggin_uuid is not None: - lib.logsummary_uploaddb.insert_logsummary_db(config, args.loggin_uuid) # noqa + lib.logsummary_uploaddb.insert_logsummary_db(es_backend, config, args.loggin_uuid) # noqa elif args.summary_uuid is not None: lib.data_summary.summary_uuid(es_backend, config, args.summary_uuid, args.update) diff --git a/bml/config.yml b/bml/config.yml index 08c18b5..b54cbc0 100644 --- a/bml/config.yml +++ b/bml/config.yml @@ -1,4 +1,4 @@ -elastic-host: elk.browbeatproject.org +elastic-host: elk-b09-h30-r720xd.rdu.openstack.engineering.redhat.com elastic-port: 9200 tests: diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py index 3aa6663..fc707c3 100644 --- a/bml/lib/elastic_backend.py +++ b/bml/lib/elastic_backend.py @@ -14,10 +14,6 @@ def __init__(self, host, port): 'port': port}], send_get_body_as='POST', retries=True, - sniff_on_start=True, - sniff_on_connection_fail=True, - sniff_timeout=10, - sniffer_timeout=120, timeout=120) def grab_uuids_by_date(self, version, time_period): @@ -53,6 +49,7 @@ def grab_uuid(self, uuid): query = {"query": {"match": {'browbeat_uuid': uuid}}} results = helpers.scan(self.es, query, + raise_on_error=False, size=100, request_timeout=1000) diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py index 1de722b..a9ec665 100644 --- a/bml/lib/timeseries_uploaddb.py +++ b/bml/lib/timeseries_uploaddb.py @@ -28,9 +28,7 @@ def get_features(gdata, pos): return [mean, percentile95] -def insert_timeseriessummaries_db(config, uuid): - # WIP should pass the backend object here - elastic = Backend("elk.browbeatproject.org", "9200") +def insert_timeseriessummaries_db(elastic, config, uuid): brun = browbeat_run(elastic, uuid, timeseries=True) graphite_details = brun.get_graphite_details() graphite_url = graphite_details[0] diff --git a/requirements.txt b/requirements.txt index 588de70..c6d1a27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -elasticsearch>=2.0.0,<3.0.0 +elasticsearch>=5.0.0,<6.0.0 tensorflow requests pyyaml From 809ec727308c615d0b69ae711a5b168a2683e099 Mon Sep 17 00:00:00 2001 From: agopi Date: Fri, 6 Apr 2018 16:17:19 -0400 Subject: [PATCH 2/7] Fixed logseries_summarization function and reorganized --- bml/lib/browbeat_run.py | 4 +- bml/lib/elastic_backend.py | 30 +++++++++++++++ bml/lib/logsummary_uploaddb.py | 69 ++++++---------------------------- bml/lib/timeseries_uploaddb.py | 4 +- 4 files changed, 46 insertions(+), 61 deletions(-) diff --git a/bml/lib/browbeat_run.py b/bml/lib/browbeat_run.py index 03c229e..2713918 100644 --- a/bml/lib/browbeat_run.py +++ b/bml/lib/browbeat_run.py @@ -28,8 +28,8 @@ def _init_timeseries(self, elastic_connection, uuid): = elastic_connection.compute_start_end(uuid) # print graphite_details self._metrics_root = graphite_details[2] - self._graphite_start = int(graphite_details[0]) / 1000 - self._graphite_end = int(graphite_details[1]) / 1000 + self._graphite_start = int(graphite_details[0]) + self._graphite_end = int(graphite_details[1]) self._graphite_url = graphite_details[3] # Not needed but keeping in case the new way breaks # this timestamp should never be smaller than any time in diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py index fc707c3..c966a82 100644 --- a/bml/lib/elastic_backend.py +++ b/bml/lib/elastic_backend.py @@ -87,3 +87,33 @@ def compute_start_end(self, uuid): graphite_port = "80" graphite_url = "http://{}:{}".format(graphite_url, graphite_port) return [start, end, cloud_name, graphite_url] + + + def compute_hits(self, start, end, cloud_name, level_type): + time_dict = { + "format": "epoch_millis" + } + time_dict["gte"] = start + time_dict["lte"] = end + query_input = { + "query": { + "bool": { + "must": { + "query_string": { + "query": "browbeat.cloud_name: \ + " + cloud_name + " AND level: " + level_type + } + }, + "filter": { + "bool": { + "must": [ + { + "range": { + "@timestamp": time_dict + } + } + ], + "must_not": [] + }}}}} + res = self.es.search(index="logstash-*", body=query_input) + return res['hits']['total'] diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py index c64b9a5..3a156d2 100644 --- a/bml/lib/logsummary_uploaddb.py +++ b/bml/lib/logsummary_uploaddb.py @@ -1,64 +1,19 @@ from elasticsearch import Elasticsearch from util import connect_crdb +from browbeat_run import browbeat_run -def compute_hits(es, start, end, cloud_name, level_type): - time_dict = { - "format": "epoch_millis" - } - time_dict["gte"] = start - time_dict["lte"] = end - query_input = { - "query": { - "filtered": { - "query": { - "query_string": { - "query": "browbeat.cloud_name: \ - " + cloud_name + " AND level: " + level_type - } - }, - "filter": { - "bool": { - "must": [ - { - "range": { - "@timestamp": time_dict - } - } - ], - "must_not": [] - }}}}} - res = es.search(index="logstash-*", body=query_input) - return res['hits']['total'] - - -def insert_logsummary_db(config, uuid): - es = Elasticsearch([{'host': 'elk.browbeatproject.org', 'port': 9200}]) - query_input = { - "query": { - "match": { - 'browbeat_uuid': uuid - } - }, - "aggs": { - "max_time": { - "max": { - "field": "timestamp" - } - }, - "min_time": { - "min": { - "field": "timestamp" - }}}} - res = es.search(index="browbeat-rally-*", body=query_input) - start = int(res['aggregations']['min_time']['value']) - end = int(res['aggregations']['max_time']['value']) - cloud_name = res['hits']['hits'][0]['_source']['cloud_name'] - num_errors = compute_hits(es, start, end, cloud_name, 'error') - num_warn = compute_hits(es, start, end, cloud_name, 'warning') - num_debug = compute_hits(es, start, end, cloud_name, 'debug') - num_notice = compute_hits(es, start, end, cloud_name, 'notice') - num_info = compute_hits(es, start, end, cloud_name, 'info') +def insert_logsummary_db(es, config, uuid): + brun = browbeat_run(es, uuid, timeseries=True) + graphite_details = brun.get_graphite_details() + start = graphite_details[1] + end = graphite_details[2] + cloud_name = str(graphite_details[3]) + num_errors = es.compute_hits(start, end, cloud_name, 'error') + num_warn = es.compute_hits(start, end, cloud_name, 'warning') + num_debug = es.compute_hits(start, end, cloud_name, 'debug') + num_notice = es.compute_hits(start, end, cloud_name, 'notice') + num_info = es.compute_hits(start, end, cloud_name, 'info') conn = connect_crdb(config) conn.set_session(autocommit=True) cur = conn.cursor() diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py index a9ec665..cbf9b5a 100644 --- a/bml/lib/timeseries_uploaddb.py +++ b/bml/lib/timeseries_uploaddb.py @@ -32,8 +32,8 @@ def insert_timeseriessummaries_db(elastic, config, uuid): brun = browbeat_run(elastic, uuid, timeseries=True) graphite_details = brun.get_graphite_details() graphite_url = graphite_details[0] - start = graphite_details[1] - end = graphite_details[2] + start = graphite_details[1] / 1000 + end = graphite_details[2] / 1000 metric_base = str(graphite_details[3]) + "." base_url = "{}/render?target={}" time_url = "&format=json&from={}&until={}" From 88acf20dd36bb7aa5c66594decd074ae1949408b Mon Sep 17 00:00:00 2001 From: agopi Date: Tue, 10 Apr 2018 10:39:47 -0400 Subject: [PATCH 3/7] Fixed linting errors --- bml/lib/elastic_backend.py | 1 - bml/lib/logsummary_uploaddb.py | 1 - bml/lib/timeseries_uploaddb.py | 1 - 3 files changed, 3 deletions(-) diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py index c966a82..959d3bf 100644 --- a/bml/lib/elastic_backend.py +++ b/bml/lib/elastic_backend.py @@ -88,7 +88,6 @@ def compute_start_end(self, uuid): graphite_url = "http://{}:{}".format(graphite_url, graphite_port) return [start, end, cloud_name, graphite_url] - def compute_hits(self, start, end, cloud_name, level_type): time_dict = { "format": "epoch_millis" diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py index 3a156d2..fc682da 100644 --- a/bml/lib/logsummary_uploaddb.py +++ b/bml/lib/logsummary_uploaddb.py @@ -1,4 +1,3 @@ -from elasticsearch import Elasticsearch from util import connect_crdb from browbeat_run import browbeat_run diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py index cbf9b5a..e53db23 100644 --- a/bml/lib/timeseries_uploaddb.py +++ b/bml/lib/timeseries_uploaddb.py @@ -1,6 +1,5 @@ import numpy import requests -from elastic_backend import Backend from browbeat_run import browbeat_run from util import connect_crdb From aa0a1c0d245c5c0cd7d5b251b5d6fb7fca25c619 Mon Sep 17 00:00:00 2001 From: agopi Date: Tue, 10 Apr 2018 13:32:54 -0400 Subject: [PATCH 4/7] Refactored code for pushing data to crdb 1) Need to update master entry in config file wrt to current osp version it points to. --- bml/bml.py | 4 +-- bml/config.yml | 5 ++++ bml/lib/logsummary_uploaddb.py | 26 +++++++------------ bml/lib/timeseries_uploaddb.py | 25 ++++-------------- bml/lib/update_crdb.py | 46 ++++++++++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 39 deletions(-) diff --git a/bml/bml.py b/bml/bml.py index 16a0ea1..b42ab57 100644 --- a/bml/bml.py +++ b/bml/bml.py @@ -94,9 +94,9 @@ def main(): str(args.days) + "d", args.version, update=False) elif args.timeseries_uuid is not None: - lib.timeseries_uploaddb.insert_timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa + lib.timeseries_uploaddb.timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa elif args.loggin_uuid is not None: - lib.logsummary_uploaddb.insert_logsummary_db(es_backend, config, args.loggin_uuid) # noqa + lib.logsummary_uploaddb.logsummary_db(es_backend, config, args.loggin_uuid) # noqa elif args.summary_uuid is not None: lib.data_summary.summary_uuid(es_backend, config, args.summary_uuid, args.update) diff --git a/bml/config.yml b/bml/config.yml index b54cbc0..9a0c5f3 100644 --- a/bml/config.yml +++ b/bml/config.yml @@ -122,6 +122,11 @@ table_name: table_errors: - 'num_errors' +# This needs to be updated before end of each release cycle +# https://releases.openstack.org/. +master: + - '14-tripleo' + table_timeseries: - 'timeseries_summary' diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py index fc682da..d271d11 100644 --- a/bml/lib/logsummary_uploaddb.py +++ b/bml/lib/logsummary_uploaddb.py @@ -1,26 +1,18 @@ from util import connect_crdb from browbeat_run import browbeat_run +from update_crdb import insert_logsummary_db -def insert_logsummary_db(es, config, uuid): +def logsummary_db(es, config, uuid): brun = browbeat_run(es, uuid, timeseries=True) graphite_details = brun.get_graphite_details() start = graphite_details[1] end = graphite_details[2] cloud_name = str(graphite_details[3]) - num_errors = es.compute_hits(start, end, cloud_name, 'error') - num_warn = es.compute_hits(start, end, cloud_name, 'warning') - num_debug = es.compute_hits(start, end, cloud_name, 'debug') - num_notice = es.compute_hits(start, end, cloud_name, 'notice') - num_info = es.compute_hits(start, end, cloud_name, 'info') - conn = connect_crdb(config) - conn.set_session(autocommit=True) - cur = conn.cursor() - cur.execute("INSERT INTO {} VALUES ('{}', \ - {}, {}, {}, {}, {});".format(config['table_logsummary'][0], - str(uuid), - int(num_errors), - int(num_warn), - int(num_debug), - int(num_notice), - int(num_info))) + num_errors = int(es.compute_hits(start, end, cloud_name, 'error')) + num_warn = int(es.compute_hits(start, end, cloud_name, 'warning')) + num_debug = int(es.compute_hits(start, end, cloud_name, 'debug')) + num_notice = int(es.compute_hits(start, end, cloud_name, 'notice')) + num_info = int(es.compute_hits(start, end, cloud_name, 'info')) + insert_logsummary_db(config, uuid, num_errors, num_warn, num_debug, + num_notice, num_info) diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py index e53db23..811ea8c 100644 --- a/bml/lib/timeseries_uploaddb.py +++ b/bml/lib/timeseries_uploaddb.py @@ -1,7 +1,7 @@ import numpy import requests from browbeat_run import browbeat_run -from util import connect_crdb +from update_crdb import insert_timeseriessummaries_db metrics_list = ["overcloud-controller-0.cpu-*.cpu-system", "overcloud-controller-0.cpu-*.cpu-user", @@ -27,7 +27,7 @@ def get_features(gdata, pos): return [mean, percentile95] -def insert_timeseriessummaries_db(elastic, config, uuid): +def timeseriessummaries_db(elastic, config, uuid): brun = browbeat_run(elastic, uuid, timeseries=True) graphite_details = brun.get_graphite_details() graphite_url = graphite_details[0] @@ -41,30 +41,15 @@ def insert_timeseriessummaries_db(elastic, config, uuid): time_url = time_url.format(start, end) final_url = base_url + "{}" + time_url - conn = connect_crdb(config) - conn.set_session(autocommit=True) - cur = conn.cursor() cpu_system = summarize_metric(final_url, metrics_list[0]) cpu_user = summarize_metric(final_url, metrics_list[1]) cpu_softirq = summarize_metric(final_url, metrics_list[2]) cpu_wait = summarize_metric(final_url, metrics_list[3]) mem_slabunrecl = summarize_metric(final_url, metrics_list[4]) mem_used = summarize_metric(final_url, metrics_list[5]) - cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\ - {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0], - str(uuid), - float(cpu_system[0]), - float(cpu_system[1]), - float(cpu_user[0]), - float(cpu_user[1]), - float(cpu_softirq[0]), - float(cpu_softirq[1]), - float(cpu_wait[0]), - float(cpu_wait[1]), - float(mem_used[0]), - float(mem_used[1]), - float(mem_slabunrecl[0]), - float(mem_slabunrecl[1]))) + insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user, + cpu_softirq, cpu_wait, mem_slabunrecl, + mem_used) def summarize_metric(final_url, metric_id): diff --git a/bml/lib/update_crdb.py b/bml/lib/update_crdb.py index 380ab35..25694a2 100644 --- a/bml/lib/update_crdb.py +++ b/bml/lib/update_crdb.py @@ -1,5 +1,13 @@ from util import connect_crdb +# This is to update the version name to the specifi osp_version +# If it is master, as based on the cycle master can be one of the versions +def update_osp_version(config, osp_name): + if "master" in str(osp_name): + return config['master'][0] + else: + return osp_name + def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade, time_stamp, puddle, dlrn, concurrency, times, @@ -8,6 +16,7 @@ def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade, conn.set_session(autocommit=True) cur = conn.cursor() classify = True + osp_name = update_osp_version(config, osp_name) cur.execute("INSERT INTO {} VALUES ('{}', '{}', '{}', {}, '{}', '{}', \ '{}', '{}', {}, {}, {}, {});" .format(config['table_name'][0], str(uuid), str(test), @@ -29,6 +38,7 @@ def insert_values_db(config, uuid, test, osp_name, avg_runtime, conn.set_session(autocommit=True) cur = conn.cursor() classify = False + osp_name = update_osp_version(config, osp_name) cur.execute("INSERT INTO {} (uuid, test, osp_version, avg_runtime, \ timestamp, rhos_puddle, dlrn_hash, classify, concurrency, \ times, percentile95) VALUES ('{}', '{}', '{}', {}, '{}', '{}',\ @@ -50,3 +60,39 @@ def insert_errors_db(config, uuid, errors): cur.execute("INSERT INTO {} VALUES ('{}', {});" .format(name_table, str(uuid), errors)) + + +def insert_logsummary_db(config, uuid, num_errors, num_warn, + num_debug, num_notice, num_info): + conn = connect_crdb(config) + conn.set_session(autocommit=True) + cur = conn.cursor() + cur.execute("INSERT INTO {} VALUES ('{}', \ + {}, {}, {}, {}, {});".format(config['table_logsummary'][0], + str(uuid), + int(num_errors), + int(num_warn), + int(num_debug), + int(num_notice), + int(num_info))) + +def insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_softirq, + cpu_wait, mem_used, mem_slabunrecl): + conn = connect_crdb(config) + conn.set_session(autocommit=True) + cur = conn.cursor() + cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\ + {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0], + str(uuid), + float(cpu_system[0]), + float(cpu_system[1]), + float(cpu_user[0]), + float(cpu_user[1]), + float(cpu_softirq[0]), + float(cpu_softirq[1]), + float(cpu_wait[0]), + float(cpu_wait[1]), + float(mem_used[0]), + float(mem_used[1]), + float(mem_slabunrecl[0]), + float(mem_slabunrecl[1]))) From 4c6f5d2c4492f34caaf3b9c9f4ace92fb321e0ae Mon Sep 17 00:00:00 2001 From: agopi Date: Tue, 10 Apr 2018 17:38:55 -0400 Subject: [PATCH 5/7] Storing memory values in MB instead of bytes --- bml/lib/data_summary.py | 8 ++++---- bml/lib/timeseries_uploaddb.py | 14 +++++++++----- bml/lib/update_crdb.py | 29 +++++++++++++++-------------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/bml/lib/data_summary.py b/bml/lib/data_summary.py index 549135b..2780eba 100644 --- a/bml/lib/data_summary.py +++ b/bml/lib/data_summary.py @@ -40,10 +40,10 @@ def summary_uuid(es_backend, config, uuid, update): def data_summary(data): - std_dev = "{:.4f}".format(numpy.std(data)).ljust(10) - avg = "{:.4f}".format(numpy.mean(data)).ljust(10) - median = "{:.4f}".format(numpy.median(data)).ljust(10) - percentile95 = "{:.4f}".format(numpy.percentile(data, 95)).ljust(10) + std_dev = "{:.2f}".format(numpy.std(data)).ljust(10) + avg = "{:.2f}".format(numpy.mean(data)).ljust(10) + median = "{:.2f}".format(numpy.median(data)).ljust(10) + percentile95 = "{:.2f}".format(numpy.percentile(data, 95)).ljust(10) summary = [avg, std_dev, median, percentile95] return(summary) diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py index 811ea8c..e93167a 100644 --- a/bml/lib/timeseries_uploaddb.py +++ b/bml/lib/timeseries_uploaddb.py @@ -11,7 +11,7 @@ "overcloud-controller-0.memory.memory-used"] -def get_features(gdata, pos): +def get_features(gdata, pos, metric_id): values = [] empty_check = True for entry in gdata: @@ -24,6 +24,10 @@ def get_features(gdata, pos): else: mean = round(numpy.mean(values), 2) percentile95 = round(numpy.percentile(values, 95), 2) + # Converting memory from bytes to MB. + if "memory" in metric_id: + mean = mean / 1000000 + percentile95 = percentile95 / 1000000 return [mean, percentile95] @@ -48,8 +52,8 @@ def timeseriessummaries_db(elastic, config, uuid): mem_slabunrecl = summarize_metric(final_url, metrics_list[4]) mem_used = summarize_metric(final_url, metrics_list[5]) insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user, - cpu_softirq, cpu_wait, mem_slabunrecl, - mem_used) + cpu_softirq, cpu_wait, mem_used, + mem_slabunrecl) def summarize_metric(final_url, metric_id): @@ -74,6 +78,6 @@ def summarize_metric(final_url, metric_id): else: dict_vals[k] = [v] list_vals = map(list, dict_vals.items()) - return get_features(list_vals, 1) + return get_features(list_vals, 1, metric_id) else: - return get_features(response[0]['datapoints'], 0) + return get_features(response[0]['datapoints'], 0, metric_id) diff --git a/bml/lib/update_crdb.py b/bml/lib/update_crdb.py index 25694a2..cdebfa9 100644 --- a/bml/lib/update_crdb.py +++ b/bml/lib/update_crdb.py @@ -76,23 +76,24 @@ def insert_logsummary_db(config, uuid, num_errors, num_warn, int(num_notice), int(num_info))) -def insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_softirq, - cpu_wait, mem_used, mem_slabunrecl): +def insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user, + cpu_softirq, cpu_wait, mem_used, + mem_slabunrecl): conn = connect_crdb(config) conn.set_session(autocommit=True) cur = conn.cursor() cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\ {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0], str(uuid), - float(cpu_system[0]), - float(cpu_system[1]), - float(cpu_user[0]), - float(cpu_user[1]), - float(cpu_softirq[0]), - float(cpu_softirq[1]), - float(cpu_wait[0]), - float(cpu_wait[1]), - float(mem_used[0]), - float(mem_used[1]), - float(mem_slabunrecl[0]), - float(mem_slabunrecl[1]))) + round(cpu_system[0],2), + round(cpu_system[1],2), + round(cpu_user[0],2), + round(cpu_user[1],2), + round(cpu_softirq[0],2), + round(cpu_softirq[1],2), + round(cpu_wait[0],2), + round(cpu_wait[1],2), + round(mem_used[0],2), + round(mem_used[1],2), + round(mem_slabunrecl[0],2), + round(mem_slabunrecl[1],2))) From 5574c02d391c741bd9acb41f23e7570535a21804 Mon Sep 17 00:00:00 2001 From: agopi Date: Sun, 15 Apr 2018 21:38:34 -0400 Subject: [PATCH 6/7] Pushing data to elastic --- bml/bml.py | 3 ++- bml/lib/browbeat_test.py | 3 ++- bml/lib/data_summary.py | 12 ++++++++++-- bml/lib/elastic_backend.py | 15 +++++++++++++++ bml/lib/update_crdb.py | 16 +++++++++------- 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/bml/bml.py b/bml/bml.py index b42ab57..2b368a6 100644 --- a/bml/bml.py +++ b/bml/bml.py @@ -53,7 +53,8 @@ def parse_args(): parser.add_argument('-u', '--update-db', dest='update', type=bool, default=False, - help='-u True pushes data to cockroach db') + help='-u True pushes summary data to cockroach db \ + and elastic') parser.add_argument('--update-clf', dest="clf_days", type=int, default=-1, diff --git a/bml/lib/browbeat_test.py b/bml/lib/browbeat_test.py index 0563a03..f115801 100644 --- a/bml/lib/browbeat_test.py +++ b/bml/lib/browbeat_test.py @@ -79,7 +79,7 @@ def _typecheck_string(self, val): else: return str(val) - # Extracts details of the really run + # Extracts details of the rally run def _set_metadata(self, raw_elastic): self._set_timeseries_metadata(raw_elastic) self._set_hardware_metadata( @@ -98,6 +98,7 @@ def _set_metadata(self, raw_elastic): self.run = self._typecheck_num(self.run) self.dlrn_hash = raw_elastic['_source']['version']['dlrn_hash'] self.rhos_puddle = raw_elastic['_source']['version']['rhos_puddle'] + self.ovn = True if "ovn" in raw_elastic['_source']['version']['logs_link'] else False # noqa self.scenario_name = raw_elastic['_source']['rally_setup']['name'] self.timestamp = raw_elastic['_source']['timestamp'] self.num_computes = \ diff --git a/bml/lib/data_summary.py b/bml/lib/data_summary.py index 2780eba..7229e92 100644 --- a/bml/lib/data_summary.py +++ b/bml/lib/data_summary.py @@ -53,6 +53,7 @@ def print_run_details(config, es_backend, uuid, update): brun = browbeat_run(es_backend, uuid, caching=True) output_string = "" osp_version = "" + ovn = "" padding = longest_test_name(config) test_clean_count = 0 # count of the tests that are being classified ''' @@ -67,6 +68,7 @@ def print_run_details(config, es_backend, uuid, update): for test_run in brun.get_tests(test_search=test_name): data.extend(test_run.raw) osp_version = test_run.version + ovn = test_run.ovn if test_run is None: continue statistics_uuid = data_summary(data) @@ -86,6 +88,7 @@ def print_run_details(config, es_backend, uuid, update): dlrn_hash = test_run.dlrn_hash puddle = test_run.rhos_puddle hash_check = check_hash(dlrn_hash, puddle) + output_prediction = "None" if time_check and cloud_check and hash_check: check_outcome = 1 if test_checks: @@ -98,7 +101,7 @@ def print_run_details(config, es_backend, uuid, update): average_runtime, output_prediction, test_run.timestamp, puddle, dlrn_hash, concurrency, times, - perc95_score) + perc95_score, ovn) if int(output_prediction) == 1: print("ALERT!!!!") print(uuid, test_name, osp_version, average_runtime) @@ -111,10 +114,15 @@ def print_run_details(config, es_backend, uuid, update): insert_values_db(config, uuid, test_name, osp_version, average_runtime, test_run.timestamp, puddle, dlrn_hash, concurrency, times, - perc95_score) + perc95_score, ovn) output_string += "\n" else: output_string += "\n" + if update: + es_backend.push_summary_es(uuid, osp_version, test_name, + average_runtime, + statistics_uuid[1], perc95_score, + output_prediction, ovn) ''' conn.commit() conn.close() diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py index 959d3bf..d48300e 100644 --- a/bml/lib/elastic_backend.py +++ b/bml/lib/elastic_backend.py @@ -116,3 +116,18 @@ def compute_hits(self, start, end, cloud_name, level_type): }}}}} res = self.es.search(index="logstash-*", body=query_input) return res['hits']['total'] + + + def push_summary_es(self, uuid, osp_version, test_name, mean, std_dev, + perc95_score, output_prediction, ovn): + data={ + "browbeat_uuid":str(uuid), + "osp_version":str(osp_version), + "action":str(test_name), + "mean":mean, + "std_dev":std_dev, + "percentile_95":perc95_score, + "class": output_prediction[0], + "with_ovn": ovn + } + self.es.index(index='bml_summary', doc_type='result', body=data) diff --git a/bml/lib/update_crdb.py b/bml/lib/update_crdb.py index cdebfa9..42576a9 100644 --- a/bml/lib/update_crdb.py +++ b/bml/lib/update_crdb.py @@ -11,14 +11,14 @@ def update_osp_version(config, osp_name): def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade, time_stamp, puddle, dlrn, concurrency, times, - perc95_score): + perc95_score, ovn): conn = connect_crdb(config) conn.set_session(autocommit=True) cur = conn.cursor() classify = True osp_name = update_osp_version(config, osp_name) cur.execute("INSERT INTO {} VALUES ('{}', '{}', '{}', {}, '{}', '{}', \ - '{}', '{}', {}, {}, {}, {});" .format(config['table_name'][0], + '{}', '{}', {}, {}, {}, {},'{}');" .format(config['table_name'][0], str(uuid), str(test), str(osp_name), float(avg_runtime), @@ -28,12 +28,13 @@ def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade, int(grade), int(concurrency), int(times), - float(perc95_score))) + float(perc95_score), + str(ovn))) def insert_values_db(config, uuid, test, osp_name, avg_runtime, time_stamp, puddle, dlrn, concurrency, times, - perc95_score): + perc95_score, ovn): conn = connect_crdb(config) conn.set_session(autocommit=True) cur = conn.cursor() @@ -41,15 +42,16 @@ def insert_values_db(config, uuid, test, osp_name, avg_runtime, osp_name = update_osp_version(config, osp_name) cur.execute("INSERT INTO {} (uuid, test, osp_version, avg_runtime, \ timestamp, rhos_puddle, dlrn_hash, classify, concurrency, \ - times, percentile95) VALUES ('{}', '{}', '{}', {}, '{}', '{}',\ - '{}', '{}', {}, {}, {})" .format(config['table_name'][0], + times, percentile95, ovn) VALUES ('{}', '{}', '{}', {}, '{}', \ + '{}', '{}', '{}', {}, {}, {},'{}')".format(config['table_name'][0], str(uuid), str(test), str(osp_name), float(avg_runtime), str(time_stamp), str(puddle), str(dlrn), bool(classify), int(concurrency), int(times), - float(perc95_score))) + float(perc95_score), + str(ovn))) def insert_errors_db(config, uuid, errors): From 8af4d0f055fa7806dea84ffd359ad1be94e6f3b8 Mon Sep 17 00:00:00 2001 From: agopi Date: Wed, 2 May 2018 21:11:30 -0400 Subject: [PATCH 7/7] Using search vs scroll for es --- bml/lib/elastic_backend.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py index d48300e..833b322 100644 --- a/bml/lib/elastic_backend.py +++ b/bml/lib/elastic_backend.py @@ -47,16 +47,18 @@ def grab_uuids_by_date(self, version, time_period): # Searches and grabs the raw source data for a Browbeat UUID def grab_uuid(self, uuid): query = {"query": {"match": {'browbeat_uuid': uuid}}} - results = helpers.scan(self.es, - query, - raise_on_error=False, - size=100, - request_timeout=1000) - - if results == []: + # Should use scroll later on but meanwhile using search + # But because ideally we dont see that many hits + # search isn't entirely bad. but in future if hits are in thousands + # use scroll + res = self.es.search(index="browbeat-rally-*", body=query, size=1000) + # size ^ above is set to 1000, as we've never exceeded more than + # 300 entries for the uuids we've seen so far + if res == []: raise ValueError(uuid + " Has no results!") - - return results + # As we switch from scroll api, we're using search to make sure + # elasticsearch doesnt keep hitting errors + return res['hits']['hits'] def compute_start_end(self, uuid): query_input = {