redhat-performance · aakarshg · Apr 6, 2018 · Apr 6, 2018 · Apr 10, 2018 · Apr 10, 2018
diff --git a/bml/bml.py b/bml/bml.py
@@ -53,7 +53,8 @@ def parse_args():
 
     parser.add_argument('-u', '--update-db', dest='update', type=bool,
                         default=False,
-                        help='-u True pushes data to cockroach db')
+                        help='-u True pushes summary data to cockroach db \
+                        and elastic')
 
     parser.add_argument('--update-clf', dest="clf_days", type=int,
                         default=-1,
@@ -94,9 +95,9 @@ def main():
                                       str(args.days) + "d",
                                       args.version, update=False)
     elif args.timeseries_uuid is not None:
-        lib.timeseries_uploaddb.insert_timeseriessummaries_db(config, args.timeseries_uuid) # noqa
+        lib.timeseries_uploaddb.timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa
     elif args.loggin_uuid is not None:
-        lib.logsummary_uploaddb.insert_logsummary_db(config, args.loggin_uuid) # noqa
+        lib.logsummary_uploaddb.logsummary_db(es_backend, config, args.loggin_uuid) # noqa
     elif args.summary_uuid is not None:
         lib.data_summary.summary_uuid(es_backend, config, args.summary_uuid,
                                       args.update)

diff --git a/bml/config.yml b/bml/config.yml
@@ -1,4 +1,4 @@
-elastic-host: elk.browbeatproject.org
+elastic-host: elk-b09-h30-r720xd.rdu.openstack.engineering.redhat.com
 elastic-port: 9200
 
 tests:
@@ -122,6 +122,11 @@ table_name:
 table_errors:
   - 'num_errors'
 
+# This needs to be updated before end of each release cycle
+# https://releases.openstack.org/. 
+master:
+  - '14-tripleo'
+
 table_timeseries:
   - 'timeseries_summary'
 

diff --git a/bml/lib/browbeat_run.py b/bml/lib/browbeat_run.py
@@ -28,8 +28,8 @@ def _init_timeseries(self, elastic_connection, uuid):
             = elastic_connection.compute_start_end(uuid)
         # print graphite_details
         self._metrics_root = graphite_details[2]
-        self._graphite_start = int(graphite_details[0]) / 1000
-        self._graphite_end = int(graphite_details[1]) / 1000
+        self._graphite_start = int(graphite_details[0])
+        self._graphite_end = int(graphite_details[1])
         self._graphite_url = graphite_details[3]
         # Not needed but keeping in case the new way breaks
         # this timestamp should never be smaller than any time in

diff --git a/bml/lib/browbeat_test.py b/bml/lib/browbeat_test.py
@@ -79,7 +79,7 @@ def _typecheck_string(self, val):
         else:
             return str(val)
 
-    # Extracts details of the really run
+    # Extracts details of the rally run
     def _set_metadata(self, raw_elastic):
         self._set_timeseries_metadata(raw_elastic)
         self._set_hardware_metadata(
@@ -98,6 +98,7 @@ def _set_metadata(self, raw_elastic):
             self.run = self._typecheck_num(self.run)
             self.dlrn_hash = raw_elastic['_source']['version']['dlrn_hash']
             self.rhos_puddle = raw_elastic['_source']['version']['rhos_puddle']
+            self.ovn = True if "ovn" in raw_elastic['_source']['version']['logs_link'] else False  # noqa
             self.scenario_name = raw_elastic['_source']['rally_setup']['name']
             self.timestamp = raw_elastic['_source']['timestamp']
             self.num_computes = \

diff --git a/bml/lib/data_summary.py b/bml/lib/data_summary.py
@@ -40,10 +40,10 @@ def summary_uuid(es_backend, config, uuid, update):
 
 
 def data_summary(data):
-    std_dev = "{:.4f}".format(numpy.std(data)).ljust(10)
-    avg = "{:.4f}".format(numpy.mean(data)).ljust(10)
-    median = "{:.4f}".format(numpy.median(data)).ljust(10)
-    percentile95 = "{:.4f}".format(numpy.percentile(data, 95)).ljust(10)
+    std_dev = "{:.2f}".format(numpy.std(data)).ljust(10)
+    avg = "{:.2f}".format(numpy.mean(data)).ljust(10)
+    median = "{:.2f}".format(numpy.median(data)).ljust(10)
+    percentile95 = "{:.2f}".format(numpy.percentile(data, 95)).ljust(10)
     summary = [avg, std_dev, median, percentile95]
     return(summary)
 
@@ -53,6 +53,7 @@ def print_run_details(config, es_backend, uuid, update):
     brun = browbeat_run(es_backend, uuid, caching=True)
     output_string = ""
     osp_version = ""
+    ovn = ""
     padding = longest_test_name(config)
     test_clean_count = 0  # count of the tests that are being classified
     '''
@@ -67,6 +68,7 @@ def print_run_details(config, es_backend, uuid, update):
         for test_run in brun.get_tests(test_search=test_name):
             data.extend(test_run.raw)
             osp_version = test_run.version
+            ovn = test_run.ovn
         if test_run is None:
             continue
         statistics_uuid = data_summary(data)
@@ -86,6 +88,7 @@ def print_run_details(config, es_backend, uuid, update):
         dlrn_hash = test_run.dlrn_hash
         puddle = test_run.rhos_puddle
         hash_check = check_hash(dlrn_hash, puddle)
+        output_prediction = "None"
         if time_check and cloud_check and hash_check:
             check_outcome = 1
             if test_checks:
@@ -98,7 +101,7 @@ def print_run_details(config, es_backend, uuid, update):
                                      average_runtime, output_prediction,
                                      test_run.timestamp, puddle,
                                      dlrn_hash, concurrency, times,
-                                     perc95_score)
+                                     perc95_score, ovn)
                 if int(output_prediction) == 1:
                     print("ALERT!!!!")
                     print(uuid, test_name, osp_version, average_runtime)
@@ -111,10 +114,15 @@ def print_run_details(config, es_backend, uuid, update):
                     insert_values_db(config, uuid, test_name, osp_version,
                                      average_runtime, test_run.timestamp,
                                      puddle, dlrn_hash, concurrency, times,
-                                     perc95_score)
+                                     perc95_score, ovn)
                 output_string += "\n"
         else:
             output_string += "\n"
+        if update:
+            es_backend.push_summary_es(uuid, osp_version, test_name,
+                                       average_runtime,
+                                       statistics_uuid[1], perc95_score,
+                                       output_prediction, ovn)
     '''
     conn.commit()
     conn.close()

diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py
@@ -14,10 +14,6 @@ def __init__(self, host, port):
              'port': port}],
             send_get_body_as='POST',
             retries=True,
-            sniff_on_start=True,
-            sniff_on_connection_fail=True,
-            sniff_timeout=10,
-            sniffer_timeout=120,
             timeout=120)
 
     def grab_uuids_by_date(self, version, time_period):
@@ -51,15 +47,18 @@ def grab_uuids_by_date(self, version, time_period):
     # Searches and grabs the raw source data for a Browbeat UUID
     def grab_uuid(self, uuid):
         query = {"query": {"match": {'browbeat_uuid': uuid}}}
-        results = helpers.scan(self.es,
-                               query,
-                               size=100,
-                               request_timeout=1000)
-
-        if results == []:
+        # Should use scroll later on but meanwhile using search
+        # But because ideally we dont see that many hits
+        # search isn't entirely bad. but in future if hits are in thousands
+        # use scroll
+        res = self.es.search(index="browbeat-rally-*", body=query, size=1000)
+        # size ^ above is set to 1000, as we've never exceeded more than
+        # 300 entries for the uuids we've seen so far
+        if res == []:
             raise ValueError(uuid + " Has no results!")
-
-        return results
+        # As we switch from scroll api, we're using search to make sure
+        # elasticsearch doesnt keep hitting errors
+        return res['hits']['hits']
 
     def compute_start_end(self, uuid):
         query_input = {
@@ -90,3 +89,47 @@ def compute_start_end(self, uuid):
             graphite_port = "80"
             graphite_url = "http://{}:{}".format(graphite_url, graphite_port)
         return [start, end, cloud_name, graphite_url]
+
+    def compute_hits(self, start, end, cloud_name, level_type):
+        time_dict = {
+            "format": "epoch_millis"
+        }
+        time_dict["gte"] = start
+        time_dict["lte"] = end
+        query_input = {
+            "query": {
+                "bool": {
+                    "must": {
+                        "query_string": {
+                            "query": "browbeat.cloud_name: \
+                            " + cloud_name + " AND level: " + level_type
+                            }
+                        },
+                    "filter": {
+                        "bool": {
+                            "must": [
+                                {
+                                    "range": {
+                                        "@timestamp": time_dict
+                                    }
+                                }
+                            ],
+                            "must_not": []
+                            }}}}}
+        res = self.es.search(index="logstash-*", body=query_input)
+        return res['hits']['total']
+
+
+    def push_summary_es(self, uuid, osp_version, test_name, mean, std_dev,
+                        perc95_score, output_prediction, ovn):
+        data={
+        "browbeat_uuid":str(uuid),
+        "osp_version":str(osp_version),
+        "action":str(test_name),
+        "mean":mean,
+        "std_dev":std_dev,
+        "percentile_95":perc95_score,
+        "class": output_prediction[0],
+        "with_ovn": ovn
+        }
+        self.es.index(index='bml_summary', doc_type='result', body=data)
diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py
@@ -1,72 +1,18 @@
-from elasticsearch import Elasticsearch
 from util import connect_crdb
+from browbeat_run import browbeat_run
+from update_crdb import insert_logsummary_db
 
 
-def compute_hits(es, start, end, cloud_name, level_type):
-    time_dict = {
-        "format": "epoch_millis"
-    }
-    time_dict["gte"] = start
-    time_dict["lte"] = end
-    query_input = {
-        "query": {
-            "filtered": {
-                "query": {
-                    "query_string": {
-                        "query": "browbeat.cloud_name: \
-                        " + cloud_name + " AND level: " + level_type
-                        }
-                    },
-                "filter": {
-                    "bool": {
-                        "must": [
-                            {
-                                "range": {
-                                    "@timestamp": time_dict
-                                }
-                            }
-                        ],
-                        "must_not": []
-                        }}}}}
-    res = es.search(index="logstash-*", body=query_input)
-    return res['hits']['total']
-
-
-def insert_logsummary_db(config, uuid):
-    es = Elasticsearch([{'host': 'elk.browbeatproject.org', 'port': 9200}])
-    query_input = {
-        "query": {
-            "match": {
-                'browbeat_uuid': uuid
-                }
-            },
-        "aggs": {
-            "max_time": {
-                "max": {
-                    "field": "timestamp"
-                    }
-                },
-            "min_time": {
-                "min": {
-                    "field": "timestamp"
-                    }}}}
-    res = es.search(index="browbeat-rally-*", body=query_input)
-    start = int(res['aggregations']['min_time']['value'])
-    end = int(res['aggregations']['max_time']['value'])
-    cloud_name = res['hits']['hits'][0]['_source']['cloud_name']
-    num_errors = compute_hits(es, start, end, cloud_name, 'error')
-    num_warn = compute_hits(es, start, end, cloud_name, 'warning')
-    num_debug = compute_hits(es, start, end, cloud_name, 'debug')
-    num_notice = compute_hits(es, start, end, cloud_name, 'notice')
-    num_info = compute_hits(es, start, end, cloud_name, 'info')
-    conn = connect_crdb(config)
-    conn.set_session(autocommit=True)
-    cur = conn.cursor()
-    cur.execute("INSERT INTO {} VALUES ('{}', \
-                {}, {}, {}, {}, {});".format(config['table_logsummary'][0],
-                                             str(uuid),
-                                             int(num_errors),
-                                             int(num_warn),
-                                             int(num_debug),
-                                             int(num_notice),
-                                             int(num_info)))
+def logsummary_db(es, config, uuid):
+    brun = browbeat_run(es, uuid, timeseries=True)
+    graphite_details = brun.get_graphite_details()
+    start = graphite_details[1]
+    end = graphite_details[2]
+    cloud_name = str(graphite_details[3])
+    num_errors = int(es.compute_hits(start, end, cloud_name, 'error'))
+    num_warn = int(es.compute_hits(start, end, cloud_name, 'warning'))
+    num_debug = int(es.compute_hits(start, end, cloud_name, 'debug'))
+    num_notice = int(es.compute_hits(start, end, cloud_name, 'notice'))
+    num_info = int(es.compute_hits(start, end, cloud_name, 'info'))
+    insert_logsummary_db(config, uuid, num_errors, num_warn, num_debug,
+                         num_notice, num_info)
diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py
@@ -1,8 +1,7 @@
 import numpy
 import requests
-from elastic_backend import Backend
 from browbeat_run import browbeat_run
-from util import connect_crdb
+from update_crdb import insert_timeseriessummaries_db
 
 metrics_list = ["overcloud-controller-0.cpu-*.cpu-system",
                 "overcloud-controller-0.cpu-*.cpu-user",
@@ -12,7 +11,7 @@
                 "overcloud-controller-0.memory.memory-used"]
 
 
-def get_features(gdata, pos):
+def get_features(gdata, pos, metric_id):
     values = []
     empty_check = True
     for entry in gdata:
@@ -25,17 +24,19 @@ def get_features(gdata, pos):
     else:
         mean = round(numpy.mean(values), 2)
         percentile95 = round(numpy.percentile(values, 95), 2)
+        # Converting memory from bytes to MB.
+        if "memory" in metric_id:
+            mean = mean / 1000000
+            percentile95 = percentile95 / 1000000
         return [mean, percentile95]
 
 
-def insert_timeseriessummaries_db(config, uuid):
-    # WIP should pass the backend object here
-    elastic = Backend("elk.browbeatproject.org", "9200")
+def timeseriessummaries_db(elastic, config, uuid):
     brun = browbeat_run(elastic, uuid, timeseries=True)
     graphite_details = brun.get_graphite_details()
     graphite_url = graphite_details[0]
-    start = graphite_details[1]
-    end = graphite_details[2]
+    start = graphite_details[1] / 1000
+    end = graphite_details[2] / 1000
     metric_base = str(graphite_details[3]) + "."
     base_url = "{}/render?target={}"
     time_url = "&format=json&from={}&until={}"
@@ -44,30 +45,15 @@ def insert_timeseriessummaries_db(config, uuid):
     time_url = time_url.format(start,
                                end)
     final_url = base_url + "{}" + time_url
-    conn = connect_crdb(config)
-    conn.set_session(autocommit=True)
-    cur = conn.cursor()
     cpu_system = summarize_metric(final_url, metrics_list[0])
     cpu_user = summarize_metric(final_url, metrics_list[1])
     cpu_softirq = summarize_metric(final_url, metrics_list[2])
     cpu_wait = summarize_metric(final_url, metrics_list[3])
     mem_slabunrecl = summarize_metric(final_url, metrics_list[4])
     mem_used = summarize_metric(final_url, metrics_list[5])
-    cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\
-                {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0],
-                                                 str(uuid),
-                                                 float(cpu_system[0]),
-                                                 float(cpu_system[1]),
-                                                 float(cpu_user[0]),
-                                                 float(cpu_user[1]),
-                                                 float(cpu_softirq[0]),
-                                                 float(cpu_softirq[1]),
-                                                 float(cpu_wait[0]),
-                                                 float(cpu_wait[1]),
-                                                 float(mem_used[0]),
-                                                 float(mem_used[1]),
-                                                 float(mem_slabunrecl[0]),
-                                                 float(mem_slabunrecl[1])))
+    insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user,
+                                  cpu_softirq, cpu_wait, mem_used,
+                                  mem_slabunrecl)
 
 
 def summarize_metric(final_url, metric_id):
@@ -92,6 +78,6 @@ def summarize_metric(final_url, metric_id):
             else:
                 dict_vals[k] = [v]
         list_vals = map(list, dict_vals.items())
-        return get_features(list_vals, 1)
+        return get_features(list_vals, 1, metric_id)
     else:
-        return get_features(response[0]['datapoints'], 0)
+        return get_features(response[0]['datapoints'], 0, metric_id)