From 3321ea1eca372e72514eb41474d8192114bdc90d Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Fri, 6 Apr 2018 13:02:05 -0400
Subject: [PATCH 1/7] Updated to work with elastic5

---
 bml/bml.py                     | 4 ++--
 bml/config.yml                 | 2 +-
 bml/lib/elastic_backend.py     | 5 +----
 bml/lib/timeseries_uploaddb.py | 4 +---
 requirements.txt               | 2 +-
 5 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/bml/bml.py b/bml/bml.py
index 65eb3fb..16a0ea1 100644
--- a/bml/bml.py
+++ b/bml/bml.py
@@ -94,9 +94,9 @@ def main():
                                       str(args.days) + "d",
                                       args.version, update=False)
     elif args.timeseries_uuid is not None:
-        lib.timeseries_uploaddb.insert_timeseriessummaries_db(config, args.timeseries_uuid) # noqa
+        lib.timeseries_uploaddb.insert_timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa
     elif args.loggin_uuid is not None:
-        lib.logsummary_uploaddb.insert_logsummary_db(config, args.loggin_uuid) # noqa
+        lib.logsummary_uploaddb.insert_logsummary_db(es_backend, config, args.loggin_uuid) # noqa
     elif args.summary_uuid is not None:
         lib.data_summary.summary_uuid(es_backend, config, args.summary_uuid,
                                       args.update)
diff --git a/bml/config.yml b/bml/config.yml
index 08c18b5..b54cbc0 100644
--- a/bml/config.yml
+++ b/bml/config.yml
@@ -1,4 +1,4 @@
-elastic-host: elk.browbeatproject.org
+elastic-host: elk-b09-h30-r720xd.rdu.openstack.engineering.redhat.com
 elastic-port: 9200
 
 tests:
diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py
index 3aa6663..fc707c3 100644
--- a/bml/lib/elastic_backend.py
+++ b/bml/lib/elastic_backend.py
@@ -14,10 +14,6 @@ def __init__(self, host, port):
              'port': port}],
             send_get_body_as='POST',
             retries=True,
-            sniff_on_start=True,
-            sniff_on_connection_fail=True,
-            sniff_timeout=10,
-            sniffer_timeout=120,
             timeout=120)
 
     def grab_uuids_by_date(self, version, time_period):
@@ -53,6 +49,7 @@ def grab_uuid(self, uuid):
         query = {"query": {"match": {'browbeat_uuid': uuid}}}
         results = helpers.scan(self.es,
                                query,
+                               raise_on_error=False,
                                size=100,
                                request_timeout=1000)
 
diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py
index 1de722b..a9ec665 100644
--- a/bml/lib/timeseries_uploaddb.py
+++ b/bml/lib/timeseries_uploaddb.py
@@ -28,9 +28,7 @@ def get_features(gdata, pos):
         return [mean, percentile95]
 
 
-def insert_timeseriessummaries_db(config, uuid):
-    # WIP should pass the backend object here
-    elastic = Backend("elk.browbeatproject.org", "9200")
+def insert_timeseriessummaries_db(elastic, config, uuid):
     brun = browbeat_run(elastic, uuid, timeseries=True)
     graphite_details = brun.get_graphite_details()
     graphite_url = graphite_details[0]
diff --git a/requirements.txt b/requirements.txt
index 588de70..c6d1a27 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-elasticsearch>=2.0.0,<3.0.0
+elasticsearch>=5.0.0,<6.0.0
 tensorflow
 requests
 pyyaml

From 809ec727308c615d0b69ae711a5b168a2683e099 Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Fri, 6 Apr 2018 16:17:19 -0400
Subject: [PATCH 2/7] Fixed logseries_summarization function and reorganized

---
 bml/lib/browbeat_run.py        |  4 +-
 bml/lib/elastic_backend.py     | 30 +++++++++++++++
 bml/lib/logsummary_uploaddb.py | 69 ++++++----------------------------
 bml/lib/timeseries_uploaddb.py |  4 +-
 4 files changed, 46 insertions(+), 61 deletions(-)

diff --git a/bml/lib/browbeat_run.py b/bml/lib/browbeat_run.py
index 03c229e..2713918 100644
--- a/bml/lib/browbeat_run.py
+++ b/bml/lib/browbeat_run.py
@@ -28,8 +28,8 @@ def _init_timeseries(self, elastic_connection, uuid):
             = elastic_connection.compute_start_end(uuid)
         # print graphite_details
         self._metrics_root = graphite_details[2]
-        self._graphite_start = int(graphite_details[0]) / 1000
-        self._graphite_end = int(graphite_details[1]) / 1000
+        self._graphite_start = int(graphite_details[0])
+        self._graphite_end = int(graphite_details[1])
         self._graphite_url = graphite_details[3]
         # Not needed but keeping in case the new way breaks
         # this timestamp should never be smaller than any time in
diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py
index fc707c3..c966a82 100644
--- a/bml/lib/elastic_backend.py
+++ b/bml/lib/elastic_backend.py
@@ -87,3 +87,33 @@ def compute_start_end(self, uuid):
             graphite_port = "80"
             graphite_url = "http://{}:{}".format(graphite_url, graphite_port)
         return [start, end, cloud_name, graphite_url]
+
+
+    def compute_hits(self, start, end, cloud_name, level_type):
+        time_dict = {
+            "format": "epoch_millis"
+        }
+        time_dict["gte"] = start
+        time_dict["lte"] = end
+        query_input = {
+            "query": {
+                "bool": {
+                    "must": {
+                        "query_string": {
+                            "query": "browbeat.cloud_name: \
+                            " + cloud_name + " AND level: " + level_type
+                            }
+                        },
+                    "filter": {
+                        "bool": {
+                            "must": [
+                                {
+                                    "range": {
+                                        "@timestamp": time_dict
+                                    }
+                                }
+                            ],
+                            "must_not": []
+                            }}}}}
+        res = self.es.search(index="logstash-*", body=query_input)
+        return res['hits']['total']
diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py
index c64b9a5..3a156d2 100644
--- a/bml/lib/logsummary_uploaddb.py
+++ b/bml/lib/logsummary_uploaddb.py
@@ -1,64 +1,19 @@
 from elasticsearch import Elasticsearch
 from util import connect_crdb
+from browbeat_run import browbeat_run
 
 
-def compute_hits(es, start, end, cloud_name, level_type):
-    time_dict = {
-        "format": "epoch_millis"
-    }
-    time_dict["gte"] = start
-    time_dict["lte"] = end
-    query_input = {
-        "query": {
-            "filtered": {
-                "query": {
-                    "query_string": {
-                        "query": "browbeat.cloud_name: \
-                        " + cloud_name + " AND level: " + level_type
-                        }
-                    },
-                "filter": {
-                    "bool": {
-                        "must": [
-                            {
-                                "range": {
-                                    "@timestamp": time_dict
-                                }
-                            }
-                        ],
-                        "must_not": []
-                        }}}}}
-    res = es.search(index="logstash-*", body=query_input)
-    return res['hits']['total']
-
-
-def insert_logsummary_db(config, uuid):
-    es = Elasticsearch([{'host': 'elk.browbeatproject.org', 'port': 9200}])
-    query_input = {
-        "query": {
-            "match": {
-                'browbeat_uuid': uuid
-                }
-            },
-        "aggs": {
-            "max_time": {
-                "max": {
-                    "field": "timestamp"
-                    }
-                },
-            "min_time": {
-                "min": {
-                    "field": "timestamp"
-                    }}}}
-    res = es.search(index="browbeat-rally-*", body=query_input)
-    start = int(res['aggregations']['min_time']['value'])
-    end = int(res['aggregations']['max_time']['value'])
-    cloud_name = res['hits']['hits'][0]['_source']['cloud_name']
-    num_errors = compute_hits(es, start, end, cloud_name, 'error')
-    num_warn = compute_hits(es, start, end, cloud_name, 'warning')
-    num_debug = compute_hits(es, start, end, cloud_name, 'debug')
-    num_notice = compute_hits(es, start, end, cloud_name, 'notice')
-    num_info = compute_hits(es, start, end, cloud_name, 'info')
+def insert_logsummary_db(es, config, uuid):
+    brun = browbeat_run(es, uuid, timeseries=True)
+    graphite_details = brun.get_graphite_details()
+    start = graphite_details[1]
+    end = graphite_details[2]
+    cloud_name = str(graphite_details[3])
+    num_errors = es.compute_hits(start, end, cloud_name, 'error')
+    num_warn = es.compute_hits(start, end, cloud_name, 'warning')
+    num_debug = es.compute_hits(start, end, cloud_name, 'debug')
+    num_notice = es.compute_hits(start, end, cloud_name, 'notice')
+    num_info = es.compute_hits(start, end, cloud_name, 'info')
     conn = connect_crdb(config)
     conn.set_session(autocommit=True)
     cur = conn.cursor()
diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py
index a9ec665..cbf9b5a 100644
--- a/bml/lib/timeseries_uploaddb.py
+++ b/bml/lib/timeseries_uploaddb.py
@@ -32,8 +32,8 @@ def insert_timeseriessummaries_db(elastic, config, uuid):
     brun = browbeat_run(elastic, uuid, timeseries=True)
     graphite_details = brun.get_graphite_details()
     graphite_url = graphite_details[0]
-    start = graphite_details[1]
-    end = graphite_details[2]
+    start = graphite_details[1] / 1000
+    end = graphite_details[2] / 1000
     metric_base = str(graphite_details[3]) + "."
     base_url = "{}/render?target={}"
     time_url = "&format=json&from={}&until={}"

From 88acf20dd36bb7aa5c66594decd074ae1949408b Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Tue, 10 Apr 2018 10:39:47 -0400
Subject: [PATCH 3/7] Fixed linting errors

---
 bml/lib/elastic_backend.py     | 1 -
 bml/lib/logsummary_uploaddb.py | 1 -
 bml/lib/timeseries_uploaddb.py | 1 -
 3 files changed, 3 deletions(-)

diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py
index c966a82..959d3bf 100644
--- a/bml/lib/elastic_backend.py
+++ b/bml/lib/elastic_backend.py
@@ -88,7 +88,6 @@ def compute_start_end(self, uuid):
             graphite_url = "http://{}:{}".format(graphite_url, graphite_port)
         return [start, end, cloud_name, graphite_url]
 
-
     def compute_hits(self, start, end, cloud_name, level_type):
         time_dict = {
             "format": "epoch_millis"
diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py
index 3a156d2..fc682da 100644
--- a/bml/lib/logsummary_uploaddb.py
+++ b/bml/lib/logsummary_uploaddb.py
@@ -1,4 +1,3 @@
-from elasticsearch import Elasticsearch
 from util import connect_crdb
 from browbeat_run import browbeat_run
 
diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py
index cbf9b5a..e53db23 100644
--- a/bml/lib/timeseries_uploaddb.py
+++ b/bml/lib/timeseries_uploaddb.py
@@ -1,6 +1,5 @@
 import numpy
 import requests
-from elastic_backend import Backend
 from browbeat_run import browbeat_run
 from util import connect_crdb
 

From aa0a1c0d245c5c0cd7d5b251b5d6fb7fca25c619 Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Tue, 10 Apr 2018 13:32:54 -0400
Subject: [PATCH 4/7] Refactored code for pushing data to crdb

1) Need to update master entry in config file wrt to current
   osp version it points to.
---
 bml/bml.py                     |  4 +--
 bml/config.yml                 |  5 ++++
 bml/lib/logsummary_uploaddb.py | 26 +++++++------------
 bml/lib/timeseries_uploaddb.py | 25 ++++--------------
 bml/lib/update_crdb.py         | 46 ++++++++++++++++++++++++++++++++++
 5 files changed, 67 insertions(+), 39 deletions(-)

diff --git a/bml/bml.py b/bml/bml.py
index 16a0ea1..b42ab57 100644
--- a/bml/bml.py
+++ b/bml/bml.py
@@ -94,9 +94,9 @@ def main():
                                       str(args.days) + "d",
                                       args.version, update=False)
     elif args.timeseries_uuid is not None:
-        lib.timeseries_uploaddb.insert_timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa
+        lib.timeseries_uploaddb.timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa
     elif args.loggin_uuid is not None:
-        lib.logsummary_uploaddb.insert_logsummary_db(es_backend, config, args.loggin_uuid) # noqa
+        lib.logsummary_uploaddb.logsummary_db(es_backend, config, args.loggin_uuid) # noqa
     elif args.summary_uuid is not None:
         lib.data_summary.summary_uuid(es_backend, config, args.summary_uuid,
                                       args.update)
diff --git a/bml/config.yml b/bml/config.yml
index b54cbc0..9a0c5f3 100644
--- a/bml/config.yml
+++ b/bml/config.yml
@@ -122,6 +122,11 @@ table_name:
 table_errors:
   - 'num_errors'
 
+# This needs to be updated before end of each release cycle
+# https://releases.openstack.org/. 
+master:
+  - '14-tripleo'
+
 table_timeseries:
   - 'timeseries_summary'
 
diff --git a/bml/lib/logsummary_uploaddb.py b/bml/lib/logsummary_uploaddb.py
index fc682da..d271d11 100644
--- a/bml/lib/logsummary_uploaddb.py
+++ b/bml/lib/logsummary_uploaddb.py
@@ -1,26 +1,18 @@
 from util import connect_crdb
 from browbeat_run import browbeat_run
+from update_crdb import insert_logsummary_db
 
 
-def insert_logsummary_db(es, config, uuid):
+def logsummary_db(es, config, uuid):
     brun = browbeat_run(es, uuid, timeseries=True)
     graphite_details = brun.get_graphite_details()
     start = graphite_details[1]
     end = graphite_details[2]
     cloud_name = str(graphite_details[3])
-    num_errors = es.compute_hits(start, end, cloud_name, 'error')
-    num_warn = es.compute_hits(start, end, cloud_name, 'warning')
-    num_debug = es.compute_hits(start, end, cloud_name, 'debug')
-    num_notice = es.compute_hits(start, end, cloud_name, 'notice')
-    num_info = es.compute_hits(start, end, cloud_name, 'info')
-    conn = connect_crdb(config)
-    conn.set_session(autocommit=True)
-    cur = conn.cursor()
-    cur.execute("INSERT INTO {} VALUES ('{}', \
-                {}, {}, {}, {}, {});".format(config['table_logsummary'][0],
-                                             str(uuid),
-                                             int(num_errors),
-                                             int(num_warn),
-                                             int(num_debug),
-                                             int(num_notice),
-                                             int(num_info)))
+    num_errors = int(es.compute_hits(start, end, cloud_name, 'error'))
+    num_warn = int(es.compute_hits(start, end, cloud_name, 'warning'))
+    num_debug = int(es.compute_hits(start, end, cloud_name, 'debug'))
+    num_notice = int(es.compute_hits(start, end, cloud_name, 'notice'))
+    num_info = int(es.compute_hits(start, end, cloud_name, 'info'))
+    insert_logsummary_db(config, uuid, num_errors, num_warn, num_debug,
+                         num_notice, num_info)
diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py
index e53db23..811ea8c 100644
--- a/bml/lib/timeseries_uploaddb.py
+++ b/bml/lib/timeseries_uploaddb.py
@@ -1,7 +1,7 @@
 import numpy
 import requests
 from browbeat_run import browbeat_run
-from util import connect_crdb
+from update_crdb import insert_timeseriessummaries_db
 
 metrics_list = ["overcloud-controller-0.cpu-*.cpu-system",
                 "overcloud-controller-0.cpu-*.cpu-user",
@@ -27,7 +27,7 @@ def get_features(gdata, pos):
         return [mean, percentile95]
 
 
-def insert_timeseriessummaries_db(elastic, config, uuid):
+def timeseriessummaries_db(elastic, config, uuid):
     brun = browbeat_run(elastic, uuid, timeseries=True)
     graphite_details = brun.get_graphite_details()
     graphite_url = graphite_details[0]
@@ -41,30 +41,15 @@ def insert_timeseriessummaries_db(elastic, config, uuid):
     time_url = time_url.format(start,
                                end)
     final_url = base_url + "{}" + time_url
-    conn = connect_crdb(config)
-    conn.set_session(autocommit=True)
-    cur = conn.cursor()
     cpu_system = summarize_metric(final_url, metrics_list[0])
     cpu_user = summarize_metric(final_url, metrics_list[1])
     cpu_softirq = summarize_metric(final_url, metrics_list[2])
     cpu_wait = summarize_metric(final_url, metrics_list[3])
     mem_slabunrecl = summarize_metric(final_url, metrics_list[4])
     mem_used = summarize_metric(final_url, metrics_list[5])
-    cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\
-                {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0],
-                                                 str(uuid),
-                                                 float(cpu_system[0]),
-                                                 float(cpu_system[1]),
-                                                 float(cpu_user[0]),
-                                                 float(cpu_user[1]),
-                                                 float(cpu_softirq[0]),
-                                                 float(cpu_softirq[1]),
-                                                 float(cpu_wait[0]),
-                                                 float(cpu_wait[1]),
-                                                 float(mem_used[0]),
-                                                 float(mem_used[1]),
-                                                 float(mem_slabunrecl[0]),
-                                                 float(mem_slabunrecl[1])))
+    insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user,
+                                  cpu_softirq, cpu_wait, mem_slabunrecl,
+                                  mem_used)
 
 
 def summarize_metric(final_url, metric_id):
diff --git a/bml/lib/update_crdb.py b/bml/lib/update_crdb.py
index 380ab35..25694a2 100644
--- a/bml/lib/update_crdb.py
+++ b/bml/lib/update_crdb.py
@@ -1,5 +1,13 @@
 from util import connect_crdb
 
+# This is to update the version name to the specifi osp_version
+# If it is master, as based on the cycle master can be one of the versions
+def update_osp_version(config, osp_name):
+    if "master" in str(osp_name):
+        return config['master'][0]
+    else:
+        return osp_name
+
 
 def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade,
                      time_stamp, puddle, dlrn, concurrency, times,
@@ -8,6 +16,7 @@ def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade,
     conn.set_session(autocommit=True)
     cur = conn.cursor()
     classify = True
+    osp_name = update_osp_version(config, osp_name)
     cur.execute("INSERT INTO {} VALUES ('{}', '{}', '{}', {}, '{}', '{}', \
                 '{}', '{}', {}, {}, {}, {});" .format(config['table_name'][0],
                                                       str(uuid), str(test),
@@ -29,6 +38,7 @@ def insert_values_db(config, uuid, test, osp_name, avg_runtime,
     conn.set_session(autocommit=True)
     cur = conn.cursor()
     classify = False
+    osp_name = update_osp_version(config, osp_name)
     cur.execute("INSERT INTO {} (uuid, test, osp_version, avg_runtime, \
                 timestamp, rhos_puddle, dlrn_hash, classify, concurrency, \
                 times, percentile95) VALUES ('{}', '{}', '{}', {}, '{}', '{}',\
@@ -50,3 +60,39 @@ def insert_errors_db(config, uuid, errors):
     cur.execute("INSERT INTO {} VALUES ('{}', {});" .format(name_table,
                                                             str(uuid),
                                                             errors))
+
+
+def insert_logsummary_db(config, uuid, num_errors, num_warn,
+                         num_debug, num_notice, num_info):
+    conn = connect_crdb(config)
+    conn.set_session(autocommit=True)
+    cur = conn.cursor()
+    cur.execute("INSERT INTO {} VALUES ('{}', \
+                {}, {}, {}, {}, {});".format(config['table_logsummary'][0],
+                                             str(uuid),
+                                             int(num_errors),
+                                             int(num_warn),
+                                             int(num_debug),
+                                             int(num_notice),
+                                             int(num_info)))
+
+def insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_softirq,
+                                  cpu_wait, mem_used, mem_slabunrecl):
+    conn = connect_crdb(config)
+    conn.set_session(autocommit=True)
+    cur = conn.cursor()
+    cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\
+                {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0],
+                                                 str(uuid),
+                                                 float(cpu_system[0]),
+                                                 float(cpu_system[1]),
+                                                 float(cpu_user[0]),
+                                                 float(cpu_user[1]),
+                                                 float(cpu_softirq[0]),
+                                                 float(cpu_softirq[1]),
+                                                 float(cpu_wait[0]),
+                                                 float(cpu_wait[1]),
+                                                 float(mem_used[0]),
+                                                 float(mem_used[1]),
+                                                 float(mem_slabunrecl[0]),
+                                                 float(mem_slabunrecl[1])))

From 4c6f5d2c4492f34caaf3b9c9f4ace92fb321e0ae Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Tue, 10 Apr 2018 17:38:55 -0400
Subject: [PATCH 5/7] Storing memory values in MB instead of bytes

---
 bml/lib/data_summary.py        |  8 ++++----
 bml/lib/timeseries_uploaddb.py | 14 +++++++++-----
 bml/lib/update_crdb.py         | 29 +++++++++++++++--------------
 3 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/bml/lib/data_summary.py b/bml/lib/data_summary.py
index 549135b..2780eba 100644
--- a/bml/lib/data_summary.py
+++ b/bml/lib/data_summary.py
@@ -40,10 +40,10 @@ def summary_uuid(es_backend, config, uuid, update):
 
 
 def data_summary(data):
-    std_dev = "{:.4f}".format(numpy.std(data)).ljust(10)
-    avg = "{:.4f}".format(numpy.mean(data)).ljust(10)
-    median = "{:.4f}".format(numpy.median(data)).ljust(10)
-    percentile95 = "{:.4f}".format(numpy.percentile(data, 95)).ljust(10)
+    std_dev = "{:.2f}".format(numpy.std(data)).ljust(10)
+    avg = "{:.2f}".format(numpy.mean(data)).ljust(10)
+    median = "{:.2f}".format(numpy.median(data)).ljust(10)
+    percentile95 = "{:.2f}".format(numpy.percentile(data, 95)).ljust(10)
     summary = [avg, std_dev, median, percentile95]
     return(summary)
 
diff --git a/bml/lib/timeseries_uploaddb.py b/bml/lib/timeseries_uploaddb.py
index 811ea8c..e93167a 100644
--- a/bml/lib/timeseries_uploaddb.py
+++ b/bml/lib/timeseries_uploaddb.py
@@ -11,7 +11,7 @@
                 "overcloud-controller-0.memory.memory-used"]
 
 
-def get_features(gdata, pos):
+def get_features(gdata, pos, metric_id):
     values = []
     empty_check = True
     for entry in gdata:
@@ -24,6 +24,10 @@ def get_features(gdata, pos):
     else:
         mean = round(numpy.mean(values), 2)
         percentile95 = round(numpy.percentile(values, 95), 2)
+        # Converting memory from bytes to MB.
+        if "memory" in metric_id:
+            mean = mean / 1000000
+            percentile95 = percentile95 / 1000000
         return [mean, percentile95]
 
 
@@ -48,8 +52,8 @@ def timeseriessummaries_db(elastic, config, uuid):
     mem_slabunrecl = summarize_metric(final_url, metrics_list[4])
     mem_used = summarize_metric(final_url, metrics_list[5])
     insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user,
-                                  cpu_softirq, cpu_wait, mem_slabunrecl,
-                                  mem_used)
+                                  cpu_softirq, cpu_wait, mem_used,
+                                  mem_slabunrecl)
 
 
 def summarize_metric(final_url, metric_id):
@@ -74,6 +78,6 @@ def summarize_metric(final_url, metric_id):
             else:
                 dict_vals[k] = [v]
         list_vals = map(list, dict_vals.items())
-        return get_features(list_vals, 1)
+        return get_features(list_vals, 1, metric_id)
     else:
-        return get_features(response[0]['datapoints'], 0)
+        return get_features(response[0]['datapoints'], 0, metric_id)
diff --git a/bml/lib/update_crdb.py b/bml/lib/update_crdb.py
index 25694a2..cdebfa9 100644
--- a/bml/lib/update_crdb.py
+++ b/bml/lib/update_crdb.py
@@ -76,23 +76,24 @@ def insert_logsummary_db(config, uuid, num_errors, num_warn,
                                              int(num_notice),
                                              int(num_info)))
 
-def insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_softirq,
-                                  cpu_wait, mem_used, mem_slabunrecl):
+def insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user,
+                                  cpu_softirq, cpu_wait, mem_used,
+                                  mem_slabunrecl):
     conn = connect_crdb(config)
     conn.set_session(autocommit=True)
     cur = conn.cursor()
     cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\
                 {}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0],
                                                  str(uuid),
-                                                 float(cpu_system[0]),
-                                                 float(cpu_system[1]),
-                                                 float(cpu_user[0]),
-                                                 float(cpu_user[1]),
-                                                 float(cpu_softirq[0]),
-                                                 float(cpu_softirq[1]),
-                                                 float(cpu_wait[0]),
-                                                 float(cpu_wait[1]),
-                                                 float(mem_used[0]),
-                                                 float(mem_used[1]),
-                                                 float(mem_slabunrecl[0]),
-                                                 float(mem_slabunrecl[1])))
+                                                 round(cpu_system[0],2),
+                                                 round(cpu_system[1],2),
+                                                 round(cpu_user[0],2),
+                                                 round(cpu_user[1],2),
+                                                 round(cpu_softirq[0],2),
+                                                 round(cpu_softirq[1],2),
+                                                 round(cpu_wait[0],2),
+                                                 round(cpu_wait[1],2),
+                                                 round(mem_used[0],2),
+                                                 round(mem_used[1],2),
+                                                 round(mem_slabunrecl[0],2),
+                                                 round(mem_slabunrecl[1],2)))

From 5574c02d391c741bd9acb41f23e7570535a21804 Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Sun, 15 Apr 2018 21:38:34 -0400
Subject: [PATCH 6/7] Pushing data to elastic

---
 bml/bml.py                 |  3 ++-
 bml/lib/browbeat_test.py   |  3 ++-
 bml/lib/data_summary.py    | 12 ++++++++++--
 bml/lib/elastic_backend.py | 15 +++++++++++++++
 bml/lib/update_crdb.py     | 16 +++++++++-------
 5 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/bml/bml.py b/bml/bml.py
index b42ab57..2b368a6 100644
--- a/bml/bml.py
+++ b/bml/bml.py
@@ -53,7 +53,8 @@ def parse_args():
 
     parser.add_argument('-u', '--update-db', dest='update', type=bool,
                         default=False,
-                        help='-u True pushes data to cockroach db')
+                        help='-u True pushes summary data to cockroach db \
+                        and elastic')
 
     parser.add_argument('--update-clf', dest="clf_days", type=int,
                         default=-1,
diff --git a/bml/lib/browbeat_test.py b/bml/lib/browbeat_test.py
index 0563a03..f115801 100644
--- a/bml/lib/browbeat_test.py
+++ b/bml/lib/browbeat_test.py
@@ -79,7 +79,7 @@ def _typecheck_string(self, val):
         else:
             return str(val)
 
-    # Extracts details of the really run
+    # Extracts details of the rally run
     def _set_metadata(self, raw_elastic):
         self._set_timeseries_metadata(raw_elastic)
         self._set_hardware_metadata(
@@ -98,6 +98,7 @@ def _set_metadata(self, raw_elastic):
             self.run = self._typecheck_num(self.run)
             self.dlrn_hash = raw_elastic['_source']['version']['dlrn_hash']
             self.rhos_puddle = raw_elastic['_source']['version']['rhos_puddle']
+            self.ovn = True if "ovn" in raw_elastic['_source']['version']['logs_link'] else False  # noqa
             self.scenario_name = raw_elastic['_source']['rally_setup']['name']
             self.timestamp = raw_elastic['_source']['timestamp']
             self.num_computes = \
diff --git a/bml/lib/data_summary.py b/bml/lib/data_summary.py
index 2780eba..7229e92 100644
--- a/bml/lib/data_summary.py
+++ b/bml/lib/data_summary.py
@@ -53,6 +53,7 @@ def print_run_details(config, es_backend, uuid, update):
     brun = browbeat_run(es_backend, uuid, caching=True)
     output_string = ""
     osp_version = ""
+    ovn = ""
     padding = longest_test_name(config)
     test_clean_count = 0  # count of the tests that are being classified
     '''
@@ -67,6 +68,7 @@ def print_run_details(config, es_backend, uuid, update):
         for test_run in brun.get_tests(test_search=test_name):
             data.extend(test_run.raw)
             osp_version = test_run.version
+            ovn = test_run.ovn
         if test_run is None:
             continue
         statistics_uuid = data_summary(data)
@@ -86,6 +88,7 @@ def print_run_details(config, es_backend, uuid, update):
         dlrn_hash = test_run.dlrn_hash
         puddle = test_run.rhos_puddle
         hash_check = check_hash(dlrn_hash, puddle)
+        output_prediction = "None"
         if time_check and cloud_check and hash_check:
             check_outcome = 1
             if test_checks:
@@ -98,7 +101,7 @@ def print_run_details(config, es_backend, uuid, update):
                                      average_runtime, output_prediction,
                                      test_run.timestamp, puddle,
                                      dlrn_hash, concurrency, times,
-                                     perc95_score)
+                                     perc95_score, ovn)
                 if int(output_prediction) == 1:
                     print("ALERT!!!!")
                     print(uuid, test_name, osp_version, average_runtime)
@@ -111,10 +114,15 @@ def print_run_details(config, es_backend, uuid, update):
                     insert_values_db(config, uuid, test_name, osp_version,
                                      average_runtime, test_run.timestamp,
                                      puddle, dlrn_hash, concurrency, times,
-                                     perc95_score)
+                                     perc95_score, ovn)
                 output_string += "\n"
         else:
             output_string += "\n"
+        if update:
+            es_backend.push_summary_es(uuid, osp_version, test_name,
+                                       average_runtime,
+                                       statistics_uuid[1], perc95_score,
+                                       output_prediction, ovn)
     '''
     conn.commit()
     conn.close()
diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py
index 959d3bf..d48300e 100644
--- a/bml/lib/elastic_backend.py
+++ b/bml/lib/elastic_backend.py
@@ -116,3 +116,18 @@ def compute_hits(self, start, end, cloud_name, level_type):
                             }}}}}
         res = self.es.search(index="logstash-*", body=query_input)
         return res['hits']['total']
+
+
+    def push_summary_es(self, uuid, osp_version, test_name, mean, std_dev,
+                        perc95_score, output_prediction, ovn):
+        data={
+        "browbeat_uuid":str(uuid),
+        "osp_version":str(osp_version),
+        "action":str(test_name),
+        "mean":mean,
+        "std_dev":std_dev,
+        "percentile_95":perc95_score,
+        "class": output_prediction[0],
+        "with_ovn": ovn
+        }
+        self.es.index(index='bml_summary', doc_type='result', body=data)
diff --git a/bml/lib/update_crdb.py b/bml/lib/update_crdb.py
index cdebfa9..42576a9 100644
--- a/bml/lib/update_crdb.py
+++ b/bml/lib/update_crdb.py
@@ -11,14 +11,14 @@ def update_osp_version(config, osp_name):
 
 def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade,
                      time_stamp, puddle, dlrn, concurrency, times,
-                     perc95_score):
+                     perc95_score, ovn):
     conn = connect_crdb(config)
     conn.set_session(autocommit=True)
     cur = conn.cursor()
     classify = True
     osp_name = update_osp_version(config, osp_name)
     cur.execute("INSERT INTO {} VALUES ('{}', '{}', '{}', {}, '{}', '{}', \
-                '{}', '{}', {}, {}, {}, {});" .format(config['table_name'][0],
+                '{}', '{}', {}, {}, {}, {},'{}');" .format(config['table_name'][0],
                                                       str(uuid), str(test),
                                                       str(osp_name),
                                                       float(avg_runtime),
@@ -28,12 +28,13 @@ def insert_grades_db(config, uuid, test, osp_name, avg_runtime, grade,
                                                       int(grade),
                                                       int(concurrency),
                                                       int(times),
-                                                      float(perc95_score)))
+                                                      float(perc95_score),
+                                                      str(ovn)))
 
 
 def insert_values_db(config, uuid, test, osp_name, avg_runtime,
                      time_stamp, puddle, dlrn, concurrency, times,
-                     perc95_score):
+                     perc95_score, ovn):
     conn = connect_crdb(config)
     conn.set_session(autocommit=True)
     cur = conn.cursor()
@@ -41,15 +42,16 @@ def insert_values_db(config, uuid, test, osp_name, avg_runtime,
     osp_name = update_osp_version(config, osp_name)
     cur.execute("INSERT INTO {} (uuid, test, osp_version, avg_runtime, \
                 timestamp, rhos_puddle, dlrn_hash, classify, concurrency, \
-                times, percentile95) VALUES ('{}', '{}', '{}', {}, '{}', '{}',\
-                '{}', '{}', {}, {}, {})" .format(config['table_name'][0],
+                times, percentile95, ovn) VALUES ('{}', '{}', '{}', {}, '{}', \
+                '{}', '{}', '{}', {}, {}, {},'{}')".format(config['table_name'][0],
                                                  str(uuid), str(test),
                                                  str(osp_name),
                                                  float(avg_runtime),
                                                  str(time_stamp), str(puddle),
                                                  str(dlrn), bool(classify),
                                                  int(concurrency), int(times),
-                                                 float(perc95_score)))
+                                                 float(perc95_score),
+                                                 str(ovn)))
 
 
 def insert_errors_db(config, uuid, errors):

From 8af4d0f055fa7806dea84ffd359ad1be94e6f3b8 Mon Sep 17 00:00:00 2001
From: agopi <agopi@redhat.com>
Date: Wed, 2 May 2018 21:11:30 -0400
Subject: [PATCH 7/7] Using search vs scroll for es

---
 bml/lib/elastic_backend.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/bml/lib/elastic_backend.py b/bml/lib/elastic_backend.py
index d48300e..833b322 100644
--- a/bml/lib/elastic_backend.py
+++ b/bml/lib/elastic_backend.py
@@ -47,16 +47,18 @@ def grab_uuids_by_date(self, version, time_period):
     # Searches and grabs the raw source data for a Browbeat UUID
     def grab_uuid(self, uuid):
         query = {"query": {"match": {'browbeat_uuid': uuid}}}
-        results = helpers.scan(self.es,
-                               query,
-                               raise_on_error=False,
-                               size=100,
-                               request_timeout=1000)
-
-        if results == []:
+        # Should use scroll later on but meanwhile using search
+        # But because ideally we dont see that many hits
+        # search isn't entirely bad. but in future if hits are in thousands
+        # use scroll
+        res = self.es.search(index="browbeat-rally-*", body=query, size=1000)
+        # size ^ above is set to 1000, as we've never exceeded more than
+        # 300 entries for the uuids we've seen so far
+        if res == []:
             raise ValueError(uuid + " Has no results!")
-
-        return results
+        # As we switch from scroll api, we're using search to make sure
+        # elasticsearch doesnt keep hitting errors
+        return res['hits']['hits']
 
     def compute_start_end(self, uuid):
         query_input = {