Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Elastic5 #3

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions bml/bml.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def parse_args():

parser.add_argument('-u', '--update-db', dest='update', type=bool,
default=False,
help='-u True pushes data to cockroach db')
help='-u True pushes summary data to cockroach db \
and elastic')

parser.add_argument('--update-clf', dest="clf_days", type=int,
default=-1,
Expand Down Expand Up @@ -94,9 +95,9 @@ def main():
str(args.days) + "d",
args.version, update=False)
elif args.timeseries_uuid is not None:
lib.timeseries_uploaddb.insert_timeseriessummaries_db(config, args.timeseries_uuid) # noqa
lib.timeseries_uploaddb.timeseriessummaries_db(es_backend, config, args.timeseries_uuid) # noqa
elif args.loggin_uuid is not None:
lib.logsummary_uploaddb.insert_logsummary_db(config, args.loggin_uuid) # noqa
lib.logsummary_uploaddb.logsummary_db(es_backend, config, args.loggin_uuid) # noqa
elif args.summary_uuid is not None:
lib.data_summary.summary_uuid(es_backend, config, args.summary_uuid,
args.update)
Expand Down
7 changes: 6 additions & 1 deletion bml/config.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
elastic-host: elk.browbeatproject.org
elastic-host: elk-b09-h30-r720xd.rdu.openstack.engineering.redhat.com
elastic-port: 9200

tests:
Expand Down Expand Up @@ -122,6 +122,11 @@ table_name:
table_errors:
- 'num_errors'

# This needs to be updated before end of each release cycle
# https://releases.openstack.org/.
master:
- '14-tripleo'

table_timeseries:
- 'timeseries_summary'

Expand Down
4 changes: 2 additions & 2 deletions bml/lib/browbeat_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def _init_timeseries(self, elastic_connection, uuid):
= elastic_connection.compute_start_end(uuid)
# print graphite_details
self._metrics_root = graphite_details[2]
self._graphite_start = int(graphite_details[0]) / 1000
self._graphite_end = int(graphite_details[1]) / 1000
self._graphite_start = int(graphite_details[0])
self._graphite_end = int(graphite_details[1])
self._graphite_url = graphite_details[3]
# Not needed but keeping in case the new way breaks
# this timestamp should never be smaller than any time in
Expand Down
3 changes: 2 additions & 1 deletion bml/lib/browbeat_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _typecheck_string(self, val):
else:
return str(val)

# Extracts details of the really run
# Extracts details of the rally run
def _set_metadata(self, raw_elastic):
self._set_timeseries_metadata(raw_elastic)
self._set_hardware_metadata(
Expand All @@ -98,6 +98,7 @@ def _set_metadata(self, raw_elastic):
self.run = self._typecheck_num(self.run)
self.dlrn_hash = raw_elastic['_source']['version']['dlrn_hash']
self.rhos_puddle = raw_elastic['_source']['version']['rhos_puddle']
self.ovn = True if "ovn" in raw_elastic['_source']['version']['logs_link'] else False # noqa
self.scenario_name = raw_elastic['_source']['rally_setup']['name']
self.timestamp = raw_elastic['_source']['timestamp']
self.num_computes = \
Expand Down
20 changes: 14 additions & 6 deletions bml/lib/data_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def summary_uuid(es_backend, config, uuid, update):


def data_summary(data):
std_dev = "{:.4f}".format(numpy.std(data)).ljust(10)
avg = "{:.4f}".format(numpy.mean(data)).ljust(10)
median = "{:.4f}".format(numpy.median(data)).ljust(10)
percentile95 = "{:.4f}".format(numpy.percentile(data, 95)).ljust(10)
std_dev = "{:.2f}".format(numpy.std(data)).ljust(10)
avg = "{:.2f}".format(numpy.mean(data)).ljust(10)
median = "{:.2f}".format(numpy.median(data)).ljust(10)
percentile95 = "{:.2f}".format(numpy.percentile(data, 95)).ljust(10)
summary = [avg, std_dev, median, percentile95]
return(summary)

Expand All @@ -53,6 +53,7 @@ def print_run_details(config, es_backend, uuid, update):
brun = browbeat_run(es_backend, uuid, caching=True)
output_string = ""
osp_version = ""
ovn = ""
padding = longest_test_name(config)
test_clean_count = 0 # count of the tests that are being classified
'''
Expand All @@ -67,6 +68,7 @@ def print_run_details(config, es_backend, uuid, update):
for test_run in brun.get_tests(test_search=test_name):
data.extend(test_run.raw)
osp_version = test_run.version
ovn = test_run.ovn
if test_run is None:
continue
statistics_uuid = data_summary(data)
Expand All @@ -86,6 +88,7 @@ def print_run_details(config, es_backend, uuid, update):
dlrn_hash = test_run.dlrn_hash
puddle = test_run.rhos_puddle
hash_check = check_hash(dlrn_hash, puddle)
output_prediction = "None"
if time_check and cloud_check and hash_check:
check_outcome = 1
if test_checks:
Expand All @@ -98,7 +101,7 @@ def print_run_details(config, es_backend, uuid, update):
average_runtime, output_prediction,
test_run.timestamp, puddle,
dlrn_hash, concurrency, times,
perc95_score)
perc95_score, ovn)
if int(output_prediction) == 1:
print("ALERT!!!!")
print(uuid, test_name, osp_version, average_runtime)
Expand All @@ -111,10 +114,15 @@ def print_run_details(config, es_backend, uuid, update):
insert_values_db(config, uuid, test_name, osp_version,
average_runtime, test_run.timestamp,
puddle, dlrn_hash, concurrency, times,
perc95_score)
perc95_score, ovn)
output_string += "\n"
else:
output_string += "\n"
if update:
es_backend.push_summary_es(uuid, osp_version, test_name,
average_runtime,
statistics_uuid[1], perc95_score,
output_prediction, ovn)
'''
conn.commit()
conn.close()
Expand Down
67 changes: 55 additions & 12 deletions bml/lib/elastic_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ def __init__(self, host, port):
'port': port}],
send_get_body_as='POST',
retries=True,
sniff_on_start=True,
sniff_on_connection_fail=True,
sniff_timeout=10,
sniffer_timeout=120,
timeout=120)

def grab_uuids_by_date(self, version, time_period):
Expand Down Expand Up @@ -51,15 +47,18 @@ def grab_uuids_by_date(self, version, time_period):
# Searches and grabs the raw source data for a Browbeat UUID
def grab_uuid(self, uuid):
query = {"query": {"match": {'browbeat_uuid': uuid}}}
results = helpers.scan(self.es,
query,
size=100,
request_timeout=1000)

if results == []:
# Should use scroll later on but meanwhile using search
# But because ideally we dont see that many hits
# search isn't entirely bad. but in future if hits are in thousands
# use scroll
res = self.es.search(index="browbeat-rally-*", body=query, size=1000)
# size ^ above is set to 1000, as we've never exceeded more than
# 300 entries for the uuids we've seen so far
if res == []:
raise ValueError(uuid + " Has no results!")

return results
# As we switch from scroll api, we're using search to make sure
# elasticsearch doesnt keep hitting errors
return res['hits']['hits']

def compute_start_end(self, uuid):
query_input = {
Expand Down Expand Up @@ -90,3 +89,47 @@ def compute_start_end(self, uuid):
graphite_port = "80"
graphite_url = "http://{}:{}".format(graphite_url, graphite_port)
return [start, end, cloud_name, graphite_url]

def compute_hits(self, start, end, cloud_name, level_type):
time_dict = {
"format": "epoch_millis"
}
time_dict["gte"] = start
time_dict["lte"] = end
query_input = {
"query": {
"bool": {
"must": {
"query_string": {
"query": "browbeat.cloud_name: \
" + cloud_name + " AND level: " + level_type
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"@timestamp": time_dict
}
}
],
"must_not": []
}}}}}
res = self.es.search(index="logstash-*", body=query_input)
return res['hits']['total']


def push_summary_es(self, uuid, osp_version, test_name, mean, std_dev,
perc95_score, output_prediction, ovn):
data={
"browbeat_uuid":str(uuid),
"osp_version":str(osp_version),
"action":str(test_name),
"mean":mean,
"std_dev":std_dev,
"percentile_95":perc95_score,
"class": output_prediction[0],
"with_ovn": ovn
}
self.es.index(index='bml_summary', doc_type='result', body=data)
84 changes: 15 additions & 69 deletions bml/lib/logsummary_uploaddb.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,18 @@
from elasticsearch import Elasticsearch
from util import connect_crdb
from browbeat_run import browbeat_run
from update_crdb import insert_logsummary_db


def compute_hits(es, start, end, cloud_name, level_type):
time_dict = {
"format": "epoch_millis"
}
time_dict["gte"] = start
time_dict["lte"] = end
query_input = {
"query": {
"filtered": {
"query": {
"query_string": {
"query": "browbeat.cloud_name: \
" + cloud_name + " AND level: " + level_type
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"@timestamp": time_dict
}
}
],
"must_not": []
}}}}}
res = es.search(index="logstash-*", body=query_input)
return res['hits']['total']


def insert_logsummary_db(config, uuid):
es = Elasticsearch([{'host': 'elk.browbeatproject.org', 'port': 9200}])
query_input = {
"query": {
"match": {
'browbeat_uuid': uuid
}
},
"aggs": {
"max_time": {
"max": {
"field": "timestamp"
}
},
"min_time": {
"min": {
"field": "timestamp"
}}}}
res = es.search(index="browbeat-rally-*", body=query_input)
start = int(res['aggregations']['min_time']['value'])
end = int(res['aggregations']['max_time']['value'])
cloud_name = res['hits']['hits'][0]['_source']['cloud_name']
num_errors = compute_hits(es, start, end, cloud_name, 'error')
num_warn = compute_hits(es, start, end, cloud_name, 'warning')
num_debug = compute_hits(es, start, end, cloud_name, 'debug')
num_notice = compute_hits(es, start, end, cloud_name, 'notice')
num_info = compute_hits(es, start, end, cloud_name, 'info')
conn = connect_crdb(config)
conn.set_session(autocommit=True)
cur = conn.cursor()
cur.execute("INSERT INTO {} VALUES ('{}', \
{}, {}, {}, {}, {});".format(config['table_logsummary'][0],
str(uuid),
int(num_errors),
int(num_warn),
int(num_debug),
int(num_notice),
int(num_info)))
def logsummary_db(es, config, uuid):
brun = browbeat_run(es, uuid, timeseries=True)
graphite_details = brun.get_graphite_details()
start = graphite_details[1]
end = graphite_details[2]
cloud_name = str(graphite_details[3])
num_errors = int(es.compute_hits(start, end, cloud_name, 'error'))
num_warn = int(es.compute_hits(start, end, cloud_name, 'warning'))
num_debug = int(es.compute_hits(start, end, cloud_name, 'debug'))
num_notice = int(es.compute_hits(start, end, cloud_name, 'notice'))
num_info = int(es.compute_hits(start, end, cloud_name, 'info'))
insert_logsummary_db(config, uuid, num_errors, num_warn, num_debug,
num_notice, num_info)
42 changes: 14 additions & 28 deletions bml/lib/timeseries_uploaddb.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import numpy
import requests
from elastic_backend import Backend
from browbeat_run import browbeat_run
from util import connect_crdb
from update_crdb import insert_timeseriessummaries_db

metrics_list = ["overcloud-controller-0.cpu-*.cpu-system",
"overcloud-controller-0.cpu-*.cpu-user",
Expand All @@ -12,7 +11,7 @@
"overcloud-controller-0.memory.memory-used"]


def get_features(gdata, pos):
def get_features(gdata, pos, metric_id):
values = []
empty_check = True
for entry in gdata:
Expand All @@ -25,17 +24,19 @@ def get_features(gdata, pos):
else:
mean = round(numpy.mean(values), 2)
percentile95 = round(numpy.percentile(values, 95), 2)
# Converting memory from bytes to MB.
if "memory" in metric_id:
mean = mean / 1000000
percentile95 = percentile95 / 1000000
return [mean, percentile95]


def insert_timeseriessummaries_db(config, uuid):
# WIP should pass the backend object here
elastic = Backend("elk.browbeatproject.org", "9200")
def timeseriessummaries_db(elastic, config, uuid):
brun = browbeat_run(elastic, uuid, timeseries=True)
graphite_details = brun.get_graphite_details()
graphite_url = graphite_details[0]
start = graphite_details[1]
end = graphite_details[2]
start = graphite_details[1] / 1000
end = graphite_details[2] / 1000
metric_base = str(graphite_details[3]) + "."
base_url = "{}/render?target={}"
time_url = "&format=json&from={}&until={}"
Expand All @@ -44,30 +45,15 @@ def insert_timeseriessummaries_db(config, uuid):
time_url = time_url.format(start,
end)
final_url = base_url + "{}" + time_url
conn = connect_crdb(config)
conn.set_session(autocommit=True)
cur = conn.cursor()
cpu_system = summarize_metric(final_url, metrics_list[0])
cpu_user = summarize_metric(final_url, metrics_list[1])
cpu_softirq = summarize_metric(final_url, metrics_list[2])
cpu_wait = summarize_metric(final_url, metrics_list[3])
mem_slabunrecl = summarize_metric(final_url, metrics_list[4])
mem_used = summarize_metric(final_url, metrics_list[5])
cur.execute("INSERT INTO {} VALUES ('{}', {}, {}, {}, {}, {}, {},\
{}, {}, {}, {}, {}, {});".format(config['table_timeseries'][0],
str(uuid),
float(cpu_system[0]),
float(cpu_system[1]),
float(cpu_user[0]),
float(cpu_user[1]),
float(cpu_softirq[0]),
float(cpu_softirq[1]),
float(cpu_wait[0]),
float(cpu_wait[1]),
float(mem_used[0]),
float(mem_used[1]),
float(mem_slabunrecl[0]),
float(mem_slabunrecl[1])))
insert_timeseriessummaries_db(config, uuid, cpu_system, cpu_user,
cpu_softirq, cpu_wait, mem_used,
mem_slabunrecl)


def summarize_metric(final_url, metric_id):
Expand All @@ -92,6 +78,6 @@ def summarize_metric(final_url, metric_id):
else:
dict_vals[k] = [v]
list_vals = map(list, dict_vals.items())
return get_features(list_vals, 1)
return get_features(list_vals, 1, metric_id)
else:
return get_features(response[0]['datapoints'], 0)
return get_features(response[0]['datapoints'], 0, metric_id)
Loading