Merge pull request #61 from sousinha1997/new_push_q3

push
sousinha1997 · Oct 1, 2024 · 486df37 · 486df37
2 parents fa28b9c + c35b138
commit 486df37
Show file tree

Hide file tree

Showing 49 changed files with 1,981 additions and 673 deletions.
diff --git a/health_check.py b/health_check.py
@@ -205,3 +205,6 @@ def health_check():
     check_virtual_environment()
     check_python_version()
     check_and_install_requirements()
+
+if __name__ == "__main__":
+    health_check()
diff --git a/quisby.py b/quisby.py
diff --git a/quisby/benchmarks/auto_hpl/extract.py b/quisby/benchmarks/auto_hpl/extract.py
@@ -3,13 +3,20 @@
 from quisby.pricing import cloud_pricing
 from quisby.benchmarks.linpack.extract import linpack_format_data
 
+from quisby.util import read_config
+
 
 def extract_auto_hpl_data(path, system_name):
 
+    summary_data = []
+    server = read_config("server", "name")
+    result_dir = read_config("server", "result_dir")
+
     if path.endswith(".csv"):
         with open(path) as file:
             results = []
             file_data = file.readlines()
+            summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
 
             if len(file_data) > 1:
                 header_row = file_data[-2].strip().split(":")
@@ -24,8 +31,8 @@ def extract_auto_hpl_data(path, system_name):
                 )
 
                 if results:
-                    return results
+                    return results, summary_data
 
             else:
-                return None
+                return None, None
 
diff --git a/quisby/benchmarks/coremark/compare.py b/quisby/benchmarks/coremark/compare.py
@@ -8,11 +8,34 @@
     get_sheet,
     create_sheet, clear_sheet_data, clear_sheet_charts,
 )
-from quisby.util import combine_two_array_alternating, merge_lists_alternately
+from quisby.util import combine_two_array_alternating, merge_lists_alternately, read_config
 from quisby.benchmarks.coremark.graph import graph_coremark_data
+import re
 
 
-def compare_coremark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name"]):
+def extract_prefix_and_number(input_string):
+    match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
+    if match:
+        prefix = match.group(1)
+        suffix = match.group(3)  # Extracts the suffix after the number
+        return prefix, suffix
+    return None, None
+
+
+def compare_inst(item1, item2):
+    cloud_type = read_config("cloud", "cloud_type")
+    if cloud_type == "local":
+        return True
+    elif cloud_type == "aws":
+        return item1.split(".")[0] == item2.split(".")[0]
+    elif cloud_type == "gcp":
+
+        return item1.split("-")[0] == item2.split("-")[0]
+    elif cloud_type == "azure":
+        return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
+
+
+def compare_coremark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]):
     values = []
     results = []
     spreadsheet_name = []
@@ -29,13 +52,23 @@ def compare_coremark_results(spreadsheets, spreadsheetId, test_name, table_name=
     for value in list_1:
         for ele in list_2:
             # Check max throughput
-            if value[0][0] in table_name and ele[0][0] in table_name:
-                results.append([""])
-                for item1 in value:
-                    for item2 in ele:
-                        if item1[0] == item2[0]:
-                            results = merge_lists_alternately(results, item1, item2)
-                break
+            if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]:
+                if compare_inst(value[1][0], ele[1][0]):
+                    results.append([""])
+                    for item1 in value:
+                        for item2 in ele:
+                            if item1[0] == item2[0]:
+                                results = merge_lists_alternately(results, item1, item2)
+                    break
+
+            elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr":
+                if compare_inst(value[1][0], ele[1][0]):
+                    results.append([""])
+                    for item1 in value:
+                        for item2 in ele:
+                            if item1[0] == item2[0]:
+                                results.append(item1)
+                    break
 
             elif value[1][0] == ele[1][0]:
                 if value[0][0] == ele[0][0]:
@@ -69,4 +102,4 @@ def compare_coremark_results(spreadsheets, spreadsheetId, test_name, table_name=
     test_name = "coremark"
 
     compare_coremark_results(spreadsheets, "", test_name,
-                            table_name=["System Name"])
+                            table_name=["System Name"])
diff --git a/quisby/benchmarks/coremark/coremark.py b/quisby/benchmarks/coremark/coremark.py
@@ -1,9 +1,16 @@
-
 """ Custom key to sort the data base don instance name """
+from itertools import groupby
+
 from quisby import custom_logger
 import re
 
 from quisby.util import read_config
+from quisby.pricing.cloud_pricing import get_cloud_pricing
+
+from quisby.util import process_instance
+
+from quisby.util import mk_int
+
 
 def extract_prefix_and_number(input_string):
     match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
@@ -15,66 +22,138 @@ def extract_prefix_and_number(input_string):
     return None, None, None
 
 
-
 def custom_key(item):
-    cloud_type = read_config("cloud","cloud_type")
-    if item[1][0] == "localhost":
-        return (item[1][0])
+    cloud_type = read_config("cloud", "cloud_type")
+    if item[1][0] == "local":
+        return item[1][0]
     elif cloud_type == "aws":
-        instance_type =item[1][0].split(".")[0]
-        instance_number = item[1][0].split(".")[1]
-        return (instance_type, instance_number)
+        instance_name = item[1][0]
+        instance_type = instance_name.split(".")[0]
+        instance_number = instance_name.split(".")[1]
+        return instance_type, instance_number
+    elif cloud_type == "gcp":
+        instance_type = item[1][0].split("-")[0]
+        instance_number = int(item[1][0].split('-')[-1])
+        return instance_type, instance_number
+    elif cloud_type == "azure":
+        instance_type, instance_number, version = extract_prefix_and_number(item[1][0])
+        return instance_type, version, instance_number
+
+
+def calc_price_performance(inst, avg):
+    region = read_config("cloud", "region")
+    cloud_type = read_config("cloud", "cloud_type")
+    os_type = read_config("test", "os_type")
+    cost_per_hour = None
+    price_perf = 0.0
+    try:
+        cost_per_hour = get_cloud_pricing(
+            inst, region, cloud_type.lower(), os_type)
+        price_perf = float(avg) / float(cost_per_hour)
+    except Exception as exc:
+        custom_logger.debug(str(exc))
+        custom_logger.error("Error calculating value !")
+    return cost_per_hour, price_perf
+
+
+def group_data(results):
+    cloud_type = read_config("cloud", "cloud_type")
+    if cloud_type == "aws":
+        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type"))
+    elif cloud_type == "azure":
+        results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
+        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
     elif cloud_type == "gcp":
-         instance_type = item[1][0].split("-")[0]
-         instance_number = int(item[1][0].split('-')[-1])
-         return (instance_type, instance_number)
+        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature"))
+    elif cloud_type == "local":
+        return groupby(results, key=lambda x: process_instance(x[1][0], "family"))
+
+
+def sort_data(results):
+    cloud_type = read_config("cloud", "cloud_type")
+    if cloud_type == "aws":
+        results.sort(key=lambda x: str(process_instance(x[1][0], "family")))
     elif cloud_type == "azure":
-        instance_type, instance_number, version=extract_prefix_and_number(item[1][0])
-        return (instance_type, instance_number)
+        results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "feature")))
+    elif cloud_type == "gcp":
+        results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "sub_family")))
 
 
-def create_summary_coremark_data(results,OS_RELEASE):
+def create_summary_coremark_data(results, OS_RELEASE, sorted_results=None):
     final_results = []
-    cal_data = [["System name", "test passes_"+OS_RELEASE]]
 
     # Sort data based on instance name
-    sorted_data = sorted(results, key=custom_key)
-
-    # Add summary data
-    for item in sorted_data:
-        sum = 0
-        avg = 0
-        iterations = 0
-        for index in range(3,len(item)):
-            sum = sum + float(item[index][1])
-            iterations = iterations + 1
-        avg = float(sum/iterations)
-        cal_data.append([item[1][0],avg])
-
-        final_results += item
-    final_results += [[""]]
-    final_results += cal_data
+    results = list(filter(None, results))
+    sort_data(results)
+
+    for _, items in group_data(results):
+        cal_data = [["System name", "Test_passes-" + OS_RELEASE]]
+        items = list(items)
+        sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size")))
+        # sorted_results.extend(sorted_data)
+        cost_per_hour, price_per_perf = [], []
+
+        # Add summary data
+        for item in sorted_data:
+            sum = 0
+            avg = 0
+            iterations = 0
+            for index in range(3, len(item)):
+                sum = sum + float(item[index][1])
+                iterations = iterations + 1
+            avg = float(sum / iterations)
+            try:
+                cph, pp = calc_price_performance(item[1][0], avg)
+            except Exception as exc:
+                custom_logger.error(str(exc))
+                break
+            cal_data.append([item[1][0], avg])
+            price_per_perf.append([item[1][0], pp])
+            cost_per_hour.append([item[1][0], cph])
+        sorted_results = [[""]]
+        sorted_results += cal_data
+        sorted_results.append([""])
+        sorted_results.append(["Cost/Hr"])
+        sorted_results += cost_per_hour
+        sorted_results.append([""])
+        sorted_results.append(["Price-perf", f"Passes/$-{OS_RELEASE}"])
+        sorted_results += price_per_perf
+        final_results.extend(sorted_results)
     return final_results
 
+
 def extract_coremark_data(path, system_name, OS_RELEASE):
     """"""
     results = []
-    processed_data =[]
+    processed_data = []
+    summary_data = []
+    server = read_config("server", "name")
+    result_dir = read_config("server", "result_dir")
 
     # Extract data from file
     try:
         if path.endswith(".csv"):
             with open(path) as file:
                 coremark_results = file.readlines()
+            summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
         else:
             return None
     except Exception as exc:
         custom_logger.debug(str(exc))
         custom_logger.error("Unable to extract data from csv file for coremark")
         return None
-
+    data_index = 0
+    header = []
     for index, data in enumerate(coremark_results):
-        coremark_results[index] = data.strip("\n").split(":")
+        if "iteration" in data:
+            data_index = index
+            header = data.strip("\n").split(":")
+        else:
+            coremark_results[index] = data.strip("\n").split(":")
+    coremark_results = [header] + coremark_results[data_index + 1:]
+
+    # for index, data in enumerate(coremark_results):
+    #     coremark_results[index] = data.strip("\n").split(":")
 
     # Format the data
     iteration = 1
@@ -88,5 +167,4 @@ def extract_coremark_data(path, system_name, OS_RELEASE):
             iteration = iteration + 1
     results.append(processed_data)
 
-    return results
-
+    return results, summary_data
diff --git a/quisby/benchmarks/coremark/graph.py b/quisby/benchmarks/coremark/graph.py
@@ -101,13 +101,21 @@ def graph_coremark_data(spreadsheetId, range, action):
 
     header_row = []
     sheetId = -1
+
     for index, row in enumerate(data):
         if "System name" in row:
             start_index = index
             header_row.extend(row)
+            title = "%s : %s" % (range, "Test Passes")
+            subtitle = "Average Test Passes"
+        elif "Price-perf" in row:
+            start_index = index
+            header_row.extend(row)
+            title = "%s : %s" % (range, "Price-Performance")
+            subtitle = "Passes/$"
         if start_index:
             if not row:
-                end_index = index - 1
+                end_index = index
             if index + 1 == len(data):
                 end_index = index + 1
 
@@ -124,20 +132,35 @@ def graph_coremark_data(spreadsheetId, range, action):
                 "addChart": {
                     "chart": {
                         "spec": {
-                            "title": "%s : %s" % (range, "Test passes"),
+                            "title": title,
+                            "subtitle": subtitle + " : ",
                             "basicChart": {
                                 "chartType": "COMBO",
-                                "legendPosition": "BOTTOM_LEGEND",
+                                "legendPosition": "RIGHT_LEGEND",
                                 "axis": [
                                     {
-                                        "position": "LEFT_AXIS",
-                                        "title": "Test passes"
-                                    },
-                                    {
+                                        "format": {
+                                            "bold": True,
+                                            "italic": True,
+                                            "fontSize": 14
+                                        },
                                         "position": "BOTTOM_AXIS",
-                                        "title": "Machine types",
+                                        "title": "System"},
+                                    {
+                                        "format": {
+                                            "bold": True,
+                                            "italic": True,
+                                            "fontSize": 14
+                                        },
+                                        "position": "LEFT_AXIS",
+                                        "title": graph_data[0][1].split("-")[0],
                                     },
                                     {
+                                        "format": {
+                                            "bold": True,
+                                            "italic": True,
+                                            "fontSize": 14
+                                        },
                                         "position": "RIGHT_AXIS",
                                         "title": "%Diff",
                                     },
@@ -168,8 +191,11 @@ def graph_coremark_data(spreadsheetId, range, action):
                                 "anchorCell": {
                                     "sheetId": sheetId,
                                     "rowIndex": GRAPH_ROW_INDEX,
-                                    "columnIndex": column_count + 1,
-                                }
+                                    "columnIndex": column_count + GRAPH_COL_INDEX,
+                                },
+                                "offsetXPixels": 100,
+                                "widthPixels": 600,
+                                "heightPixels": 400
                             }
                         },
                     }
@@ -199,4 +225,3 @@ def graph_coremark_data(spreadsheetId, range, action):
             update_conditional_formatting(spreadsheetId, sheetId, col, threshold)
 
 
-