Evaluate: YCSB as timeseries per experiment run - smoothing and repla…

…ce NaN
Beuth-Erdelt · Jan 27, 2025 · b8c5273 · b8c5273
1 parent ac30b13
commit b8c5273
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 2 deletions.
diff --git a/bexhoma/evaluators.py b/bexhoma/evaluators.py
@@ -1128,6 +1128,8 @@ def find_matching_files(directory, pattern):
                 #print(data)
                 df = pd.DataFrame(data)
                 df = df.set_index('sec')
+                #print("BLA", df)
+                #df.fillna(0)
                 df = df.groupby(df.index).last() # in case of duplicate indexes (i.e., times)
                 if remove_first > 0:
                     df = df.iloc[remove_first:]
@@ -1143,7 +1145,8 @@ def find_matching_files(directory, pattern):
                     if df_total.empty:
                         df_total = df.copy()
                     else:
-                        df_total[column] = df_total[column] + df[column]
+                        df_total = df_total.add(df, fill_value=0)
+                        #df_total[column] = df_total[column] + df[column]
                 #df.plot(ylim=(0,df['current_ops_per_sec'].max()*1.1))
         if aggregate:
             #print(df_total)
@@ -1162,7 +1165,7 @@ def get_benchmark_logs_timeseries_df_aggregated(self, metric="current_ops_per_se
         #print(list_logs)
         #list_logs = df[df['client'] == client]['pod'].tolist()
         #list_logs = df[df['client'] == client]['pod_count'].tolist()
-        df_total = self.benchmark_logs_to_timeseries_df(list_logs, metric=metric)
+        df_total = self.benchmark_logs_to_timeseries_df(list_logs, metric=metric, aggregate=True)
         return df_total
     def get_benchmark_logs_timeseries_df_single(self, metric="current_ops_per_sec", configuration="", client='1', experiment_run='1'):
         #code = "1737365651"
@@ -1173,6 +1176,7 @@ def get_benchmark_logs_timeseries_df_single(self, metric="current_ops_per_sec",
         #configuration = 'configuration'
         df = self.get_df_benchmarking()
         list_logs = df[(df['client'] == str(client)) & (df['configuration'] == configuration) & (df['experiment_run'] == str(experiment_run))]['pod'].tolist()
+        #print(list_logs)
         #list_logs = df[df['client'] == client]['pod'].tolist()
         #list_logs = df[df['client'] == client]['pod_count'].tolist()
         df_total = self.benchmark_logs_to_timeseries_df(list_logs, metric=metric, aggregate=False)

diff --git a/test-pool.sh b/test-pool.sh
@@ -694,6 +694,40 @@ wait_process "ycsb"
 
 
 
+BEXHOMA_YCSB_SF_DATA=16
+BEXHOMA_YCSB_SF_OPS=16
+
+
+### Small functional test - low target - same, but 60s interval ycsb metrics
+### Fixed nodes
+### Workload A
+### repeat for 1 driver and 8 drivers
+### TODO: Do the same for PGBouncer sidecar? Check resources first
+nohup python ycsb.py -ms 1 -tr \
+  -sf $BEXHOMA_YCSB_SF_DATA \
+  -sfo $BEXHOMA_YCSB_SF_OPS \
+  --workload a \
+  -dbms PostgreSQL \
+  -rnn $BEXHOMA_NODE_SUT -rnl $BEXHOMA_NODE_LOAD -rnb $BEXHOMA_NODE_BENCHMARK \
+  -tb 16384 \
+  -nlp 8 \
+  -nlt 64 \
+  -nlf 4 \
+  -nbp 1,8,16 \
+  -nbt 64 \
+  -nbf 4 \
+  -ne 1 \
+  -nc 2 \
+  -m -mc \
+  -rst shared -rss 50Gi \
+  run </dev/null &>$LOG_DIR/test_ycsb_testcase_workload_a4.log &
+
+
+wait_process "ycsb"
+
+
+
+
 ###########################################
 ############## Clean Folder ###############
 ###########################################