-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_results.py
404 lines (375 loc) · 18.3 KB
/
analyze_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
import ast
import os
import re
import statistics
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import rc
rc('text', usetex=True)
def process_search(results, search):
solutions = []
total_time = 0
expanded_nodes = 0
generated_nodes = 0
exec_time = 0
state_space_exhausted = False
item_id = 0
while item_id < len(search):
try:
if 'New best' in search[item_id] or 'Another' in search[item_id]:
item_id += 1
raw_sol = search[item_id]
matches = re.findall(r'\(G=\d+\)',raw_sol)
number_of_solutions = len(matches)
g_of_solutions = int(matches[0].split('G=')[-1].replace(')',''))
if 'New best' in search[item_id-1]:
item_id += 1
metric = float(search[item_id].split(': ')[-1].strip())
if metric > 100:
metric = 1000 - metric
item_id += 1
nodes = search[item_id].split(': ')[-1]
expanded_nodes = int(nodes.split('/')[0])
generated_nodes = int(nodes.split('/')[1])
item_id += 1
exec_time = float(search[item_id].split(': ')[-1].strip())
solutions.append({
'Number of Solutions': number_of_solutions,
'g of Solutions': g_of_solutions,
'Metric': metric,
'Expanded': expanded_nodes,
'Generated': generated_nodes,
'Time': exec_time
})
elif 'Nodes' in search[item_id]:
nodes = search[item_id].split(': ')[-1]
expanded_nodes = int(nodes.split('/')[0])
generated_nodes = int(nodes.split('/')[1])
item_id += 1
aux = search[item_id]
exec_time = float(aux.split(': ')[-1].strip())
elif 'State space' in search[item_id]:
state_space_exhausted = True
elif 'Total running time' in search[item_id]:
total_time = float(search[item_id].split(' ')[-1].strip())
else:
pass
except:
pass
item_id += 1
results['State Space Exhausted'] = state_space_exhausted
results['Total Execution Time'] = total_time
results['Expanded Nodes'] = expanded_nodes
results['Generated Nodes'] = generated_nodes
results['Exec Time'] = exec_time
results['Solutions History'] = solutions
def process_results_file(results_file):
infile = open(results_file, 'r')
aux = results_file.split('/')[1].split('_')
domain = aux[1]
problem = aux[2]
metric = '_'.join(aux[3:])
results = {}
results['Domain'] = domain
results['Problem'] = problem
results['Metric'] = metric
for line in infile:
if 'Domain File: ' in line:
domain = line.split(': ')[-1].strip().split('/')[-3]
elif 'Problem File: ' in line:
problem = line.split(': ')[-1].strip().split('/')[-2]
elif 'Metric: ' in line:
metric = line.split(': ')[-1].strip()
elif 'Plans by Goal: ' in line:
plans_by_goal = ast.literal_eval(line.split('Plans by Goal: ')[-1].strip())
results['Number of Goals'] = len(plans_by_goal.keys())
plans = []
for key, value in plans_by_goal.items():
plans.append(len(value))
results['Number of Plans per Goal'] = plans
elif 'Top Quality Time: ' in line:
top_q_time = float(line.split(': ')[-1].strip())
results['Top Quality Time'] = top_q_time
elif 'Actions considered: ' in line:
actions_considered = ast.literal_eval(line.split('Actions considered: ')[-1].strip())
results['Max Branching Factor'] = len(actions_considered)
elif 'Initial metric' in line:
initial_metric = float(line.split(': ')[-1].strip())
if initial_metric > 100:
initial_metric = 1000 - initial_metric
results['Initial Metric'] = initial_metric
line = next(infile)
search = []
while line != '':
search.append(line.strip())
try:
line = next(infile)
except:
break
results['Raw Search'] = search
process_search(results, search)
break
else:
pass
return results
def process_results_file_keren(results_file):
infile = open(results_file, 'r')
aux = results_file.split('/')[1].split('_')
domain = aux[0]
problem = aux[1]
results = {}
results['Domain'] = domain
results['Problem'] = problem.replace('.log','')
best_sol = 'None'
best_time = 'None'
best_expanded = 'None'
best_generated = 'None'
for line in infile:
if 'init wcd: ' in line:
initial_metric = float(line.split(' ')[2])
results['Initial Metric'] = initial_metric
elif 'reduced from' in line:
best_sol = float(line.split(' ')[6])
best_time = 900 - float(line.split(' ')[9])
elif 'Expanded' in line:
best_expanded = int(line.split(' ')[-1].strip())
elif 'Generated' in line:
best_generated = int(line.split(' ')[-1].strip())
results['Sol Time'] = best_time
results['Sol Metric'] = best_sol
results['Expanded Nodes'] = best_expanded
results['Generated Nodes'] = best_generated
return results
def generate_structured_results_keren(results_path):
results = []
print(f'There are {len(os.listdir(results_path))} experiments in {results_path}')
for file in os.listdir(results_path):
print(f'Processing file {file}')
structured_result = process_results_file_keren(f'{results_path}{file}')
results.append(structured_result)
return results
def generate_structured_results(results_path):
results = []
id = 0
print(f'There are {len(os.listdir(results_path))} experiments in {results_path}')
for experiment in os.listdir(results_path):
new_results_path = f'{results_path}/{experiment}'
if not '.' in new_results_path:
for file in os.listdir(new_results_path):
if file.startswith('results_'):
print(f'Processing file {file}')
structured_result = process_results_file(f'{new_results_path}/{file}')
id += 1
results.append(structured_result)
return results
def generate_search_history_plot(df):
for index, row in df.iterrows():
if row['Domain'] == 'depots':
task = f'{row["Domain"]} {row["Problem"]} {row["Metric"]}'
initial_metric = row['Initial Metric']
solution_history = row['Solutions History']
if solution_history is not None:
solutions_list = [(0, initial_metric,0)]
this_metric = None
this_changes = None
for sol in solution_history:
this_metric = sol['Metric']
this_time = sol['Time']
this_changes = sol['g of Solutions']
solutions_list.append((this_time, this_metric, this_changes))
last_time = row['Total Execution Time']
different_metrics = set([x[1] for x in solutions_list])
different_g = set([x[2] for x in solutions_list])
if len(solutions_list) > 10 and len(different_metrics) > 5 and len(different_g) > 3:
if this_metric is not None: # if is an interesting problem with different solutions
solutions_list.append((last_time, this_metric, this_changes))
times = [x[0] for x in solutions_list]
metrics = [x[1] for x in solutions_list]
changes = [x[2] for x in solutions_list]
plt.rcParams['figure.figsize'] = [20, 8]
fig, ax = plt.subplots()
plt.xlabel(r'Time (seconds)', fontsize=50)
plt.ylabel(r'Max. Avg. Distance', fontsize=45)
"""plt.legend((lo, ll, l, a, h, hh, ho),
('Low Outlier', 'LoLo', 'Lo', 'Average', 'Hi', 'HiHi', 'High Outlier'),
scatterpoints=1,
loc='lower left',
ncol=3,
fontsize=8)"""
aux = plt.scatter(times, metrics, c=changes, s=180, cmap='tab10')
plt.legend(*aux.legend_elements(),prop={'size': 30},markerscale=2,loc='lower right')
plt.plot(times, metrics, color='grey')
#ax.legend((aux), ('a'), scatterpoints=1)
ax.grid(True)
ax.tick_params(axis='both', which='major', labelsize=28)
plt.show()
print(len(solutions_list))
break
print()
def get_first_best_sol(solution_history):
best_metric = None
best_time = None
best_expanded = None
num_sols = None
for sol in solution_history:
if sol['Number of Solutions'] == 1:
best_metric = sol['Metric']
best_time = sol['Time']
best_expanded = sol['Expanded']
if len(solution_history) > 0:
last_sol = solution_history[-1]
num_sols = last_sol['Number of Solutions']
return best_metric, best_time, best_expanded, num_sols
def generate_big_table(df, keren_df):
domains = ['blocks-words', 'depots', 'grid-navigation', 'ipc-grid', 'logistics']
metrics_order = [
'goal_transparency',
'plan_transparency',
'goal_privacy',
'plan_privacy',
'min_avg_distance_goal_compliance',
'max_avg_distance_goal_compliance',
'min_max_distance_goal_compliance',
'max_min_distance_goal_compliance'
]
lower_better = ['goal_transparency', 'plan_transparency', 'min_avg_distance_goal_compliance' 'min_max_distance_goal_compliance']
total_num_sols = []
for domain in domains:
print('\n#> ' + domain + '\n')
total_num_sols_per_domain = []
for m in metrics_order:
print('\n# ' + m)
this_results = df[(df['Domain'] == domain) & (df['Metric'] == m)]
if len(this_results) != 60:
print(f'{domain} does not have the 60 problems for metric {m}, only {len(this_results)}, skipping')
continue
expanded_nodes = []
times = []
improvements = []
initial_metrics = []
top_quality_times = []
abs_improvements = []
keren_expanded_nodes = []
keren_times = []
keren_initial_metrics = []
keren_improvements = []
total_problems = 0
improved_problems = 0
problems_with_unreachable_goals = 0
problems_where_we_do_not_compute_all_plans = 0
for index, row in this_results.iterrows():
total_problems += 1
metric = row['Metric']
initial_metric = row['Initial Metric']
top_quality_time = row['Top Quality Time']
solution_history = row['Solutions History']
if not isinstance(solution_history, list):
#This can happen when some goals are unreachable, or when we spent all the time limit computing the plan library
problems_with_unreachable_goals += 1
continue
plans_per_goal = row['Number of Plans per Goal']
if 1000 in plans_per_goal:
# This means we hitted the plan limit and we should not be reporting, as there could be other plans we are not accounting for
# We needed to limit the number of optimal plans computed, otherwise we were running out of disk
problems_where_we_do_not_compute_all_plans += 1
continue
best_metric, best_time, best_expanded, num_sols = get_first_best_sol(solution_history)
if best_metric != None: # i.e., we are only reporting problems for which we find improvements
total_num_sols.append(num_sols)
total_num_sols_per_domain.append(num_sols)
if metric == 'goal_transparency':
for keren_index, keren_row in keren_df.iterrows():
if keren_row['Domain'] == row['Domain'] and \
keren_row['Problem'] == row['Problem'] and \
keren_row['Sol Metric'] != 'None' and \
keren_row['Initial Metric'] == initial_metric: # keren also finds and improvement for this problem
improved_problems += 1
# Our data
improvement = best_metric
expanded_nodes.append(best_expanded)
actual_time = best_time + top_quality_time
times.append(actual_time)
improvements.append(improvement)
initial_metrics.append(initial_metric)
top_quality_times.append(top_quality_time)
abs_improvements.append(abs(initial_metric - improvement))
# kerens data
keren_expanded_nodes.append(keren_row['Expanded Nodes'])
keren_times.append(keren_row['Sol Time'])
keren_improvements.append(keren_row['Sol Metric'])
keren_initial_metrics.append(keren_row['Initial Metric'])
else:
improved_problems += 1
improvement = best_metric
expanded_nodes.append(best_expanded)
actual_time = best_time + top_quality_time
times.append(actual_time)
improvements.append(improvement)
initial_metrics.append(initial_metric)
abs_improvements.append(abs(initial_metric - improvement))
if len(times) > 0:
# reporting keren
#avg_top_quality = statistics.mean(top_quality_times)
if m == 'goal_transparency':
avg_time = round(statistics.mean(keren_times), 1)
std_time = 0.0
if len(keren_times) > 1:
std_time = round(statistics.stdev(keren_times), 1)
avg_expanded = round(statistics.mean(keren_expanded_nodes), 1)
std_expanded = 0.0
if len(keren_expanded_nodes) > 1:
std_expanded = round(statistics.stdev(keren_expanded_nodes), 1)
avg_improvement = round(statistics.mean(keren_improvements), 1)
std_improvement = 0.0
if len(keren_improvements) > 1:
std_improvement = round(statistics.stdev(keren_improvements), 1)
avg_initial_metric = round(statistics.mean(keren_initial_metrics), 1)
std_initial_metric = 0.0
if len(keren_initial_metrics) > 1:
std_initial_metric = round(statistics.stdev(keren_initial_metrics), 1)
# print('GRD-LS')
# print(f'Improved problems = {improved_problems}')
# print(f'{avg_time}\pm{std_time} & {avg_expanded}\pm{std_expanded} & {avg_improvement}\pm{std_improvement} & {improved_problems}/{total_problems}&')
print(f'& {avg_time}/{std_time} & {avg_initial_metric}/{std_initial_metric} & {avg_improvement}/{std_improvement}&')
# reporting ours
avg_time = round(statistics.mean(times),1)
std_time = 0.0
if len(times) > 1:
std_time = round(statistics.stdev(times),1)
avg_expanded = round(statistics.mean(expanded_nodes),1)
std_expanded = 0.0
if len(expanded_nodes) > 1:
std_expanded = round(statistics.stdev(expanded_nodes),1)
avg_improvement = round(statistics.mean(improvements),1)
std_improvement = 0.0
if len(improvements) > 1:
std_improvement = round(statistics.stdev(improvements),1)
avg_initial_metric = round(statistics.mean(initial_metrics),1)
std_initial_metric = 0.0
if len(initial_metrics) > 1:
std_initial_metric = round(statistics.stdev(initial_metrics),1)
# print(f'{avg_time}\pm{std_time} & {avg_expanded}\pm{std_expanded} & {avg_improvement}\pm{std_improvement} & {improved_problems}/{total_problems}\\\\')
print(f'& {avg_time}/{std_time} & {avg_initial_metric}/{std_initial_metric} & {avg_improvement}/{std_improvement}&',end='')
# print(f'In {m}, {improved_problems} out of {total_problems}. Unreachable={problems_with_unreachable_goals}')
# avg_sols_per_domain = statistics.mean(total_num_sols_per_domain)
# print(avg_sols_per_domain)
# print('\n- Absolute values for Improvement')
# print(abs_improvements)
else:
print()
# print(f'- & - & - &',end='')
print('\\\\')
avg_sols = statistics.mean(total_num_sols)
# print(avg_sols)
max_sols = max(total_num_sols)
# print(max_sols)
print()
def generate_reduction_per_g_violinplot(df):
print()
if __name__ == '__main__':
ger_results_df = pd.read_json('ger_results.json')
# ger_results_df.to_csv('ger_results.csv', sep=',')
keren_results_df = pd.read_json('grd_results.json')
generate_big_table(ger_results_df, keren_results_df)
# generate_search_history_plot(ger_results_df)
print()