-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_competitors.py
238 lines (199 loc) · 10.7 KB
/
main_competitors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import pyximport
from config import COCO_SGS_dir
pyximport.install(language_level=3)
from competitors.charts import plot_comparison_both, plot_comparison_coverage, plot_comparison_diversity
from competitors.kmeodids import load_resnet_images, compute_BOW_descriptors, read_BOW_images, kmedoids_summary, get_kmedoids_graphs, \
competitors_dir
from sims.prs import edge_pruning, node_pruning, load_PRS
from sims.scene_graphs.image_processing import getImageName
from shutil import copyfile
import os
import pandas as pd
import json
from sims.graph_algorithms import compute_coverage_matrix
from sims.sgs_evaluation import evaluate_summary_graphs
from sims.sims_config import SImS_config
from sims.visualization import print_graphs
if __name__ == "__main__":
class RUN_CONFIG:
compute_BOW_descriptors = False # Map each COCO image to its BOW descriptors
run_kmedoids = True # Run KMedoids summary for different k values
print_kmedoids_graphs = True # Print scene graphs of selected kmedoids images (for each k)
features = "resnet" # BOW or resnet -- depending on the feature extractor to be used
include_in_eval = ["BOW", "resnet"]
use_full_graphs = False # True if you want to compute coverage on full graphs
# False to apply node and edge pruning before computing coverage
pairing_method = 'img_min' # Method used to associate images to SGS graphs (see associate_img_to_sgs() in sgs.pyx)
# img_min, img_max, img_avg, std
compute_kmedoids_coverage_matrix = True # Compute graph coverage matrix for kmedoids
evaluate_kmedoids = True # Use coverage matrix to compute graph coverage and diversity of kmedoids
write_comparison_charts = True # Write comparison charts between kmedoids and SImS (for white paper)
#dataset = 'COCO_subset' # Choose dataset
# dataset = 'COCO_subset2'
dataset = 'COCO_subset2'
if dataset == 'COCO_subset':
mink = 4 # Min value of k to test kmedoids
maxk = 20 # Max value of k to test kmedoids
elif dataset in ('COCO_subset2', 'COCO_subset3'):
mink = 2
maxk = 20
# Paths:
prefix = ""
if RUN_CONFIG.features == "resnet":
prefix = "resnet_"
dsname_prefix = []
output_path = os.path.join(competitors_dir, RUN_CONFIG.dataset)
if not os.path.exists(output_path):
os.makedirs(output_path)
kmedoids_out_clusters_path = os.path.join(output_path, f"{prefix}centroids.json")
config = SImS_config(RUN_CONFIG.dataset)
# --------------------------
# Feature extraction for each image in COCO training set
if RUN_CONFIG.compute_BOW_descriptors:
compute_BOW_descriptors()
# KMedoids summary for different k values
if RUN_CONFIG.run_kmedoids:
if RUN_CONFIG.features == "BOW":
X = read_BOW_images(RUN_CONFIG.dataset)
else:
X = load_resnet_images(RUN_CONFIG.dataset)
res = {}
avg_time = 0
print(f"Number of images: {len(X)}")
for k in range(RUN_CONFIG.mink, RUN_CONFIG.maxk+1):
medoids, duration = kmedoids_summary(X, k, RUN_CONFIG.features == "resnet") # with resnet, use cosine distance
res[k] = (medoids, duration.seconds)
avg_time += duration.seconds
print(f"{k}: {medoids}")
with open(os.path.join(output_path, "log.txt"),'a+') as f:
f.write(f"{k}: {medoids}\n")
print(str(avg_time/len(res)))
with open(os.path.join(output_path, "avgTime.txt"), 'w') as f:
f.write('Average time for kmedoids run on COCO subset (seconds):\n')
f.write(str(avg_time/len(res)))
with open(kmedoids_out_clusters_path,'w') as f:
json.dump(res, f)
# Print graphs associated to kmedoids
if RUN_CONFIG.print_kmedoids_graphs:
with open(kmedoids_out_clusters_path) as f:
kmedoids_result = json.load(f)
with open(config.scene_graphs_json_path, 'r') as f:
coco_graphs = json.load(f)
kmedoids_graphs = get_kmedoids_graphs(kmedoids_result, coco_graphs)
for k, graphs in kmedoids_graphs.items():
out_graphs_dir = os.path.join(output_path,'kmedoids_graphs',f'k{k}')
if not os.path.exists(out_graphs_dir):
os.makedirs(out_graphs_dir)
print_graphs(graphs, out_graphs_dir)
for i, g in enumerate(graphs):
imgName = getImageName(g['graph']['name'], extension='jpg')
copyfile(os.path.join(config.img_dir, imgName), os.path.join(out_graphs_dir, f"g{i}.jpg"))
# Compute graph coverage for kmedoids (coverage matrix)
if RUN_CONFIG.compute_kmedoids_coverage_matrix:
with open(kmedoids_out_clusters_path) as f:
kmedoids_result = json.load(f)
with open(config.scene_graphs_json_path, 'r') as f:
coco_graphs = json.load(f)
kmedoids_graphs = get_kmedoids_graphs(kmedoids_result, coco_graphs)
# Load pairwise relationship summary (PRS) if needed
if RUN_CONFIG.use_full_graphs==False:
prs = load_PRS(config, True)
# Apply node and edge pruning before computing coverage matrix
cmatrices_list = []
omatrices_list = []
for k, summary_graphs_i in kmedoids_graphs.items():
if RUN_CONFIG.use_full_graphs == False:
summary_graphs_i = edge_pruning(prs, summary_graphs_i)
summary_graphs_i = node_pruning(summary_graphs_i)
cmatrix, omatrix = compute_coverage_matrix(coco_graphs, [{'g':s} for s in summary_graphs_i])
cmatrix.columns = list(range(int(k)))
omatrix.columns = list(range(int(k)))
cmatrix['k'] = k
omatrix['k'] = k
cmatrices_list.append(cmatrix)
omatrices_list.append(omatrix)
cmatrices = pd.concat(cmatrices_list, sort=True)
omatrices = pd.concat(omatrices_list, sort=True)
cmatrices.set_index('k', inplace=True)
omatrices.set_index('k', inplace=True)
cmatrices.index.name = 'k'
omatrices.index.name = 'k'
if RUN_CONFIG.use_full_graphs:
output_file_c = os.path.join(output_path, f"{prefix}coverage_mat_full.csv")
output_file_o = os.path.join(output_path, f"{prefix}overlap_mat_full.csv")
else:
output_file_c = os.path.join(output_path, f"{prefix}coverage_mat_pruned.csv")
output_file_o = os.path.join(output_path, f"{prefix}overlap_mat_pruned.csv")
cmatrices.to_csv(output_file_c, sep=",")
omatrices.to_csv(output_file_o, sep=",")
# Compute coverage and diversity for kmedoids
if RUN_CONFIG.evaluate_kmedoids:
with open(kmedoids_out_clusters_path) as f:
kmedoids_result = json.load(f)
with open(config.scene_graphs_json_path, 'r') as f:
coco_graphs = json.load(f)
kmedoids_graphs = get_kmedoids_graphs(kmedoids_result, coco_graphs)
if RUN_CONFIG.use_full_graphs==False:
suffix = "_pruned"
suffix2=f"_{RUN_CONFIG.pairing_method}"
else:
suffix = "_full"
suffix2 = ""
cmatrices = pd.read_csv(os.path.join(output_path, f"{prefix}coverage_mat{suffix}.csv"), index_col='k')
omatrices = pd.read_csv(os.path.join(output_path, f"{prefix}overlap_mat{suffix}.csv"), index_col='k')
# Load pairwise relationship summary (PRS) if needed
if RUN_CONFIG.use_full_graphs==False:
prs = load_PRS(config, True)
# Prune kmedoids graphs before computing coverage and diversity
results = []
for k, summary_graphs_i in kmedoids_graphs.items():
if RUN_CONFIG.use_full_graphs == False:
summary_graphs_i = edge_pruning(prs, summary_graphs_i)
summary_graphs_i = node_pruning(summary_graphs_i)
res = evaluate_summary_graphs([{'g':s} for s in summary_graphs_i], cmatrices.loc[int(k)].iloc[:,:int(k)], omatrices.loc[int(k)].iloc[:,:int(k)])
results.append(res)
kmed_df = pd.DataFrame(results, columns=["N. graphs",
"Avg. nodes", "Std. nodes",
"Coverage",
"Coverage-overlap",
"Diversity",
"Diversity-ne"])
kmed_df.to_csv(os.path.join(output_path, f"{prefix}evaluation{suffix}.csv"))
# Write comparison charts for white paper
if RUN_CONFIG.write_comparison_charts:
if RUN_CONFIG.use_full_graphs==False:
suffix = "_pruned"
suffix2=f"_{RUN_CONFIG.pairing_method}"
else:
suffix = "_full"
suffix2 = ""
# Read SImS results
dfs_to_eval = {}
sims_eval_path = os.path.join(config.SGS_dir, f'evaluation{suffix2}.csv')
if not os.path.exists(sims_eval_path):
print(sims_eval_path, "not found. You have to evaluate SImS first. Run main_SGS.py with evaluate_SGS_experiments=True")
exit()
else:
sims_df = pd.read_csv(sims_eval_path, index_col=0)
dfs_to_eval["SImS"] = sims_df
# Read kmedoids results
if "BOW" in RUN_CONFIG.include_in_eval:
kmed_eval_path = os.path.join(output_path, f"evaluation{suffix}.csv")
if not os.path.exists(kmed_eval_path):
print("You have to evaluate Kmedoids with BOW first. Run main_competitors.py with evaluate_kmedoids=True")
exit()
else:
kmed_df = pd.read_csv(kmed_eval_path, index_col=0)
dfs_to_eval["SIFT"] = kmed_df
if "resnet" in RUN_CONFIG.include_in_eval:
kmed_eval_path = os.path.join(output_path, f"resnet_evaluation{suffix}.csv")
if not os.path.exists(kmed_eval_path):
print("You have to evaluate Kmedoids with ResNet first. Run main_competitors.py with evaluate_kmedoids=True and features=resnet")
exit()
else:
kmed_df = pd.read_csv(kmed_eval_path, index_col=0)
dfs_to_eval["ResNet50"] = kmed_df
sims_agg_df = None
plot_comparison_both(RUN_CONFIG.mink, RUN_CONFIG.maxk, dfs_to_eval, suffix2, output_path)
plot_comparison_coverage(RUN_CONFIG.mink, RUN_CONFIG.maxk, dfs_to_eval, suffix2, output_path)
plot_comparison_diversity(RUN_CONFIG.mink, RUN_CONFIG.maxk, dfs_to_eval, suffix2, output_path)