-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcascade_dis.py
58 lines (50 loc) · 2.01 KB
/
cascade_dis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
__author__ = 'yonatan'
import time
from new_finger_print import spaciograms_distance_rating
def distance_function_nate(entry, target_dict, rank):
# a = time.time()
if rank == 1:
dist = spaciograms_distance_rating(entry["sp_one"], target_dict["sp_one"], rank)
elif rank == 2:
dist = spaciograms_distance_rating(entry["sp_two"], target_dict["sp_two"], rank)
elif rank == 3:
dist = spaciograms_distance_rating(entry["specio"], target_dict["specio"], rank)
# b = time.time()
# print ("specio time = %s" % str(b - a))
return dist
def stage_one(target_dict, entries, rank, stopme):
start_time = time.time()
# list of tuples with (entry,distance). Initialize with first n distance values
nearest_n = []
farthest_nearest = 20000
i = 0
for entry in entries:
if (i % 1000) == 0:
print (i)
# print ("boom")
if i < stopme:
d = distance_function_nate(entry, target_dict, rank)
nearest_n.append((entry, d))
else:
if i == stopme:
# sort by distance
nearest_n.sort(key=lambda tup: tup[1])
# last item in the list (index -1, go python!)
farthest_nearest = nearest_n[-1][1]
# Loop through remaining entries, if one of them is better, insert it in the correct location and remove last item
d = distance_function_nate(entry, target_dict, rank)
if d < farthest_nearest:
insert_at = stopme - 2
while d < nearest_n[insert_at][1]:
insert_at -= 1
if insert_at == -1:
break
nearest_n.insert(insert_at + 1, (entry, d))
nearest_n.pop()
farthest_nearest = nearest_n[-1][1]
i += 1
end_time = time.time()
total_time = end_time - start_time
print ("total time = %s" % (str(total_time)))
# id_list = [item[0]['id'] for item in nearest_n]
return nearest_n