-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKnn.py
49 lines (35 loc) · 1.91 KB
/
Knn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from distances.EuclideanDistance import EuclideanDistance
import numpy as np
class Knn:
def __init__(self, experiment_dict, dataset, distance_metric):
self.datasets = experiment_dict
self.dataset = dataset
self.distance_metric = distance_metric
def sort_similarities_bubble_sort(self, similarities, targets):
"""
:type similarities: numpy.ndarray, targets: numpy.ndarray
:rtype: similarites: numpy.ndarray, targets: numpy.ndarray
"""
temp_target = targets.tolist()
for i in range(len(similarities)):
for j in range(0, len(similarities) - i - 1):
if (similarities[j + 1] < similarities[j]):
similarities[j], similarities[j + 1] = similarities[j + 1], similarities[j]
## Sort the classes along with the eucledian distances to maintain relevancy
temp_target[j], temp_target[j + 1] = temp_target[j + 1], temp_target[j]
return temp_target, similarities
def fit_knn_model(self, k, x_train, x_val, y_train):
## Run the KNN classifier
y_pred_knn = []
## Iterate through each value in test data
for val in x_val:
distances = self.distance_metric.compute_distances_from_points(val, x_train)
## A temporary target array is created and both similarities and targets are sorted in a rank with bubble-sort
temp_target = y_train
temp_target, distances = self.sort_similarities_bubble_sort(distances, temp_target)
## Finding majority among the neighbours for the selected point
vote = [0 for _ in range(max(temp_target) + 1)]
for i in range(k):
vote[temp_target[i]] += 1 #use the class as hashing solution
y_pred_knn.append(vote.index(max(vote)))#final prediction for the selected point
return np.array(y_pred_knn)