-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimpleMi.py
75 lines (55 loc) · 1.99 KB
/
simpleMi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import numpy as np
from sklearn.cluster import KMeans
from sklearn.svm import SVC
import EDM54.miHelperMethods as utilities
import EDM54.similarities as similarities
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics.pairwise import chi2_kernel
class SMI:
def learn(self, bags, c=1, kernel='rbf'):
instances = utilities.get_all_intances(bags)
#instances = utilities.get_all_positive_intances(bags)
# Don't cluster, uses all instances
# Mapping Function: Given X and Vocab V, return vector V
V, labels = self.map_all(bags)
# Use SVM with rbf as kernel
classifier = SVC(kernel= 'rbf', gamma= 'auto', C=10)
classifier.fit(V, labels)
return classifier
def map_all(self, bags):
mapped_vectors = []
labels = []
for b in bags:
v = self.map_to_vector_min(b[1])
labels.append(b[-1])
mapped_vectors.append(v)
return mapped_vectors, labels
# avg vector
def map_to_vector(self, bag):
v = [0] * len(bag[0][1:])
for instance in bag:
for ind,i in enumerate(instance[1:]):
v[ind] += i
v = [i/len(bag) for i in v]
#print(v)
return v
# min max vector
def map_to_vector_min(self, bag):
v = [0] * len(bag[0][1:]) * 2
for instance in bag:
for ind,i in enumerate(instance[1:]):
v[ind] = min(i, v[ind])
v[ind+len(bag[0][1:])] = max(i,v[ind+len(bag[0][1:])])
#print(v)
return v
def map_to_vector_mma(self, bag):
v = [0] * len(bag[0][1:]) * 3
for instance in bag:
for ind,i in enumerate(instance[1:]):
v[ind] = min(i, v[ind])
v[ind+len(bag[0][1:])] = max(i, v[ind+len(bag[0][1:])])
v[ind + 2*len(bag[0][1:])] += i
#print(v)
for i,ins in enumerate(v[2*len(bag[0][1:]):]):
v[i] = v[i]/len(bag)
return v