-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathifor.py
122 lines (80 loc) · 3.22 KB
/
ifor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from sklearn.ensemble import IsolationForest as ifor
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.utils import check_array
def _average_path_length(n_samples_leaf):
"""
The average path length in a n_samples iTree, which is equal to
the average path length of an unsuccessful BST search since the
latter has the same structure as an isolation tree.
Parameters
----------
n_samples_leaf : array-like of shape (n_samples,)
The number of training samples in each test sample leaf, for
each estimators.
Returns
-------
average_path_length : ndarray of shape (n_samples,)
"""
n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False)
n_samples_leaf_shape = n_samples_leaf.shape
n_samples_leaf = n_samples_leaf.reshape((1, -1))
average_path_length = np.zeros(n_samples_leaf.shape)
mask_1 = n_samples_leaf <= 1
mask_2 = n_samples_leaf == 2
not_mask = ~np.logical_or(mask_1, mask_2)
average_path_length[mask_1] = 0.0
average_path_length[mask_2] = 1.0
average_path_length[not_mask] = (
2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
- 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]
)
return average_path_length.reshape(n_samples_leaf_shape)
def find_average_depth(x,tx,**kwargs):
clf=ifor(**kwargs)
clf.fit(x)
#sc=clf.score_samples(tx)
sc=clf._compute_score_samples(tx.astype(np.float32),False)
sc=np.log(sc)/np.log(2)
sc*=-_average_path_length([clf._max_samples])
return sc
class FB():
def __init__(self,bag,N):
self.bag=bag
self.N=N
self.which=np.random.choice(np.arange(N),bag,replace=False)
def predict(self,X):
assert X.shape[1]==self.N
return np.stack([X[:,i] for i in self.which],axis=1)
def __call__(self,X):
return self.predict(X)
def baggin(x,tx,count=32):
fb=FB(count,x.shape[1])
return fb(x),fb(tx),fb.which
if __name__=="__main__":
x=np.load("training.npz")["q"]
f=np.load("test.npz")
tx,ty=f["q"],f["y"]
x,tx,which=baggin(x,tx)
clf=ifor()
clf.fit(x)
features=clf.estimators_features_
feat=0
valid=[feat in feats for feats in features]
tx=tx.astype(np.float32)
scores=-clf.score_samples(tx)
auc=roc_auc_score(ty,scores)
print(auc)
#scores=_compute_score_samples(clf,tx,True)
#scores=clf._compute_score_samples(tx,True)
#scores=clf._compute_chunked_score_samples(tx)
scores=-find_average_depth(x,tx)
auc=roc_auc_score(ty,scores)
print(auc)
exit()
clf.estimators_=[zw for zw,val in zip(clf.estimators_,valid) if val]
clf.estimators_features_=[zw for zw,val in zip(clf.estimators_features_,valid) if val]
clf.estimators_samples_=[zw for zw,val in zip(clf.estimators_samples_,valid) if val]
scores=-clf.score_samples(tx)
auc=roc_auc_score(ty,scores)
print(auc)