-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathClassifier.py
123 lines (112 loc) · 5.59 KB
/
Classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Abdullah Arif, Clyde Rempillo, Andrew Myshok
# COMP-4800 - Selected Topic in Software engineering project
# Keystroke dynamics user classifier - trying to classify users based on the difference of their authentication
from KeystrokeAuthenticator import KeystrokeAuthenticator
from statistics import stdev, mean
from numpy import percentile
# from pprint import pprint
class Classifier:
def __init__(self, users: dict, userModel: KeystrokeAuthenticator, comparative = False):
self.users = users
self.userModel = userModel
# comparative means it determine the threshold based on the given population otherwise it will use a segment user
self.comparative = comparative
@staticmethod
def findOutlier(scores: list, userIDs: list) -> list:
avg = mean(scores)
std = stdev(scores)
# basically get every user with z-score above 3
return [userIDs[i] for i in range(len(scores)) if (scores[i] - avg) > std * 3]
def classifyUser(self):
# build the users values needed to classify the users
self.buildClassifier()
userIDs = list(self.users.keys())
# classify user based on their relative performance
# get sheep from data
if self.comparative:
sheepScore = [self.users[userID].getAccuracy for userID in userIDs ]
sheepThreshold = percentile(sheepScore,70)
print("percentile =", sheepThreshold)
# pprint(sheepScore)
else:
sheepThreshold = 0.8
sheepIDs = [userID for userID in userIDs if self.users[userID].getAccuracy >= sheepThreshold]
if len(sheepIDs) != 0:
print("The following user are considered to be sheep:", sheepIDs)
else:
print("There are no sheep in the data... ")
# determine the lamb from the data
if self.comparative:
lambScores = [self.users[userID].getFalsePositive for userID in userIDs ]
lambThreshold = percentile(lambScores,90)
print("percentile =", lambThreshold)
# pprint(lambScores)
else:
lambThreshold = 0.99
lambsIDs = [userID for userID in userIDs if self.users[userID].getFalsePositive > lambThreshold ]
if len(lambsIDs) != 0:
print("The following user are considered to be lamb:", lambsIDs)
else:
print("There are no lambs in the data... ")
# determine the goat from the data
if self.comparative:
goatScores = [self.users[userID].getFalseNegative for userID in userIDs ]
goatThreshold = percentile(goatScores,90)
print("percentile =", goatThreshold)
# pprint(goatScores)
else:
goatThreshold = 0.99
goatsIDs = [userID for userID in userIDs if self.users[userID].getFalseNegative > goatThreshold]
if len(goatsIDs) != 0:
print("The following user are considered to be goat:", goatsIDs)
else:
print("There are no goats in the data... ")
# determine the wolf from the data
if self.comparative:
wolfScores = [self.users[userID].getImitations for userID in userIDs ]
wolfThreshold = percentile(wolfScores,90)
print("percentile =", wolfThreshold)
# pprint(wolfScores)
else:
wolfThreshold = 0.99
wolvesIDs = [userID for userID in userIDs if self.users[userID].getImitations >wolfThreshold]
if len(wolvesIDs) != 0:
print("The following user are considered to be wolf:", wolvesIDs)
else:
print("There are no wolves in the data... ")
return len(sheepIDs), len(lambsIDs), len(goatsIDs), len(wolvesIDs)
# get the information needed to classify the users
def buildClassifier(self):
for userID in self.users:
self.trainUser(userID)
self.checkSelf(userID)
self.checkImpostor(userID)
# Step 1 (training): Use the first 200 passwords typed by the genuine user and train model to detect user
def trainUser(self, userID: str):
user = self.users[userID]
self.userModel.trainModel(user.getTrainingVector())
# reset the value that will be used to classify user in case model is run multiple time
user.resetClassifierValue()
# Step 2 (genuine-user testing): Next see how well detector does against user's own results
def checkSelf(self, userID: str):
user = self.users[userID]
userTestVectors = user.getUserTestData()
for testVector in userTestVectors:
# Calculate the distance between the test vector and the mean vector and square it
if self.userModel.evaluate(testVector) < 0.85: # user failed own test
user.reject(False) # store false rejection - rejected themselves
else:
user.accept(True) # store true match - authenticated themselves
# Step 3 (impostor testing): Check how model does against impostor score
def checkImpostor(self, userID: str):
user = self.users[userID]
for impostorID in self.users.keys():
if impostorID == userID: # the user themselves are not an impostor
continue
impostor = self.users[impostorID]
for testVector in impostor.getStrokes(10):
if self.userModel.evaluate(testVector) < 0.85: # impostor failing test
user.reject(True) # store true rejection - rejected an impostor
else:
user.accept(False) # store true match
impostor.imitate() # store the