-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathOLButils.py
210 lines (184 loc) · 7.88 KB
/
OLButils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python
import random
import numpy as np
from sklearn.cluster import KMeans
from OLB import OLB
import scipy
import skimage
import scipy.misc
from skimage import io
from matplotlib import pyplot as plt
from skimage.exposure import rescale_intensity
import skimage.transform
from traindataclass import traindata
import utils
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy.misc import toimage
import json
# import Pycluster
# this function projects one representation points x_j into
# space defined by h
# returns the projected data p_j
def _projectone(OLB, x_j, h, mu):
U = np.dot(OLB.pcs, h.T)
p_j = np.dot(x_j - mu, U.T)
return p_j
# this function takes in an face image X and transforms it to top num of principal components and
# returns the transformed image as Z and saves all eigenvectors in self.features
# rowvar=False to indicate that columns represent variables.
def _PCA(topk, m, images): # taking in m batch of face images
# right now assume face image is in dimension (224, 224, 1)
# flatten to one row of (1,224*224*1)
assert images.shape == (m, 224*224)
L = images.T # each column is an image vector now shape is (50176 x m)
mean_img_col = np.mean(L, axis=1) # sample mean of each feature vector
for j in range(m): # subtract each column vector from its feature sample mean for all training images
L[:, j] -= mean_img_col
C = np.matrix(L.transpose()) * np.matrix(L) # m x m
C /= m
evalues, evectors = np.linalg.eig(C) # m x m
sort_indices = evalues.argsort()[::-1] # getting their correct order - decreasing
evalues = evalues[sort_indices]
evectors = evectors[sort_indices]
evalues_count = topk
evalues = evalues[:evalues_count]
evectors = evectors[:evalues_count]
evectors = evectors.transpose() # change eigenvectors from rows to columns
evectors = L * evectors # left multiply to get the correct evectors (50176, m)
norms = np.linalg.norm(evectors, axis=0) # find the norm of each eigenvector
evectors = evectors / norms # normalize all eigenvectors
weights = evectors.transpose() * L # computing the weights (numPCs, m)
with open('./finalresult/weights.txt','w') as f:
np.savetxt(f, weights)
with open('./finalresult/meanimg.txt', 'w') as f:
np.savetxt(f, mean_img_col)
with open('./finalresult/eigenvectors.txt', 'w') as f:
np.savetxt(f, evectors.transpose())
print 'Finished writing weights and eigenvectors and mu to files ...'
return weights, mean_img_col, evectors.transpose() # to rows # (m, 224*224)
# correct evectors shape is (50176, 100)
# shape of evectors and L is: (50176, 100) (50176, 146)
# shape of weights (evectors * L) is (100, 146)
# this function finds the class labels of the kmeans of the projected data from p
# it returns an binary array of size len(p) indicating the class label of each feature
def _kmeans(p, k):
kmeans = KMeans(k, random_state=0).fit(p)
# labels, error, nfound = Pycluster.kcluster(p, k)
return kmeans.labels_
# can only be called after importing txt and image data and after calling converttraindata
# this function randomly samples batchsize of training data and returns gender labels, age
# and images, all of batch size
def loadtraindatabatch(batchsize, traindataconverted):
sze = batchsize
loadedgenderlabel = []
loadedagelabel = []
loadedimage = []
sampled = random.sample(traindataconverted, sze) # without replacement
for x in sampled:
loadedgenderlabel.append(x['genderlabel'])
loadedagelabel.append(x['agelabel'])
img = utils.load_image(x['imagepath'])
x = np.array(img)
img = x.flatten().reshape(1, -1)
assert img.shape == (1, 224*224)
assert np.sum(img) > 0
if len(loadedimage) == 0:
loadedimage = img
else:
loadedimage = np.concatenate((loadedimage, img), 0) # stack rows
print 'Loaded and concatenated image shape after loading {} number of training data is {}'.format(batchsize, loadedimage.shape)
return loadedgenderlabel, loadedagelabel, loadedimage
def test(learner, train, test, displayk, saveimg = './finalresult/after.png'):
# {userid, imageid, gender, genderlabel, age, agelabel, imagepath}
testimg = random.choice(train)
weights, mean_img_col, evectors = learner.weights, learner.mean_img_col, learner.evectors
# show the average face by adding up all the eigenfaces with weight = 1
avgface = np.matrix.sum(evectors, axis = 0)
avgfaceimg = np.reshape(avgface, (224,224))
avgfaceimg = avgfaceimg.astype(complex).real
scipy.misc.toimage(avgfaceimg).save('./finalresult/avgface.png')
# plt.imshow(avgfaceimg, interpolation='nearest')
# plt.title('Average Eigenface')
# plt.show()
# scipy.misc.toimage(avgface).save('averageface.png')
# avgface = skimage.io.imread('averageface.png')
# io.imshow(avgface)
# io.show()
print 'finished importing weights, mu, and eigenvectors of the sample data ...'
testimg = utils.load_image(testimg['imagepath'])
scipy.misc.toimage(testimg).save('./finalresult/testbefore.png')
# io.imshow(img)
# io.show()
# plt.imshow(testimg, interpolation='nearest')
# plt.title('TEST-Before Reconstruction')
# plt.show()
img = np.reshape(testimg, (-1,1)) # column vector
img -= np.reshape(mean_img_col,(-1,1))
S = evectors * img
sortedidx= np.argsort(S)
topkweights = S[sortedidx][:displayk]
topkevecs = evectors[sortedidx][:displayk]
reconstructed = 0
for i in range(displayk):
reconstructed += topkweights[i][0] * topkevecs[i]
reconstructed = np.reshape(reconstructed, (224, 224))
reconstructed = reconstructed.astype(complex).real
scipy.misc.toimage(reconstructed).save(saveimg)
# plt.imshow(reconstructed, interpolation='nearest')
# plt.title('TEST-After Reconstructed')
# plt.show()
# scipy.misc.toimage(reconstructed).save(saveimg)
# afterimg = skimage.io.imread(saveimg)
# io.imshow(afterimg)
# io.show()
# testimg = mpimg.imread(testimg['imagepath'])
print 'successfully saved the reconstructed output image (note that this is just top %s eigenfaces)' % displayk
if displayk < 10:
for idx, e in enumerate(topkevecs):
e = np.reshape(e, (224, 224))
fig = plt.figure()
plt.imshow(e, interpolation='nearest')
plt.title('%s eigenvector' % idx)
name = '%s eigenvector.png' % idx
fig.savefig(name)
# calculate class averages instead of averages of all training data
def classavg(learner, train):
weights, mean_img_col, evectors = learner.weights, learner.mean_img_col, learner.evectors
# show the average face by adding up all the eigenfaces with weight = 1
avgface = np.matrix.sum(evectors, axis = 0)
avgfaceimg = np.reshape(avgface, (224,224))
avgfaceimg = avgfaceimg.astype(complex).real
plt.imshow(avgfaceimg, interpolation='nearest')
plt.title('Average Eigenface')
plt.show()
# this function tests classification accuracy on the eigenface weights only
# by subtracting the image mean - multiple by the eigenvectors and
# obtain the weights on each eigenvector
# and calculate which weights are the closest to the obtained image weight
# and assign the image label to that weight's label
def testeigenface(learner, train, test, param, outputfile = './finalresult/eigenfacepredict.json'):
# test dataset looks like:
# {userid, imageid, gender, genderlabel, age, agelabel, img}
# img.shape = (1, 224*224)
correct = 0
for x in test:
img = utils.load_image(x['imagepath'])
img = np.reshape(img, (-1,1)) # column vector
img -= np.reshape(learner.mean_img_col,(-1,1))
S = learner.evectors * img
diff = learner.weights - S # finding the min ||W_j - S||
norms = np.linalg.norm(diff, axis=0)
closest_face_idx = np.argmin(norms) # idx corresponding to train data
predict = train[closest_face_idx]
x['predict'] = predict
if predict[param] == x[param]:
correct += 1
accu = correct*1./len(test)
test = { 'test': test }
with open(outputfile, 'a') as f:
json.dump(test, f)
with open('./finalresult/testeigenface.txt','a') as f:
f.write('Accuracy for testing on {} test dataset with just the eigenfaces for {} is {} \n'.format(len(test['test']), param, accu))
return accu