-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathself_tsne.py
232 lines (197 loc) · 6.48 KB
/
self_tsne.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
@PengTingyu_201692392
from time import time
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
def neg_squared_euc_dists(X):
'''
# 参数:
X: [N,D]的矩阵
# 返回值:
-D:[N,N]的矩阵, -D[i,j]表示X_i & X_j 之间的欧式距离的平方的负数
'''
sum_X = np.sum(np.square(X), 1)
D = np.add(np.add(-2 * np.dot(X, X.T), sum_X).T, sum_X)
return -D
def softmax(X, diag_zero=True):
'''
softmax 计算 exp()/∑exp()
'''
# 归一化
e_x = np.exp(X - np.max(X, axis=1).reshape([-1, 1]))
# 设置对角线值为0
if diag_zero:
np.fill_diagonal(e_x, 0.)
e_x = e_x + 1e-8 # 数值稳定
return e_x / e_x.sum(axis=1).reshape([-1, 1])
def calc_prob_matrix(distances, sigmas=None):
'''
将距离矩阵转化为概率矩阵
# 参数:
distances:距离矩阵
sigmas:高斯分布的方差
'''
if sigmas is not None:
two_sig_sq = 2. * np.square(sigmas.reshape((-1, 1)))
return softmax(distances / two_sig_sq)
else:
return softmax(distances)
def p_conditional_to_joint(P):
'''
将高维中的距离变为对称的
# 参数:
P: 条件概率矩阵
'''
return (P + P.T) / (2. * P.shape[0])
def p_joint(X, target_perplexity):
'''
根据给的原数据X和困惑度,计算最后高维中的联合概率分布矩阵
# 参数
X: 原数据矩阵
# 返回值:
P: 高维中的联合概率分布矩阵.
'''
distances = neg_squared_euc_dists(X)
# 找到最优的高斯分布方差sigma
sigmas = find_optimal_sigmas(distances, target_perplexity)
p_conditional = calc_prob_matrix(distances, sigmas)
P = p_conditional_to_joint(p_conditional)
return P
def binary_search(eval_fn, target, tol=1e-10, max_iter=10000, lower=1e-20, upper=1000.):
'''
二分查找
# 参数:
eval_fn: 优化的函数
target: 目标输出值
tol: Float, guess < tol时停止搜索
max_iter: int, 最大迭代次数
lower: Float, 搜索下界
upper: Float, 搜索上界
# Returns:
Float, 优化函数的最优输入值
'''
for i in range(max_iter):
guess = (lower + upper) / 2.
val = eval_fn(guess)
if val > target:
upper = guess
else:
lower = guess
if np.abs(val - target) <= tol:
break
return guess
def calc_perplexity(prob_matrix):
'''
计算困惑度
# 参数:
prob_matrix: 困惑度分布
'''
entropy = -np.sum(prob_matrix * np.log2(prob_matrix), 1)
perplexity = 2 ** entropy
return perplexity
def perplexity(distances, sigmas):
'''
根据距离矩阵和高斯方差计算困惑度
'''
return calc_perplexity(calc_prob_matrix(distances, sigmas))
def find_optimal_sigmas(distances, target_perplexity):
'''
给距离矩阵的每一行找一个能产生最优困惑度分布的sigma[].
'''
sigmas = []
# 距离矩阵的每一行代表一个数据点
for i in range(distances.shape[0]):
# Make fn that returns perplexity of this row given sigma
eval_fn = lambda sigma:perplexity(distances[i:i+1, :], np.array(sigma))
# 执行二分查找
correct_sigma = binary_search(eval_fn, target_perplexity)
sigmas.append(correct_sigma)
return np.array(sigmas)
def estimate_tsne(X, y, P, rng, num_iters, q_fn, grad_fn, learning_rate, momentum):
'''
模拟函数
# 参数:
X: 原数据矩阵
y: labels
P: 高维中的联合分布矩阵
rng: np.random.RandomState().
num_iters: 迭代次数
q_fn: 获得Y和低维空间中的联合概率分布矩阵Q
# 返回值:
Y: X在低维空间中的分布.
'''
# Y随机初始化
Y = rng.normal(0., 0.0001, [X.shape[0], 2])
if momentum:
Y_m2 = Y.copy()
Y_m1 = Y.copy()
# 梯度下降
for i in range(num_iters):
Q, distances = q_fn(Y)
grads = grad_fn(P, Q, Y, distances)
# 更新Y
Y = Y - learning_rate * grads
if momentum:
Y += momentum * (Y_m1 - Y_m2)
# 更新
Y_m2 = Y_m1.copy()
Y_m1 = Y.copy()
return Y
def q_tsne(Y):
'''
根据t-SNE低维空间的分布Y,计算低维空间的联合概率分布矩阵
'''
distances = neg_squared_euc_dists(Y)
inv_distances = np.power(1. - distances, -1)
np.fill_diagonal(inv_distances, 0.)
return inv_distances / np.sum(inv_distances), inv_distances
def tsne_grad(P, Q, Y, inv_distances):
'''
t-SNE的梯度
'''
pq_diff = P - Q
pq_expanded = np.expand_dims(pq_diff, 2)
y_diffs = np.expand_dims(Y, 1) - np.expand_dims(Y, 0)
distances_expanded = np.expand_dims(inv_distances, 2)
y_diffs_wt = y_diffs * distances_expanded
grad = 4. * (pq_expanded * y_diffs_wt).sum(1)
return grad
def plot_embedding(data, label, title):
'''
可视化嵌入空间
'''
x_min, x_max = np.min(data, 0), np.max(data, 0)
data = (data - x_min) / (x_max - x_min)
fig = plt.figure()
ax = plt.subplot(111)
for i in range(data.shape[0]):
plt.text(data[i, 0], data[i, 1], str(label[i]),
color=plt.cm.Set1(label[i] / 10.),
fontdict={'weight': 'bold', 'size': 9})
plt.xticks([])
plt.yticks([])
plt.title(title)
return fig
# 参数
PERPLEXITY = 20
SEED = 1 # 随机种子
MOMENTUM = 0.9
LEARNING_RATE = 10.
NUM_ITERS = 500
def main():
rng = np.random.RandomState(SEED)
# 加载数据
digits = datasets.load_digits(n_class=6) # 只选取数字0~5进行可视化
X, y = digits.data, digits.target
P = p_joint(X, PERPLEXITY)
# t-SNE
t0 = time()
Y = estimate_tsne(X, y, P, rng, num_iters=NUM_ITERS, q_fn=q_tsne, grad_fn=tsne_grad,\
learning_rate=LEARNING_RATE, momentum=MOMENTUM)
# 可视化
fig = plot_embedding(Y, y,
't-SNE embedding of the digits (time %.2fs)'
% (time() - t0))
plt.show(fig)
if __name__ == '__main__':
main()