-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathevaluate.py
71 lines (59 loc) · 2.47 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import bottleneck as bn
def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
batch_users = X_pred.shape[0]
idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
idx_topk_part[:, :k]]
idx_part = np.argsort(-topk_part, axis=1)
# X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
# topk predicted score
idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
# build the discount template
tp = 1. / np.log2(np.arange(2, k + 2))
DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
idx_topk].toarray() * tp).sum(axis=1)
IDCG = np.array([(tp[:min(n, k)]).sum()
for n in heldout_batch.getnnz(axis=1)])
return DCG / IDCG
def Recall_at_k_batch(X_pred, heldout_batch, k=100):
batch_users = X_pred.shape[0]
idx = bn.argpartition(-X_pred, k, axis=1)
X_pred_binary = np.zeros_like(X_pred, dtype=bool)
X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True
X_true_binary = (heldout_batch > 0).toarray()
tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
np.float32)
recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
return recall
def precisionAtK(Y_pred_orig, Y_true_orig, k, verbose=False):
Y_pred = Y_pred_orig.copy()
Y_true = Y_true_orig.copy()
row_sum = np.asarray(Y_true.sum(axis=1)).reshape(-1)
indices = row_sum.argsort()
row_sum.sort()
start = 0
while start < len(indices) and row_sum[start] == 0:
start += 1
indices = indices[start:]
Y_pred = Y_pred[indices, :]
Y_true = Y_true[indices, :]
p = np.zeros(k)
assert Y_pred.shape == Y_true.shape
n_items, n_labels = Y_pred.shape
prevMatch = 0
for i in range(1, k + 1):
Jidx = np.argmax(Y_pred, 1)
prevMatch += np.sum(Y_true[np.arange(n_items), Jidx])
Y_pred[np.arange(n_items), Jidx] = -np.inf
p[i - 1] = prevMatch / (i * n_items)
return tuple(p[[0, 2, 4]])
def evaluate_all(X_pred, X_true):
ranks = [1, 5, 10, 20, 50]
ndcgs = np.zeros((len(ranks), X_pred.shape[0]))
recalls = np.zeros((len(ranks), X_pred.shape[0]))
for i in range(len(ranks)):
ndcgs[i, :] = NDCG_binary_at_k_batch(X_pred, X_true, ranks[i])
recalls[i, :] = Recall_at_k_batch(X_pred, X_true, ranks[i])
return ndcgs, recalls
return result