-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathdata.py
47 lines (40 loc) · 1.32 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import numpy as np
def load_data(file_dir):
# output:
# N: the number of user;
# M: the number of item
# data: the list of rating information
user_ids_dict, rated_item_ids_dict = {},{}
N, M, u_idx, i_idx = 0,0,0,0
data = []
f = open(file_dir)
for line in f.readlines():
if '::' in line:
u, i, r, _ = line.split('::')
else:
u, i, r, _ = line.split()
if int(u) not in user_ids_dict:
user_ids_dict[int(u)]=u_idx
u_idx+=1
if int(i) not in rated_item_ids_dict:
rated_item_ids_dict[int(i)]=i_idx
i_idx+=1
data.append([user_ids_dict[int(u)],rated_item_ids_dict[int(i)],float(r)])
f.close()
N = u_idx
M = i_idx
return N, M, data, rated_item_ids_dict
def sequence2mat(sequence, N, M):
# input:
# sequence: the list of rating information
# N: row number, i.e. the number of users
# M: column number, i.e. the number of items
# output:
# mat: user-item rating matrix
records_array = np.array(sequence)
mat = np.zeros([N,M])
row = records_array[:,0].astype(int)
col = records_array[:,1].astype(int)
values = records_array[:,2].astype(np.float32)
mat[row,col]=values
return mat