-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathload_cf_data.py
44 lines (31 loc) · 1.36 KB
/
load_cf_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import numpy as np
import pandas as pd
import scipy.sparse as sparse
def load_train_data(csv_file, n_items):
tp = pd.read_csv(csv_file)
n_users = tp['uid'].max() + 1
rows, cols = tp['uid'], tp['sid']
data = sparse.csr_matrix((np.ones_like(rows),
(rows, cols)), dtype='float64',
shape=(n_users, n_items))
return data
def load_tr_te_data(csv_file_tr, csv_file_te, n_items):
tp_tr = pd.read_csv(csv_file_tr)
tp_te = pd.read_csv(csv_file_te)
start_idx = min(tp_tr['uid'].min(), tp_te['uid'].min())
end_idx = max(tp_tr['uid'].max(), tp_te['uid'].max())
rows_tr, cols_tr = tp_tr['uid'] - start_idx, tp_tr['sid']
rows_te, cols_te = tp_te['uid'] - start_idx, tp_te['sid']
data_tr = sparse.csr_matrix((np.ones_like(rows_tr),
(rows_tr, cols_tr)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
data_te = sparse.csr_matrix((np.ones_like(rows_te),
(rows_te, cols_te)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
return data_tr, data_te
def get_n_items(pro_dir):
unique_sid = list()
with open(os.path.join(pro_dir, 'unique_sid.txt'), 'r') as f:
for line in f:
unique_sid.append(line.strip())
n_items = len(unique_sid)
return n_items