forked from yorgoon/DiffE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcustom_utils.py
34 lines (25 loc) · 922 Bytes
/
custom_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
def load_data(root_dir, num_files):
# load the data into a dataframe
X = []
Y = []
for i in range(num_files):
text_file_path = root_dir + "/" + str(i) + "_word.txt"
text_file = open(text_file_path, "r")
emg_file_path = root_dir + "/" + str(i) + "_emg.npy"
emg_file = np.load(emg_file_path)
X.append(emg_file)
Y.append(text_file.read())
# do one hot encoding on Y
Y = np.array(Y)
Y = Y.reshape(-1, 1)
enc = OneHotEncoder(handle_unknown="ignore")
enc.fit(Y)
Y = enc.transform(Y).toarray()
max_dimension = max(arr.shape[0] for arr in X)
# pad the arrays to be the same size
X = np.array([np.pad(arr, ((0, max_dimension - arr.shape[0]), (0, 0)), 'constant') for arr in X])
return X,Y
load_data("single_word_mouthed", 150)