-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
150 lines (111 loc) · 4.54 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
from datetime import datetime, timedelta
from random import shuffle
def read_period_file(file):
""" Opens the .txt file of the period calendar app. This function is meant
for the app Period Tracker of Simple Design Ltd."""
period_cal = []
periods = []
# read the text file to a list as start-end pairs with datetime
with open(file, "r", encoding="utf-8") as f:
for line in f:
newline = line.split("\t")
if line.strip().split("\t")[1] == "Period Starts":
period_cal.append([])
# add start of period
period_cal[-1].append(datetime.strptime(newline[0], "%d %b, %Y"))
elif line.strip().split("\t")[1] == "Period Ends":
# add end of period
period_cal[-1].append(datetime.strptime(newline[0], "%d %b, %Y"))
# make list of cycle and menstruation times
for period in period_cal[1:]:
num = period_cal.index(period)
if num > 0:
lengths = []
# add first day
lengths.append(period_cal[num][0])
# add length of cycle
lengths.append((period_cal[num][0] - period_cal[num - 1][0]).days)
# add length of menstruation
lengths.append((period_cal[num][1] - period_cal[num][0]).days + 1)
periods.append(lengths)
return periods
def make_train_test_sets(periods):
""" Split into training and test sets, augment the data. """
x = []
y = []
for period in periods[:-3]:
p_index = periods.index(period)
x.append([])
x[-1].append([period[-2], period[-1]])
x[-1].append([periods[p_index + 1][-2], periods[p_index + 1][-1]])
x[-1].append([periods[p_index + 2][-2], periods[p_index + 2][-1]])
y.append([periods[p_index + 3][-2], periods[p_index + 3][-1]])
assert len(x) == len(y)
x = x * 5
y = y * 5
train_size = int(len(y) * 0.8)
train_x = np.array(x[0:train_size])
train_y = np.array(y[0:train_size])
test_x = np.array(x[train_size : len(x)])
test_y = np.array(y[train_size : len(y)])
# the last period of the train set, so that we can print a date on the
# predicted periods of the test set
last_known_period = (periods*5)[train_size][0]
return train_x, train_y, test_x, test_y, last_known_period
def load_synthetic_data(file):
""" Split into training and test sets, augment the data. """
periods = []
with open(file, 'r') as f:
for line in f:
periods.append([int(x) for x in line.strip().split('\t')])
x = []
y = []
for period in periods[:-3]:
p_index = periods.index(period)
x.append([])
x[-1].append([period[-2], period[-1]])
x[-1].append([periods[p_index + 1][-2], periods[p_index + 1][-1]])
x[-1].append([periods[p_index + 2][-2], periods[p_index + 2][-1]])
y.append([periods[p_index + 3][-2], periods[p_index + 3][-1]])
assert len(x) == len(y)
x = x * 5
y = y * 5
train_size = int(len(y) * 0.8)
train_x = np.array(x[0:train_size])
train_y = np.array(y[0:train_size])
test_x = np.array(x[train_size : len(x)])
test_y = np.array(y[train_size : len(y)])
return train_x, train_y, test_x, test_y
def evaluate_predictions(test_y, predictions):
""" Evaluate on the test set. """
assert len(test_y) == len(predictions)
right_cycle = 0
right_menstr = 0
for idx, y in enumerate(test_y):
if y[0] == predictions[idx][0]:
right_cycle += 1
if y[1] == predictions[idx][1]:
right_menstr += 1
return right_cycle / len(test_y), right_menstr / len(test_y)
def print_predictions(last_known_period, predictions):
# add the first predicted period
next_periods = [[
last_known_period + timedelta(days = predictions[0][0]),
last_known_period + timedelta(days = predictions[0][0] + predictions[0][1]),
predictions[0][1]
]]
# add the next ones
for period in predictions[1:]:
last_period = next_periods[-1]
next_periods.append([
last_period[0] + timedelta(days = period[0]),
last_period[0] + timedelta(days = period[0] + period[1]),
period[1]
])
for num, period in enumerate(next_periods):
print(str(num) + ". From " + period[0].strftime('%d.%m.%Y') + \
" to " + period[1].strftime('%d.%m.%Y') + ", length: " + str(period[2]))
return next_periods