This repository has been archived by the owner on Apr 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
109 lines (84 loc) · 2.93 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import csv
import os
import estimate
import plotly.graph_objects as go
def infinity():
i = 0
while True:
yield i
i += 1
def normalisation(data):
return [(row - min(data)) / (max(data) - min(data)) for row in data]
def train(data, theta, learning_rate):
sum = [0, 0]
for i in range(len(data[0])):
x, y = [row[i] for row in data]
estimated_y = estimate.price(x, theta)
sum[0] += (estimated_y - y)
sum[1] += (estimated_y - y) * x
return [theta[i] - learning_rate * sum[i] / len(data[i]) for i in range(len(theta))]
if not os.path.isfile('data.csv'):
print('Data file not found!')
exit()
print('Loading data...')
mileages = []
prices = []
with open('data.csv', 'r') as file:
reader = csv.reader(file)
header = next(reader)
for row in reader:
mileages.append(int(row[0]))
prices.append(int(row[1]))
file.close()
print('Training...')
theta = [0, 0]
learning_rate = 0.1
normalised_mileages = normalisation(mileages)
normalised_prices = normalisation(prices)
trained_figure = go.Figure()
trained_figure.update_layout(
xaxis_title='Mileage', yaxis_title='Price', title='Training')
trained_figure.add_trace(go.Scatter(x=normalised_mileages,
y=normalised_prices, mode='markers', name='Data'))
print('Learning rate:', learning_rate)
for i in infinity():
new = train([normalised_mileages, normalised_prices],
theta, learning_rate)
if new == theta:
break
theta = new
print('Theta Normalised:', theta, end='\r')
if i % int(1 / learning_rate) != 0:
continue
x = [min(normalised_mileages), max(normalised_mileages)]
y = [estimate.price(x, theta) for x in x]
trained_figure.add_trace(go.Scatter(x=x, y=y, mode='lines',
name='Estimate {}'.format(i)))
print()
sum = 0
for i in range(len(normalised_mileages)):
sum += (estimate.price(normalised_mileages[i], theta) - normalised_prices[i]) ** 2
MSE = sum / len(normalised_mileages)
RMSE = MSE ** 0.5
delta = [max(x) - min(x) for x in [mileages, prices]]
theta[1] = theta[1] * delta[1] / delta[0]
theta[0] = theta[0] * delta[1] + min(prices) - theta[1] * min(mileages)
print('Theta Denormalised:', theta)
x = [min(mileages), max(mileages)]
y = [estimate.price(x, theta) for x in x]
denormalised_figure = go.Figure()
denormalised_figure.update_layout(xaxis_title='Mileage', yaxis_title='Price')
denormalised_figure.add_trace(go.Scatter(x=mileages,
y=prices, mode='markers', name='Data'))
denormalised_figure.add_trace(go.Scatter(
x=x, y=y, mode='lines', name='Estimate'))
print('Writing result...')
with open('result.csv', 'w') as file:
writer = csv.writer(file)
writer.writerow(['theta_0', 'theta_1'])
writer.writerow(theta)
file.close()
print('Done! RMSE:', RMSE * 100, '%')
trained_figure.show()
denormalised_figure.show()
# %%