-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcircuit-learning.lua
190 lines (139 loc) · 5.3 KB
/
circuit-learning.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
-- Global variables
MOVE_STEPS = 5 --make decisions every 5 step
MAX_VELOCITY = 5 --velocity
FILENAME = "Qtable-circuit.csv" --q table file name
WHEEL_DIST = -1 -- distance between wheels
n_steps = 0 -- counter
sum_rewards = 0 --sum of reward
count = 0 -- counter
--- This function is executed every time you press the 'execute' button.
function init()
Qlearning = require "Qlearning"
WHEEL_DIST = robot.wheels.axis_length
total_state_acquisition = 0 -- total number of times that the robot is on the black and the white part of arena
on_white_circuit_acquisition = 0 --- numeber of times that the robot is in correct part of arena, the white part
--- First action is to go forward.
--- If the velocity is equal velocità on left and right robot go straight,
--- if Left>Right robot go left
--- If Right>Left robot go right
local left_v = MAX_VELOCITY
local right_v = MAX_VELOCITY
--- Hyperparameters for q learning
--- learning rate: more alpha is high less the robot remembers, more alpha is low more it remembers
alpha = 0.1
--- the discount factor: it goes from 0 to 1, indicate how much do you want to maximize rewards going forward or in the future?
gamma = 0.9
--- epsilon the probability of peeking a random action, it must be from 0 (never random) to 1 (always random).
epsilon = 0.9 --never used
--- k how strong the selection favours actions with high Q table value, must be grater or equal to 1.
k = 2
old_state = get_state() ---the current robot state
state = old_state
action = 1
-- States: each direction can be 0 or 1, a state is a combination of those.
-- So in total the states are 2^8 = 256.
sensor_direction_names = {"N", "NW", "W", "SW", "S", "SE", "E", "NE"}
number_of_states = math.pow(2, #sensor_direction_names)
-- Actions: are 3 in total
velocity_direction_names = {"WNW", "N", "ENE"}
velocity_directions = {
["WNW"] = math.pi / 4, -- 45 degree
["N"] = 0,
["ENE"] = - math.pi / 4, -- -45 degree
}
number_of_actions = #velocity_direction_names
Q_table = {}
-- Dimension: 256 x 3 = 768 values.
Q_table = Qlearning.load_Q_table(FILENAME)
robot.wheels.set_velocity(left_v,right_v)
end
function get_state()
-- State goes from 1 to 256.
-- 1 equals that all sensors are 0.
local new_state = 1
for i = 1, #robot.base_ground do
-- if the values of base ground is equals to 1 there is white area under the robot
if robot.base_ground[i].value == 1 then
new_state = new_state + math.pow(2,i-1)
end
end
return new_state
end
function get_reward()
-- Rewards goes from 0 to 1
local white_sensors = 0
for i = 1, #robot.base_ground do
if robot.base_ground[i].value == 1 then
white_sensors = white_sensors + 1
end
end
return math.pow(white_sensors / #robot.base_ground, 2) -- rw=(number of sensor in white area / total sensor)^2
end
--next action to do
function perform_action(action)
-- Ensure not to exceed MAX_VELOCITY
function limit_v(left_v, right_v)
function limit(value)
if (value > MAX_VELOCITY) then
value = MAX_VELOCITY
end
if (value < - MAX_VELOCITY) then
value = - MAX_VELOCITY
end
return value
end
return limit(left_v), limit(right_v)
end
local angle = velocity_directions[velocity_direction_names[action]]
local left_v = MAX_VELOCITY - (angle * WHEEL_DIST / 2)
local right_v = MAX_VELOCITY + (angle * WHEEL_DIST / 2)
left_v, right_v = limit_v(left_v, right_v)
robot.wheels.set_velocity(left_v,right_v)
end
--- This function is executed at each time step.
-- It must contain the logic of your controller.
function step()
n_steps = n_steps + 1
-- update counter of total state 2 times per seconds
if n_steps % MOVE_STEPS == 0 then
total_state_acquisition = total_state_acquisition + 1
-- Update
state = get_state()
-- update counter if the robot completely is in white area
if old_state ~= 1 then
on_white_circuit_acquisition = on_white_circuit_acquisition + 1
end
local reward = get_reward()
Q_table = Qlearning.update_Q_table(alpha, gamma, old_state, action, reward, state, Q_table)
sum_rewards = sum_rewards + reward
count = count + 1
-- Perform the best action and update state
-- action = Qlearning.get_random_action(epsilon, state, Q_table)
action = Qlearning.get_weighted_action(k, old_state, Q_table)
perform_action(action)
old_state = state
end
end
--[[ This function is executed every time you press the 'reset'
button in the GUI. It is supposed to restore the state
of the controller to whatever it was right after init() was
called. The state of sensors and actuators is reset
automatically by ARGoS. ]]
function reset()
local left_v = MAX_VELOCITY
local right_v = MAX_VELOCITY
action = 1
robot.wheels.set_velocity(left_v,right_v)
n_steps = 0
end
--[[ This function is executed only once, when the robot is removed
from the simulation ]]
function destroy()
-- put your code here
Qlearning.save_Q_table(FILENAME, Q_table)
metric = math.floor((on_white_circuit_acquisition/total_state_acquisition) * 10000) / 10000
-- print(metric .. ", !!marker!!")
file = io.open("results/average_reward_noise.txt", "a")
file:write(sum_rewards / count.."\n")
file:close()
end