-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagent.py
155 lines (132 loc) · 5.91 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from game import *
from model import *
import random
DEFAULT_EPSILON = 0.5
DEFAULT_GAMMA = 0.3
DEFAULT_BATCH_SIZE = 50
DEFAULT_LEARNING_RATE = 0.001
DEFAULT_HIDDEN_UNITS = 256
class Agent:
def __init__(
self,
epsilon: float = DEFAULT_EPSILON,
gamma: float = DEFAULT_GAMMA,
batch_size: int = DEFAULT_BATCH_SIZE,
learning_rate: float = DEFAULT_LEARNING_RATE,
model_hidden_units: int = DEFAULT_HIDDEN_UNITS,
model_file_path: str = None,
):
self.epsilon = epsilon
self.gamma = gamma
self.game_iteration = 1
self.memory = []
self.model = Net(16, model_hidden_units, 3, model_file_path).to(device)
self.training = QLearning(self.model, lr=learning_rate, gamma=self.gamma)
self.batch_size = batch_size
def get_state(self, game: Snake):
head = game.head
point_l = Point(head.x - game.block_size, head.y)
point_r = Point(head.x + game.block_size, head.y)
point_u = Point(head.x, head.y - game.block_size)
point_d = Point(head.x, head.y + game.block_size)
point_ll = Point(head.x - 2 * game.block_size, head.y)
point_rr = Point(head.x + 2 * game.block_size, head.y)
point_uu = Point(head.x, head.y - 2 * game.block_size)
point_dd = Point(head.x, head.y + 2 * game.block_size)
point_lu = Point(head.x - game.block_size, head.y - game.block_size)
point_ld = Point(head.x - game.block_size, head.y + game.block_size)
point_ru = Point(head.x + game.block_size, head.y - game.block_size)
point_rd = Point(head.x + game.block_size, head.y + game.block_size)
dir_l = game.direction == Direction.LEFT
dir_r = game.direction == Direction.RIGHT
dir_u = game.direction == Direction.UP
dir_d = game.direction == Direction.DOWN
state = [
# Danger straight
(dir_u and game.check_collision(point_u))
or (dir_r and game.check_collision(point_r))
or (dir_d and game.check_collision(point_d))
or (dir_l and game.check_collision(point_l)),
# Danger double straight
(dir_u and game.check_collision(point_uu))
or (dir_r and game.check_collision(point_rr))
or (dir_d and game.check_collision(point_dd))
or (dir_l and game.check_collision(point_ll)),
# Danger right
(dir_u and game.check_collision(point_r))
or (dir_r and game.check_collision(point_d))
or (dir_d and game.check_collision(point_l))
or (dir_l and game.check_collision(point_u)),
# Danger left
(dir_u and game.check_collision(point_l))
or (dir_r and game.check_collision(point_u))
or (dir_d and game.check_collision(point_r))
or (dir_l and game.check_collision(point_d)),
# Danger behind left
(dir_u and game.check_collision(point_ld))
or (dir_r and game.check_collision(point_lu))
or (dir_d and game.check_collision(point_ru))
or (dir_l and game.check_collision(point_rd)),
# Danger behind right
(dir_u and game.check_collision(point_rd))
or (dir_r and game.check_collision(point_ld))
or (dir_d and game.check_collision(point_lu))
or (dir_l and game.check_collision(point_ru)),
# Danger ahead left
(dir_u and game.check_collision(point_lu))
or (dir_r and game.check_collision(point_ru))
or (dir_d and game.check_collision(point_rd))
or (dir_l and game.check_collision(point_ld)),
# Danger ahead right
(dir_u and game.check_collision(point_ru))
or (dir_r and game.check_collision(point_rd))
or (dir_d and game.check_collision(point_ld))
or (dir_l and game.check_collision(point_lu)),
# Move direction
dir_l, # Going left
dir_r, # Going right
dir_u, # Going up
dir_d, # Going down
# Food location
game.food.x < game.head.x, # Food left
game.food.x > game.head.x, # Food right
game.food.y < game.head.y, # Food up
game.food.y > game.head.y, # Food down
]
return np.array(state, dtype=int)
def predict_movement(self, state: list, disable_randomness: bool = False):
move_predicted = [0, 0, 0]
if not disable_randomness:
if random.random() < self.epsilon:
move = random.randint(0, 2)
move_predicted[move] = 1
return move_predicted
state0 = torch.tensor(state, dtype=torch.float)
prediction = self.model(state0)
move = torch.argmax(prediction).item()
move_predicted[move] = 1
return move_predicted
def train_short_memory(
self, state: list, action: list, reward: int, next_state: list, game_over: bool
):
self.training.train_step(state, action, reward, next_state, game_over)
def train_long_memory(self):
random.shuffle(self.memory)
for start in range(0, len(self.memory), self.batch_size):
mini_sample = self.memory[start : start + self.batch_size]
scores, states, actions, rewards, next_states, game_overs = zip(
*mini_sample
)
self.training.train_step(states, actions, rewards, next_states, game_overs)
def store(
self, state: list, action: list, reward: int, next_state: list, game_over: bool
):
self.memory.append((state, action, reward, next_state, game_over))
def increment_game_iteration(self):
self.game_iteration += 1
def update_epsilon(
self, decrement: float = 2.0 * 10**-4, inf_epsilon: float = 0.05
):
self.epsilon = max(inf_epsilon, self.epsilon - decrement)
def clear_memory(self):
self.memory = []