-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQLearning.py
92 lines (81 loc) · 3.79 KB
/
QLearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import time
from collections import deque
import numpy as np
import matplotlib.pyplot as plt
from Agents import Agent
from Profile import Profile
from Hyperparameter import *
def profiling(profile):
print("total_number_learn_calls:\t", profile.total_number_learn_calls)
print("total_number_sampling_calls:\t", profile.total_number_sampling_calls)
print("total_number_evaluate_calls:\t", profile.total_number_evaluate_calls)
print("total_time_sampling:\t", profile.total_time_sampling)
print("total_time_learning:\t", profile.total_time_learning)
print("\ttotal_time_evaluation:\t", profile.total_time_evaluation)
print("\ttotal_time_training:\t", profile.total_time_training)
print("\ttotal_time_soft_update:\t", profile.total_time_soft_update)
print("\ttotal_time_samples_to_environment_values:\t", profile.total_time_samples_to_environment_values)
print("\ttotal_time_updating_priorities:\t", profile.total_time_updating_priorities)
print("\ttotal_time_introducing_isw:\t", profile.total_time_introducing_isw)
print()
def dqn(agent, env):
stats = agent.profile
scores = [] # list containing scores from each episode
scores_window = deque(maxlen=100) # last 100 scores
eps = EPS_START # initialize epsilon
B = B_START
for i_episode in range(1, NR_EPISODES + 1):
state = env.reset()
score = 0
for t in range(MAX_NR_STEPS):
# compute action
action = agent.act(state, eps)
# get information from environment
next_state, reward, done, info = env.step(action)
# save experience and train network
agent.step(state, action, reward, next_state, done, B)
state = next_state
score += reward
if done:
break
scores_window.append(score) # save most recent score
scores.append(score) # save most recent score
eps = max(EPS_END, EPS_DECAY * eps) # decrease epsilon
fraction = min(i_episode / NR_EPISODES, 1.0)
B = B + fraction * (1.0 - B)
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
if i_episode % 100 == 0:
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
if WITH_PROFILING:
profiling(stats)
if np.mean(scores_window) >= VAL_ENV_SOLVED:
print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode - 100,
np.mean(scores_window)))
break
return scores
def start_agent(env, seed):
stats = Profile()
# get information about states and actions
state_dimension = env.get_state_dim()
number_actions = env.get_nr_actions()
filename_local = "Neural_networks/" + FILENAME_FOR_LOADING + "_model_local.pth"
filename_target = "Neural_networks/" + FILENAME_FOR_LOADING + "_model_target.pth"
agent = Agent(state_shape=state_dimension, number_actions=number_actions, filename_local=filename_local,
filename_target=filename_target, seed=seed, profile=stats)
start_time = time.time()
scores = dqn(agent=agent, env=env)
print("Time for learning:", (time.time() - start_time))
if SAVE:
filename_local = FILENAME_FOR_SAVING + "_model_local.pth"
filename_target = FILENAME_FOR_SAVING + "_model_target.pth"
agent.qnetwork_local.save(filename_local)
agent.qnetwork_target.save(filename_target)
if PLOT:
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(PLOTNAME)
plt.show()