-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsimple_test.py
64 lines (55 loc) · 1.7 KB
/
simple_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from env_wrapper import EnvWrapper
from ppo_agent.meta.config import Config
from ppo_agent.agent import CadreAgent
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def visualize_rgb(obs):
obs = np.array(obs, dtype=np.uint8)
obs1 = np.hstack(obs[0:4])
obs2 = np.hstack(obs[4:8])
obs = np.vstack([obs1, obs2])
img = Image.fromarray(obs)
plt.axis('off')
plt.imshow(img)
plt.draw()
plt.pause(0.1)
plt.clf()
all_config = Config.fromfile("config_files/agent_config.py")
env_cfg = all_config.env_cfg
rank = 0
env_cfg.rank = rank
env_cfg.port = env_cfg.port[rank]
env_cfg.routes = env_cfg.routes[rank]
env_cfg.scenarios = env_cfg.scenarios[rank]
env_cfg.town = env_cfg.town[rank]
env_cfg.seq_length = all_config.rollout_cfg.seq_length
env = EnvWrapper(env_cfg)
agent_cfg = all_config.agent_cfg
agent_cfg.rank = rank
agent = CadreAgent(**agent_cfg)
# render = False
render = True
for episode in range(5):
done = False
obs = env.reset()
sum_reward = 0
timestep = 0
while not done:
_, action, *_ = agent.act(obs)
control = agent.convert_action(action)
timestep += 1
# action = env.action_space.sample()
control[0] = 0
if timestep % 20 > 10 and timestep % 20 < 20:
control[1] = 1.0
else:
control[1] = 0.0
control[2] = 0
obs, reward, done, info = env.step(control)
sum_reward += reward
if render:
visualize_rgb(obs['rgb'])
if done:
print('===========> in episode {}, sum reward for steer is {:.2f}, sum reward for throttle is {:.2f}\n'.format(episode, sum_reward[0].item(), sum_reward[1].item()))
break