forked from Farama-Foundation/Minigrid
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_tests.py
executable file
·120 lines (93 loc) · 3.15 KB
/
run_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
import random
import numpy as np
import gym
from gym_minigrid.register import env_list
from gym_minigrid.minigrid import Grid, OBJECT_TO_IDX
# Test specifically importing a specific environment
from gym_minigrid.envs import DoorKeyEnv
# Test importing wrappers
from gym_minigrid.wrappers import *
##############################################################################
print('%d environments registered' % len(env_list))
for env_name in env_list:
print('testing "%s"' % env_name)
# Load the gym environment
env = gym.make(env_name)
env.max_steps = min(env.max_steps, 200)
env.reset()
env.render('rgb_array')
# Verify that the same seed always produces the same environment
for i in range(0, 5):
seed = 1337 + i
env.seed(seed)
grid1 = env.grid
env.seed(seed)
grid2 = env.grid
assert grid1 == grid2
env.reset()
# Run for a few episodes
num_episodes = 0
while num_episodes < 5:
# Pick a random action
action = random.randint(0, env.action_space.n - 1)
obs, reward, done, info = env.step(action)
# Validate the agent position
assert env.agent_pos[0] < env.width
assert env.agent_pos[1] < env.height
# Test observation encode/decode roundtrip
img = obs['image']
vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen'] # hackish
img2 = Grid.decode(img).encode(vis_mask=vis_mask)
assert np.array_equal(img, img2)
# Test the env to string function
str(env)
# Check that the reward is within the specified range
assert reward >= env.reward_range[0], reward
assert reward <= env.reward_range[1], reward
if done:
num_episodes += 1
env.reset()
env.render('rgb_array')
# Test the close method
env.close()
env = gym.make(env_name)
env = ImgObsWrapper(env)
env.reset()
env.step(0)
env.close()
# Test the fully observable wrapper
env = gym.make(env_name)
env = FullyObsWrapper(env)
env.reset()
obs, _, _, _ = env.step(0)
assert obs.shape == env.observation_space.shape
env.close()
env = gym.make(env_name)
env = FlatObsWrapper(env)
env.reset()
env.step(0)
env.close()
env = gym.make(env_name)
env = AgentViewWrapper(env, 5)
env.reset()
env.step(0)
env.close()
##############################################################################
print('testing agent_sees method')
env = gym.make('MiniGrid-DoorKey-6x6-v0')
goal_pos = (env.grid.width - 2, env.grid.height - 2)
# Test the "in" operator on grid objects
assert ('green', 'goal') in env.grid
assert ('blue', 'key') not in env.grid
# Test the env.agent_sees() function
env.reset()
for i in range(0, 500):
action = random.randint(0, env.action_space.n - 1)
obs, reward, done, info = env.step(action)
goal_visible = ('green', 'goal') in Grid.decode(obs['image'])
agent_sees_goal = env.agent_sees(*goal_pos)
assert agent_sees_goal == goal_visible
if done:
env.reset()
#############################################################################