This repository has been archived by the owner on Oct 26, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunction_approximation.py
182 lines (129 loc) · 4.92 KB
/
function_approximation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import numpy as np
import copy
from gym.spaces import Discrete, Box, Tuple
"""
Function Approximation takes in a space and apply a function to
a value contained within that space to convert it into another form
"""
class FunctionApproximationBase(object):
def __init__(self, space=None):
self.space = space
@property
def space_type(self):
if isinstance(self.space, Discrete):
return 'D'
elif isinstance(self.space, Box):
return 'B'
elif isinstance(self.space, Tuple):
return 'T'
@property
def num_discrete(self):
if self.space_type == 'D':
return self.space.n
elif self.space_type == 'B':
return len(self.space.low.flatten())
elif self.space_type == 'T':
return len(self.space.spaces)
@property
def n_total(self):
"""
Allows for FA to over-ride the number return while not removing
accessibility to true space size
"""
return self.num_discrete
def convert(self, array):
"""Takes in an action-value array"""
raise NotImplementedError
def configure(self, space):
raise NotImplementedError
def export(self):
raise NotImplementedError
class DefaultFA(FunctionApproximationBase):
def __init__(self, space=None):
FunctionApproximationBase.__init__(self, space)
def convert(self, array):
return array
def configure(self, space):
self.space = space
def export(self):
return {"Type": "Default"}
class DiscreteMaxFA(FunctionApproximationBase):
def __init__(self, space=None):
FunctionApproximationBase.__init__(self, space)
def convert(self, array):
action = np.argmax(array)
if not self.space.contains(action):
raise ValueError("Action not contained within space")
return action
def configure(self, space):
self.space = space
def export(self):
return {"Type": "Discrete Max"}
class ClipFA(FunctionApproximationBase):
def __init__(self, space=None):
FunctionApproximationBase.__init__(self, space)
def convert(self, array):
if self.space_type == 'B':
action = np.clip(array, self.space.low, self.space.high)
else:
raise TypeError("Can't clip on space type {0}".format(self.space_type))
return action
def configure(self, space):
self.space = space
def export(self):
return {"Type": "Clip"}
# Single Tiling implementation with equidistant spacing
class SingleTiling(FunctionApproximationBase):
def __init__(self, space=None, num_tiles=1):
FunctionApproximationBase.__init__(self, space)
self.num_tiles = num_tiles
self.tiles = None
self.tile_boundaries = None
self.tile_hits = None
def configure(self, space):
self.space = space
print self.space_type
if self.space_type != 'B':
raise TypeError("SingleTiling is only valid for box environments")
self.tiles = np.zeros(self.n_total)
self.tile_boundaries = self.__set_tile_boundaries()
@property
def n_total(self):
return self.num_discrete ** self.num_tiles
def __set_tile_boundaries(self):
tile_boundaries = []
for dim in range(self.num_discrete):
# If np.inf then use range of +-1 (CartPole)
if self.space.high[dim] == np.inf:
split = self.__get_split(dim)
tile_boundaries.append([-1 + (i + 1) * split for i in range(self.num_tiles - 1)])
else:
split = self.__get_split(dim)
tile_boundaries.append([self.space.low[dim] + (i + 1) * split for i in range(self.num_tiles - 1)])
return tile_boundaries
def get_value(self, observation):
return self.tiles[self.__convert_base10(self.__get_tile(observation))]
def __get_split(self, obv_ind):
if self.space.high[obv_ind] == np.inf:
return 2 / float(self.num_tiles)
else:
return (self.space.high[obv_ind] - self.space.low[obv_ind]) / float(self.num_tiles)
def __get_tile(self, observation):
tile = []
for i, obs in enumerate(observation):
for j, space in enumerate(self.tile_boundaries[i]):
if obs < space:
tile.append(j)
break
# Has to be minus 2 to otherwise j doesn't reach
if j == self.num_tiles - 2:
tile.append(j+1)
return tile
def __convert_base10(self, tile):
return sum([val * self.num_tiles ** (len(tile) - (i + 1)) for i, val in enumerate(tile)])
def convert(self, observation):
results = self.__convert_base10(self.__get_tile(observation))
return results
def export(self):
return {"Type": "Single Tiling",
"Num Tiles": self.num_tiles}