-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpendulum_dci.py
122 lines (102 loc) · 3.79 KB
/
pendulum_dci.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from pendulum import Pendulum
import numpy as np
from numpy import pi
import time
import matplotlib.pyplot as plt
class Pendulum_dci:
'''
Describe continuous state pendulum environment with discrete control input. Torque is discretized
with the specified steps. Joint velocity and torque are saturated.
Guassian noise can be added in the dynamics.
Cost is -1 if the goal state has been reached, zero otherwise.
'''
def __init__(self, n_joint = 1, ndu=11, vMax=5, uMax=5, dt=5e-2, ndt=1, noise_stddev=0):
self.njoint = n_joint
self.pendulum = Pendulum(n_joint,noise_stddev, vMax, uMax)
self.pendulum.DT = dt # Time step length
self.pendulum.NDT = ndt # Number of Euler steps per integration (internal)
self.ndu = ndu # Number of discretization steps for joint torque
# the value above must be odd
self.vMax = vMax # Max velocity (v in [-vmax,vmax])
self.uMax = uMax # Max torque (u in [-umax,umax])
self.dt = dt # time step
self.DU = 2*uMax/(ndu-1) # discretization resolution for joint torque
def c2du(self, u):
u = np.clip(u,-self.uMax,self.uMax)
return int(np.floor((u+self.uMax)/self.DU))
def d2cu(self, iu):
iu = np.clip(iu,0,self.ndu-1) - (self.ndu-1)/2
return iu*self.DU
# use the continuous time reset
def reset(self,x=None):
self.x = self.pendulum.reset(x)
return self.x
def step(self,iu):
''' Simulate one time step '''
u = self.d2cu(iu)
self.x, cost = self.pendulum.step(u)
return self.x, cost
def render(self):
self.pendulum.render()
time.sleep(self.pendulum.DT)
def plot_V_table(self, V, x, i=0):
''' Plot the given Value table V '''
import matplotlib.pyplot as plt
plt.figure()
plt.pcolormesh(x[0], x[1], V, cmap=plt.cm.get_cmap('Blues'))
plt.colorbar()
plt.title("V table %d" %i)
plt.xlabel("q")
plt.ylabel("dq")
plt.show(block=False)
def plot_policy(self, pi, x, i=0):
''' Plot the given policy table pi '''
import matplotlib.pyplot as plt
plt.figure()
plt.pcolormesh(x[0], x[1], pi, cmap=plt.cm.get_cmap('RdBu'))
plt.colorbar()
plt.title("Policy %d" %i)
plt.xlabel("q")
plt.ylabel("dq")
plt.show(block=False)
if __name__=="__main__":
### --- Random seed
RANDOM_SEED = int((time.time()%10)*1000)
print("Seed = %d" % RANDOM_SEED)
np.random.seed(RANDOM_SEED)
env = Pendulum_dci(4)
x0 = x = env.reset(np.zeros(env.pendulum.nx))
u = np.zeros(env.pendulum.nu)
u_aux = np.zeros(env.pendulum.nu)
cost = []
X = []
V = []
U = []
for i in range(100):
u[0] += 0.1
if env.pendulum.nu > 1:
for i in range(env.pendulum.nu - 1):
u[i+1] = 0
U.append([u[k] for k in range(env.pendulum.nu)])
else:
U.append(u[0])
for k in range(env.pendulum.nu):
u_aux[k] = env.c2du(u[k])
x,c = env.step(u_aux)
X.append(x[:env.pendulum.nq])
V.append(x[env.pendulum.nq:])
cost.append(c)
env.render()
plt.figure()
plt.plot( np.cumsum(cost)/range(1,100+1) )
plt.title("cost")
plt.figure()
plt.plot(np.reshape(X,(100,env.pendulum.nq)))
plt.title("pos")
plt.figure()
plt.plot(np.reshape(V,(100,env.pendulum.nq)))
plt.title("vel")
plt.figure()
plt.plot(np.reshape(U,(100,env.pendulum.nu)))
plt.title("torque")
plt.show()