forked from wbpowell328/stochastic-optimization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAdaptiveMarketPlanningDriverScript.py
99 lines (75 loc) · 3.38 KB
/
AdaptiveMarketPlanningDriverScript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""
Adaptive Market Planning Driver Script
"""
from collections import namedtuple
from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
from AdaptiveMarketPlanningPolicy import AdaptiveMarketPlanningPolicy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
if __name__ == "__main__":
# this is an example of creating a model and running a simulation for a certain trial size
# define state variables
state_names = ['order_quantity', 'counter']
init_state = {'order_quantity': 0, 'counter': 0}
decision_names = ['step_size']
# read in variables from excel file
file = 'Base parameters.xlsx'
raw_data = pd.ExcelFile(file)
data = raw_data.parse('parameters')
cost = data.iat[1, 2]
trial_size = np.rint(data.iat[2, 2]).astype(int)
price = data.iat[3, 2]
theta_step = data.iat[4, 2]
T = data.iat[5, 2]
reward_type = data.iat[6, 2]
# initialize model and store ordered quantities in an array
M = AdaptiveMarketPlanningModel(state_names, decision_names, init_state, T,reward_type, price, cost)
P = AdaptiveMarketPlanningPolicy(M, theta_step)
rewards_per_iteration = []
learning_list_per_iteration = []
for ite in list(range(trial_size)):
print("Starting iteration ", ite)
reward,learning_list = P.run_policy()
M.learning_list=[]
#print(learning_list)
rewards_per_iteration.append(reward)
learning_list_per_iteration.append(learning_list)
print("Ending iteration ", ite," Reward ",reward)
nElem = np.arange(1,trial_size+1)
rewards_per_iteration = np.array(rewards_per_iteration)
rewards_per_iteration_sum = rewards_per_iteration.cumsum()
rewards_per_iteration_cum_avg = rewards_per_iteration_sum/nElem
if (reward_type=="Cumulative"):
rewards_per_iteration_cum_avg = rewards_per_iteration_cum_avg/T
rewards_per_iteration = rewards_per_iteration/T
optimal_order_quantity = -np.log(cost/price) * 100
print("Optimal order_quantity for price {} and cost {} is {}".format(price,cost,optimal_order_quantity))
print("Reward type: {}, theta_step: {}, T: {} - Average reward over {} iteratios is: {}".format(reward_type,theta_step,T,trial_size,rewards_per_iteration_cum_avg[-1]))
ite = np.random.randint(0,trial_size)
order_quantity = learning_list_per_iteration[ite]
print("Order quantity for iteration {}".format(ite))
print(order_quantity)
#Ploting the reward
fig1, axsubs = plt.subplots(1,2,sharex=True,sharey=True)
fig1.suptitle("Reward type: {}, theta_step: {}, T: {}".format(reward_type,theta_step,T) )
axsubs[0].plot(nElem, rewards_per_iteration_cum_avg, 'g')
axsubs[0].set_title('Cum_average reward')
axsubs[1].plot(nElem, rewards_per_iteration, 'g')
axsubs[1].set_title('Reward per iteration')
#Create a big subplot
ax = fig1.add_subplot(111, frameon=False)
# hide tick and tick label of the big axes
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
ax.set_ylabel('USD', labelpad=0) # Use argument `labelpad` to move label downwards.
ax.set_xlabel('Iterations', labelpad=10)
plt.show()
# ploting the analytical sol
plt.xlabel("Time")
plt.ylabel("Order quantity")
plt.title("Analytical vs learned ordered quantity - (iteration {})".format(ite))
time = np.arange(0, len(order_quantity))
plt.plot(time, time * 0 - np.log(cost/price) * 100, label = "Analytical solution")
plt.plot(time, order_quantity, label = "Kesten's Rule for theta_step {}".format(theta_step))
plt.legend()
plt.show()