-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
203 lines (170 loc) · 7.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
from __future__ import division
from scipy.stats import pearsonr, spearmanr#, norm
# from scipy.integrate import quad
import numpy as np
import random as rando
import math
import matplotlib.pyplot as plt
import csv
import time
import pylab
# set these parameter values
N = 15 # number of commercial banks initially
c = [0,0,0] # expected payouts for each of the actions (vector of length D)
sig = [0.1,0.25,0.50] # expected volatilities for payouts of each of the actions (vector of length D)
T = 1000 # number of time steps in simulation
D = len(sig) # number of possible actions/investment packages
w_ext = 100 # number of iterations for convergence
record_mat = [] # matrix in which all the relevant records will be noted down
timebefore = time.time() # time before simulation
sysriskdamage = 0
corrmatrix = np.zeros([N,N])
bankrupt_debts = 0
alpha = 0.2 # coefficient of systemic risk
epsilon = 0 # initial exploration parameter for epsilon-greedy
theta = 0.1 # softmax probability distribution parameter
eta = 0.05 # learning rate
llambda1 = 1.0 # social learning parameter for bailout cases
llambda2 = 0 # discount parameter for epsilon
NW_init = 1 # initial net worth of banks
plot_yes = True # whether or not to plot net worths and actions
Q = np.zeros([N, D]) # initialize Q matrices
NW = np.zeros(N) + NW_init # net worth of banks
payouts = np.zeros([N,T]) # NxT matrix
bailout = np.zeros(N) # indicator for whether bank i was just bailed out
bailout_count = 0 # counter for number of bailouts over the course of the simulation
distress_count = 0 # counter for number of distress cases over the course of the simulation
bhist = [[] for a in range(0,N)] # record of bailout history for all banks
brupthist = [[] for a in range(0,N)] # record of bankruptcy history for all banks
bankrupt = np.zeros(N) # indicator for whether bank i is bankrupt and out of the simulation
CBM = np.zeros(N) # vector of amount spent by CB on each bank
probs_acts = np.zeros(D) # probabilities of actions for banks
acts = -1*np.ones([N,T]) # actions taken by banks
# CB Objective Function
beta0 = -6
beta1 = 4
beta2 = -7
beta3 = 1
def CBObj(C,R_s,H_m):# Cost, Systemic Risk, Moral Hazard
return 1/(1+math.exp(beta0+beta1*C+beta2*R_s+beta3*H_m))
# start time loop
for t in range(0,T):
bailout = np.zeros(N)
# choose actions and determine payouts
for i in range(0,N):
if rando.random() > epsilon: # softmax operator
# find sum of exponential/softmax forms
sum_exps = 0
for j in range(0,D):
sum_exps += math.exp(Q[i][j]/theta)
# find probability of each action
for j in range(0,D):
probs_acts[j] = math.exp(Q[i][j]/theta)/sum_exps
# select action
act_determ = rando.random()
cumul_sum = probs_acts[0]
counter = 0
while cumul_sum < act_determ:
cumul_sum += probs_acts[counter+1]
counter += 1
acts[i][t] = counter
else: # epsilon-greedy
acts[i][t] = rando.randint(0,D-1)
# determine payout
payouts[i][t] = np.random.normal(c[int(acts[i][t])],sig[int(acts[i][t])])
# determine new net worth
NW[i] += payouts[i][t]
# for all banks in bankruptcy
for i in range(0,N):
if NW[i] < 0: # CB decision on whether to bail out
cost = 0.3*NW_init - NW[i] # cost of bailing out
systemicrisk_hat = 0
denom = 0
for i2 in range(0,N):
if i2 != i:
if bankrupt[i2] == 0:
corrmatrix[i][i2] = max(pearsonr(acts[i][0:t+1],acts[i2][0:t+1])[0],0) # pearson coefficient between actions
if not math.isnan(corrmatrix[i][i2]):
systemicrisk_hat += corrmatrix[i][i2]
denom += 1
systemicrisk_hat = systemicrisk_hat/max(denom,1) # systemic risk proxy
moral_hazard_hat = bailout_count/max(distress_count,1) # moral hazard proxy
p_bailout = CBObj(cost, systemicrisk_hat, moral_hazard_hat) # CB Objective
if rando.random() < p_bailout: # if CB decided to bail out
bailout[i] += 0.3*NW_init - NW[i] # amount of money awarded in bailout
NW[i] = 0.3*NW_init # small amount of positive cash given back
bailout_count += 1 # increase number of bailouts
distress_count += 1 # increase number of distress cases
bhist[i].append(t) # make note of this bailout
else: # if CB decided not to bail out
brupthist[i].append(t) # make note of this bankruptcy
distress_count += 1 # increase number of distress cases
# rewards
for i in range(0,N):
Q[i][int(acts[i][t])] = (1-eta)*Q[i][int(acts[i][t])] + eta*(payouts[i][t]+bailout[i])
if NW[i] < 0 or bailout[i] != 0:
for i2 in range(0,N):
Q[i2][int(acts[i][t])] = (1-llambda1*eta)*Q[i2][int(acts[i][t])] + llambda1*eta*(payouts[i][t]+bailout[i])
# get rid of banks that went bankrupt without bailout
for i in range(0,N):
if NW[i] < 0:
bankrupt_debts += 1-NW[i] # replace with new bank
NW[i] = NW_init
for i2 in range(0,N): # systemic risk impact on other banks
if i2 != i:
if not math.isnan(corrmatrix[i][i2]):
NW[i2] += -alpha*corrmatrix[i][i2]*NW[i2]
sysriskdamage += alpha*corrmatrix[i][i2]*NW[i2]
else:
corrmatrix[i][i2] = 0
CBM = CBM + bailout # update amount of money spent by CB
epsilon = epsilon*math.exp(llambda2*t) # epsilon decay
finexps = np.zeros(D) # final expectations of counts for each action
for i in range(0,N):
# find sum of exponential/softmax forms
sum_exps = 0
for j in range(0,D):
sum_exps += math.exp(Q[i][j]/theta)
# find probability of each action
for j in range(0,D):
finexps[j] += math.exp(Q[i][j]/theta)/sum_exps
unique, counts = np.unique(acts, return_counts=True)
investriskmetric = (counts[0]*sig[0] + counts[1]*sig[1] + counts[2]*sig[2])/(w_ext*N*T) # investment risk metric
# Final Q Convergence
Q_avg = np.sum(Q,axis=0)/N
timeafter = time.time() # time after simulation
print "**************************************************"
print "Execution took", (timeafter-timebefore)/3600, "hours."
# Plotting
if plot_yes == True:
for wtr in range(0,N): # plot net worth over time for different banks
plt.plot(range(0,T),payouts[wtr],'b-',range(0,T),np.zeros(T),'k--')
plt.title("Payout for bank %s"%wtr)
plt.xlabel("Time step")
plt.ylabel("Payout")
for d in range(0,len(bhist[wtr])):
plt.plot((bhist[wtr][d],bhist[wtr][d]),(-2,2),'g--')
plt.annotate('bailout', xy=(bhist[wtr][d], -2), xytext=(bhist[wtr][d], -3),
arrowprops=dict(facecolor='black', shrink=0.05),
)
for d in range(0,len(brupthist[wtr])):
plt.plot((brupthist[wtr][d],brupthist[wtr][d]),(-2,2),'r--')
plt.annotate('bankruptcy', xy=(brupthist[wtr][d], -2), xytext=(brupthist[wtr][d], -3),
arrowprops=dict(facecolor='black', shrink=0.05),
)
plt.show()
plt.plot(range(0,T),acts[wtr],'b-') # plot actions over time for different banks
plt.title("Action of bank %s"%wtr)
plt.xlabel("Time step")
plt.ylabel("Action")
for d in range(0,len(bhist[wtr])):
plt.plot((bhist[wtr][d],bhist[wtr][d]),(0,D),'g--')
plt.annotate('bailout', xy=(bhist[wtr][d], 0), xytext=(bhist[wtr][d], -1),
arrowprops=dict(facecolor='black', shrink=0.05),
)
for d in range(0,len(brupthist[wtr])):
plt.plot((brupthist[wtr][d],brupthist[wtr][d]),(0,D),'r--')
plt.annotate('bankruptcy', xy=(brupthist[wtr][d], 0), xytext=(brupthist[wtr][d], -1),
arrowprops=dict(facecolor='black', shrink=0.05),
)
plt.show()