-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathifeh_mcmc_RRab.py
280 lines (220 loc) · 12 KB
/
ifeh_mcmc_RRab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import joblib
import pymc3 as pm
import pandas as pd
import os
import numpy as np
from sklearn.linear_model import LinearRegression
import arviz as az
import ifeh_io as io
# ======================================================================================================================
# PREAMBLE
# The root directory:
rootdir = "."
# Input data files:
inputfile1 = "CHR_X_phot_single.dat"
inputfile2 = "LIT_hires_feh_1_X_phot.dat"
inputfile3 = "LIT_hires_feh_2_X_phot.dat"
# inputfile2 = None
# inputfile3 = None
# The name of the plot showing the predicted vs ground truth values for the initial OLS fit:
ols_residual_figure = "ifeh_mc1_poly1.3CHR_RRab_ols_residual"
# The name of the plot showing the predicted vs ground truth values for the final Bayesian fit:
residual_plot_figure = "ifeh_mc1_poly1.3CHR_RRab_residual"
# The names of the traceplot and pairplot produced by arviz:
traceplot_figure = "traceplot1_poly1.3_RRab"
pairplot_figure = "pairplot1_poly1.3_RRab"
# Format of the output figures
figformat = 'png'
# The final predictive model will be saved to outputfile_model using joblib:
output_model_file = "model_mc1_poly1.3CHR_RRab.sav"
# Mark the Blazhko stars in the plots?
mark_blazhko = True
subset_expr = '(type=="RRab" or type=="RRab-BL") and keep==1 and snr>95 and phcov>0.8'
# subset_expr = 'type=="RRab" and keep==1 and snr>95 and phcov>0.8'
# The names of the input features:
input_feature_names = ['period', 'A1', 'A2', 'A3', 'phi21', 'phi31']
# The indices of the optimal feature set:
feature_indx = [0, 2, 5]
n_poly = 1 # polynomial order of the optimal feature set
# If trim_quantiles is provided, data beyond the lower and upper quantiles (qlo, qhi)
# of the listed column names will be trimmed.
trim_quantiles = None
qlo = 0.01 # lower percentile for data rejection
qhi = 0.99 # upper percentile for data rejection
# RRab:
mean_21_phase = 7.56
mean_31_phase = 5.88
# END OF PREAMBLE
# ======================================================================================================================
def shift_phase(phase, mean_phase=5.9):
shifted_phase = phase - np.floor((phase - mean_phase + np.pi) / 2.0 / np.pi) * 2.0 * np.pi
return shifted_phase
# ======================================================================================================================
# Read in the data:
X, y, yw, df, feature_names, df_orig = \
io.load_dataset(os.path.join(rootdir, inputfile1),
trim_quantiles=trim_quantiles,
n_poly=n_poly, plothist=False,
usecols=['name', 'use_id', 'source', 'period', 'totamp', 'A1', 'A2', 'A3', 'A1_e', 'A2_e', 'A3_e',
'phi1', 'phi2', 'phi3', 'phi1_e', 'phi2_e', 'phi3_e', 'phi21', 'phi31',
'phi21_e', 'phi31_e', 'phcov', 'snr', 'feh', 'e_feh', 'type', 'keep', 'shift'],
input_feature_names=input_feature_names, y_col='feh', output_feature_indices=feature_indx,
yerr_col='e_feh', subset_expr=subset_expr, dropna_cols=['use_id'])
if inputfile2 is not None:
X2, y2, yw2, df2, feature_names, df2_orig = \
io.load_dataset(os.path.join(rootdir, inputfile2),
trim_quantiles=trim_quantiles,
n_poly=n_poly, plothist=False,
usecols=['name', 'use_id', 'source', 'period', 'totamp', 'A1', 'A2', 'A3',
'A1_e', 'A2_e', 'A3_e', 'phi1', 'phi2', 'phi3', 'phi1_e', 'phi2_e', 'phi3_e',
'phi21', 'phi31', 'phi21_e', 'phi31_e',
'phcov', 'snr', 'feh', 'e_feh', 'type', 'keep', 'shift'],
input_feature_names=input_feature_names, y_col='feh', output_feature_indices=feature_indx,
yerr_col='e_feh', subset_expr=subset_expr, dropna_cols=['use_id'])
y2 = y2 + df2['shift'].to_numpy()
y = np.hstack((y, y2))
yw = np.hstack((yw, yw2))
X = np.vstack((X, X2))
df = pd.concat((df, df2))
df_orig = pd.concat((df_orig, df2_orig))
if inputfile3 is not None:
X3, y3, yw3, df3, feature_names, df3_orig = \
io.load_dataset(os.path.join(rootdir, inputfile3),
trim_quantiles=trim_quantiles,
n_poly=n_poly, plothist=False,
usecols=['name', 'use_id', 'source', 'period', 'totamp', 'A1', 'A2', 'A3',
'A1_e', 'A2_e', 'A3_e', 'phi1', 'phi2', 'phi3', 'phi1_e', 'phi2_e', 'phi3_e',
'phi21', 'phi31', 'phi21_e', 'phi31_e',
'phcov', 'snr', 'feh', 'e_feh', 'type', 'keep', 'shift'],
input_feature_names=input_feature_names, y_col='feh', output_feature_indices=feature_indx,
yerr_col='e_feh', subset_expr=subset_expr, dropna_cols=['use_id'])
y3 = y3 + df3['shift'].to_numpy()
y = np.hstack((y, y3))
yw = np.hstack((yw, yw3))
X = np.vstack((X, X3))
df = pd.concat((df, df3))
df_orig = pd.concat((df_orig, df3_orig))
df['A1_e'] = df['A1_e'].replace(0.0, 0.001)
df['A2_e'] = df['A2_e'].replace(0.0, 0.001)
df['A3_e'] = df['A3_e'].replace(0.0, 0.001)
df['phi1_e'] = df['phi1_e'].replace(0.0, 0.001)
df['phi2_e'] = df['phi2_e'].replace(0.0, 0.001)
df['phi3_e'] = df['phi3_e'].replace(0.0, 0.001)
df['phi31_e'] = df['phi31_e'].replace(0.0, 0.001)
df['phi21_e'] = df['phi21_e'].replace(0.0, 0.001)
blazhko_mask = (df['type'] == 'RRab-BL') | (df['type'] == 'RRc-BL')
print('Input features:')
print(feature_names)
# ======================================================================================================================
# Perform initial linear regression:
reg = LinearRegression(fit_intercept=True)
reg.fit(X, y, sample_weight=yw)
r2score = reg.score(X, y.reshape(-1, 1))
yhat = reg.predict(X)
residual_stdev = np.std(y - yhat)
print("R^2 score = {0:.3f}".format(r2score))
print("residual st.dev. = {0:.3f}".format(residual_stdev))
io.plot_residual(y, yhat, xlabel="$[Fe/H]_{HR}$", ylabel="$[Fe/H]_{pred.}$",
plotrange=(-3, 0.5), fname=ols_residual_figure)
print("intercept = {0:.3f}".format(reg.intercept_))
# print("coefficients: ")
for coef, fname in zip(reg.coef_, feature_names):
print("coef_{0}: {1:.4f}".format(fname, coef))
# ======================================================================================================================
# Define Bayesian model:
with pm.Model() as model:
# mildly informative prior on the linear function's parameters:
icept = pm.Normal('icept', reg.intercept_, sigma=10)
coefs = dict()
for coef, fname in zip(reg.coef_, feature_names):
coefs['coef_' + fname] = pm.Normal('coef_' + fname, coef, sigma=10)
# uninformative prior on the latent regressor (this would take a long time to train):
# phi1_latent = pm.Normal('phi1_latent', mu=6, sigma=10, shape=df['phi1'].shape)
# phi3_latent = pm.Normal('phi3_latent', mu=6, sigma=10, shape=df['phi3'].shape)
# a2_latent = pm.HalfNormal('a2_latent', mu=0, sigma=10, shape=a2.shape)
# informative priors on the latent regressors:
a2_latent = pm.Normal('a2_latent', mu=df['A2'], sigma=df['A2_e'], shape=df['A2'].shape)
phi1_latent = pm.Normal('phi1_latent', mu=df['phi1'], sigma=df['phi1_e'], shape=df['phi1'].shape)
phi3_latent = pm.Normal('phi3_latent', mu=df['phi3'], sigma=df['phi3_e'], shape=df['phi3'].shape)
phi31_latent = pm.Deterministic('phi31_latent', shift_phase(phi3_latent - 3 * phi1_latent, mean_31_phase))
# likelihoods of the regressors:
likelihood_phi1 = pm.Normal('lh_phi1', mu=phi1_latent, sigma=df['phi1_e'], observed=df['phi1'])
likelihood_phi3 = pm.Normal('lh_phi3', mu=phi3_latent, sigma=df['phi3_e'], observed=df['phi3'])
likelihood_a2 = pm.Normal('lh_A2', mu=a2_latent, sigma=df['A2_e'], observed=df['A2'])
features = dict()
features['period'] = df['period']
features['A2'] = a2_latent
features['phi31'] = phi31_latent
y_latent = pm.Deterministic('y_latent', icept + coefs['coef_period'] * features['period'] +
coefs['coef_A2'] * features['A2'] +
coefs['coef_phi31'] * features['phi31'])
likelihood_y = pm.Normal('lh_y', mu=y_latent, sigma=df['e_feh'], observed=y)
# Find maximum a posteriori estimate:
# (fast but gives only a point estimate)
# with model:
# map_estimate = pm.find_MAP()
# print(map_estimate)
#
# Compiute predictions from MAP estimate:
# yhat = map_estimate['icept'] + map_estimate['coef_period'] * df['period'] +
# map_estimate['coef_A1'] * df['A1'] + map_estimate['coef_A2'] * df['A2'] + map_estimate['coef_A3'] * df['A3'] +
# map_estimate['coef_phi21'] * df['phi21'] + map_estimate['coef_phi31'] * df['phi31']
#
# plot_residual(y, yhat, xlabel="$[Fe/H]_{HR}$", ylabel="$[Fe/H]_{pred.}$", fname=None)
# Sample posterior:
tuningsteps = 10000 # will be rejected from the trace
with model:
trace = pm.sample(50000, tune=tuningsteps, cores=4, chains=4, init='advi', discard_tuned_samples=True,
return_inferencedata=True, target_accept=0.95)
# this takes around 5 minutes using 4 cores of an Intel i7 processor
axes = az.plot_trace(trace, var_names=["icept", "coef"], filter_vars="like",
lines=tuple([(k, {}, [v['mean'], v['hdi_95%'], v['hdi_5%']])
for k, v in
az.summary(trace, var_names=["icept", "coef"], filter_vars="like",
hdi_prob=0.9).iterrows()]))
fig = axes.ravel()[0].figure
fig.savefig(traceplot_figure + '.' + figformat, format=figformat)
summary = az.summary(trace, var_names=["icept", "coef"], filter_vars="like", kind='diagnostics')
print(summary)
summary = az.summary(trace, var_names=["icept", "coef"], filter_vars="like", kind='stats', hdi_prob=0.90)
print(summary)
summary = az.summary(trace, var_names=["icept", "coef"], filter_vars="like", kind='stats', hdi_prob=0.95)
print(summary)
axes = az.plot_pair(trace, var_names=["icept", "coef"], filter_vars="like", marginals=True,
kind='kde', figsize=(15, 15))
fig = axes.ravel()[0].figure
fig.get_axes()[0].set_ylabel("$\\theta_0$")
fig.get_axes()[1].set_ylabel("$\\theta_P$")
fig.get_axes()[3].set_ylabel("$\\theta_{A_2}$")
fig.get_axes()[6].set_ylabel("$\\theta_{\phi_{31}}$")
fig.get_axes()[6].set_xlabel("$\\theta_0$")
fig.get_axes()[7].set_xlabel("$\\theta_P$")
fig.get_axes()[8].set_xlabel("$\\theta_{A_2}$")
fig.get_axes()[9].set_xlabel("$\\theta_{\phi_{31}}$")
fig.savefig(pairplot_figure + '.' + figformat, format=figformat)
params = dict()
params['icept'] = summary['mean'][0]
reg.intercept_ = summary['mean'][0]
for i, fname in enumerate(list(feature_names)):
params[fname] = summary['mean'][i + 1]
reg.coef_[i] = summary['mean'][i + 1]
yhat_mc = reg.predict(X)
residual_stdev = np.std(y - yhat_mc)
print("residual st.dev. = {0:.3f}".format(residual_stdev))
if mark_blazhko:
io.plot_residual(y, yhat_mc, highlight=blazhko_mask, xlabel="$[Fe/H]_{HR}$", ylabel="$[Fe/H]_{pred.}$",
plotrange=(-3, 0.5), fname=residual_plot_figure + '.' + figformat, format=figformat)
else:
io.plot_residual(y, yhat_mc, highlight=blazhko_mask, xlabel="$[Fe/H]_{HR}$", ylabel="$[Fe/H]_{pred.}$",
plotrange=(-3, 0.5), fname=residual_plot_figure + '.' + figformat, format=figformat)
print("intercept = {0:.3f}".format(reg.intercept_))
joblib.dump(reg, output_model_file)
icept_trace = trace['posterior'].icept.to_series().to_numpy()
coef_period_trace = trace['posterior'].coef_period.to_series().to_numpy()
coef_A2_trace = trace['posterior'].coef_A2.to_series().to_numpy()
coef_phi31_trace = trace['posterior'].coef_phi31.to_series().to_numpy()
np.set_printoptions(precision=4, suppress=True)
covmat = np.cov(np.vstack((icept_trace, coef_period_trace, coef_A2_trace, coef_phi31_trace)), ddof=0)
print(covmat)
corrmat = np.corrcoef(np.vstack((icept_trace, coef_period_trace, coef_A2_trace, coef_phi31_trace)))
print(corrmat)