Skip to content

Commit

Permalink
Merge pull request #14 from d2cml-ai/std_ipw
Browse files Browse the repository at this point in the history
Add files via upload
  • Loading branch information
TJhon authored Aug 2, 2024
2 parents d0113d7 + c1412b2 commit 2a16a97
Show file tree
Hide file tree
Showing 3 changed files with 2,052 additions and 94 deletions.
236 changes: 142 additions & 94 deletions drdid/ipwd_did.py
Original file line number Diff line number Diff line change
@@ -1,104 +1,152 @@
from .utils import *

def std_ipw_did_panel(
y1: ndarray, y0: ndarray, D: ndarray,
covariates, i_weights, boot = False):

n = len(D)
delta_y = y1 - y0

int_cov = has_intercept(int_cov)
i_weights = has_weights(i_weights)

_, _, w_cont, _, _, asy_lin_rep_ps =\
fit_ps(D, int_cov, i_weights)

w_treat = i_weights * D

att_treat = w_treat * delta_y
att_cont = w_cont * delta_y

eta_treat = np.mean(att_treat) / np.mean(w_treat)
eta_cont = np.mean(att_cond) / np.mean(w_cont)

ipw_att = eta_treat - eta_cont

inf_treat = (att_treat - w_treat * eta_treat) / np.mean(w_treat)
inf_cont_1 = att_cont - w_cont * eta_cont
w_ref = w_cont * (delta_y - eta_cont)
M2 = np.mean(w_ref[:, n_x] * int_cov, axis=0)
inf_control = asy_lin_rep_ps * M2

att_inf_func = inf_treat - inf_control

se_att = None
if not boot:
se_att = np.std(att_inf_func) / np.sqrt(n)

return (ipw_att, att_inf_func, se_att)
# ----------------- RC

def std_ipw_did_rc(
y: ndarray, post: ndarray, D: ndarray, covariates = None, i_weights = None
, boot = False):

n = len(D)
post1 = (1 - post)
d1 = (1 - D)

int_cov = has_intercept(covariates, n)
i_weights = has_weights(i_weights, n)

ps_fit, w_cont_pre, _, w_cont_post, _, asy_lin_rep_ps = \
fit_ps(D, int_cov, i_weights, post)

w_i = i_weights * D
w_treat_pre = w_i * post1
w_treat_post = w_i * post

diff_w = i_weights * ps_fit * (1 - D)

def eta_form(vect, y_ = y):
return vect * y_ / np.mean(vect)

eta_treat_pre = eta_form(w_treat_pre)
eta_treat_post = eta_form(w_treat_post)
eta_cont_pre = eta_form(w_cont_pre)
eta_cont_post = eta_form(w_cont_post)

att_treat_pre = np.mean(eta_treat_pre)
att_treat_post = np.mean(eta_treat_post)
att_cont_pre = np.mean(eta_cont_pre)
att_cont_post = np.mean(eta_cont_post)

ipw_att = att_treat_post - att_treat_pre - (att_cont_post - att_cont_pre)

inf_treat_pre = eta_treat_pre - \
w_treat_pre * att_treat_pre / np.mean(w_treat_pre)
inf_treat_post = eta_treat_post -\
w_treat_post * att_treat_post / np.mean(w_treat_post)
inf_treat = inf_treat_post - inf_treat_pre

inf_cont_pre = eta_cont_pre - \
w_cont_pre * att_cont_pre / np.mean(w_cont_pre)
inf_cont_post = eta_cont_post -\
w_cont_post * att_cont_post / np.mean(w_cont_post)
def std_ipw_did_panel(y1, y0, D, covariates, i_weights = None):
D = np.asarray(D).flatten()
n = len(D)
delta_y = np.asarray(y1 - y0).flatten()
int_cov = np.ones((n, 1))

if covariates is not None:
covariates = np.asarray(covariates)
if np.all(covariates[:, 0] == 1):
int_cov = covariates
else:
int_cov = np.column_stack((np.ones(n), covariates))

if i_weights is None:
i_weights = np.ones(n)
elif np.min(i_weights) < 0:
raise ValueError("i_weights must be non-negative")

i_weights = i_weights / np.mean(i_weights)
pscore_model = sm.GLM(D, int_cov, family=sm.families.Binomial(), freq_weights=i_weights)
pscore_results = pscore_model.fit()
# print(D.mean())
# print(pscore_results.summary2())
if not pscore_results.converged:
print("Warning: glm algorithm did not converge")
if np.any(np.isnan(pscore_results.params)):
raise ValueError("Propensity score model coefficients have NA components. \n Multicollinearity (or lack of variation) of covariates is a likely reason.")
ps_fit = pscore_results.predict()
ps_fit = np.minimum(ps_fit, 1 - 1e-16)

w_treat = i_weights * D
w_cont = i_weights * ps_fit * (1 - D) / (1 - ps_fit)

att_treat = w_treat * delta_y
att_cont = w_cont * delta_y

eta_treat = mean(att_treat) / mean(w_treat)
eta_cont = mean(att_cont) / mean(w_cont)

ipw_att = eta_treat - eta_cont

score_ps = i_weights[:, np.newaxis] * (D - ps_fit)[:, np.newaxis] * int_cov
Hessian_ps = pscore_results.cov_params() * n
asy_lin_rep_ps = np.dot(score_ps, Hessian_ps)

inf_treat = (att_treat - w_treat * eta_treat) / mean(w_treat)
inf_cont_1 = att_cont - w_cont * eta_cont
pre_m2 = w_cont * (delta_y - eta_cont)
M2 = np.mean(pre_m2[:, np.newaxis] * int_cov, axis = 0)
print(M2)
inf_cont_2 = np.dot(asy_lin_rep_ps, M2)

inf_control = (inf_cont_1 + inf_cont_2) / np.mean(w_cont)
att_inf_func = inf_treat - inf_control
print(np.std(att_inf_func) / np.sqrt(n))
return ipw_att, att_inf_func

def std_ipw_did_rc(y, post, D, covariates, i_weights = None):
D = np.asarray(D).flatten()
y = np.asarray(y).flatten()
post = np.asarray(post).flatten()
n = len(D)
if covariates is None:
int_cov = np.ones((n, 1))
else:
covariates = np.asarray(covariates)
if np.all(covariates[:, 0] == 1):
int_cov = covariates
else:
int_cov = np.column_stack((np.ones(n), covariates))

# Pesos
if i_weights is None:
i_weights = np.ones(n)
else:
i_weights = np.asarray(i_weights)
if np.min(i_weights) < 0:
raise ValueError("i_weights must be non-negative")

# Normalizar pesos
i_weights = np.asarray(i_weights).flatten()
i_weights = i_weights / np.mean(i_weights)

pscore_model = sm.GLM(D, int_cov, family=sm.families.Binomial(), freq_weights=i_weights)
pscore_results = pscore_model.fit()
if not pscore_results.converged:
print("Warning: glm algorithm did not converge")
if np.any(np.isnan(pscore_results.params)):
raise ValueError("Propensity score model coefficients have NA components. \n Multicollinearity (or lack of variation) of covariates is a likely reason.")
ps_fit = pscore_results.predict()
ps_fit = np.minimum(ps_fit, 1 - 1e-16)


w_treat_pre = i_weights * D * (1 - post)
w_treat_post = i_weights * D * post
# print(np.mean(w_treat_pre))

w_cont_pre = i_weights * ps_fit * (1 - D) * (1 - post)/(1 - ps_fit)
w_cont_post = i_weights * ps_fit * (1 - D) * post/(1 - ps_fit)

# Elements of the influence function (summands)
eta_treat_pre = w_treat_pre * y / np.mean(w_treat_pre)
eta_treat_post = w_treat_post * y / np.mean(w_treat_post)
# print(eta_treat_pre)

eta_cont_pre = w_cont_pre * y / np.mean(w_cont_pre)
eta_cont_post = w_cont_post * y / np.mean(w_cont_post)

# Estimator of each component
att_treat_pre = np.mean(eta_treat_pre)
att_treat_post = np.mean(eta_treat_post)
att_cont_pre = np.mean(eta_cont_pre)
att_cont_post = np.mean(eta_cont_post)
ipw_att = (att_treat_post - att_treat_pre) - (att_cont_post - att_cont_pre)

score_ps = (i_weights * (D - ps_fit))[:, np.newaxis] * int_cov
Hessian_ps = pscore_results.cov_params() * n
asy_lyn_rep_ps = np.dot(score_ps, Hessian_ps)

inf_treat_pre = eta_treat_pre - w_treat_pre * att_treat_pre/np.mean(w_treat_pre)
inf_treat_post = eta_treat_post - w_treat_post * att_treat_post/np.mean(w_treat_post)
inf_treat = inf_treat_post - inf_treat_pre
# Now, get the influence function of control component
# Leading term of the influence function: no estimation effect
inf_cont_pre = eta_cont_pre - w_cont_pre * att_cont_pre/np.mean(w_cont_pre)
inf_cont_post = eta_cont_post - w_cont_post * att_cont_post/np.mean(w_cont_post)
inf_cont = inf_cont_post - inf_cont_pre

# Estimation effect from gamma hat (pscore)
# Derivative matrix (k x 1 vector)

def simple_rep(a, b, y_ = y, cov = int_cov):
return (a * (y - b))[:, n_x] * cov / np.mean(a)

M2_pre = np.mean(simple_rep(w_cont_pre, att_cont_pre), axis=0)
M2_post = np.mean(simple_rep(w_cont_post, att_treat_post), axis=0)
M2_pre = np.mean((w_cont_pre *(y - att_cont_pre))[:, np.newaxis] * int_cov, axis = 0)/np.mean(w_cont_pre)
M2_post = np.mean((w_cont_post *(y - att_cont_post))[:, np.newaxis] * int_cov, axis = 0)/np.mean(w_cont_post)

inf_cont_ps = np.dot(asy_lin_rep_ps, M2_post - M2_pre)
inf_cont = inf_cont + inf_cont_ps
att_inf_func = inf_treat - inf_cont
# Now the influence function related to estimation effect of pscores
M2 = M2_post - M2_pre
# print()

if not boot:
se_att = np.std(att_inf_func) / np.sqrt(n)
inf_cont_ps = np.dot(asy_lyn_rep_ps, M2)

return(ipw_att, att_inf_func, se_att)
# Influence function for the control component
inf_cont = inf_cont + inf_cont_ps

#get the influence function of the DR estimator (put all pieces together)
att_inf_func = inf_treat - inf_cont
# print(np.std(att_inf_func) / np.sqrt(n))
return ipw_att, att_inf_func



Expand Down
Loading

0 comments on commit 2a16a97

Please sign in to comment.