Skip to content

Commit

Permalink
Update inference
Browse files Browse the repository at this point in the history
Now it checks for multicollinearity and adjust the code accordingly.
  • Loading branch information
paulcorcuera committed Jan 4, 2024
1 parent 1cf9617 commit 249f230
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 59 deletions.
84 changes: 47 additions & 37 deletions src/VarianceComponentsHDFE.jl
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,33 @@ function real_main()
println(summary)
end

Z_lincom = nothing
if parsed_args["do_lincom"]
#Construct Z_lincom
if lincom_covariates == []
println("\n User asked for lincom but no covariates were specified. This step will not be performed.")
else
if typeof(lincom_covariates[1]) == String
#Build lincom controls matrix
lincom_labels = []
push!(lincom_labels,String.(lincom_covariates[1]))

Z_lincom = data[obs,lincom_covariates[1]]
if length(lincom_covariates)>=2
for k=2:length(lincom_covariates)
hcat(Z_lincom, data[obs,lincom_covariates[k]])
push!(lincom_labels,String.(lincom_covariates[k]))
end
end
else
println("WARNING: Elements of lincom covariates are not defined correctly. No inference will be performed.")
end
end

#Now subset to the Leave Out Sample
Z_lincom = Z_lincom[obs,:]
end

#Residualize outcome variable
if controls != nothing
println("\nPartialling out controls from $(settings.outcome_id_display)...")
Expand Down Expand Up @@ -313,46 +340,15 @@ function real_main()
# @unpack θ_first, θ_second, θCOV, obs, β, Dalpha, Fpsi, Pii, Bii_first, Bii_second, Bii_cov = compute_whole(y,first_id,second_id,controls,settings)
@unpack θ_first, θ_second, θCOV, β, Dalpha, Fpsi, Pii, Bii_first, Bii_second, Bii_cov, y, X, sigma_i = leave_out_estimation(y,first_id,second_id,controls,settings)

Z_lincom = nothing
if parsed_args["do_lincom"]
if parsed_args["do_lincom"]
#Construct Transform
F = sparse(collect(1:length(second_id)),second_id,1)
J = size(F,2)
S = sparse(1.0I, J-1, J-1)
S = vcat(S,sparse(-zeros(1,J-1)))
Transform = hcat(spzeros(length(second_id),maximum(first_id)), -F*S )

#Construct Z_lincom
if lincom_covariates == []
println("\n User asked for lincom but no covariates were specified. This step will not be performed.")
else
if typeof(lincom_covariates[1]) == String
#Build lincom controls matrix
lincom_labels = []
push!(lincom_labels,String.(lincom_covariates[1]))

Z_lincom = data[obs,lincom_covariates[1]]
if length(lincom_covariates)>=2
for k=2:length(lincom_covariates)
hcat(Z_lincom, data[obs,lincom_covariates[k]])
push!(lincom_labels,String.(lincom_covariates[k]))
end
end
else
println("WARNING: Elements of lincom covariates are not defined correctly. No inference will be performed.")
end
end
end

if Z_lincom != nothing
#Collapse and reweight to person-year observations
match_id = compute_matchid(second_id, first_id)
Z_lincom_col = ones(size(Z_lincom,1),1)
for i = 1:size(Z_lincom,2)
Z_lincom_col = hcat(Z_lincom_col,(transform(groupby(DataFrame(z = Z_lincom[:,i], match_id = match_id), :match_id), :z => mean => :z_py).z_py))
end

@unpack test_statistic, linear_combination , SE_linear_combination_KSS, SE_naive = lincom_KSS(y,X, Z_lincom_col, Transform, sigma_i; lincom_labels)
@unpack test_statistic, linear_combination , SE_linear_combination_KSS, SE_naive = lincom_KSS(y,X, Z_lincom, Transform, sigma_i; lincom_labels)
end

if parsed_args["write_detailed_csv"]
Expand Down Expand Up @@ -449,13 +445,25 @@ function real_main()


if Z_lincom != nothing
#Establish basis
zz = Z_lincom'*Z_lincom
invzz =invsym!(Symmetric(deepcopy(zz)), setzeros=true)
basis = diag(invzz).<0

# Modify labels if they are present
lincom_labels = lincom_labels == nothing ? nothing : lincom_labels[basis[2:end]]

#Check if the basis includes the intercept
has_cons = basis[1] == true ? 1 : 0

#Write the output of inference
r = size(Z_lincom_col,2)
write(io," Results of High Dimensional Lincom \n\n")
if lincom_labels == nothing
for q=2:r
start = has_cons == 1 ? 2 : 1
for q=start:r
if q <= r
ncol = q-1
ncol = has_cons == 1 ? q-1 : q
output_inference = """
Coefficient of Column $(ncol): $(linear_combination[q]) \n
Traditional HC Standard Error of Column $(ncol): $(SE_naive[q]) \n
Expand All @@ -466,8 +474,10 @@ function real_main()
end
end
else
for q=2:r
tell_me = lincom_labels[q-1]
start = has_cons == 1 ? 2 : 1
for q=start:r
pos = has_cons == 1 ? q-1 : q
tell_me = lincom_labels[pos]
output_inference = """
Coefficient on $(tell_me): $(linear_combination[q-1]) \n
Traditional HC Standard Error on $(tell_me): $(SE_naive[q-1]) \n
Expand Down
69 changes: 47 additions & 22 deletions src/leave_out_correction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -427,11 +427,16 @@ function leave_out_KSS(y,first_id,second_id;controls = nothing, do_lincom = fals
Number of $(settings.first_id_display_small)s: $(maximum(first_id))
Number of $(settings.second_id_display_small)s: $(maximum(second_id))
Number of Movers : $(num_movers)
Mean of $(settings.outcome_id_display): $(mean(y))
Variance of $(settings.outcome_id_display): $(var(y))
Mean of $(settings.outcome_id_display): $(mean(y_untransformed[obs]))
Variance of $(settings.outcome_id_display): $(var(y_untransformed[obs]))
"""
println(summary)
end

if do_lincom == true
#Subset to the Leave Out Sample
Z_lincom = Z_lincom[obs,:]
end

#Residualize outcome variable
if controls != nothing
Expand Down Expand Up @@ -492,20 +497,18 @@ function leave_out_KSS(y,first_id,second_id;controls = nothing, do_lincom = fals
@unpack θ_first, θ_second, θCOV, β, Dalpha, Fpsi, Pii, Bii_first, Bii_second, Bii_cov, y, X, sigma_i = leave_out_estimation(y,first_id,second_id,controls,settings)

if do_lincom == true
#Subset to the Leave Out Sample
Z_lincom = Z_lincom[obs,:]
F = sparse(collect(1:length(second_id)),second_id,1)
J = size(F,2)
S = sparse(1.0I, J-1, J-1)
S = vcat(S,sparse(-zeros(1,J-1)))
Transform = hcat(spzeros(length(second_id),maximum(first_id)), -F*S )

#Collapse and reweight to person-year observations
match_id = compute_matchid(second_id, first_id)
Z_lincom_col = ones(size(Z_lincom,1),1)
for i = 1:size(Z_lincom,2)
Z_lincom_col = hcat(Z_lincom_col,(transform(groupby(DataFrame(z = Z_lincom[:,i], match_id = match_id), :match_id), :z => mean => :z_py).z_py))
end
#match_id = compute_matchid(second_id, first_id)
#Z_lincom_col = ones(size(Z_lincom,1),1)
#for i = 1:size(Z_lincom,2)
# Z_lincom_col = hcat(Z_lincom_col,(transform(groupby(DataFrame(z = Z_lincom[:,i], match_id = match_id), :match_id), :z => mean => :z_py).z_py))
#end

#Run Inference
println("\nRegressing the $(settings.second_id_display_small) effects on observables Z.")
Expand Down Expand Up @@ -981,6 +984,24 @@ function lincom_KSS(y,X, Z, Transform, sigma_i; lincom_labels = nothing)
# positionvec = indexin(joint_test_regressors, labels) .+ 1
#end

#Add intercept
Z = hcat(ones(size(Z,1),1),Z)

#Establish the independent basis Z
zz = Z'*Z
invzz =invsym!(Symmetric(deepcopy(zz)), setzeros=true)
basis = diag(invzz).<0

if !all(basis)
Z = Z[:, basis]
zz = zz[basis,basis]
end

# Modify labels if they are present
lincom_labels = lincom_labels == nothing ? nothing : lincom_labels[basis[2:end]]

#Check if the basis includes the intercept
has_cons = basis[1] == true ? 1 : 0

#PART 1: COMPUTE FE
n = size(y,1)
Expand All @@ -993,7 +1014,7 @@ function lincom_KSS(y,X, Z, Transform, sigma_i; lincom_labels = nothing)

#PART 2: SET UP MATRIX FOR SANDWICH FORMULA
wy = Transform*beta
zz = Z'*Z
#zz = Z'*Z No need to compute anymore
numerator=Z\wy
sigma_i = sparse(collect(1:n),collect(1:n),[sigma_i ...],n,n)
sigma_i_naive = sparse(collect(1:n),collect(1:n),[(y-X*beta).^2 ...],n,n)
Expand Down Expand Up @@ -1021,32 +1042,36 @@ function lincom_KSS(y,X, Z, Transform, sigma_i; lincom_labels = nothing)
#PART 4: REPORT
println("Inference on Linear Combinations:")
if lincom_labels == nothing
for q=2:r
start = has_cons == 1 ? 2 : 1
for q=start:r
pos = has_cons == 1 ? q-1 : q
if q <= r
println("\nCoefficient of Column ", q-1,": ", numerator[q] )
println("Traditional HC Standard Error of Column ", q-1,": ", sqrt(denominator_naive[q]) )
println("KSS Standard Error of Column ", q-1,": ", sqrt(denominator[q]) )
println("T-statistic of Column ", q-1, ": ", test_statistic[q])
println("\nCoefficient of Column ", pos,": ", numerator[q] )
println("Traditional HC Standard Error of Column ", pos,": ", sqrt(denominator_naive[q]) )
println("KSS Standard Error of Column ", pos,": ", sqrt(denominator[q]) )
println("T-statistic of Column ", pos, ": ", test_statistic[q])
end
end
else
for q=2:r
tell_me = lincom_labels[q-1]
start = has_cons == 1 ? 2 : 1
for q=start:r
pos = has_cons == 1 ? q-1 : q
tell_me = lincom_labels[pos]
println("\nCoefficient on ", tell_me,": ", numerator[q] )
println("Traditional HC Standard Error on ", tell_me,": ", sqrt(denominator_naive[q]) )
println("KSS Standard Error on ", tell_me,": ", sqrt(denominator[q]) )
println("T-statistic on ", tell_me,": ", test_statistic[q])
end
end

test_statistic = test_statistic[2:end]
linear_combination = numerator[2:end]
SE_linear_combination_KSS = sqrt.(denominator[2:end])
SE_naive = sqrt.(denominator[2:end])
test_statistic = has_cons == 1 ? test_statistic[2:end] : test_statistic
linear_combination = has_cons == 1 ? numerator[2:end] : numerator
SE_linear_combination_KSS = has_cons == 1 ? sqrt.(denominator[2:end]) : sqrt.(denominator)
SE_naive = has_cons == 1 ? sqrt.(denominator[2:end]) : sqrt.(denominator)
return (test_statistic = test_statistic , linear_combination = linear_combination, SE_linear_combination_KSS = SE_linear_combination_KSS, SE_naive =SE_naive)
end

# # PART 5: Joint-test. Quadratic form beta'*A*beta MAYBE FOR FUTURE VERSION!
# # PART 5: Joint-test. Quadratic form beta'*A*beta MAYBE FOR FUTURE VERSION! This can be used to create a F-stat for the regression in lincom_KSS
# if joint_test ==false & (joint_test_regressors != nothing )
# println("Joint test will not be computed. You need to set the argument option joint_test to true.")
# end
Expand Down

0 comments on commit 249f230

Please sign in to comment.