Codes are debugged

sokbae · Oct 24, 2020 · c31c742 · c31c742
1 parent c1771db
commit c31c742
Show file tree

Hide file tree

Showing 8 changed files with 67 additions and 24 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,3 +5,4 @@ export(avg_RR_logit)
 export(cicc_AR)
 export(cicc_RR)
 export(cicc_plot)
+export(trim_pr)
diff --git a/R/avg_AR_logit.R b/R/avg_AR_logit.R
@@ -8,8 +8,10 @@
 #' @param x n by d matrix of covariates
 #' @param sampling 'cc' for case-control sampling; 'cp' for case-population sampling (default =  'cc')
 #' @param p_upper specified upper bound for the unknown true case probability (default = 1)
-#' @param length specified length of a sequence from 0 to p_upper (default = 20)
+#' @param length specified length of a sequence from 0 to p_upper (default = 21)
 #' @param interaction TRUE if there are interaction terms in the retrospective logistic model; FALSE if not (default = TRUE)
+#' @param eps a small constant that determines the trimming of the estimated probabilities.
+#' Specifically, the estimate probability is trimmed to be between eps and 1-eps (default = 1e-8).
 #'
 #' @return An S3 object of type "ciccr". The object has the following elements.
 #' \item{est}{(length)-dimensional vector of the average of the upper bound of causal attributable risk}
@@ -30,7 +32,7 @@
 #' Econometrica, 68(4), 997-1010.
 #'
 #' @export
-avg_AR_logit = function(y, t, x, sampling = 'cc', p_upper = 1L, length = 20L, interaction = TRUE){
+avg_AR_logit = function(y, t, x, sampling = 'cc', p_upper = 1L, length = 21L, interaction = TRUE, eps=1e-8){
 
   # Check whether y is either 0 or 1
   if ( sum( !(y %in% c(0,1)) ) > 0 ){
@@ -49,14 +51,12 @@ avg_AR_logit = function(y, t, x, sampling = 'cc', p_upper = 1L, length = 20L, in
 
   # Prospective logistic estimation of P(Y=1|X=x)
 
-  small_e = 1e-8
-
   lm_pro = stats::glm(y~x, family=stats::binomial("logit"))
   est_pro = stats::coef(lm_pro)
   fit_pro = stats::fitted.values(lm_pro)
   fit_pro = matrix(fit_pro,ncol=1)
 
-  fit_pro = fit_pro + small_e*(fit_pro < small_e) + (1-small_e)*(fit_pro > (1-small_e))
+  fit_pro = trim_pr(fit_pro)
 
   # Estimation of r(x,p)
 
@@ -67,7 +67,6 @@ avg_AR_logit = function(y, t, x, sampling = 'cc', p_upper = 1L, length = 20L, in
   if (sampling=='cc'){
     r_num = ((1-hhat)*fit_pro)%*%pseq
     r_den = r_num + (hhat*(1-fit_pro))%*%(1-pseq)
-    r_den = r_den + small_e*(r_den < small_e)
     r_cc = r_num/r_den
   }  else if (sampling=='cp'){
     r1 = (1-hhat)/hhat
@@ -97,13 +96,10 @@ avg_AR_logit = function(y, t, x, sampling = 'cc', p_upper = 1L, length = 20L, in
   fit_ret_y1 = exp(x_reg_y1%*%est_ret)/(1+exp(x_reg_y1%*%est_ret))
   fit_ret_y0 = exp(x_reg_y0%*%est_ret)/(1+exp(x_reg_y0%*%est_ret))
 
-  fit_ret_y1 = fit_ret_y1 + small_e*(fit_ret_y1 < small_e) + (1-small_e)*(fit_ret_y1 > (1-small_e))
-  fit_ret_y0 = fit_ret_y0 + small_e*(fit_ret_y0 < small_e) + (1-small_e)*(fit_ret_y0 > (1-small_e))
-
   # Estimation of Gamma_AR(x,p)
 
-  P11 = fit_ret_y1
-  P10 = fit_ret_y0
+  P11 = trim_pr(fit_ret_y1)
+  P10 = trim_pr(fit_ret_y0)
   P11 = matrix(P11, ncol=1)%*%matrix(1, nrow=1, ncol=ncol(pseq))
   P10 = matrix(P10, ncol=1)%*%matrix(1, nrow=1, ncol=ncol(pseq))
 
@@ -115,11 +111,9 @@ avg_AR_logit = function(y, t, x, sampling = 'cc', p_upper = 1L, length = 20L, in
   if (sampling=='cc'){
 
     term1_cc_den = P10 + r_cc*(P11-P10)
-    term1_cc_den = term1_cc_den + small_e*(term1_cc_den < small_e)
     term1_cc = P11/term1_cc_den
 
     term2_cc_den = P00 + r_cc*(P01-P00)
-    term2_cc_den = term2_cc_den + small_e*(term2_cc_den < small_e)
     term2_cc = P01/term2_cc_den
 
     GammaAR_cc = term1_cc - term2_cc
@@ -147,5 +141,3 @@ class(outputs) = "ciccr"
 outputs
 
 }
-
-
diff --git a/R/cicc_AR.R b/R/cicc_AR.R
@@ -12,6 +12,8 @@
 #' @param length specified length of a sequence from 0 to p_upper (default = 21)
 #' @param interaction TRUE if there are interaction terms in the retrospective logistic model; FALSE if not (default = TRUE)
 #' @param no_boot number of bootstrap repetitions to compute the confidence intervals (default = 0)
+#' @param eps a small constant that determines the trimming of the estimated probabilities.
+#' Specifically, the estimate probability is trimmed to be between eps and 1-eps (default = 1e-8).
 #'
 #' @return An S3 object of type "ciccr". The object has the following elements:
 #' \item{est}{(length)-dimensional vector of the upper bounds on the average of attributable risk}
@@ -35,7 +37,7 @@
 #' Econometrica, 68(4), 997-1010.
 #'
 #' @export
-cicc_AR = function(y, t, x, sampling = 'cc', p_upper = 1L, cov_prob = 0.95, length = 21L, interaction = TRUE, no_boot = 0L){
+cicc_AR = function(y, t, x, sampling = 'cc', p_upper = 1L, cov_prob = 0.95, length = 21L, interaction = TRUE, no_boot = 0L, eps=1e-8){
 
   # Check whether p_upper is in (0,1]
   if (p_upper <=0L || p_upper > 1L){
@@ -48,7 +50,7 @@ cicc_AR = function(y, t, x, sampling = 'cc', p_upper = 1L, cov_prob = 0.95, leng
   }
 
   n = length(y)
-  results = avg_AR_logit(y, t, x, sampling=sampling, p_upper=p_upper, length=length, interaction=interaction)
+  results = avg_AR_logit(y, t, x, sampling=sampling, p_upper=p_upper, length=length, interaction=interaction, eps=eps)
   est = results$est
   est = est*(est <= 1) + 1*(est > 1) # truncated at 1 since AR cannot be larger than 1
 
@@ -67,7 +69,7 @@ cicc_AR = function(y, t, x, sampling = 'cc', p_upper = 1L, cov_prob = 0.95, leng
   bt_t = bt_data[bt_i,2]
   bt_x = bt_data[bt_i,c(-1,-2)]
 
-  bt_results = avg_AR_logit(bt_y, bt_t, bt_x, sampling=sampling, p_upper=p_upper, length=length, interaction=interaction)
+  bt_results = avg_AR_logit(bt_y, bt_t, bt_x, sampling=sampling, p_upper=p_upper, length=length, interaction=interaction, eps=eps)
   bt_est = bt_results$est
   bt_est_matrix = rbind(bt_est_matrix,bt_est)
   }
@@ -82,9 +84,11 @@ cicc_AR = function(y, t, x, sampling = 'cc', p_upper = 1L, cov_prob = 0.95, leng
 
   tmp = (bt_est_matrix <= matrix(1,nrow=nrow(bt_est_matrix),ncol=1)%*%matrix(est,nrow=1))
   pstar = apply(tmp, 2, mean)
-  zstar = stats::qnorm(mean(pstar))
-  bc_prob = stats::pnorm(cov_prob + 2*zstar)
+  zstar = stats::qnorm(pstar)
+  z0 = stats::qnorm(cov_prob)
+  bc_prob = stats::pnorm(z0 + 2*zstar)
   bt_ci = apply(bt_est_matrix, 2, stats::quantile, prob=bc_prob) # bias-corrected percentile method
+  bt_ci = diag(bt_ci)
 
   bt_ci = bt_ci*(bt_ci <= 1) + 1*(bt_ci > 1) # truncated at 1 since AR cannot be larger than 1
   }

diff --git a/R/trim_pr.R b/R/trim_pr.R
@@ -0,0 +1,18 @@
+#' @title Trimming the estimates to be strictly between 0 and 1
+#'
+#' @description Trimming the estimates to be strictly between 0 and 1
+#'
+#' @param ps n-dimensional vector of estimated probabilities
+#' @param eps a small constant that determines the trimming of the estimated probabilities.
+#' Specifically, the estimate probability is trimmed to be between eps and 1-eps (default = 1e-8).
+#'
+#' @return
+#' \item{ps_tr}{n-dimensional trimmed estimates}
+#'
+#' @export
+trim_pr = function(ps, eps=1e-8){
+
+  ps_tr = eps*(ps < eps) + ps*(ps >= eps)*(ps <= 1-eps) + (1-eps)*(ps > 1-eps)
+
+  ps_tr
+}
diff --git a/man/avg_AR_logit.Rd b/man/avg_AR_logit.Rd
diff --git a/man/cicc_AR.Rd b/man/cicc_AR.Rd
diff --git a/man/trim_pr.Rd b/man/trim_pr.Rd