-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHazdecomp_Example.R
163 lines (141 loc) · 5.1 KB
/
Hazdecomp_Example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# HazdecompExample.R
rm(list=ls())
setwd("////")
require(tidyverse)
require(survey)
##########################################################################
# Notes:
# data should be constructed as person-epsiod/counting-process format
# model formulas should include offset()
# -set to 0 or log interval width in discrete-time models
# (if NULL will be set to 0)
# -set to log exposure: log(t[i] - t[i-1]) in continuous-time models
# -account for frailty
# offset(logexp + logv)
# where v is a known relative risk at the
# person level or higher (i.e., persons nested in clusters)
# (example uses sibling data identified by famid)
# details in Powers and Yun (2008) Soc. Methodology
# decompose differences in hazard of premarital birth betweeen
# Black and White women in the NLSY79 data.
############################################################################
# D A T A D E S C R I P T I O N
# data are structured as split-episodes based on age at risk of a first
# premarital birth: [12,16) [16,18) [18,20) [20,22) [22,24) [24,36)
# (mean-centered) :
# pctsmom = proportion of years from age 0-18 spent in single mother household
# nfamtran = number of family-structure changes from 0-18
# medu = respondent mother's years of schooling)
# inc1000 = adjusted family income in thousands of $
# nosibs = number of older siblings
# magebir = respondent mother's age at respondent's birth)
# age = factor for age-intervals
# race = factor for race
# famid = family identifier for sibling clusters
# iid = individual identifier
#############################################################################
df <- read.csv(file='hazdata.csv')
# libraries
require(survey)
# set survey design: required regardless of actual design:
# clusters, no weights
dfsvy <- svydesign(ids=~iid, weights=~1, cluster=~famid, nest=TRUE, data=df)
# no weights, no clusters
dfsvy <- svydesign(ids=~iid, weights=~1, nest=TRUE, data=df)
# OK (ignore warning)
dfsvy <- svydesign(ids=~iid, data=df)
# make subsets etc.
# check on values of grouping variable
table(df$race) #OK
# define subsets here
Asub <- subset(dfsvy, race == "Black")
Bsub <- subset(dfsvy, race == "White")
#
# source main subroutines
source('hazard_decomp_functions_svy.R')
# rate decomposition
#
# test calls
m1 <- decomp.pwcexp(devnt ~ age + pctsmom + nfamtran + medu +
inc1000 + nosibs + magebir + offset(logexp) - 1,
Asub, Bsub, scale=1000, printit=TRUE)
m2 <- decomp.dtlogit(devnt ~ age + pctsmom + nfamtran + medu +
inc1000 + nosibs + magebir - 1,
Asub, Bsub, scale=100, printit=TRUE)
m2 <- decomp.dtcloglog(devnt ~ age + pctsmom + nfamtran + medu +
inc1000 + nosibs + magebir - 1,
Asub, Bsub, scale=100, printit=TRUE)
require(tidyverse)
# average two decompositions: A-B and B-A (reversed)
m1a <- decomp.pwcexp(devnt ~ age + pctsmom + nfamtran + medu +
inc1000 + nosibs + magebir + offset(logexp) - 1,
Asub, Bsub, scale=1000, reverse=FALSE)
m1b <- decomp.pwcexp(devnt ~ age + pctsmom + nfamtran + medu +
inc1000 + nosibs + magebir + offset(logexp) - 1,
Asub, Bsub, scale=1000, reverse=TRUE)
E.ave <- ave.decompE(m1a, m1b, scale=1000)
C.ave <- ave.decompC(m1a, m1b, scale=1000)
# simple tables
E.ave[, -c(1)]
C.ave[, -c(1)]
# coef plots
pE <- ggplot(E.ave, aes(b, term)) +
geom_point(color="seagreen") +
geom_errorbarh(aes(xmin=b.lower, xmax=b.upper),
linewidth=.3,
height=.3,
linetype=1,
color="turquoise4") +
# add in a dotted line at zero
geom_vline(xintercept = 0, lty = 2, color="green") +
theme_gray() +
labs(
x = "E-coefs",
y = NULL,
title = "Plot of Characteristic Effects (averaged)"
)
pC <- ggplot(C.ave, aes(b, term)) +
geom_point(color="seagreen") +
geom_errorbarh(aes(xmin=b.lower, xmax=b.upper),
linewidth=.3,
height=.3,
linetype=1,
color="turquoise4") +
# add in a dotted line at zero
geom_vline(xintercept = 0, lty = 2, color="green") +
theme_gray() +
labs(
x = "C-coefs",
y = NULL,
title = "Plot of Coefficient Effects (averaged)"
)
gridExtra::grid.arrange(pE,pC, nrow=1, ncol=2)
# source all below here
ave.decompE <- function(D1, D2, scale) {
s <- scale
vn <- D1$vnames
b1 <- D1$bE
b2 <- D2$bE
v1 <- D1$varbE
v2 <- D2$varbE
b <- s * (b1 - b2)/2
v <- (v1 + v2)/4
se.b <- sqrt(diag(v)) * s
return(data.frame(term=vn, b=b, se=se.b, z=b/se.b,
b.lower = b-1.96*se.b,
b.upper = b+1.96*se.b))
}
ave.decompC <- function(D1, D2, scale) {
s <- scale
vn <- D1$vnames
b1 <- D1$bC
b2 <- D2$bC
v1 <- D1$varbC
v2 <- D2$varbC
b <- s * (b1 - b2)/2
v <- s^2 * (v1 + v2)/4
se.b <- sqrt(diag(v))
return(data.frame(term = vn, b=b, se=se.b, z=b/se.b,
b.lower = b-1.96*se.b,
b.upper = b+1.96*se.b))
}