-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathfunctions_rebecca.R
116 lines (90 loc) · 3.38 KB
/
functions_rebecca.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Functions (Rebecca):
# Calculate means for coded groups
# Runs all linear models
# Internalizing year 9 x # suspensions/expulsions in year 15
# Externalizing behaviors year 9 x # suspensions/expulsions in year 15
#Calculates subscores for internalizing and externalizing behaviors
ff_sub <- ff_sub %>%
mutate(int_scores = (k5g2a + k5g2c + k5g2e + k5g2g + k5g2i + k5g2j + k5g2k + k5g2l)/8, ext_scores = (k5g2b + k5g2d + k5g2f + k5g2h + k5g2m + k5g2n)/6) %>%
filter(int_scores >=0, ext_scores >= 0)
ff_sub_lm <- ff_sub %>%
rowwise() %>%
select(idnum, starts_with("k6d6"), starts_with("k5f1"), int_scores, ext_scores, cm1bsex, ck6ethrace, p5l12g, p6c21) %>%
filter(rowSums(across(where(is.numeric)))>=0 & ck6ethrace >=0 & p5l12g >=0 & p6c21 >=0) %>%
mutate(del_beh_9 = sum(c_across(starts_with("k5f1"))),
del_beh_15 = sum(c_across(starts_with("k6d6"))))
means_df <- function(df, ...) {
means <- map(df, mean, ...) # calculate means
nulls <- map_lgl(means, is.null) # find null values
means_l <- means[!nulls] # subset list to remove nulls
as.data.frame(means_l) # return a df
}
means_df(ff_sub)
ff_sub$idnum <- as.numeric(ff_sub$idnum)
ff_sub$ck6ethrace <- as.numeric(ff_sub$ck6ethrace)
summary(lm(del_beh_15 ~ ext_scores + cm1bsex + ck6ethrace, data = ff_sub)) #HK- should data be ff_sub_lm or ff_sub?
summary(lm(del_beh_15 ~ int_scores + cm1bsex + ck6ethrace , data = ff_sub))
mod_db_int <- ff_sub_lm %>%
group_by(idnum) %>%
nest() %>%
mutate(
model = map(
data, ~lm(del_beh_15 ~ int_scores + cm1bsex + ck6ethrace, data = .x)
)
)
mod_db_ext <- ff_sub %>%
group_by(idnum) %>%
nest() %>%
mutate(
model = map(
data, ~lm(del_beh_15 ~ ext_scores + cm1bsex + ck6ethrace, data = .x)
)
)
#HK - found that lm formula can be sent as a string.
#Thought an alternate would be to form a string formula which can be customized in many ways
# and then calling the lm function
# If we only care about the coef., we can also run the pull_coef in this step
# model_fit <- function(x){
# model_str <- paste0("del_beh_15 ~ ", x, " + cm1bsex + ck6ethrace")
#
# ff_sub_lm %>%
# group_by(idnum) %>%
# nest() %>%
# mutate(
# model = map(
# data, ~do.call("lm", list(as.formula(model_str), data = quote(.x)))
# ),
# intercept = map_dfr(model, pull_coef)
# )
# }
#
# mod_db_int <- model_fit("int_scores")
# mod_db_ext <- model_fit("ext_scores")
pull_coef <- function(model, coef_name) {
coef(model)[coef_name]
}
mod_db_ext %>%
mutate(intercept = map_dfr(model, pull_coef))
mod_db_int %>%
mutate(intercept = map_dfr(model, pull_coef))
mods <- function(data, x, y, points = FALSE, ...) {
p <- ggplot(data, aes({{x}}, {{y}}))
if (points) {
p <- p + geom_point(color = "gray80")
}
p +
geom_smooth(method = "lm",
color = "magenta",
...) +
geom_smooth(...)
}
mods(ff_sub, int_scores, del_beh_15) +
labs(title = "Checking linearity",
subtitle = "Linear vs LOESS fits",
x = "Engine Displacement",
y = "Miles Per gallon")
mods(ff_sub, ext_scores, del_beh_15) +
labs(title = "Checking linearity",
subtitle = "Linear vs LOESS fits",
x = "Engine Displacement",
y = "Miles Per gallon")