-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcognitive_domains.R
228 lines (203 loc) · 7.43 KB
/
cognitive_domains.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
library(tidyverse)
library(vroom)
#' Prepare Data for Analysis
#'
#' This function prepares data for analysis by reading a CSV file, filtering the data based on the specified domains and scales,
#' selecting relevant columns, and writing the filtered data to a new CSV file.
#'
#' @param domains A character vector of domains to filter the data by.
#' @param pheno A string representing the phenotype to use as the base name for the output CSV file.
#' @param scales A character vector of scales to filter the data by.
#' @param type A string representing the name of the input CSV file. Default is "neurocog.csv".
#'
#' @return NULL (invisible) after writing the filtered data to a CSV file.
#' @export
#'
#' @examples
#' # Example usage:
#' domains <- c("Memory")
#' pheno <- "memory"
#' scales <- c("Scale1", "Scale2")
#' prepare_data(domains, pheno, scales)
prepare_data <- function(domains, pheno, scales, type = "neurocog.csv") {
# Check if required arguments are provided
if (missing(domains) || missing(pheno) || missing(scales)) {
stop("All arguments (domains, pheno, scales) must be provided.")
}
# Read the CSV file into a data frame
data <- vroom::vroom(type)
# Ensure required columns are present in the data
required_columns <- c(
"domain", "z_mean_domain", "scale", "test", "test_name", "raw_score", "score", "ci_95",
"percentile", "range", "subdomain", "narrow", "pass", "verbal", "timed",
"description", "result", "z", "z_mean_subdomain", "z_sd_subdomain",
"z_mean_narrow", "z_sd_narrow", "z_mean_pass", "z_sd_pass",
"z_mean_verbal", "z_sd_verbal", "z_mean_timed", "z_sd_timed"
)
if (!all(required_columns %in% colnames(data))) {
stop("The data does not contain all required columns.")
}
# Filter data by domains and remove rows with missing 'z_mean_domain' values
data <- data %>%
dplyr::filter(domain %in% domains, !is.na(z_mean_domain))
# Select relevant columns
data <- data %>%
dplyr::select(
test,
test_name,
scale,
raw_score,
score,
ci_95,
percentile,
range,
domain,
subdomain,
narrow,
pass,
verbal,
timed,
description,
result,
z,
z_mean_domain,
z_sd_domain,
z_mean_subdomain,
z_sd_subdomain,
z_mean_narrow,
z_sd_narrow,
z_mean_pass,
z_sd_pass,
z_mean_verbal,
z_sd_verbal,
z_mean_timed,
z_sd_timed
)
# Filter data by scales
data <- data %>%
dplyr::filter(scale %in% scales)
# Write the filtered data to a CSV file with the phenotype name
readr::write_csv(
data,
paste0(pheno, ".csv"),
na = "",
col_names = TRUE,
append = FALSE
)
# Return NULL invisibly to avoid printing the data frame
invisible(NULL)
}
# Define the variables for domains, phenotype, and scales of interest
domains <- c("General Cognitive Ability")
pheno <- "iq"
scales <- c(
"Animal Coding",
"Arithmetic",
"Attention Domain",
"Attention Index (ATT)",
"Attention Index"
)
# Call the function with the specified domains, phenotype, and scales
data <- prepare_data(domains, pheno, scales)
# Second function
library(ggplot2)
library(gt)
#' Visualize Data with Table and Dot Plot
#'
#' This function generates a table and a dot plot for a given dataset based on specified phenotypes and domains.
#' The table is created using `bwu::tbl_gt`, and the dot plot is created using `bwu::dotplot`.
#'
#' @param data A data frame containing the data to be visualized. Must contain columns `scale`, `x`, and `y`.
#' @param scales_to_keep A vector of scales to filter the data by.
#' @param pheno A string representing the phenotype of interest.
#' @param domain A string representing the domain of interest.
#' @param grp_pheno A list containing grouped phenotypic data. Default is NULL.
#' @param x A string representing the name of the x variable. This should match a column name in the data.
#' @param y A string representing the name of the y variable. This should match a column name in the data.
#'
#' @return A list containing two elements: the table visualization (from `tbl_gt`) and the dot plot visualization (from `dotplot`).
#' @export
#'
#' @examples
#' # Example usage:
#' data <- data.frame(scale = c("A", "B", "C"), x = rnorm(3), y = rnorm(3))
#' scales_to_keep <- c("A", "B")
#' pheno <- "example_pheno"
#' domain <- "example_domain"
#' visualize_data(data, scales_to_keep, pheno, domain)
visualize_data <- function(data, scales_to_keep, pheno, domain, grp_pheno = NULL, x = "x", y = "y") {
# Check if required arguments are missing
if (missing(data) || missing(scales_to_keep) || missing(pheno) || missing(domain)) {
stop("All arguments (data, scales_to_keep, pheno, domain) must be provided")
}
# Ensure that data contains the necessary columns
if (!all(c("scale", x, y) %in% colnames(data))) {
stop("Data must contain 'scale', 'x', and 'y' columns")
}
# Filter data based on scales_to_keep
data_tbl <- data %>%
dplyr::filter(scale %in% scales_to_keep)
# Table visualization parameters
table_name <- paste0("table_", pheno)
vertical_padding <- 0
multiline <- TRUE
# Notes and source information
fn_scaled_score <- gt::md("Score = Scaled score (Mean = 10 [50th‰], SD ± 3 [16th‰, 84th‰])")
fn_standard_score <- gt::md("Score = Index score (Mean = 100 [50th‰], SD ± 15 [16th‰, 84th‰])")
fn_t_score <- gt::md("Score = T score (Mean = 50 [50th‰], SD ± 10 [16th‰, 84th‰])")
fn_z_score <- gt::md("Score = z-score (Mean = 0 [50th‰], SD ± 1 [16th‰, 84th‰])")
source_note <- gt::md("Score = _T_ score (Mean = 50 [50th‰], SD ± 10 [16th‰, 84th‰])")
# Create the table using bwu::tbl_gt
tbl_gt <- bwu::tbl_gt(
data = data_tbl,
pheno = pheno,
table_name = table_name,
source_note = source_note,
fn_scaled_score = fn_scaled_score,
fn_standard_score = fn_standard_score,
fn_t_score = fn_t_score,
fn_z_score = fn_z_score,
grp_scaled_score = if (!is.null(grp_pheno)) grp_pheno[["scaled_score"]] else NULL,
grp_standard_score = if (!is.null(grp_pheno)) grp_pheno[["standard_score"]] else NULL,
grp_t_score = if (!is.null(grp_pheno)) grp_pheno[["t_score"]] else NULL,
dynamic_grp = grp_pheno,
vertical_padding = vertical_padding,
multiline = multiline,
na.rm = TRUE
)
# Dot plot visualization
filename <- paste0("fig_", pheno, "_", domain, ".svg")
colors <- NULL
return_plot <- TRUE
# Extract x and y values
x_values <- data[[x]]
y_values <- data[[y]]
# Create the dot plot using bwu::dotplot
dotplot <- bwu::dotplot(
data = data,
x = x_values,
y = y_values,
colors = colors,
return_plot = return_plot,
filename = filename,
na.rm = TRUE
)
# Return both visualizations as a list
list(tbl_gt = tbl_gt, dotplot = dotplot)
}
# Define the variables for domains, phenotype, and scales of interest
domains <- c("General Cognitive Ability")
pheno <- "iq"
scales_to_keep <- c(
"Animal Coding",
"Arithmetic",
"Attention Domain",
"Attention Index (ATT)",
"Attention Index"
)
grp_pheno <- list(
scaled_score = c("WAIS-IV", "D-KEFS", "NEPSY-2", "WISC-5", "WISC-V", "WPPSI-IV", "RBANS"),
standard_score = c("NAB", "NAB-S", "WISC-5", "WISC-V", "WAIS-IV", "WPPSI-IV", "WASI-II", "RBANS", "NAB Executive Functions", "NAB Attention"),
t_score = c("NAB", "NAB-S", "NIH EXAMINER", "Trail Making Test", "Daily Living", "NAB Executive Functions", "NAB Attention")
)
data_vis <- visualize_data(prepare_data(your_data))