diff --git a/R/figures/exploratory-fig1.R b/R/figures/exploratory-fig1.R new file mode 100644 index 0000000..040afdc --- /dev/null +++ b/R/figures/exploratory-fig1.R @@ -0,0 +1,224 @@ +# Plot a summary figure with performance across all experiments. +setwd("git/DA-analysis") +options(stringsAsFactors = FALSE) +library(tidyverse) +library(magrittr) +library(effsize) +library(upstartr) +source("R/theme.R") + +remove_quantile_per_method = function(df, val_col, group_cols, quantile_range=c(0.01, 0.99)) { + df$idx = 1:nrow(df) + group_grid = data.frame() + for (group_col in group_cols) { + if (nrow(group_grid) == 0) { + group_grid = df[,group_col] + } else { + group_grid %<>% tidyr::crossing(., df[,group_col]) + } + } + + new_df = data.frame() + for (i in 1:nrow(group_grid)) { + tmp_row = group_grid[i,] + tmp_df = tmp_row %>% left_join(df) + quan_range = quantile(unlist(tmp_df[,val_col]), quantile_range) + tmp_df %<>% mutate( + filter_now = ifelse(get(val_col) < quan_range[2] & get(val_col) > quan_range[1], T, F) + ) + # if remove all -- probably means all 0 + if (sum(tmp_df$filter_now) == 0) tmp_df$filter_now = T + new_df %<>% rbind(tmp_df %>% dplyr::select(filter_now, idx)) + } + new_df = df %>% + left_join(new_df) %>% + filter(filter_now) %>% + dplyr::select(-filter_now, -idx) + return(new_df) +} + + +###############################################################################- +## bulk logFC plot #### +###############################################################################- + +# load data +dat = readRDS("data/summaries/concordance_res/matched_bulk/all_aucc_logFC_absolute_filter.rds") %>% + type_convert() %>% + # remove pseudoreplicates + filter(!sc_pseudo_repl) +# remove fixed columns +keep = map_lgl(dat, ~ n_distinct(.x) > 1) +dat %<>% extract(, keep) + +# restructure 'expr' column +dat %<>% mutate(expr_set = ifelse(grepl('pseudo', expr), 'Random', 'logFC') %>% + fct_relevel('Random', 'logFC'), + expr = gsub("_pseudo", "", expr)) + +# average over bulk methods +avg = dat %>% + group_by_at(vars(-bulk_da_method, -bulk_da_type, + -bulk_features, -overlap, -aucc)) %>% + summarise(aucc = mean(aucc), + n = n()) %>% + ungroup() + + # re-code x-values and colors +avg %<>% + mutate(method = paste0(sc_da_method, '-', sc_da_type) %>% + gsub("-singlecell|-binomial|-exact", "", .), + method = gsub("snapatac", "SnapATAC::findDAR", method) %>% + gsub("fisher", "FET", .) %>% + gsub("-LR_.*|-perm.*$", "", .), + color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + 'singlecell', sc_da_family) %>% + fct_recode('single-cell' = 'singlecell', + 'other' = 'non_libra') %>% + fct_relevel('single-cell', 'pseudobulk', 'other')) + +# re-code filtering +avg %<>% + mutate(method = fct_recode(method, + "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', + 'Permutation~test' = 'permutation', + 't~test' = 't', + 'LR[peaks]' = 'LR_peaks', + 'LR[clusters]' = 'LR', + 'Binomial' = 'binomial', + 'Wilcoxon~rank-sum~test' = 'wilcox', + 'Fisher~exact~test' = 'FET', + 'Negative~binomial' = 'negbinom') %>% + as.character()) %>% + # drop limma + filter(!grepl('limma', method)) + +avg1 = avg %>% + filter(!grepl('pseudo-replicates', method)) %>% + filter(!(paper == 'GonzalesBlas_2018' & !grepl('0 h', cell_type))) %>% + mutate(logFC = gsub("^.*_", "", expr) %>% + replace(. == 'all', 0)) + +dplyr::count(avg1, method) + +labs = avg1 %>% + group_by(logFC, expr_set, method, color) %>% + summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% + ungroup() %>% + mutate(label = formatC(median, format = 'f', digits = 2)) +pal = c('logFC' = 'black', + 'Random' = 'grey75') + +p1 = avg1 %>% + filter( + logFC != 1, logFC != 0 + ) %>% + filter( + !(method == 'Negative~binomial' & aucc > 0.1), + !(method != 'Negative~binomial' & aucc > 0.01) + ) %>% + ggplot(aes(x = logFC, y = aucc, + color = expr_set, fill = expr_set)) + + facet_wrap(~ method, scales = 'free', ncol = 7, labeller = label_parsed) + + geom_boxplot(aes(y = aucc), outlier.shape = NA, alpha = 0.4, width = 0.7, + size = 0.3) + + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + + # geom_text(data = labs, aes(y = -0.12, label = label), size = 1.75, hjust = 0, + # color = 'black') + + scale_color_manual('Filter by:', values = pal) + + scale_fill_manual('Filter by:', values = pal) + + scale_x_reordered(expression('Absolute fold-change filter')) + + scale_y_continuous('AUCC') + + boxed_theme(size_sm = 5, size_lg = 6) + + theme(legend.position = 'top', + legend.key.width = unit(0.4, 'lines'), + # aspect.ratio = 1.2, + # axis.text.x = element_text(angle=45, hjust=1) + ) +p1 +ggsave("fig/Exp_Fig1/bulk-concordance-absolute-logFC-filter-overview.pdf", p1, + width = 16, height = 6, units = "cm", useDingbats = FALSE) + +###############################################################################- +## AUCC: boxplot, enrichment over random #### +###############################################################################- + +enr = avg1 %>% + filter( + logFC != 1, logFC != 0 + ) %>% + group_by_at(vars(-expr_set, -aucc)) %>% + summarise(delta = ifelse(n() == 2, + aucc[expr_set == 'logFC'] - aucc[expr_set == 'Random'], + aucc), + nn = n()) %>% + ungroup() +pal = da_analysis_colors +p2 = enr %>% + filter(logFC != '0.0') %>% + remove_quantile_per_method(., val_col='delta', group_cols = c('method', 'logFC')) %>% + ggplot(aes(x = logFC, y = delta, + color = method, fill = method)) + + facet_wrap(~ method, scales = 'free', ncol = 7, labeller = label_parsed) + + geom_boxplot(aes(y = delta), outlier.shape = NA, alpha = 0.4, width = 0.6, + size = 0.4) + + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + + # geom_text(data = labs, aes(y = -0.12, label = label), size = 1.75, hjust = 0, + # color = 'black') + + scale_color_manual('', values = pal) + + scale_fill_manual('', values = pal) + + scale_x_reordered(expression('Absolute fold-change filter')) + + scale_y_continuous(expression(Delta~AUCC)) + + boxed_theme(size_sm = 5, size_lg = 6) + + theme(legend.position = 'none', + # aspect.ratio = 0.8, + # axis.text.x = element_text(angle=45, hjust=1), + axis.title.y = element_text(size=6) + ) +p2 +ggsave("fig/Exp_Fig1/bulk-concordance-absolute-logFC-filter-enrichment.pdf", p2, + width = 16, height = 6, units = "cm", useDingbats = FALSE) + +grid = distinct(enr, method, logFC) %>% filter(logFC != '0.0') +eff = pmap_dfr(grid, function(...) { + current = tibble(...) + df = filter(avg1, method == current$method, logFC == current$logFC) %>% + mutate(expr_set = factor(expr_set, levels = c('logFC', 'Random'))) + eff = cohen.d(formula = aucc ~ expr_set | Subject(paste(paper, cell_type)), + data = df, + paired = TRUE)$estimate + mutate(current, d = eff) +}) +labs = eff %>% + filter(!is.na(d)) %>% + group_by(logFC) %>% + summarise(mean = mean(d), n = n(), median = median(d), d = median) %>% + ungroup() %>% + mutate(label = format(median, digits = 2, scientific=T)) + +p3 = eff %>% + ggplot(aes(x = factor(logFC), y = d)) + + geom_boxplot(outlier.shape = NA, alpha = 0.4, width = 0.6, size = 0.35, + fill = 'grey82', color = 'grey20') + + geom_label(data = labs, aes(y = Inf, label = label), size = 1.5, hjust = 0.5, + vjust = 1, color = 'black', fill = NA, label.size = NA, + label.padding = unit(0.6, 'lines')) + + # scale_color_manual('', values = pal) + + # scale_fill_manual('', values = pal) + + scale_x_discrete(expression('Absolute fold-change filter')) + + scale_y_continuous('Cohen\'s d\n(vs. random peak removal)', limits = c(-1e-04, 1e-04)) + + ggtitle('Effect of fold-change filtering on concordance\nbetween scATAC-seq and matched bulk ATAC-seq') + + boxed_theme(size_sm = 5, size_lg = 6) + + theme( + # axis.text.x = element_text(angle = 45, hjust = 1), + aspect.ratio = 1, + plot.title = element_text(size=5, hjust=0.5) + ) +p3 +ggsave("fig/Exp_Fig1/bulk-concordance-absolute-logFC-filter-d.pdf", p3, + width = 4, height = 4, units = "cm", useDingbats = FALSE) + +p4 = wrap_plots(p1,p2,ncol=1) +ggsave("fig/Exp_Fig1/top-row.pdf", p4, + width = 16, height = 12, units = "cm", useDingbats = FALSE) + diff --git a/R/figures/exploratory-fig2.R b/R/figures/exploratory-fig2.R new file mode 100644 index 0000000..528d716 --- /dev/null +++ b/R/figures/exploratory-fig2.R @@ -0,0 +1,217 @@ +# Plot a summary figure with performance across all experiments. +setwd("git/DA-analysis") +options(stringsAsFactors = FALSE) +library(tidyverse) +library(magrittr) +library(effsize) +library(upstartr) +source("R/theme.R") + +remove_quantile_per_method = function(df, val_col, group_cols, quantile_range=c(0.01, 0.99)) { + df$idx = 1:nrow(df) + group_grid = data.frame() + for (group_col in group_cols) { + if (nrow(group_grid) == 0) { + group_grid = df[,group_col] + } else { + group_grid %<>% tidyr::crossing(., df[,group_col]) + } + } + + new_df = data.frame() + for (i in 1:nrow(group_grid)) { + tmp_row = group_grid[i,] + tmp_df = tmp_row %>% left_join(df) + quan_range = quantile(unlist(tmp_df[,val_col]), quantile_range) + tmp_df %<>% mutate( + filter_now = ifelse(get(val_col) < quan_range[2] & get(val_col) > quan_range[1], T, F) + ) + # if remove all -- probably means all 0 + if (sum(tmp_df$filter_now) == 0) tmp_df$filter_now = T + new_df %<>% rbind(tmp_df %>% dplyr::select(filter_now, idx)) + } + new_df = df %>% + left_join(new_df) %>% + filter(filter_now) %>% + dplyr::select(-filter_now, -idx) + return(new_df) +} + + +###############################################################################- +## bulk logFC plot #### +###############################################################################- + +# load data +dat = readRDS("data/summaries/concordance_res/multiome/all_aucc_logFC_absolute_filter.rds") %>% + type_convert() +# remove fixed columns +keep = map_lgl(dat, ~ n_distinct(.x) > 1) +dat %<>% extract(, keep) + +# restructure 'expr' column +dat %<>% mutate(expr_set = ifelse(grepl('pseudo', expr), 'Random', 'logFC') %>% + fct_relevel('Random', 'logFC'), + expr = gsub("_pseudo", "", expr)) + +# average over bulk methods +avg = dat %>% + group_by_at(vars(-scrna_da_method, -scrna_da_type, + -bulk_features, -overlap, -aucc)) %>% + summarise(aucc = mean(aucc), + n = n()) %>% + ungroup() + + # re-code x-values and colors +avg %<>% + mutate(method = paste0(scatac_da_method, '-', scatac_da_type) %>% + gsub("-singlecell|-binomial|-exact", "", .), + method = gsub("snapatac", "SnapATAC::findDAR", method) %>% + gsub("fisher", "FET", .) %>% + gsub("-LR_.*|-perm.*$", "", .), + color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + 'singlecell', scatac_da_family) %>% + fct_recode('single-cell' = 'singlecell', + 'other' = 'non_libra') %>% + fct_relevel('single-cell', 'pseudobulk', 'other')) + +# re-code filtering +avg %<>% + mutate(method = fct_recode(method, + "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', + 'Permutation~test' = 'permutation', + 't~test' = 't', + 'LR[peaks]' = 'LR_peaks', + 'LR[clusters]' = 'LR', + 'Binomial' = 'binomial', + 'Wilcoxon~rank-sum~test' = 'wilcox', + 'Fisher~exact~test' = 'FET', + 'Negative~binomial' = 'negbinom') %>% + as.character()) %>% + # drop limma + filter(!grepl('limma', method)) + +avg1 = avg %>% + filter(!grepl('pseudo-replicates', method)) %>% + filter(!(paper == 'GonzalesBlas_2018' & !grepl('0 h', cell_type))) %>% + mutate(logFC = gsub("^.*_", "", expr) %>% + replace(. == 'all', '0.0')) + +dplyr::count(avg1, method) +labs = avg1 %>% + group_by(logFC, expr_set, method, color) %>% + summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% + ungroup() %>% + mutate(label = formatC(median, format = 'f', digits = 2)) +pal = c('logFC' = 'black', + 'Random' = 'grey75') + +p1 = avg1 %>% + filter( + logFC != '0.0' & logFC != 1 + ) %>% + ggplot(aes(x = logFC, y = aucc, + color = expr_set, fill = expr_set)) + + facet_wrap(~ method, scales = 'free', ncol = 7, labeller = label_parsed) + + geom_boxplot(aes(y = aucc), outlier.shape = NA, alpha = 0.4, width = 0.7, + size = 0.3) + + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + + # geom_text(data = labs, aes(y = -0.12, label = label), size = 1.75, hjust = 0, + # color = 'black') + + scale_color_manual('Filter by:', values = pal) + + scale_fill_manual('Filter by:', values = pal) + + scale_x_reordered(expression('Absolute fold-change filter')) + + scale_y_continuous('AUCC') + + boxed_theme(size_sm = 5, size_lg = 6) + + theme(legend.position = 'top', + legend.key.width = unit(0.4, 'lines'), + # aspect.ratio = 1.2, + # axis.text.x = element_text(angle=45, hjust=1) + ) +p1 +ggsave("fig/Exp_Fig2/multiome-concordance-absolute-logFC-filter-overview.pdf", p1, + width = 16, height = 6, units = "cm", useDingbats = FALSE) + +###############################################################################- +## AUCC: boxplot, enrichment over random #### +###############################################################################- + +enr = avg1 %>% + filter( + logFC != '0.0' & logFC != 1 + ) %>% + group_by_at(vars(-expr_set, -aucc)) %>% + summarise(delta = ifelse(n() == 2, + aucc[expr_set == 'logFC'] - aucc[expr_set == 'Random'], + aucc), + nn = n()) %>% + ungroup() +pal = da_analysis_colors +p2 = enr %>% + filter(logFC != '0.0') %>% + # remove_quantile_per_method(., val_col='delta', group_cols = c('method', 'logFC')) %>% + ggplot(aes(x = logFC, y = delta, + color = method, fill = method)) + + facet_wrap(~ method, scales = 'free', ncol = 7, labeller = label_parsed) + + geom_boxplot(aes(y = delta), outlier.shape = NA, alpha = 0.4, width = 0.6, + size = 0.4) + + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + + # geom_text(data = labs, aes(y = -0.12, label = label), size = 1.75, hjust = 0, + # color = 'black') + + scale_color_manual('', values = pal) + + scale_fill_manual('', values = pal) + + scale_x_reordered(expression('Absolute fold-change filter')) + + scale_y_continuous(expression(Delta~AUCC)) + + boxed_theme(size_sm = 5, size_lg = 6) + + theme(legend.position = 'none', + # aspect.ratio = 0.8, + # axis.text.x = element_text(angle=45, hjust=1), + axis.title.y = element_text(size=6) + ) +p2 +ggsave("fig/Exp_Fig2/multiome-concordance-absolute-logFC-filter-enrichment.pdf", p2, + width = 16, height = 6, units = "cm", useDingbats = FALSE) + +grid = distinct(enr, method, logFC) %>% filter(logFC != '0.0') +eff = pmap_dfr(grid, function(...) { + current = tibble(...) + df = filter(avg1, method == current$method, logFC == current$logFC) %>% + mutate(expr_set = factor(expr_set, levels = c('logFC', 'Random'))) + eff = cohen.d(formula = aucc ~ expr_set | Subject(paste(paper, cell_type)), + data = df, + paired = TRUE)$estimate + mutate(current, d = eff) +}) +labs = eff %>% + filter(!is.na(d)) %>% + group_by(logFC) %>% + summarise(mean = mean(d), n = n(), median = median(d), d = median) %>% + ungroup() %>% + mutate(label = format(median, digits = 2, scientific=T)) + +p3 = eff %>% + ggplot(aes(x = factor(logFC), y = d)) + + geom_boxplot(outlier.shape = NA, alpha = 0.4, width = 0.6, size = 0.35, + fill = 'grey82', color = 'grey20') + + geom_label(data = labs, aes(y = Inf, label = label), size = 1.5, hjust = 0.5, + vjust = 1, color = 'black', fill = NA, label.size = NA, + label.padding = unit(0.6, 'lines')) + + # scale_color_manual('', values = pal) + + # scale_fill_manual('', values = pal) + + scale_x_discrete(expression('Absolute fold-change filter')) + + scale_y_continuous('Cohen\'s d\n(vs. random gene removal)') + + ggtitle('Effect of log-fold change filtering on concordance\nbetween ATAC and RNA modalities of multiome data') + + boxed_theme(size_sm = 5, size_lg = 6) + + theme( + # axis.text.x = element_text(angle = 45, hjust = 1), + aspect.ratio = 1, + plot.title = element_text(size=5, hjust=0.5) + ) +p3 +ggsave("fig/Exp_Fig2/multiome-concordance-absolute-logFC-filter-d.pdf", p3, + width = 4, height = 4, units = "cm", useDingbats = FALSE) + +p4 = wrap_plots(p1,p2,ncol=1) +ggsave("fig/Exp_Fig2/top-row.pdf", p4, + width = 16, height = 12, units = "cm", useDingbats = FALSE) + diff --git a/R/figures/exploratory-fig3.R b/R/figures/exploratory-fig3.R new file mode 100644 index 0000000..0fbdf1c --- /dev/null +++ b/R/figures/exploratory-fig3.R @@ -0,0 +1,216 @@ +# Plot a summary figure with performance across all experiments. +setwd("git/DA-analysis") +options(stringsAsFactors = FALSE) +library(tidyverse) +library(magrittr) +library(effsize) +library(upstartr) +source("R/theme.R") + +remove_quantile_per_method = function(df, val_col, group_cols, quantile_range=c(0.01, 0.99)) { + df$idx = 1:nrow(df) + group_grid = data.frame() + for (group_col in group_cols) { + if (nrow(group_grid) == 0) { + group_grid = df[,group_col] + } else { + group_grid %<>% tidyr::crossing(., df[,group_col]) + } + } + + new_df = data.frame() + for (i in 1:nrow(group_grid)) { + tmp_row = group_grid[i,] + tmp_df = tmp_row %>% left_join(df) + quan_range = quantile(unlist(tmp_df[,val_col]), quantile_range) + tmp_df %<>% mutate( + filter_now = ifelse(get(val_col) < quan_range[2] & get(val_col) > quan_range[1], T, F) + ) + # if remove all -- probably means all 0 + if (sum(tmp_df$filter_now) == 0) tmp_df$filter_now = T + new_df %<>% rbind(tmp_df %>% dplyr::select(filter_now, idx)) + } + new_df = df %>% + left_join(new_df) %>% + filter(filter_now) %>% + dplyr::select(-filter_now, -idx) + return(new_df) +} + + +###############################################################################- +## bulk logFC plot #### +###############################################################################- + +# load data +dat = readRDS("data/summaries/false_discoveries/false_discoveries_logFC_absolute.rds") %>% + type_convert() %>% + filter(is.na(latent_vars)) %>% + filter(da_family != 'mixedmodel', + pseudo_repl == FALSE) +# remove fixed columns +keep = map_lgl(dat, ~ n_distinct(.x) > 1) +dat %<>% extract(, keep) + +dat %<>% + mutate(expr = replace(percentile, is.na(percentile), '0.0')) %>% + mutate(expr_set = ifelse(grepl('pseudo', expr), 'Random', 'logFC'), + expr = gsub("_pseudo", "", expr)) + +# re-code x-values and colors +dat %<>% + mutate(method = paste0(da_method, '-', da_type) %>% + gsub("-singlecell|-binomial|-exact", "", .), + method = gsub("snapatac", "SnapATAC::findDAR", method) %>% + gsub("fisher", "FET", .) %>% + gsub("-LR_.*|-perm.*$", "", .) %>% + gsub("LR_", "LR", .), + color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + 'singlecell', da_family) %>% + fct_recode('single-cell' = 'singlecell', + 'other' = 'non_libra') %>% + fct_relevel('single-cell', 'pseudobulk', 'other'), + method = fct_recode(method, + "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', + 'Permutation~test' = 'permutation', + 't~test' = 't', + 'LR[peaks]' = 'LRpeaks', + 'LR[clusters]' = 'LR', + 'Binomial' = 'binomial', + 'Wilcoxon~rank-sum~test' = 'wilcox', + 'Fisher~exact~test' = 'FET', + 'Negative~binomial' = 'negbinom') %>% + as.character() + ) %>% + # drop limma + filter(!grepl('limma', method)) + +dat %<>% + mutate(logFC = gsub("^.*_", "", expr) %>% + replace(. == 'all', 0), + logFC = case_when( + logFC == 0 ~ 0, + logFC == 0.5 ~ 1, + logFC == 1 ~ 2, + logFC > 1 ~ 3 + )) %>% + mutate(logFC = as.factor(logFC)) + +dat$number_of_da_regions[is.na(dat$number_of_da_regions)] = 0 + +table(dat$logFC) +dplyr::count(dat, method) +labs = dat %>% + group_by(logFC, expr_set, method, color) %>% + summarise(mean = mean(number_of_da_regions), + n = n(), + median = median(number_of_da_regions), + number_of_da_regions = median) %>% + ungroup() %>% + mutate(label = formatC(median, format = 'f', digits = 2)) +pal = c('logFC' = 'black', + 'Random' = 'grey75') +p1 = dat %>% + filter(logFC != 0 & logFC != 1) %>% + # remove_quantile_per_method(., group_cols = c('method', 'logFC'), val_col='number_of_da_regions') %>% + ggplot(aes(x = logFC, y = number_of_da_regions, + color = expr_set, fill = expr_set)) + + facet_wrap(~ method, scales = 'free', ncol = 7, labeller = label_parsed) + + geom_boxplot(aes(y = number_of_da_regions), outlier.shape = NA, alpha = 0.4, + width = 0.7, size = 0.3) + + scale_color_manual('Filter by:', values = pal) + + scale_fill_manual('Filter by:', values = pal) + + scale_x_reordered(expression('Absolute fold-change filter')) + + scale_y_continuous('# of DA peaks', breaks=pretty_breaks(n=3)) + + boxed_theme(size_sm = 5, size_lg = 6) + + theme(legend.position = 'top', + legend.key.width = unit(0.4, 'lines'), + aspect.ratio = 0.7, + # axis.text.x = element_text(angle=45, hjust=1) + ) +p1 +ggsave("fig/Exp_Fig3/luecken-logFC-filter-overview.pdf", p1, + width = 18, height = 15, units = "cm", useDingbats = FALSE) + +enr = dat %>% + filter(logFC != 0 & logFC != 1) %>% + group_by_at(vars(-expr_set, -percentile, + -number_of_regions, -number_of_da_regions, + -number_of_nominal_da_regions)) %>% + summarise(delta = ifelse(n() == 2, + number_of_da_regions[expr_set == 'logFC'] - number_of_da_regions[expr_set == 'Random'], + number_of_da_regions), + nn = n()) %>% + ungroup() +pal = da_analysis_colors +p2 = enr %>% + filter(logFC != 0) %>% + ggplot(aes(x = logFC, y = delta, + color = method, fill = method)) + + facet_wrap(~ method, scales = 'free', ncol = 7, labeller = label_parsed) + + geom_boxplot(aes(y = delta), outlier.shape = NA, alpha = 0.4, width = 0.6, + size = 0.4) + + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + + # geom_text(data = labs, aes(y = -0.12, label = label), size = 1.75, hjust = 0, + # color = 'black') + + scale_color_manual('', values = pal) + + scale_fill_manual('', values = pal) + + scale_x_reordered(expression('Absolute fold-change filter')) + + scale_y_continuous(expression(Delta~'# of DA peaks'), breaks=pretty_breaks(3)) + + boxed_theme(size_sm = 5, size_lg = 6) + + theme(legend.position = 'none', + aspect.ratio = 0.8, + # axis.text.x = element_text(angle=45, hjust=1) + ) +p2 +ggsave("fig/Exp_Fig3/luecken-logFC-filter-enrichment.pdf", p2, + width = 18, height = 15, units = "cm", useDingbats = FALSE) + +grid = distinct(enr, method, logFC) +eff = pmap_dfr(grid, function(...) { + current = tibble(...) + df = filter(dat, method == current$method, logFC == current$logFC) %>% + mutate(expr_set = factor(expr_set, levels = c('logFC', 'Random'))) + eff = cohen.d(formula = number_of_da_regions ~ expr_set | Subject(label), + data = df, + paired = TRUE)$estimate + mutate(current, d = eff) +}) +eff$d[is.na(eff$d)] = 0 +labs = eff %>% + group_by(logFC) %>% + summarise(mean = mean(d), n = n(), median = median(d), d = median) %>% + ungroup() %>% + mutate( + median = ifelse(is.na(median), 0, median), + label = formatC(median, format = 'f', digits = 2) + ) + +p3 = eff %>% + ggplot(aes(x = factor(logFC), y = d)) + + geom_boxplot(outlier.shape = NA, alpha = 0.4, width = 0.6, size = 0.35, + fill = 'grey82', color = 'grey20') + + geom_label(data = labs, aes(y = Inf, label = label), size = 1.5, hjust = 0.5, + vjust = 1, color = 'black', fill = NA, label.size = NA, + label.padding = unit(0.6, 'lines')) + + # scale_color_manual('', values = pal) + + # scale_fill_manual('', values = pal) + + scale_x_discrete(expression('Absolute fold-change filter')) + + scale_y_continuous('Cohen\'s d\n(vs. random peak removal)') + + ggtitle('Effect of log-fold change filtering on false discoveries\nin null comparisons of real scATAC-seq data') + + coord_cartesian(ylim = c(0, 0.7)) + + boxed_theme(size_sm = 5, size_lg = 6) + + theme( + # axis.text.x = element_text(angle = 45, hjust = 1), + aspect.ratio = 1, + plot.title = element_text(size=5) + ) +p3 +ggsave("fig/Exp_Fig3/luecken-logFC-filter-d.pdf", p3, + width = 4, height = 4, units = "cm", useDingbats = FALSE) + +p4 = wrap_plots(p1,p2,ncol=1) +p4 +ggsave("fig/Exp_Fig3/top-row.pdf", p4, + width = 16, height = 14, units = "cm", useDingbats = FALSE) + diff --git a/R/figures/exploratory-fig4.R b/R/figures/exploratory-fig4.R new file mode 100644 index 0000000..af4abff --- /dev/null +++ b/R/figures/exploratory-fig4.R @@ -0,0 +1,151 @@ +# Plot a summary figure with performance across all experiments. +setwd("git/DA-analysis") +options(stringsAsFactors = FALSE) +library(tidyverse) +library(magrittr) +library(effsize) +library(upstartr) +library(ggpubr) +source("R/theme.R") + +dat = readRDS('data/metadata/enhancer-promoter-peak_stats.rds') %>% + type_convert() %>% + mutate( + peak_type = fct_recode( + peak_type, + 'Enhancer'='enhancer', + 'Promoter'='promoter' + ), + pct_open = pct_open*100 + ) + +dat0 = dat %>% + filter(data_type == 'sc') + +color_pal = c( + 'Enhancer'="#FFDCA4", + 'Promoter'="#887AA1" +) + +labs = dat0 %>% + group_by(peak_type) %>% + summarise( + mean_lab = round(median(mean), 3), + pct_open_lab = round(median(pct_open), 3), + peak_size_lab = round(median(peak_size), 3) + ) %>% + ungroup() + +p1 = dat0 %>% + mutate( + peak_type = factor(peak_type, levels=c("Enhancer", "Promoter")) + ) %>% + ggplot(aes(x=peak_type, y=mean, color=peak_type, fill=peak_type)) + + geom_boxplot(size = 0.35, alpha = 0.4, width = 0.6, outlier.shape = NA) + + geom_label(dat = labs, + aes(label = mean_lab, y= 0.18), color = 'black', + label.padding = unit(0.35, 'lines'), + label.size = NA, fill = NA, + size = 1.75, hjust = 0.5, vjust = 0, + show.legend = FALSE) + + scale_fill_manual(values = color_pal) + + scale_color_manual(values = color_pal) + + # geom_signif( + # comparisons = list(c("Enhancer", "Promoter")), + # map_signif_level = TRUE, + # color = 'black', + # tip_length = 0, + # size = 0.3, + # textsize = 3 + # ) + + boxed_theme() + + scale_y_continuous('Read depth', limits=c(0, 0.2), breaks=seq(0, 0.18, 0.06)) + + scale_x_discrete() + + theme( + axis.title.x = element_blank(), + axis.ticks.x = element_blank(), + aspect.ratio = 1.6, + legend.title = element_blank(), + legend.position = 'none', + axis.text.x = element_text(angle=45, hjust=1), + plot.title = element_text(size=5) + ) +p1 + +p2 = dat0 %>% + mutate( + peak_type = factor(peak_type, levels=c("Enhancer", "Promoter")) + ) %>% + ggplot(aes(x=peak_type, y=pct_open, color=peak_type, fill=peak_type)) + + geom_boxplot(size = 0.35, alpha = 0.4, width = 0.6, outlier.shape = NA) + + geom_label(dat = labs, + aes(label = pct_open_lab, y= 13.2), color = 'black', + label.padding = unit(0.35, 'lines'), + label.size = NA, fill = NA, + size = 1.75, hjust = 0.5, vjust = 0, + show.legend = FALSE) + + scale_fill_manual(values = color_pal) + + scale_color_manual(values = color_pal) + + # geom_signif( + # comparisons = list(c("Enhancer", "Promoter")), + # map_signif_level = TRUE, + # color = 'black', + # tip_length = 0, + # size = 0.3, + # textsize = 3, + # y_position = 0 + # ) + + boxed_theme() + + scale_y_continuous('% open', limits=c(0, 14.3), breaks=seq(0, 14, 4)) + + scale_x_discrete() + + theme( + axis.title.x = element_blank(), + axis.ticks.x = element_blank(), + aspect.ratio = 1.6, + legend.title = element_blank(), + legend.position = 'none', + axis.text.x = element_text(angle=45, hjust=1), + plot.title = element_text(size=5) + ) +p2 + +p3 = dat0 %>% + mutate( + peak_type = factor(peak_type, levels=c("Enhancer", "Promoter")) + ) %>% + ggplot(aes(x=peak_type, y=peak_size, color=peak_type, fill=peak_type)) + + geom_boxplot(size = 0.35, alpha = 0.4, width = 0.6, outlier.shape = NA) + + geom_label(dat = labs, + aes(label = peak_size_lab, y= 985), color = 'black', + label.padding = unit(0.35, 'lines'), + label.size = NA, fill = NA, + size = 1.75, hjust = 0.5, vjust = 0, + show.legend = FALSE) + + scale_fill_manual(values = color_pal) + + scale_color_manual(values = color_pal) + + # geom_signif( + # comparisons = list(c("Enhancer", "Promoter")), + # map_signif_level = TRUE, + # color = 'black', + # tip_length = 0, + # size = 0.3, + # textsize = 3, + # y_position = 0 + # ) + + boxed_theme() + + scale_y_continuous('Peak width (bp)', limits=c(0, 1100)) + + scale_x_discrete() + + theme( + axis.title.x = element_blank(), + axis.ticks.x = element_blank(), + aspect.ratio = 1.6, + legend.title = element_blank(), + legend.position = 'none', + axis.text.x = element_text(angle=45, hjust=1), + plot.title = element_text(size=5) + ) +p3 + +row = p1 | p2 | p3 +ggsave('fig/Exp_Fig4/row.pdf', row, width=9, height=5, units='cm') + diff --git a/R/figures/exploratory-fig5.R b/R/figures/exploratory-fig5.R new file mode 100644 index 0000000..e70cac6 --- /dev/null +++ b/R/figures/exploratory-fig5.R @@ -0,0 +1,392 @@ +setwd("git/DA-analysis") +options(stringsAsFactors = FALSE) +library(tidyverse) +library(magrittr) +source("R/theme.R") + +### PANEL A ### +bulk_aucc_rank1 = readRDS('data/summaries/meta_summaries/bulk_aucc-leave_out_bulk-summary.rds') +bulk_aucc_rank1 %<>% + group_by(bulk_da_method, bulk_da_type) %>% + mutate( + rank = rank(-aucc, ties.method='first'), + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method) + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(bulk_da_method, bulk_da_type, method, val, rank) %>% + arrange(bulk_da_method, bulk_da_type, rank) %>% + mutate( + type = paste0('Matched bulk (leave out: ', bulk_da_method, '-', bulk_da_type, ')'), + tercile = ntile(-rank, 3), + ) %>% + ungroup() %>% + dplyr::select(-bulk_da_method, -bulk_da_type) + +bulk_aucc_rank2 = readRDS('data/summaries/meta_summaries/bulk_aucc-k=1000-summary.rds') +bulk_aucc_rank2 %<>% + mutate( + rank = rank(-aucc, ties.method='first'), + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method) + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Matched bulk (k = 1000)', + tercile = ntile(-rank, 3) + ) + +bulk_aucc_rank3 = readRDS('data/summaries/meta_summaries/bulk_aucc-filter=5-summary.rds') +bulk_aucc_rank3 %<>% + mutate( + rank = rank(-aucc, ties.method='first'), + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method) + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Matched bulk (5% cell filter)', + tercile = ntile(-rank, 3) + ) + +bulk_aucc_rank4 = readRDS('data/summaries/meta_summaries/bulk_aucc-pseudobulk_peaks-summary.rds') +bulk_aucc_rank4 %<>% + mutate( + rank = rank(-aucc, ties.method='first'), + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method) + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Matched bulk (Pseudobulk peak calling)', + tercile = ntile(-rank, 3) + ) + + +bulk_aucc_rank5 = readRDS('data/summaries/meta_summaries/bulk_aucc-noisy_peaks-summary.rds') +bulk_aucc_rank5 %<>% + mutate( + rank = rank(-aucc, ties.method='first'), + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method) + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Matched bulk (Spurious peaks added)', + tercile = ntile(-rank, 3) + ) + +bulk_aucc_rank6 = readRDS('data/summaries/meta_summaries/bulk_aucc-enhancer_promoter-summary.rds') +bulk_aucc_rank6 %<>% + group_by(ann) %>% + mutate( + rank = rank(-aucc, ties.method='first'), + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method) + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(ann, method, val, rank) %>% + arrange(ann, rank) %>% + mutate( + type = paste0('Matched bulk: Bulk concordance (', str_to_title(ann), ' regions only)'), + tercile = ntile(-rank, 3) + ) %>% + ungroup() %>% + dplyr::select(-ann) + +bulk_aucc_rank = rbind( + bulk_aucc_rank1, + bulk_aucc_rank2, + bulk_aucc_rank3, + bulk_aucc_rank4, + bulk_aucc_rank5, + bulk_aucc_rank6 +) %>% + mutate( + experiment = 'Experiment 1: Bulk concordance' + ) + +multiome_aucc_rank1 = readRDS('data/summaries/meta_summaries/multiome_aucc-bidrectional_promoter-summary.rds') +multiome_aucc_rank1 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome (Removal of overlapping promoter regions)', + tercile = ntile(-rank, 3) + ) + + +multiome_aucc_rank2 = readRDS('data/summaries/meta_summaries/multiome_aucc-leave_out_bulk-summary.rds') +multiome_aucc_rank2 %<>% + group_by(scrna_da_method, scrna_da_type) %>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(scrna_da_method, scrna_da_type, method, val, rank) %>% + arrange(scrna_da_method, scrna_da_type, rank) %>% + mutate( + type = paste0('Multiome (leave out: ', scrna_da_method, '-', scrna_da_type, ')'), + tercile = ntile(-rank, 3) + ) %>% + ungroup() %>% + dplyr::select(-scrna_da_method, -scrna_da_type) + +multiome_aucc_rank3 = readRDS('data/summaries/meta_summaries/multiome_aucc-k=100-summary.rds') +multiome_aucc_rank3 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome (k = 100)', + tercile = ntile(-rank, 3) + ) + +multiome_aucc_rank4 = readRDS('data/summaries/meta_summaries/multiome_aucc-k=1000-summary.rds') +multiome_aucc_rank4 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome (k = 1000)', + tercile = ntile(-rank, 3) + ) + +multiome_aucc_rank5 = readRDS('data/summaries/meta_summaries/multiome_aucc-filter=1-summary.rds') +multiome_aucc_rank5 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome (1% cell filter)', + tercile = ntile(-rank, 3) + ) + +multiome_aucc_rank6 = readRDS('data/summaries/meta_summaries/multiome_aucc-scrna_terciles-summary.rds') +multiome_aucc_rank6 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome (Removal of lowly expressed genes)', + tercile = ntile(-rank, 3) + ) + +multiome_gsea_aucc_rank1 = readRDS('data/summaries/meta_summaries/multiome_gsea_aucc-bidirectional_promoter-summary.rds') +multiome_gsea_aucc_rank1 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome GO (Removal of overlapping promoter regions)', + tercile = ntile(-rank, 3) + ) + +multiome_gsea_aucc_rank2 = readRDS('data/summaries/meta_summaries/multiome_gsea_aucc-leave_bulk_out-summary.rds') +multiome_gsea_aucc_rank2 %<>% + group_by(scrna_da_method, scrna_da_type) %>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(scrna_da_method, scrna_da_type, method, val, rank) %>% + arrange(scrna_da_method, scrna_da_type, rank) %>% + mutate( + type = paste0('Multiome GO (leave out: ', scrna_da_method, '-', scrna_da_type, ')'), + tercile = ntile(-rank, 3) + ) %>% + ungroup() %>% + dplyr::select(-scrna_da_method, -scrna_da_type) + +multiome_gsea_aucc_rank3 = readRDS('data/summaries/meta_summaries/multiome_gsea_aucc-k=50-summary.rds') +multiome_gsea_aucc_rank3 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome GO (k = 50)', + tercile = ntile(-rank, 3) + ) + +multiome_gsea_aucc_rank4 = readRDS('data/summaries/meta_summaries/multiome_gsea_aucc-k=500-summary.rds') +multiome_gsea_aucc_rank4 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome GO (k = 500)', + tercile = ntile(-rank, 3) + ) + +multiome_gsea_aucc_rank5 = readRDS('data/summaries/meta_summaries/multiome_gsea_aucc-filter=1-summary.rds') +multiome_gsea_aucc_rank5 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome GO (1% cell filter)', + tercile = ntile(-rank, 3) + ) + +multiome_gsea_aucc_rank6 = readRDS('data/summaries/meta_summaries/multiome_gsea_aucc-scrna_terciles-summary.rds') +multiome_gsea_aucc_rank6 %<>% + mutate( + rank = rank(-aucc, ties.method='first') + ) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, rank) %>% + arrange(rank) %>% + mutate( + type = 'Multiome GO (Removal of lowly expressed genes)', + tercile = ntile(-rank, 3) + ) + +multiome_aucc_rank = rbind( + multiome_aucc_rank1, + multiome_aucc_rank2, + multiome_aucc_rank3, + multiome_aucc_rank4, + multiome_aucc_rank5, + multiome_aucc_rank6, + multiome_gsea_aucc_rank1, + multiome_gsea_aucc_rank2, + multiome_gsea_aucc_rank3, + multiome_gsea_aucc_rank4, + multiome_gsea_aucc_rank5, + multiome_gsea_aucc_rank6 +) %>% + mutate( + experiment = 'Experiment 2: Multiome concordance' + ) + +dat = rbind( + bulk_aucc_rank, + multiome_aucc_rank +) + +final_rank = dat %>% + group_by(method) %>% + summarise( + val = mean(tercile) + ) %>% + ungroup() %>% + mutate( + rank = rank(-val), + tercile = ntile(-rank, 3), + # type = '', + # experiment = 'Summary' + experiment = 'Summary', + type = 'Summary' + ) %>% + arrange(rank) + +# dat %<>% rbind(final_rank) +method_order = rev(unique(final_rank$method)) +dat$method = factor(dat$method, levels=method_order) + +factor_levels = rev(unique(dat$type)) +dat$type = factor(dat$type, levels=factor_levels) + +dat %<>% + mutate( + tercile = as.character(tercile), + tercile = fct_recode(tercile, + 'Top' = '3', + 'Middle' = '2', + 'Bottom' = '1' + ) + ) +dat$tercile = factor(dat$tercile, levels = c('Bottom', 'Middle', 'Top')) +# dat$experiment = factor(dat$experiment, levels = c('Matched bulk concordance', 'Multiome concordance', 'False discoveries', 'Biases', 'Summary')) +dat$experiment = factor(dat$experiment, levels = c( + 'Experiment 1: Bulk concordance', + 'Experiment 2: Multiome concordance') +) + +# [1] "#000000" "#0E0B0C" "#1C1717" "#292223" "#362D2E" "#423839" "#4E4244" "#594D4F" "#645759" "#6E6063" "#786A6C" "#827376" "#8B7C7F" "#938588" +# [15] "#9B8E91" "#A39699" "#AA9EA1" "#B1A6A9" "#B8AEB1" "#BEB5B8" "#C4BCBF" "#C9C3C6" "#CECACC" "#D3D0D2" "#D7D6D8" "#DBDCDE" "#DFE1E3" "#E3E7E8" +# [29] "#E6ECED" "#E9F0F2" "#EBF5F6" "#EEF9FA" "#F0FCFD" "#F2FFFF" "#F4FFFF" "#F5FFFF" "#F7FFFF" "#F8FFFF" "#F9FFFF" "#FAFFFF" "#FBFFFF" "#FBFFFF" +# [43] "#FCFFFF" "#FCFFFF" "#FDFFFF" "#FDFFFF" "#FEFFFF" "#FEFFFF" "#FEFFFF" "#FEFFFF" "#FFFDFD" "#FFF8F9" "#FFF4F4" "#FFEEEF" "#FFE9EA" "#FFE3E4" +# [57] "#FFDDDE" "#FFD6D8" "#FFCFD2" "#FFC8CB" "#FFC1C4" "#FFB9BD" "#FFB2B6" "#FFAAAE" "#FFA2A7" "#FF9A9F" "#FF9298" "#FF8A90" "#FF8288" "#FF7A81" +# [71] "#FE7279" "#FD6A72" "#FC626A" "#FB5B63" "#FA535C" "#F84C55" "#F7454F" "#F53F48" "#F33842" "#F1323C" "#EF2C37" "#ED2732" "#EA222D" "#E71D28" +# [85] "#E51924" "#E11620" "#DE121D" "#DB0F1A" "#D70D18" "#D30B16" "#CF0A14" "#CB0913" "#C70913" "#C20A13" "#BD0B14" "#B90C15" "#B30F17" "#AE1219" +# [99] "#A8161D" "#A31B21" +# pal = brewer.pal(3, 'Blues') +pal = colorRampPalette(c(brewer.pal(3, 'Reds')[1], "#B30F17"))(3) +# pal = colorRampPalette(c("#FF9298", "#DB0F1A"))(3) + +experiments = levels(dat$experiment) +table(dat$tercile) + +for (experiment in experiments) { + if (experiment == first(experiments)) out_plots = list() + dat1 = dat %>% + filter(experiment == !!experiment) + p1_1 = dat1 %>% + ggplot(aes(x = method, y = type, fill=tercile)) + + # facet_grid(experiment ~ ., scales = 'free', space = 'free') + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0), labels = scales::parse_format()) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Tercile', values = pal, + labels = c('1' = 'Bottom', '2' = 'Middle', '3' = 'Top')) + + # coord_fixed() + + ggtitle(experiment) + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.text.x = element_text(angle = 45, hjust = 1), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'bottom', + legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) + if (experiment != last(experiments)) { + p1_1 = p1_1 + theme( + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + legend.position = 'none' + ) + } + out_plots[[length(out_plots)+1]] = p1_1 +} + +p1 = wrap_plots(out_plots, ncol=1) +p1 +ggsave('fig/Exp_Fig5/sensitivity-terciles.pdf', p1, width=15, height=20, units='cm') + diff --git a/R/figures/exploratory-fig6.R b/R/figures/exploratory-fig6.R new file mode 100644 index 0000000..357eaf0 --- /dev/null +++ b/R/figures/exploratory-fig6.R @@ -0,0 +1,493 @@ +setwd("git/DA-analysis") +options(stringsAsFactors = FALSE) +library(tidyverse) +library(magrittr) +source("R/theme.R") + +bin_bulk_aucc_rank = readRDS('data/summaries/meta_summaries/bulk_aucc_binarized_summary.rds') +bin_bulk_aucc_rank %<>% + mutate(bin = ifelse(color == 'binarized', 'binarized', 'non-binarized')) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, bin) %>% + mutate( + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +bulk_aucc_rank = readRDS('data/summaries/meta_summaries/bulk_aucc_summary.rds') +bulk_aucc_rank %<>% + filter(!method %in% bin_bulk_aucc_rank$method) %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val) %>% + mutate( + bin = 'n/a', + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +norm_bulk_aucc_rank = readRDS('data/summaries/meta_summaries/bulk_aucc_normalized_summary.rds') +norm_bulk_aucc_rank %<>% + mutate(norm = as.character(normalization)) %>% + filter(norm != 'logTP(10K)') %>% + dplyr::rename(val = aucc) %>% + dplyr::select(method, val, norm) %>% + mutate( + bin = ifelse(method %in% bin_bulk_aucc_rank$method, 'non-binarized', 'n/a'), + method_full = paste0(method, '_', bin, '_', norm) + ) %>% + dplyr::relocate(bin, .before=norm) + +dat1 = rbind(bulk_aucc_rank, bin_bulk_aucc_rank, norm_bulk_aucc_rank) %>% + distinct() %>% + mutate( + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method), + norm = ifelse(norm == '', 'n/a', norm), + norm = ifelse(method %in% norm_bulk_aucc_rank$method & norm == 'n/a', 'logTP(10K)', norm)) %>% + distinct() %>% + mutate( + rank = rank(-val, ties.method='first'), + tercile = ntile(-rank, 3), + type = 'Matched bulk', + experiment = 'Experiment 1: Bulk concordance' + ) %>% + arrange(rank) + +luecken_false_discoveries_rank = readRDS('data/summaries/meta_summaries/luecken_false_discoveries_summary.rds') +luecken_false_discoveries_rank %<>% + filter(!method %in% bin_bulk_aucc_rank$method) %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val) %>% + mutate( + bin = 'n/a', + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +bin_luecken_false_discoveries_rank = readRDS('data/summaries/meta_summaries/luecken_false_discoveries_binarized_summary.rds') +bin_luecken_false_discoveries_rank %<>% + mutate(bin = ifelse(color == 'binarized', 'binarized', 'non-binarized')) %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val, bin) %>% + mutate( + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +norm_luecken_false_discoveries_rank = readRDS('data/summaries/meta_summaries/luecken_false_discoveries_normalized_summary.rds') +norm_luecken_false_discoveries_rank %<>% + mutate(norm = as.character(normalization)) %>% + filter(norm != 'logTP(10K)') %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val, norm) %>% + mutate( + bin = ifelse(method %in% bin_bulk_aucc_rank$method, 'non-binarized', 'n/a'), + method_full = paste0(method, '_', bin, '_', norm) + ) %>% + dplyr::relocate(bin, .before=norm) + +dat2 = rbind(luecken_false_discoveries_rank, bin_luecken_false_discoveries_rank, norm_luecken_false_discoveries_rank) %>% + distinct() %>% + mutate( + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method), + norm = ifelse(norm == '', 'n/a', norm), + norm = ifelse(method %in% norm_bulk_aucc_rank$method & norm == 'n/a', 'logTP(10K)', norm)) %>% + distinct() %>% + mutate( + rank = rank(val, ties.method='first'), + tercile = ntile(-rank, 3), + type = 'Published data', + experiment = 'Experiment 3: False discoveries' + ) %>% + arrange(rank) + +snapatac_false_discoveries_rank = readRDS('data/summaries/meta_summaries/snapatac_false_discoveries_summary.rds') +snapatac_false_discoveries_rank %<>% + filter(!method %in% bin_bulk_aucc_rank$method) %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val) %>% + mutate( + bin = 'n/a', + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +bin_snapatac_false_discoveries_rank = readRDS('data/summaries/meta_summaries/snapatac_false_discoveries_binarized_summary.rds') +bin_snapatac_false_discoveries_rank %<>% + mutate(bin = ifelse(color == 'binarized', 'binarized', 'non-binarized')) %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val, bin) %>% + mutate( + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +norm_snapatac_false_discoveries_rank = readRDS('data/summaries/meta_summaries/snapatac_false_discoveries_normalized_summary.rds') +norm_snapatac_false_discoveries_rank %<>% + mutate(norm = as.character(normalization)) %>% + filter(norm != 'logTP(10K)') %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val, norm) %>% + mutate( + bin = ifelse(method %in% bin_bulk_aucc_rank$method, 'non-binarized', 'n/a'), + method_full = paste0(method, '_', bin, '_', norm) + ) %>% + dplyr::relocate(bin, .before=norm) + +dat3 = rbind(snapatac_false_discoveries_rank, bin_snapatac_false_discoveries_rank, norm_snapatac_false_discoveries_rank) %>% + distinct() %>% + mutate( + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method), + norm = ifelse(norm == '', 'n/a', norm), + norm = ifelse(method %in% norm_bulk_aucc_rank$method & norm == 'n/a', 'logTP(10K)', norm)) %>% + distinct() %>% + mutate( + rank = rank(val, ties.method='first'), + tercile = ntile(-rank, 3), + type = 'Downsampled data', + experiment = 'Experiment 3: False discoveries' + ) %>% + arrange(rank) + +splatter_false_discoveries_rank = readRDS('data/summaries/meta_summaries/splatter_false_discoveries_summary.rds') +splatter_false_discoveries_rank %<>% + filter(!method %in% bin_bulk_aucc_rank$method) %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val) %>% + mutate( + bin = 'n/a', + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +bin_splatter_false_discoveries_rank = readRDS('data/summaries/meta_summaries/splatter_false_discoveries_binarized_summary.rds') +bin_splatter_false_discoveries_rank %<>% + mutate(bin = ifelse(color == 'binarized', 'binarized', 'non-binarized')) %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val, bin) %>% + mutate( + norm = '', + method_full = paste0(method, '_', bin, '_', norm) + ) + +norm_splatter_false_discoveries_rank = readRDS('data/summaries/meta_summaries/splatter_false_discoveries_normalized_summary.rds') +norm_splatter_false_discoveries_rank %<>% + mutate(norm = as.character(normalization)) %>% + filter(norm != 'logTP(10K)') %>% + dplyr::rename(val = median) %>% + dplyr::select(method, val, norm) %>% + mutate( + bin = ifelse(method %in% bin_bulk_aucc_rank$method, 'non-binarized', 'n/a'), + method_full = paste0(method, '_', bin, '_', norm) + ) %>% + dplyr::relocate(bin, .before=norm) + +dat4 = rbind(splatter_false_discoveries_rank, bin_splatter_false_discoveries_rank, norm_splatter_false_discoveries_rank) %>% + distinct() %>% + mutate( + method = ifelse(method == 'LR_peaks', 'LR[peaks]', method), + norm = ifelse(norm == '', 'n/a', norm), + norm = ifelse(method %in% norm_bulk_aucc_rank$method & norm == 'n/a', 'logTP(10K)', norm)) %>% + distinct() %>% + mutate( + rank = rank(val, ties.method='first'), + tercile = ntile(-rank, 3), + type = 'Simulations', + experiment = 'Experiment 3: False discoveries' + ) %>% + arrange(rank) + +dat = rbind( + dat1, + dat2, + dat3, + dat4 +) + +method_order = dat1 %>% arrange(rank) %>% pull(method_full) %>% rev() +dat$method_full = factor(dat$method_full, levels=method_order) +dat1$method_full = factor(dat1$method_full, levels=method_order) +# final_rank$method_bin = factor(final_rank$method_bin, levels=method_order) + +factor_levels = rev(unique(dat$type)) +dat$type = factor(dat$type, levels=factor_levels) + +dat %<>% + mutate( + tercile = as.character(tercile), + tercile = fct_recode(tercile, + 'Top' = '3', + 'Middle' = '2', + 'Bottom' = '1' + ) + ) +dat$tercile = factor(dat$tercile, levels = c('Bottom', 'Middle', 'Top')) +dat$experiment = factor(dat$experiment, levels = c( + 'Experiment 1: Bulk concordance', + 'Experiment 3: False discoveries' + ) +) + +pal = colorRampPalette(c(brewer.pal(3, 'Reds')[1], "#B30F17"))(3) + +experiments = levels(dat$experiment) +table(dat$tercile) +bin_colors = c('#F38400', '#2B3D26', 'white') %>% setNames(c('binarized', 'non-binarized', 'n/a')) +p0_1 = dat1 %>% + mutate(bin = factor(bin, levels = names(bin_colors))) %>% + ggplot(aes(x = method_full, y = 1, fill = bin)) + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Binarization', values = bin_colors) + + # coord_fixed() + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'bottom', + legend.justification = 'right', + legend.title = element_blank(), + # legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) + + guides(fill = guide_legend(nrow = 1)) +p0_1 +norm_colors = c( + pals::kelly(14)[8:14] %>% setNames(c('none', 'logTP(10K)', 'TP(median)', 'TP(10K)', 'logTP(median)', 'TF-IDF', 'smooth GC-full quantile')), + c('white') %>% setNames('n/a') +) +p0_2 = dat1 %>% + mutate(norm = factor(norm, levels = names(norm_colors))) %>% + ggplot(aes(x = method_full, y = 1, fill = norm)) + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Normalization', values = norm_colors) + + # coord_fixed() + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'bottom', + legend.justification = 'right', + legend.title = element_blank(), + # legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) + + guides(fill = guide_legend(nrow = 1)) +p0_2 + +for (experiment in experiments) { + if (experiment == first(experiments)) out_plots = list(p0_1, p0_2) + dat_tmp = dat %>% + filter(experiment == !!experiment) + p1_1 = dat_tmp %>% + ggplot(aes(x = method_full, y = type, fill=tercile)) + + # facet_grid(experiment ~ ., scales = 'free', space = 'free') + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0), labels = scales::parse_format()(rev(dat1$method))) + + # scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Tercile', values = pal, + labels = c('1' = 'Bottom', '2' = 'Middle', '3' = 'Top')) + + # coord_fixed() + + ggtitle(experiment) + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.text.x = element_text(angle = 45, hjust = 1), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'bottom', + legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) + if (experiment != last(experiments)) { + p1_1 = p1_1 + theme( + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + legend.position = 'none' + ) + } + out_plots[[length(out_plots)+1]] = p1_1 +} + +p1 = wrap_plots(out_plots, ncol=1) +p1 +ggsave('fig/Exp_Fig6/binarization-normalization-terciles.pdf', p1, width=15, height=15, units='cm') + +summary_rank = dat1 %>% + group_by(method) %>% + arrange(method, -val) %>% + dplyr::slice(1) %>% + ungroup() %>% + mutate( + rank = rank(-val, ties.method='first'), + tercile = ntile(-rank, 3), + type = 'Matched bulk', + experiment = 'Experiment 1: Bulk concordance' + ) %>% + arrange(rank) + +method_order = summary_rank %>% arrange(rank) %>% pull(method_full) %>% rev() +summary_dat = rbind( + dat1 %>% + filter(method_full %in% method_order) %>% + mutate( + rank = rank(-val, ties.method='first'), + tercile = ntile(-rank, 3) + ), + dat2 %>% + filter(method_full %in% method_order) %>% + mutate( + rank = rank(val, ties.method='first'), + tercile = ntile(-rank, 3) + ), + dat3 %>% + filter(method_full %in% method_order) %>% + mutate( + rank = rank(val, ties.method='first'), + tercile = ntile(-rank, 3) + ), + dat4 %>% + filter(method_full %in% method_order) %>% + mutate( + rank = rank(val, ties.method='first'), + tercile = ntile(-rank, 3) + ) +) +summary_dat$method_full = factor(summary_dat$method_full, levels=method_order) +summary_dat %<>% + mutate( + tercile = as.character(tercile), + tercile = fct_recode(tercile, + 'Top' = '3', + 'Middle' = '2', + 'Bottom' = '1' + ) + ) +summary_dat$tercile = factor(summary_dat$tercile, levels = c('Bottom', 'Middle', 'Top')) +summary_dat$experiment = factor(summary_dat$experiment, levels = c( + 'Experiment 1: Bulk concordance', + 'Experiment 3: False discoveries' + ) +) + +pal = colorRampPalette(c(brewer.pal(3, 'Reds')[1], "#B30F17"))(3) + +p0_1 = summary_rank %>% + mutate(bin = factor(bin, levels = names(bin_colors))) %>% + ggplot(aes(x = method_full, y = 1, fill = bin)) + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Binarization', values = bin_colors) + + # coord_fixed() + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'right', + legend.title = element_blank(), + # legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) +p0_1 + +p0_2 = summary_rank %>% + mutate(norm = factor(norm, levels = names(norm_colors))) %>% + ggplot(aes(x = method_full, y = 1, fill = norm)) + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Normalization', values = norm_colors) + + # coord_fixed() + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'right', + legend.title = element_blank(), + # legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) +p0_2 + +for (experiment in experiments) { + if (experiment == first(experiments)) out_plots = list(p0_1, p0_2) + dat_tmp = summary_dat %>% + filter(experiment == !!experiment) + p1_1 = dat_tmp %>% + ggplot(aes(x = method_full, y = type, fill=tercile)) + + # facet_grid(experiment ~ ., scales = 'free', space = 'free') + + geom_tile(color = 'white') + + scale_x_discrete(expand = c(0, 0), labels = scales::parse_format()(rev(summary_rank$method))) + + # scale_x_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + + scale_fill_manual('Tercile', values = pal, + labels = c('1' = 'Bottom', '2' = 'Middle', '3' = 'Top')) + + # coord_fixed() + + ggtitle(experiment) + + coord_fixed(ratio=1) + + boxed_theme() + + theme( + plot.margin = unit(c(0.01, 0.01, 0.2, 0.01), "cm"), + axis.text.x = element_text(angle = 45, hjust = 1), + axis.title.x = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + strip.text = element_text(face='bold'), + legend.position = 'bottom', + legend.justification = 'right', + legend.key.size = unit(0.35, 'lines'), + plot.title = element_text(size=7) + ) + if (experiment != last(experiments)) { + p1_1 = p1_1 + theme( + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + legend.position = 'none' + ) + } + out_plots[[length(out_plots)+1]] = p1_1 +} + +p2 = wrap_plots(out_plots, ncol=1) +p2 +ggsave('fig/Exp_Fig6/best-combination.pdf', p2, width=8, height=7, units='cm') diff --git a/R/figures/fig5.R b/R/figures/fig5.R index 872107f..c5137b9 100644 --- a/R/figures/fig5.R +++ b/R/figures/fig5.R @@ -109,8 +109,8 @@ p1 = eff %>% label.padding = unit(0.6, 'lines')) + # scale_color_manual('', values = pal) + # scale_fill_manual('', values = pal) + - scale_x_discrete('# of cells') + - scale_y_continuous('Cohen\'s d (vs. no filter)', limits = c(-0.65, 0.65)) + + scale_x_discrete('% of peaks removed') + + scale_y_continuous('Cohen\'s d (vs. random peak removal)', limits = c(-0.65, 0.65)) + ggtitle('Effect of log-fold change filtering on concordance\nbetween scATAC-seq and matched bulk ATAC-seq') + boxed_theme(size_sm = 5, size_lg = 6) + theme(axis.text.x = element_text(angle = 45, hjust = 1), @@ -230,8 +230,8 @@ p2 = eff %>% geom_label(data = labs, aes(y = Inf, label = label), size = 1.5, hjust = 0.5, vjust = 1, color = 'black', fill = NA, label.size = NA, label.padding = unit(0.6, 'lines')) + - scale_x_discrete('# of cells') + - scale_y_continuous('Cohen\'s d (vs. no filter)', limits=c(-0.8, 0.05)) + + scale_x_discrete('% of genes removed') + + scale_y_continuous('Cohen\'s d (vs. random gene removal)', limits=c(-0.8, 0.05)) + ggtitle('Effect of log-fold change filtering on concordance\nbetween ATAC and RNA modalities of multiome data') + boxed_theme(size_sm = 5, size_lg = 6) + theme(axis.text.x = element_text(angle = 45, hjust = 1), @@ -337,8 +337,8 @@ p3 = eff %>% label.padding = unit(0.6, 'lines')) + # scale_color_manual('', values = pal) + # scale_fill_manual('', values = pal) + - scale_x_discrete('# of cells') + - scale_y_continuous('Cohen\'s d (vs. no filter)') + + scale_x_discrete('% of peaks removed') + + scale_y_continuous('Cohen\'s d (vs. random peak removal)') + ggtitle('Effect of log-fold change filtering on false discoveries\nin null comparisons of real scATAC-seq data') + coord_cartesian(ylim = c(0, 1.3)) + boxed_theme(size_sm = 5, size_lg = 6) + diff --git a/R/figures/fig6.R b/R/figures/fig6.R index b325849..b38bf3a 100644 --- a/R/figures/fig6.R +++ b/R/figures/fig6.R @@ -30,12 +30,12 @@ dat %<>% gsub("fisher", "FET", .) %>% gsub("-LR_.*|-perm.*$", "", .) %>% gsub("LR_", "LR", .), - color = ifelse(sc_binarization, 'binarized', sc_da_family), - color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), - 'singlecell', color) %>% - fct_recode('single-cell' = 'singlecell', - 'other' = 'non_libra') %>% - fct_relevel('single-cell', 'pseudobulk', 'other'), + # color = ifelse(sc_binarization, 'binarized', sc_da_family), + # color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + # 'singlecell', color) %>% + # fct_recode('single-cell' = 'singlecell', + # 'other' = 'non_libra') %>% + # fct_relevel('single-cell', 'pseudobulk', 'other'), method = fct_recode(method, "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', 'Permutation~test' = 'permutation', @@ -46,7 +46,9 @@ dat %<>% 'Wilcoxon~rank-sum~test' = 'wilcox', 'Fisher~exact~test' = 'FET', 'Negative~binomial' = 'negbinom') %>% - as.character()) %>% + as.character(), + color = ifelse(!sc_binarization, method, 'binarized') + ) %>% # drop limma filter(!grepl('limma', method)) @@ -59,6 +61,11 @@ avg1 = dat %>% summarise(aucc = mean(aucc), n = n()) %>% ungroup() + +color_fct = unique(avg1$color) +color_fct = c(color_fct[color_fct != 'binarized'], 'binarized') +avg1$color = factor(avg1$color, levels = rev(color_fct)) + # plot labs = avg1 %>% group_by(k, method, color) %>% @@ -67,15 +74,21 @@ labs = avg1 %>% mutate(label = formatC(median, format = 'f', digits = 2)) pal = c(da_analysis_colors, 'binarized' = "#6E645F") +saveRDS(labs, 'data/summaries/meta_summaries/bulk_aucc_binarized_summary.rds') + +method_levels = avg1 %>% + group_by(method) %>% + summarise(aucc = median(aucc)) %>% + ungroup() %>% + arrange(aucc) %>% + pull(method) +avg1$method = factor(avg1$method, levels = method_levels) + p1 = avg1 %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% - ggplot(aes(x = reorder(method, aucc, stats::median), - color = color, fill = color)) + + ggplot(aes(x = method, color = color, fill = color)) + geom_boxplot(aes(y = aucc), outlier.shape = NA, alpha = 0.4, width = 0.65, size = 0.35) + - geom_text(data = labs, aes(y = -0.08, group = color, label = label), + geom_text(data = labs, aes(y = -0.08, group = color, label = label, color='black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -88,7 +101,8 @@ p1 = avg1 %>% # legend.position = 'none', legend.key.width = unit(0.18, 'lines'), legend.key.height = unit(0.14, 'lines'), - aspect.ratio = 1.4) + aspect.ratio = 1.4) + + ggtitle('Matched bulk concordance') p1 ggsave("fig/Fig6/bulk-aucc-binarization.pdf", p1, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) @@ -125,12 +139,12 @@ dat %<>% gsub("fisher", "FET", .) %>% gsub("-LR_.*|-perm.*$", "", .) %>% gsub("LR_", "LR", .), - color = ifelse(binarization, 'binarized', da_family), - color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), - 'singlecell', color) %>% - fct_recode('single-cell' = 'singlecell', - 'other' = 'non_libra') %>% - fct_relevel('single-cell', 'pseudobulk', 'other'), + # color = ifelse(binarization, 'binarized', da_family), + # color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + # 'singlecell', color) %>% + # fct_recode('single-cell' = 'singlecell', + # 'other' = 'non_libra') %>% + # fct_relevel('single-cell', 'pseudobulk', 'other'), method = fct_recode(method, "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', 'Permutation~test' = 'permutation', @@ -141,7 +155,9 @@ dat %<>% 'Wilcoxon~rank-sum~test' = 'wilcox', 'Fisher~exact~test' = 'FET', 'Negative~binomial' = 'negbinom') %>% - as.character()) %>% + as.character(), + color = ifelse(binarization, 'binarized', method) + ) %>% # drop limma filter(!grepl('limma', method)) @@ -152,6 +168,10 @@ dat %<>% ungroup() dplyr::count(dat, color, method) %>% arrange(method, color) +color_fct = unique(dat$color) +color_fct = c('binarized', color_fct[color_fct != 'binarized']) +dat$color = factor(dat$color, levels = color_fct) + labs = dat %>% group_by(method, color) %>% summarise(mean = mean(number_of_da_regions), @@ -160,11 +180,11 @@ labs = dat %>% number_of_da_regions = median) %>% ungroup() %>% mutate(label = round(median)) + +saveRDS(labs, 'data/summaries/meta_summaries/luecken_false_discoveries_binarized_summary.rds') + pal = c(da_analysis_colors, 'binarized' = "#6E645F") p2 = dat %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% ggplot(aes(x = reorder(method, number_of_da_regions, stats::median), color = color, fill = color)) + geom_boxplot(aes(y = number_of_da_regions), outlier.shape = NA, @@ -172,7 +192,7 @@ p2 = dat %>% # geom_jitter(aes(y = aucc), width = 0.2, height = 0, shape = 1, size = 0.5, # stroke = 0.3) + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + - geom_text(data = labs, aes(y = -18e3, group = color, label = label), + geom_text(data = labs, aes(y = -18e3, group = color, label = label, color='black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -186,12 +206,12 @@ p2 = dat %>% theme(axis.title.y = element_blank(), aspect.ratio = 1.4, legend.position = 'none' - ) + ) + + ggtitle('Published data') p2 ggsave("fig/Fig6/luecken-false-discoveries-binarization.pdf", p2, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) - ###############################################################################- ## snapatac binarization plot #### ###############################################################################- @@ -221,12 +241,12 @@ dat %<>% gsub("fisher", "FET", .) %>% gsub("-LR_.*|-perm.*$", "", .) %>% gsub("LR_", "LR", .), - color = ifelse(binarization, 'binarized', de_family), - color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), - 'singlecell', color) %>% - fct_recode('single-cell' = 'singlecell', - 'other' = 'non_libra') %>% - fct_relevel('single-cell', 'pseudobulk', 'other'), + # color = ifelse(binarization, 'binarized', de_family), + # color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + # 'singlecell', color) %>% + # fct_recode('single-cell' = 'singlecell', + # 'other' = 'non_libra') %>% + # fct_relevel('single-cell', 'pseudobulk', 'other'), method = fct_recode(method, "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', 'Permutation~test' = 'permutation', @@ -237,7 +257,8 @@ dat %<>% 'Wilcoxon~rank-sum~test' = 'wilcox', 'Fisher~exact~test' = 'FET', 'Negative~binomial' = 'negbinom') %>% - as.character()) %>% + as.character(), + color = ifelse(binarization, 'binarized', method)) %>% # drop limma filter(!grepl('limma', method)) @@ -247,6 +268,10 @@ dat %<>% filter(n_distinct(color) == 2) %>% ungroup() +color_fct = unique(dat$color) +color_fct = c('binarized', color_fct[color_fct != 'binarized']) +dat$color = factor(dat$color, levels = color_fct) + labs = dat %>% group_by(method, color) %>% summarise(mean = mean(number_of_da_regions), @@ -255,11 +280,11 @@ labs = dat %>% number_of_da_regions = median) %>% ungroup() %>% mutate(label = round(median)) + +saveRDS(labs, 'data/summaries/meta_summaries/snapatac_false_discoveries_binarized_summary.rds') + pal = c(da_analysis_colors, 'binarized' = "#6E645F") p3 = dat %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% ggplot(aes(x = reorder(method, number_of_da_regions, stats::median), color = color, fill = color)) + geom_boxplot(aes(y = number_of_da_regions), outlier.shape = NA, @@ -267,7 +292,7 @@ p3 = dat %>% # geom_jitter(aes(y = aucc), width = 0.2, height = 0, shape = 1, size = 0.5, # stroke = 0.3) + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + - geom_text(data = labs, aes(y = -20e3, group = color, label = label), + geom_text(data = labs, aes(y = -20e3, group = color, label = label, color='black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -281,7 +306,8 @@ p3 = dat %>% theme(axis.title.y = element_blank(), aspect.ratio = 1.4, legend.position = 'none' - ) + ) + + ggtitle('Downsampled data') p3 ggsave("fig/Fig6/snapatac-false-discoveries-binarization.pdf", p3, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) @@ -315,12 +341,12 @@ dat %<>% gsub("fisher", "FET", .) %>% gsub("-LR_.*|-perm.*$", "", .) %>% gsub("LR_", "LR", .), - color = ifelse(binarization, 'binarized', de_family), - color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), - 'singlecell', color) %>% - fct_recode('single-cell' = 'singlecell', - 'other' = 'non_libra') %>% - fct_relevel('single-cell', 'pseudobulk', 'other'), + # color = ifelse(binarization, 'binarized', de_family), + # color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + # 'singlecell', color) %>% + # fct_recode('single-cell' = 'singlecell', + # 'other' = 'non_libra') %>% + # fct_relevel('single-cell', 'pseudobulk', 'other'), method = fct_recode(method, "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', 'Permutation~test' = 'permutation', @@ -331,7 +357,9 @@ dat %<>% 'Wilcoxon~rank-sum~test' = 'wilcox', 'Fisher~exact~test' = 'FET', 'Negative~binomial' = 'negbinom') %>% - as.character()) %>% + as.character(), + color = ifelse(binarization, 'binarized', method) + ) %>% # drop limma filter(!grepl('limma', method)) @@ -340,6 +368,9 @@ dat %<>% group_by(method) %>% filter(n_distinct(color) == 2) %>% ungroup() +color_fct = unique(dat$color) +color_fct = c('binarized', color_fct[color_fct != 'binarized']) +dat$color = factor(dat$color, levels = color_fct) labs = dat %>% group_by(method, color) %>% @@ -349,11 +380,11 @@ labs = dat %>% number_of_da_regions = median) %>% ungroup() %>% mutate(label = round(median)) + +saveRDS(labs, 'data/summaries/meta_summaries/splatter_false_discoveries_binarized_summary.rds') + pal = c(da_analysis_colors, 'binarized' = "#6E645F") p4 = dat %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% ggplot(aes(x = reorder(method, number_of_da_regions, stats::median), color = color, fill = color)) + geom_boxplot(aes(y = number_of_da_regions), outlier.shape = NA, @@ -361,7 +392,7 @@ p4 = dat %>% # geom_jitter(aes(y = aucc), width = 0.2, height = 0, shape = 1, size = 0.5, # stroke = 0.3) + # geom_errorbar(data = labs, aes(ymin = mean, ymax = mean), width = 0.8) + - geom_text(data = labs, aes(y = -10e3, group = color, label = label), + geom_text(data = labs, aes(y = -10e3, group = color, label = label, color = 'black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -375,7 +406,8 @@ p4 = dat %>% theme(axis.title.y = element_blank(), aspect.ratio = 1.4, legend.position = 'none' - ) + )+ + ggtitle('Simulations') p4 ggsave("fig/Fig6/splatter-false-discoveries-binarization.pdf", p4, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) @@ -418,12 +450,12 @@ dat %<>% gsub("fisher", "FET", .) %>% gsub("-LR_.*|-perm.*$", "", .) %>% gsub("LR_", "LR", .), - color = ifelse(binarization, 'binarized', da_family), - color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), - 'singlecell', color) %>% - fct_recode('single-cell' = 'singlecell', - 'other' = 'non_libra') %>% - fct_relevel('single-cell', 'pseudobulk', 'other'), + # color = ifelse(binarization, 'binarized', da_family), + # color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + # 'singlecell', color) %>% + # fct_recode('single-cell' = 'singlecell', + # 'other' = 'non_libra') %>% + # fct_relevel('single-cell', 'pseudobulk', 'other'), method = fct_recode(method, "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', 'Permutation~test' = 'permutation', @@ -434,7 +466,8 @@ dat %<>% 'Wilcoxon~rank-sum~test' = 'wilcox', 'Fisher~exact~test' = 'FET', 'Negative~binomial' = 'negbinom') %>% - as.character()) %>% + as.character(), + color = ifelse(binarization, 'binarized', method)) %>% # drop limma filter(!grepl('limma', method)) @@ -443,6 +476,9 @@ dat %<>% group_by(method) %>% filter(n_distinct(color) == 2) %>% ungroup() +color_fct = unique(dat$color) +color_fct = c('binarized', color_fct[color_fct != 'binarized']) +dat$color = factor(dat$color, levels = color_fct) # average over bulk methods avg1 = dat %>% @@ -460,16 +496,14 @@ labs = avg1 %>% median = median(mean_expr), mean_expr = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) + pal = c(da_analysis_colors, 'binarized' = "#6E645F") p5 = avg1 %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% ggplot(aes(x = reorder(method, mean_expr, stats::median), color = color, fill = color)) + geom_boxplot(aes(y = mean_expr), outlier.shape = NA, alpha = 0.4, width = 0.6, size = 0.4) + - geom_text(data = labs, aes(y = -1.5, group = color, label = label), + geom_text(data = labs, aes(y = -1.5, group = color, label = label, color='black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -480,7 +514,8 @@ p5 = avg1 %>% boxed_theme(size_sm = 5, size_lg = 6) + theme(axis.title.y = element_blank(), legend.position = 'none' - ) + ) + + ggtitle('Matched bulk top 1000 DA peaks') p5 ggsave("fig/Fig6/bulk-mean-expr-binarization.pdf", p5, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) @@ -502,14 +537,11 @@ labs = avg3 %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) p6 = avg3 %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% ggplot(aes(x = reorder(method, percentage_open, stats::median), color = color, fill = color)) + geom_boxplot(aes(y = percentage_open), outlier.shape = NA, alpha = 0.4, width = 0.6, size = 0.4) + - geom_text(data = labs, aes(y = -0.05, group = color, label = label), + geom_text(data = labs, aes(y = -0.05, group = color, label = label, color='black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -519,7 +551,8 @@ p6 = avg3 %>% coord_flip() + boxed_theme(size_sm = 5, size_lg = 6) + theme(axis.title.y = element_blank(), - legend.position = 'none') + legend.position = 'none') + + ggtitle('Matched bulk top 1000 DA peaks') p6 ggsave("fig/Fig6/bulk-pct-open-binarization.pdf", p6, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) @@ -542,14 +575,11 @@ labs = avg5 %>% mutate(label = round(median)) pal = c(da_analysis_colors, 'binarized' = "#6E645F") p7 = avg5 %>% - mutate( - color = ifelse(color != 'binarized', method, color) - ) %>% ggplot(aes(x = reorder(method, peak_width, stats::median), color = color, fill = color)) + geom_boxplot(aes(y = peak_width), outlier.shape = NA, alpha = 0.4, width = 0.6, size = 0.4) + - geom_text(data = labs, aes(y = -0.05, group = color, label = label), + geom_text(data = labs, aes(y = -0.05, group = color, label = label, color='black'), position = position_dodge(width = 0.65), size = 1.75, hjust = 0, show.legend = FALSE) + scale_color_manual('', values = pal) + @@ -560,7 +590,8 @@ p7 = avg5 %>% boxed_theme(size_sm = 5, size_lg = 6) + theme(axis.title.y = element_blank(), legend.position = 'none', - aspect.ratio = 1.4) + aspect.ratio = 1.4)+ + ggtitle('Matched bulk top 1000 DA peaks') p7 ggsave("fig/Fig6/bulk-peak-width-binarization.pdf", p7, width = 5, height = 6.5, units = "cm", useDingbats = FALSE) @@ -661,7 +692,7 @@ p8 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, AUCC') + + ggtitle('Matched bulk\nCohen\'s d, AUCC') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + @@ -760,7 +791,7 @@ p9 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, false discoveries') + + ggtitle('Published data\nCohen\'s d, false discoveries') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + @@ -865,7 +896,7 @@ p10 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, false discoveries') + + ggtitle('Downsampled data\nCohen\'s d, false discoveries') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + @@ -972,7 +1003,7 @@ p11 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, false discoveries') + + ggtitle('Simulations\nCohen\'s d, false discoveries') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + @@ -1094,7 +1125,7 @@ p12 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, read depth') + + ggtitle('Matched bulk\nCohen\'s d, read depth') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + @@ -1153,7 +1184,7 @@ p13 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, % open') + + ggtitle('Matched bulk\nCohen\'s d, % open') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + @@ -1207,7 +1238,7 @@ p14 = eff %>% geom_errorbar(aes(ymin = 0, ymax = d, group = method), width = 0, size = 0.3, position = position_dodge(width = 0.6)) + scale_y_continuous('Cohen\'s d,\nrelative to log(TP10K)') + - ggtitle('Cohen\'s d, peak width (bp)') + + ggtitle('Matched bulk\nCohen\'s d, peak width (bp)') + scale_color_manual(values = color_pal,labels = parse_format()) + scale_fill_manual(values = color_pal,labels = parse_format()) + boxed_theme() + diff --git a/R/figures/supp-fig1.R b/R/figures/supp-fig1.R index cf8c2b8..b13782d 100644 --- a/R/figures/supp-fig1.R +++ b/R/figures/supp-fig1.R @@ -84,6 +84,8 @@ labs = avg3 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/bulk_aucc-k=1000-summary.rds') + pal = da_analysis_colors p1 = avg3 %>% ggplot(aes(x = reorder(method, aucc, stats::median), @@ -123,6 +125,8 @@ labs = avg4 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/bulk_aucc-filter=5-summary.rds') + p2 = avg4 %>% ggplot(aes(x = reorder(method, aucc, stats::median), color = method, fill = method)) + @@ -153,9 +157,10 @@ ggsave("fig/Supp_Fig1/bulk-aucc-filter=5pct.pdf", p2, ## AUCC: leave-bulk-out #### ###############################################################################- +full_labs = data.frame() bulk_grid = distinct(dat, bulk_da_method, bulk_da_type) -plots = map(seq_len(nrow(bulk_grid)), ~ { - bulk_idx = .x +plots = list() +for (bulk_idx in 1:nrow(bulk_grid)) { bulk_row = bulk_grid[bulk_idx, ] print(bulk_row) avg0 = dat %>% @@ -216,6 +221,10 @@ plots = map(seq_len(nrow(bulk_grid)), ~ { summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) + full_labs %<>% rbind( + labs %>% tidyr::crossing(bulk_row) + ) + pal = da_analysis_colors # plot @@ -241,8 +250,11 @@ plots = map(seq_len(nrow(bulk_grid)), ~ { legend.position = 'none', aspect.ratio = 1.4, plot.title = element_text(size = 5, hjust = 0.5)) - p -}) + plots[[length(plots)+1]] = p +} + +saveRDS(full_labs, 'data/summaries/meta_summaries/bulk_aucc-leave_out_bulk-summary.rds') + p3 = wrap_plots(plots, nrow = 2, ncol=3) p3 # ggsave("fig/Supp_Fig1/bulk-aucc-leave-bulk-out.pdf", p3, @@ -423,6 +435,8 @@ labs = avg2 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/bulk_aucc-pseudobulk_peaks-summary.rds') + pal = da_analysis_colors p5 = avg2 %>% @@ -600,6 +614,8 @@ labs = avg2 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/bulk_aucc-noisy_peaks-summary.rds') + pal = da_analysis_colors p6 = avg2 %>% ggplot(aes(x = reorder(method, aucc, stats::median), @@ -689,6 +705,7 @@ coefs = coef(summary(fit)) %>% mutate(padj = p.adjust(pval, 'BH')) filter(coefs, padj < 0.05) filter(coefs, pval < 0.05) + # load data dat = readRDS("data/summaries/concordance_res/matched_bulk/all_aucc_enhancer_promoter.rds") %>% type_convert() %>% @@ -758,6 +775,8 @@ labs = avg2 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/bulk_aucc-enhancer_promoter-summary.rds') + pal = da_analysis_colors p7_1 = avg2 %>% filter(ann == 'enhancer') %>% diff --git a/R/figures/supp-fig10.R b/R/figures/supp-fig10.R index 85dd141..eb7d0dd 100644 --- a/R/figures/supp-fig10.R +++ b/R/figures/supp-fig10.R @@ -143,7 +143,7 @@ p1 = avg1 %>% # color = 'black') + scale_color_manual('Filter by:', values = pal) + scale_fill_manual('Filter by:', values = pal) + - scale_x_reordered() + + scale_x_reordered('% of genes removed') + scale_y_continuous('AUCC') + boxed_theme(size_sm = 5, size_lg = 6) + theme(legend.position = 'top', @@ -178,7 +178,7 @@ p2 = enr %>% # color = 'black') + scale_color_manual('', values = pal) + scale_fill_manual('', values = pal) + - scale_x_reordered() + + scale_x_reordered('% of genes removed') + scale_y_continuous(expression(Delta~AUCC)) + boxed_theme(size_sm = 5, size_lg = 6) + theme(legend.position = 'none', diff --git a/R/figures/supp-fig11.R b/R/figures/supp-fig11.R index f622ff9..4e5c8b6 100644 --- a/R/figures/supp-fig11.R +++ b/R/figures/supp-fig11.R @@ -89,7 +89,7 @@ p1 = dat %>% width = 0.7, size = 0.3) + scale_color_manual('Filter by:', values = pal) + scale_fill_manual('Filter by:', values = pal) + - scale_x_reordered() + + scale_x_reordered('% of peaks removed') + scale_y_continuous('# of DA peaks') + boxed_theme(size_sm = 5, size_lg = 6) + theme(legend.position = 'top', @@ -126,7 +126,7 @@ p2 = enr %>% # color = 'black') + scale_color_manual('', values = pal) + scale_fill_manual('', values = pal) + - scale_x_reordered() + + scale_x_reordered('% of peaks removed') + scale_y_continuous(expression(Delta~'# of DA peaks')) + boxed_theme(size_sm = 5, size_lg = 6) + theme(legend.position = 'none', diff --git a/R/figures/supp-fig17.R b/R/figures/supp-fig17.R index 7f7444d..b93eb23 100644 --- a/R/figures/supp-fig17.R +++ b/R/figures/supp-fig17.R @@ -102,6 +102,7 @@ grey_pal = colorRampPalette(c('grey10', 'grey85'))(6) %>% setNames(c('none', 'TP(median)', 'TP(10K)', 'logTP(median)', 'TF-IDF', 'smooth GC-full quantile')) curr_out_plots = list() +full_labs = data.frame() for (method in methods) { avg2 = avg1 %>% filter(method == !!method) @@ -110,6 +111,8 @@ for (method in methods) { summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) + full_labs %<>% rbind(labs) + pal = c('', grey_pal) pal[1] = da_analysis_colors[method] pal_names = c('logTP(10K)', names(grey_pal)) @@ -138,6 +141,8 @@ for (method in methods) { curr_out_plots[[length(curr_out_plots)+1]] = p1_1 } +saveRDS(full_labs, 'data/summaries/meta_summaries/bulk_aucc_normalized_summary.rds') + p1 = wrap_plots(curr_out_plots, nrow=1) ggsave("fig/Supp_Fig17/bulk-aucc-normalization.pdf", p1, width = 18, height = 6.5, units = "cm", useDingbats = FALSE) @@ -187,6 +192,7 @@ dat %<>% 'TP(median)' = 'tp_median', 'TP(10K)' = 'tp10k')) curr_out_plots = list() +full_labs = data.frame() for (method in methods) { dat1 = dat %>% filter(method == !!method) @@ -198,6 +204,7 @@ for (method in methods) { number_of_da_regions = median) %>% ungroup() %>% mutate(label = round(median)) + full_labs %<>% rbind(labs) pal = c('', grey_pal) pal[1] = da_analysis_colors[method] pal_names = c('logTP(10K)', names(grey_pal)) @@ -230,6 +237,8 @@ for (method in methods) { curr_out_plots[[length(curr_out_plots)+1]] = p2_1 } +saveRDS(full_labs, 'data/summaries/meta_summaries/luecken_false_discoveries_normalized_summary.rds') + p2 = wrap_plots(curr_out_plots, nrow=1) ggsave("fig/Supp_Fig17/luecken-false-discoveries-normalization.pdf", p2, width = 18, height = 6.5, units = "cm", useDingbats = FALSE) @@ -287,6 +296,7 @@ dat %<>% 'TP(10K)' = 'tp10k')) curr_out_plots = list() +full_labs = data.frame() for (method in methods) { dat1 = dat %>% filter(method == !!method) @@ -298,6 +308,7 @@ for (method in methods) { number_of_da_regions = median) %>% ungroup() %>% mutate(label = round(median)) + full_labs %<>% rbind(labs) pal = c('', grey_pal) pal[1] = da_analysis_colors[method] pal_names = c('logTP(10K)', names(grey_pal)) @@ -330,6 +341,8 @@ for (method in methods) { curr_out_plots[[length(curr_out_plots)+1]] = p3_1 } +saveRDS(full_labs, 'data/summaries/meta_summaries/snapatac_false_discoveries_normalized_summary.rds') + p3 = wrap_plots(curr_out_plots, nrow=1) ggsave("fig/Supp_Fig17/snapatac-false-discoveries-normalization.pdf", p3, width = 18, height = 6.5, units = "cm", useDingbats = FALSE) @@ -388,6 +401,7 @@ dat %<>% 'TP(median)' = 'tp_median', 'TP(10K)' = 'tp10k')) curr_out_plots = list() +full_labs = data.frame() for (method in methods) { dat1 = dat %>% filter(method == !!method) @@ -399,6 +413,7 @@ for (method in methods) { number_of_da_regions = median) %>% ungroup() %>% mutate(label = round(median)) + full_labs %<>% rbind(labs) pal = c('', grey_pal) pal[1] = da_analysis_colors[method] pal_names = c('logTP(10K)', names(grey_pal)) @@ -430,6 +445,7 @@ for (method in methods) { out_plots[[length(out_plots)+1]] = p4_1 curr_out_plots[[length(curr_out_plots)+1]] = p4_1 } +saveRDS(full_labs, 'data/summaries/meta_summaries/splatter_false_discoveries_normalized_summary.rds') p4 = wrap_plots(curr_out_plots, nrow=1) ggsave("fig/Supp_Fig17/splatter-false-discoveries-normalization.pdf", p4, width = 18, height = 6.5, units = "cm", useDingbats = FALSE) diff --git a/R/figures/supp-fig2.R b/R/figures/supp-fig2.R index b2c3232..d9c7688 100644 --- a/R/figures/supp-fig2.R +++ b/R/figures/supp-fig2.R @@ -82,6 +82,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_aucc-bidrectional_promoter-summary.rds') + pal = da_analysis_colors p1 = avg0 %>% mutate(title = 'Removal of overlapping\npromoter regions') %>% @@ -111,7 +113,6 @@ p1 ggsave("fig/Supp_Fig2/multiome-aucc-bidirectional-promoters.pdf", p1, width = 4.5, height = 6.5, units = "cm", useDingbats = FALSE) - # load data dat = readRDS("data/summaries/concordance_res/multiome/all_aucc_skinny.rds") %>% type_convert() %>% @@ -181,8 +182,10 @@ avg %<>% filter(!filter %in% c('10%', '20%')) grid = distinct(dat, scrna_da_family, scrna_da_method, scrna_da_type) -plots = pmap(grid, function(...) { - current = tibble(...) +full_labs = data.frame() +plots = list() +for (i in 1:nrow(grid)) { + current = grid[i,] # average over bulk methods avg = dat %>% @@ -241,6 +244,10 @@ plots = pmap(grid, function(...) { summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) + full_labs %<>% rbind( + labs %>% tidyr::crossing(current[2:3]) + ) + pal = da_analysis_colors p = avg0 %>% mutate(title = held_out) %>% @@ -264,11 +271,13 @@ plots = pmap(grid, function(...) { plot.title = element_text(size = 6), aspect.ratio = 1.4, legend.position = 'none') - p -}) + plots[[length(plots)+1]] = p +} +saveRDS(full_labs, 'data/summaries/meta_summaries/multiome_aucc-leave_out_bulk-summary.rds') # plot p2 = wrap_plots(plots, nrow = 2, ncol=3) +p2 ggsave("fig/Supp_Fig2/multiome-aucc-leave-bulk-out.pdf", p2, width = 13.85, height = 15, units = "cm", useDingbats = FALSE) # correlation matrix @@ -375,12 +384,80 @@ p3 ggsave("fig/Supp_Fig2/multiome-aucc-leave-bulk-out-correlation.pdf", p3, width = 5, height = 5, units = "cm", useDingbats = FALSE) + +# load data +dat = readRDS("data/summaries/concordance_res/multiome/all_aucc_non_overlapping.rds") %>% + type_convert() %>% + filter(paper != 'Flochlay_2022') +# no percentiles +# no binarization or normalization +# filter Zhu, Anandon, atlas by label +dat %<>% + filter( + !grepl('Zhu', paper), + !grepl('Anadon', paper), + !(paper == 'Squair_2022' & grepl('7d_vs_Uninjured', cell_type)), + !(paper == 'Squair_2022' & grepl('2m_vs_Uninjured', cell_type)), + ) + +# remove mixed models and pseudoreplicates +dat %<>% filter(!scatac_pseudo_repl, scatac_da_family != 'mixedmodel') +# unique columns +dat %<>% extract(, map_lgl(., ~ n_distinct(.x) > 1)) + +# average over bulk methods +avg = dat %>% + group_by_at(vars(-scrna_da_method, -bulk_da_type, + -bulk_features, -overlap, -aucc)) %>% + summarise(aucc = mean(aucc), + n = n()) %>% + ungroup() + +# re-code x-values and colors +avg %<>% + mutate(method = paste0(scatac_da_method, '-', scatac_da_type) %>% + gsub("-singlecell|-binomial|-exact", "", .), + method = gsub("snapatac", "SnapATAC::findDAR", method) %>% + gsub("fisher", "FET", .) %>% + gsub("-LR_.*|-perm.*$", "", .) %>% + gsub("LR_", "LR", .), + color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + 'singlecell', scatac_da_family) %>% + fct_recode('single-cell' = 'singlecell', + 'other' = 'non_libra') %>% + fct_relevel('single-cell', 'pseudobulk', 'other'), + method = fct_recode(method, + "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', + 'Permutation~test' = 'permutation', + 't~test' = 't', + 'LR[peaks]' = 'LRpeaks', + 'LR[clusters]' = 'LR', + 'Binomial' = 'binomial', + 'Wilcoxon~rank-sum~test' = 'wilcox', + 'Fisher~exact~test' = 'FET', + 'Negative~binomial' = 'negbinom') %>% + as.character()) %>% + # drop limma + filter(!grepl('limma', method)) + +# re-code filtering +avg %<>% + mutate(filter = ifelse(scatac_percent_filter, + paste0(scatac_min_cell_filter, '%'), + paste0(scatac_min_cell_filter, ' cells')) %>% + fct_recode('1 cell' = '1 cells') %>% + fct_relevel('1 cell', '10 cells', '0.2%', '0.5%', '1%', '5%', + '10%', '20%')) %>% + filter(!filter %in% c('10%', '20%')) + avg0 = filter(avg, k == 100, scatac_min_cell_filter == 1, !scatac_percent_filter) labs = avg0 %>% group_by(method, color) %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_aucc-k=100-summary.rds') + pal = da_analysis_colors p4 = avg0 %>% mutate(title = paste('k = 100')) %>% @@ -414,6 +491,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_aucc-k=1000-summary.rds') + pal = da_analysis_colors p5 = avg0 %>% mutate(title = paste('k = 1000')) %>% @@ -447,6 +526,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_aucc-filter=1-summary.rds') + p6 = avg0 %>% mutate(title = paste('1% cell filter')) %>% ggplot(aes(x = reorder(method, aucc, stats::median), @@ -551,6 +632,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_aucc-scrna_terciles-summary.rds') + lvls = labs %>% filter(expr == 'all') %>% arrange(median) %>% diff --git a/R/figures/supp-fig3.R b/R/figures/supp-fig3.R index 7c01bc4..26babad 100644 --- a/R/figures/supp-fig3.R +++ b/R/figures/supp-fig3.R @@ -82,6 +82,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_gsea_aucc-bidirectional_promoter-summary.rds') + pal = da_analysis_colors p1 = avg0 %>% mutate(title = 'Removal of overlapping\npromoter regions') %>% @@ -179,9 +181,11 @@ avg %<>% filter(!filter %in% c('10%', '20%')) grid = distinct(dat, scrna_da_family, scrna_da_method, scrna_da_type) -plots = pmap(grid, function(...) { - current = tibble(...) - +full_labs = data.frame() +plots = list() + +for (i in 1:nrow(grid)) { + current = grid[i,] # average over bulk methods avg = dat %>% anti_join(current) %>% @@ -239,6 +243,10 @@ plots = pmap(grid, function(...) { summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) + full_labs %<>% + rbind(labs %>% tidyr::crossing(current[2:3])) + + pal = da_analysis_colors p = avg0 %>% mutate(title = held_out) %>% @@ -262,11 +270,12 @@ plots = pmap(grid, function(...) { plot.title = element_text(size = 6), aspect.ratio = 1.4, legend.position = 'none') - p -}) - + plots[[length(plots)+1]] = p +} # plot p2 = wrap_plots(plots, nrow = 2, ncol=3) +saveRDS(full_labs, 'data/summaries/meta_summaries/multiome_gsea_aucc-leave_bulk_out-summary.rds') +p2 ggsave("fig/Supp_Fig3/multiome-gsea-aucc-leave-bulk-out.pdf", p2, width = 13.85, height = 15, units = "cm", useDingbats = FALSE) # correlation matrix @@ -373,12 +382,81 @@ p3 ggsave("fig/Supp_Fig3/multiome-gsea-aucc-leave-bulk-out-correlation.pdf", p3, width = 5, height = 5, units = "cm", useDingbats = FALSE) +# load data +dat = readRDS("data/summaries/concordance_res/multiome/all_aucc_gsea.rds") %>% + type_convert() %>% + filter(paper != 'Flochlay_2022') +# no percentiles +# no binarization or normalization +table(dat$scatac_binarization, dat$scatac_normalization) + +# filter Zhu, Anandon, atlas by label +dat %<>% + filter( + !grepl('Zhu', paper), + !grepl('Anadon', paper), + !(paper == 'Squair_2022' & grepl('7d_vs_Uninjured', cell_type)), + !(paper == 'Squair_2022' & grepl('2m_vs_Uninjured', cell_type)), + ) +# remove mixed models and pseudoreplicates +dat %<>% filter(!scatac_pseudo_repl, scatac_da_family != 'mixedmodel') + +# average over bulk methods +avg = dat %>% + group_by_at(vars(-scrna_da_family, -scrna_da_method, -scrna_da_type, + -bulk_features, -overlap, -aucc)) %>% + summarise(aucc = mean(aucc), + n = n()) %>% + ungroup() + +# re-code x-values and colors +avg %<>% + mutate(method = paste0(scatac_da_method, '-', scatac_da_type) %>% + gsub("-singlecell|-binomial|-exact", "", .), + method = ifelse(scatac_pseudo_repl, + paste(method, '(pseudo-replicates)'), method), + method = gsub("snapatac", "SnapATAC::findDAR", method) %>% + gsub("fisher", "FET", .) %>% + gsub("-LR_.*|-perm.*$", "", .) %>% + gsub("LR_", "LR", .), + color = ifelse(scatac_pseudo_repl, 'pseudo-replicates', scatac_da_family), + color = ifelse(method %in% c('binomial', 'FET', 'LRpeaks', 'permutation'), + 'singlecell', color) %>% + fct_recode('single-cell' = 'singlecell', + 'other' = 'non_libra') %>% + fct_relevel('single-cell', 'pseudobulk', 'other'), + method = fct_recode(method, + "'SnapATAC::findDAR'" = 'SnapATAC::findDAR', + 'Permutation~test' = 'permutation', + 't~test' = 't', + 'LR[peaks]' = 'LRpeaks', + 'LR[clusters]' = 'LR', + 'Binomial' = 'binomial', + 'Wilcoxon~rank-sum~test' = 'wilcox', + 'Fisher~exact~test' = 'FET', + 'Negative~binomial' = 'negbinom') %>% + as.character()) %>% + # drop limma + filter(!grepl('limma', method)) + +# re-code filtering +avg %<>% + mutate(filter = ifelse(scatac_percent_filter, + paste0(scatac_min_cell_filter, '%'), + paste0(scatac_min_cell_filter, ' cells')) %>% + fct_recode('1 cell' = '1 cells') %>% + fct_relevel('1 cell', '10 cells', '0.2%', '0.5%', '1%', '5%', + '10%', '20%')) %>% + filter(!filter %in% c('10%', '20%')) + avg0 = filter(avg, k == 50, scatac_min_cell_filter == 1, !scatac_percent_filter) labs = avg0 %>% group_by(method, color) %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_gsea_aucc-k=50-summary.rds') + pal = da_analysis_colors p4 = avg0 %>% mutate(title = paste('k = 50')) %>% @@ -412,6 +490,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_gsea_aucc-k=500-summary.rds') + pal = da_analysis_colors p5 = avg0 %>% mutate(title = paste('k = 500')) %>% @@ -445,6 +525,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_gsea_aucc-filter=1-summary.rds') + p6 = avg0 %>% mutate(title = paste('1% cell filter')) %>% ggplot(aes(x = reorder(method, aucc, stats::median), @@ -551,6 +633,8 @@ labs = avg0 %>% summarise(mean = mean(aucc), n = n(), median = median(aucc), aucc = median) %>% ungroup() %>% mutate(label = formatC(median, format = 'f', digits = 2)) +saveRDS(labs, 'data/summaries/meta_summaries/multiome_gsea_aucc-scrna_terciles-summary.rds') + lvls = labs %>% filter(expr == 'all') %>% arrange(median) %>% diff --git a/R/figures/supp-fig9.R b/R/figures/supp-fig9.R index 9359a99..c0dd4c9 100644 --- a/R/figures/supp-fig9.R +++ b/R/figures/supp-fig9.R @@ -98,7 +98,7 @@ p1 = avg1 %>% # color = 'black') + scale_color_manual('Filter by:', values = pal) + scale_fill_manual('Filter by:', values = pal) + - scale_x_reordered() + + scale_x_reordered('% of peaks removed') + scale_y_continuous('AUCC') + boxed_theme(size_sm = 5, size_lg = 6) + theme(legend.position = 'top', @@ -133,7 +133,7 @@ p2 = enr %>% # color = 'black') + scale_color_manual('', values = pal) + scale_fill_manual('', values = pal) + - scale_x_reordered() + + scale_x_reordered('% of peaks removed') + scale_y_continuous(expression(Delta~AUCC)) + boxed_theme(size_sm = 5, size_lg = 6) + theme(legend.position = 'none', diff --git a/supp_tables/supp_table1.csv b/supp_tables/supp_table1.csv new file mode 100644 index 0000000..c4b762e --- /dev/null +++ b/supp_tables/supp_table1.csv @@ -0,0 +1,42 @@ +Figure,Feature,Binarization,Normalization +"2b, 5a, 7a, 8a, 8b, 8e, 8f, S1a, S1b, S1c, S1e, S1f, S9a, S9b, S27a, S28a, S28b, E1a, E1b, E1c, E4a, E4b, E4c",Peaks called in matched bulk ATAC-seq data,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +"2c, 5b, 7b, 8c, 8d, 8e, 8f, S2a, S2b, S2c, S2d, S2e, S10a, S10b, S27b, S28c, S28d, E2a, E2b, E2c",TSSwindows called with 10000 bp from transcription start sites,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +"2d, S3a, S3b, S3c, S3d, S3e",Gene set values were obtained from the TSSwindows-associated genes,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +"3b, 4a, 4b, 4c, 5c, S5a, S5b, S5c, S6a, S6b, S6c, S6d, S6e, S6f, S11a, S11b, E3a, E3b, E3c",Peaks called in snATAC-seq data,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +"3c, S7a, S7b, S7c, S7d, S7e, S7f",Peaks called in downsamped (“snATAC-seq”) data,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +"3d, 3e, S4a, S4b, S4c, S4d, S4e, S8a, S8b, S8c, S8d",Counts were simulated from the parameters estimated from peaks defined in (atlas) dataset ,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +"6a, S12a, S12b, S12c, S12d",Peaks called in matched bulk ATAC-seq data,"Binarized vs non-binarized in the following methods: DESeq2, edgeR, LRclusters, SnapATAC, Wilcox, t-test, Negative binomial","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly)" +"6b, 6e, 6f, 6g, S13a, S13b, S13c, S14a, S14b, S14c, S14d, S14e, S14f",Peaks called in snATAC-seq data,"Binarized vs non-binarized in the following methods: DESeq2, edgeR, LRclusters, SnapATAC, Wilcox, t-test, Negative binomial","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly)" +"6c, S15a, S15b, S15c, S15d, S15e, S15f",Peaks called in downsamped (“snATAC-seq”) data,"Binarized vs non-binarized in the following methods: DESeq2, edgeR, LRclusters, SnapATAC, Wilcox, t-test, Negative binomial","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly)" +"6d, S16a, S16b, S16c, S16d",Counts were simulated from the parameters estimated from peaks defined in (atlas) dataset ,"Binarized vs non-binarized in the following methods: DESeq2, edgeR, LRclusters, SnapATAC, Wilcox, t-test, Negative binomial","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly)" +"6h, 6l, 6m, 6n, S17a, S17b, S17c, S17d",Peaks called in matched bulk ATAC-seq data,No binarization,"Wilcox, LRclusters, t-test: +log-TP10K, TP10k, none, TF-IDF, log-TP(median), smoothGC" +"6i, S18a, S18b, S18c, S19a, S19b, S20a, S20b,S21a, S21b",Peaks called in snATAC-seq data,No binarization,"Wilcox, LRclusters, t-test: +log-TP10K, TP10k, none, TF-IDF, log-TP(median), smoothGC" +"6j, S22a, S22b,S23a, S23b, S24a, S24b",Peaks called in downsamped (“snATAC-seq”) data,No binarization,"Wilcox, LRclusters, t-test: +log-TP10K, TP10k, none, TF-IDF, log-TP(median), smoothGC" +"6k, S25a, S25b,S26a, S26b",Counts were simulated from the parameters estimated from peaks defined in (atlas) dataset ,No binarization,"Wilcox, LRclusters, t-test: +log-TP10K, TP10k, none, TF-IDF, log-TP(median), smoothGC" +"6o, 6p, 6q","Peaks called in matched bulk ATAC-seq data +background matching is defined using peaks defined in the same way","Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" +S1d,Peaks called in pseudobulk snATAC-seq data,"Only when required by DA method (e.g., binomial test)","Wilcox, LRclusters, t-test: log-TP10K +DESeq2, edgeR, NB, SnapATAC: none (model counts directly) +Permutation, binomial, LRpeaks, Fisher’s exact test: none (require binarization)" diff --git a/supp_tables/supp_table2.csv b/supp_tables/supp_table2.csv new file mode 100644 index 0000000..f76c30e --- /dev/null +++ b/supp_tables/supp_table2.csv @@ -0,0 +1,344 @@ +Experiment,Dataset,Comparison,compar1_nreps,compar2_nreps +Experiment 1,Buenrostro_2018,CMP_vs_GMP,4,3 +Experiment 1,Buenrostro_2018,CMP_vs_HSC,4,3 +Experiment 1,Buenrostro_2018,CMP_vs_MEP,4,2 +Experiment 1,Buenrostro_2018,CMP_vs_MPP,4,2 +Experiment 1,Buenrostro_2018,GMP_vs_HSC,3,3 +Experiment 1,Buenrostro_2018,GMP_vs_MEP,3,2 +Experiment 1,Buenrostro_2018,GMP_vs_MPP,3,2 +Experiment 1,Buenrostro_2018,HSC_vs_MEP,3,2 +Experiment 1,Buenrostro_2018,HSC_vs_MPP,3,2 +Experiment 1,Buenrostro_2018,MEP_vs_MPP,2,2 +Experiment 1,Corces_2016,Blast_vs_LSC,2,2 +Experiment 1,GonzalesBlas_2018,0 hours_vs_24 hours,2,2 +Experiment 1,GonzalesBlas_2018,0 hours_vs_48 hours,2,2 +Experiment 1,GonzalesBlas_2018,0 hours_vs_72 hours,2,2 +Experiment 1,Satpathy_2018,Naive T cell_vs_Th17 T cell,3,2 +Experiment 1,Pliner_2018,0_hours_vs_72_hours,2,2 +Experiment 2,Argelaguet_2022,NMP_vs_Notochord,6,7 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Surface_ectoderm,8,8 +Experiment 2,Argelaguet_2022,Erythroid3_vs_Surface_ectoderm,4,8 +Experiment 2,Argelaguet_2022,Erythroid1_vs_Paraxial_mesoderm,6,8 +Experiment 2,Argelaguet_2022,Pharyngeal_mesoderm_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,ExE_endoderm_vs_Spinal_cord,8,8 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_Pharyngeal_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_1_vs_Rostral_neurectoderm,8,6 +Experiment 2,Argelaguet_2022,Erythroid1_vs_Gut,6,8 +Experiment 2,Argelaguet_2022,Forebrain_Midbrain_Hindbrain_vs_Parietal_endoderm,8,7 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Gut,8,8 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_Erythroid3,8,4 +Experiment 2,Argelaguet_2022,Blood_progenitors_1_vs_Forebrain_Midbrain_Hindbrain,8,8 +Experiment 2,Argelaguet_2022,Endothelium_vs_Somitic_mesoderm,7,8 +Experiment 2,Argelaguet_2022,ExE_endoderm_vs_Gut,8,8 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_Neural_crest,8,6 +Experiment 2,Argelaguet_2022,Gut_vs_Notochord,8,7 +Experiment 2,Argelaguet_2022,Erythroid2_vs_Erythroid3,6,4 +Experiment 2,Argelaguet_2022,Intermediate_mesoderm_vs_Visceral_endoderm,8,8 +Experiment 2,Argelaguet_2022,Notochord_vs_Surface_ectoderm,7,8 +Experiment 2,Argelaguet_2022,Erythroid2_vs_Somitic_mesoderm,6,8 +Experiment 2,Argelaguet_2022,Endothelium_vs_Erythroid2,7,6 +Experiment 2,Argelaguet_2022,Parietal_endoderm_vs_Visceral_endoderm,7,8 +Experiment 2,Argelaguet_2022,Caudal_Mesoderm_vs_Notochord,7,7 +Experiment 2,Argelaguet_2022,Allantois_vs_Cardiomyocytes,7,6 +Experiment 2,Argelaguet_2022,Blood_progenitors_1_vs_Notochord,8,7 +Experiment 2,Argelaguet_2022,NMP_vs_Parietal_endoderm,6,7 +Experiment 2,Argelaguet_2022,Erythroid3_vs_Rostral_neurectoderm,4,6 +Experiment 2,Argelaguet_2022,Allantois_vs_Forebrain_Midbrain_Hindbrain,7,8 +Experiment 2,Argelaguet_2022,Intermediate_mesoderm_vs_Surface_ectoderm,8,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Neural_crest,8,6 +Experiment 2,Argelaguet_2022,Gut_vs_Paraxial_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Haematoendothelial_progenitors_vs_Surface_ectoderm,8,8 +Experiment 2,Argelaguet_2022,Neural_crest_vs_Spinal_cord,6,8 +Experiment 2,Argelaguet_2022,Forebrain_Midbrain_Hindbrain_vs_Paraxial_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Endothelium_vs_Gut,7,8 +Experiment 2,Argelaguet_2022,Rostral_neurectoderm_vs_Spinal_cord,6,8 +Experiment 2,Argelaguet_2022,Erythroid1_vs_Neural_crest,6,6 +Experiment 2,Argelaguet_2022,Erythroid2_vs_Intermediate_mesoderm,6,8 +Experiment 2,Argelaguet_2022,ExE_ectoderm_vs_NMP,6,6 +Experiment 2,Argelaguet_2022,Caudal_Mesoderm_vs_Intermediate_mesoderm,7,8 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_Intermediate_mesoderm,6,8 +Experiment 2,Argelaguet_2022,Intermediate_mesoderm_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Erythroid2_vs_Gut,6,8 +Experiment 2,Argelaguet_2022,Forebrain_Midbrain_Hindbrain_vs_Pharyngeal_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Erythroid1_vs_Rostral_neurectoderm,6,6 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_ExE_endoderm,8,8 +Experiment 2,Argelaguet_2022,ExE_ectoderm_vs_Surface_ectoderm,6,8 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_Erythroid1,6,6 +Experiment 2,Argelaguet_2022,ExE_endoderm_vs_Haematoendothelial_progenitors,8,8 +Experiment 2,Argelaguet_2022,Allantois_vs_ExE_endoderm,7,8 +Experiment 2,Argelaguet_2022,Gut_vs_Pharyngeal_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Endothelium_vs_Surface_ectoderm,7,8 +Experiment 2,Argelaguet_2022,Endothelium_vs_ExE_mesoderm,7,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Erythroid1,8,6 +Experiment 2,Argelaguet_2022,Erythroid1_vs_Surface_ectoderm,6,8 +Experiment 2,Argelaguet_2022,Forebrain_Midbrain_Hindbrain_vs_Haematoendothelial_progenitors,8,8 +Experiment 2,Argelaguet_2022,Allantois_vs_Erythroid2,7,6 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Rostral_neurectoderm,8,6 +Experiment 2,Argelaguet_2022,Gut_vs_Surface_ectoderm,8,8 +Experiment 2,Argelaguet_2022,ExE_mesoderm_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Paraxial_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Erythroid3,8,4 +Experiment 2,Argelaguet_2022,Gut_vs_Mesenchyme,8,8 +Experiment 2,Argelaguet_2022,Intermediate_mesoderm_vs_Spinal_cord,8,8 +Experiment 2,Argelaguet_2022,Gut_vs_Visceral_endoderm,8,8 +Experiment 2,Argelaguet_2022,ExE_endoderm_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Mesenchyme_vs_Rostral_neurectoderm,8,6 +Experiment 2,Argelaguet_2022,Spinal_cord_vs_Visceral_endoderm,8,8 +Experiment 2,Argelaguet_2022,Caudal_Mesoderm_vs_ExE_endoderm,7,8 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_ExE_ectoderm,8,6 +Experiment 2,Argelaguet_2022,Neural_crest_vs_Somitic_mesoderm,6,8 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_ExE_mesoderm,6,8 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_ExE_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Erythroid3_vs_Spinal_cord,4,8 +Experiment 2,Argelaguet_2022,ExE_ectoderm_vs_Mesenchyme,6,8 +Experiment 2,Argelaguet_2022,Caudal_Mesoderm_vs_NMP,7,6 +Experiment 2,Argelaguet_2022,Erythroid3_vs_Intermediate_mesoderm,4,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Def__endoderm,8,8 +Experiment 2,Argelaguet_2022,ExE_mesoderm_vs_Spinal_cord,8,8 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_Mesenchyme,6,8 +Experiment 2,Argelaguet_2022,ExE_mesoderm_vs_Gut,8,8 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_Visceral_endoderm,6,8 +Experiment 2,Argelaguet_2022,Caudal_Mesoderm_vs_Haematoendothelial_progenitors,7,8 +Experiment 2,Argelaguet_2022,Mesenchyme_vs_Neural_crest,8,6 +Experiment 2,Argelaguet_2022,Caudal_Mesoderm_vs_ExE_mesoderm,7,8 +Experiment 2,Argelaguet_2022,NMP_vs_Spinal_cord,6,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Gut_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,Blood_progenitors_1_vs_Cardiomyocytes,8,6 +Experiment 2,Argelaguet_2022,Erythroid3_vs_Neural_crest,4,6 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_Erythroid2,6,6 +Experiment 2,Argelaguet_2022,Blood_progenitors_1_vs_Somitic_mesoderm,8,8 +Experiment 2,Argelaguet_2022,ExE_ectoderm_vs_Parietal_endoderm,6,7 +Experiment 2,Argelaguet_2022,Cardiomyocytes_vs_Somitic_mesoderm,6,8 +Experiment 2,Argelaguet_2022,ExE_endoderm_vs_Notochord,8,7 +Experiment 2,Argelaguet_2022,ExE_mesoderm_vs_Haematoendothelial_progenitors,8,8 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_NMP,8,6 +Experiment 2,Argelaguet_2022,Blood_progenitors_2_vs_ExE_ectoderm,8,6 +Experiment 2,Argelaguet_2022,Def__endoderm_vs_Visceral_endoderm,8,8 +Experiment 2,Luecken_2021,CD16+ Mono_vs_HSC,13,13 +Experiment 2,Luecken_2021,Plasma cell_vs_Proerythroblast,8,13 +Experiment 2,Luecken_2021,CD4+ T activated_vs_Erythroblast,13,13 +Experiment 2,Luecken_2021,Erythroblast_vs_Transitional B,13,12 +Experiment 2,Luecken_2021,CD4+ T activated_vs_Normoblast,13,13 +Experiment 2,Luecken_2021,Erythroblast_vs_MK_E prog,13,12 +Experiment 2,Luecken_2021,CD8+ T naive_vs_Normoblast,4,13 +Experiment 2,Luecken_2021,Proerythroblast_vs_Transitional B,13,12 +Experiment 2,Luecken_2021,cDC2_vs_G_M prog,13,11 +Experiment 2,Luecken_2021,CD16+ Mono_vs_Erythroblast,13,13 +Experiment 2,Luecken_2021,CD14+ Mono_vs_CD4+ T naive,12,13 +Experiment 2,Luecken_2021,CD4+ T activated_vs_MK_E prog,13,12 +Experiment 2,Luecken_2021,CD8+ T_vs_HSC,13,13 +Experiment 2,Luecken_2021,CD4+ T naive_vs_Naive CD20+ B,13,13 +Experiment 2,Luecken_2021,G_M prog_vs_Transitional B,11,12 +Experiment 2,Luecken_2021,CD8+ T_vs_Proerythroblast,13,13 +Experiment 2,Luecken_2021,B1 B_vs_Proerythroblast,12,13 +Experiment 2,Luecken_2021,G_M prog_vs_HSC,11,13 +Experiment 2,Luecken_2021,CD8+ T naive_vs_G_M prog,4,11 +Experiment 2,Luecken_2021,CD8+ T_vs_Transitional B,13,12 +Experiment 2,Luecken_2021,cDC2_vs_ILC,13,6 +Experiment 2,Luecken_2021,CD14+ Mono_vs_Transitional B,12,12 +Experiment 2,Luecken_2021,NK_vs_pDC,13,13 +Experiment 2,Luecken_2021,CD14+ Mono_vs_cDC2,12,13 +Experiment 2,Luecken_2021,G_M prog_vs_Proerythroblast,11,13 +Experiment 2,Luecken_2021,CD8+ T_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,B1 B_vs_CD8+ T,12,13 +Experiment 2,Luecken_2021,G_M prog_vs_pDC,11,13 +Experiment 2,Luecken_2021,CD4+ T naive_vs_pDC,13,13 +Experiment 2,Luecken_2021,CD8+ T_vs_MK_E prog,13,12 +Experiment 2,Luecken_2021,B1 B_vs_CD4+ T activated,12,13 +Experiment 2,Luecken_2021,CD16+ Mono_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,CD16+ Mono_vs_CD4+ T activated,13,13 +Experiment 2,Luecken_2021,NK_vs_Transitional B,13,12 +Experiment 2,Luecken_2021,G_M prog_vs_MK_E prog,11,12 +Experiment 2,Luecken_2021,CD16+ Mono_vs_CD4+ T naive,13,13 +Experiment 2,Luecken_2021,Erythroblast_vs_ID2-hi myeloid prog,13,2 +Experiment 2,Luecken_2021,Erythroblast_vs_pDC,13,13 +Experiment 2,Luecken_2021,Lymph prog_vs_pDC,12,13 +Experiment 2,Luecken_2021,cDC2_vs_Normoblast,13,13 +Experiment 2,Luecken_2021,CD14+ Mono_vs_NK,12,13 +Experiment 2,Luecken_2021,CD4+ T activated_vs_ID2-hi myeloid prog,13,2 +Experiment 2,Luecken_2021,pDC_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,MK_E prog_vs_Normoblast,12,13 +Experiment 2,Luecken_2021,Lymph prog_vs_Transitional B,12,12 +Experiment 2,Luecken_2021,B1 B_vs_CD4+ T naive,12,13 +Experiment 2,Luecken_2021,ID2-hi myeloid prog_vs_Lymph prog,2,12 +Experiment 2,Luecken_2021,CD8+ T_vs_G_M prog,13,11 +Experiment 2,Luecken_2021,ID2-hi myeloid prog_vs_ILC,2,6 +Experiment 2,Luecken_2021,Naive CD20+ B_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,Naive CD20+ B_vs_pDC,13,13 +Experiment 2,Luecken_2021,B1 B_vs_CD8+ T naive,12,4 +Experiment 2,Luecken_2021,cDC2_vs_Naive CD20+ B,13,13 +Experiment 2,Luecken_2021,cDC2_vs_ID2-hi myeloid prog,13,2 +Experiment 2,Luecken_2021,CD8+ T naive_vs_ID2-hi myeloid prog,4,2 +Experiment 2,Luecken_2021,Lymph prog_vs_Plasma cell,12,8 +Experiment 2,Luecken_2021,CD8+ T naive_vs_Lymph prog,4,12 +Experiment 2,Luecken_2021,Erythroblast_vs_Normoblast,13,13 +Experiment 2,Luecken_2021,B1 B_vs_CD16+ Mono,12,13 +Experiment 2,Luecken_2021,CD8+ T_vs_ILC,13,6 +Experiment 2,Luecken_2021,cDC2_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,CD14+ Mono_vs_Proerythroblast,12,13 +Experiment 2,Luecken_2021,MK_E prog_vs_NK,12,13 +Experiment 2,Luecken_2021,CD14+ Mono_vs_Lymph prog,12,12 +Experiment 2,Luecken_2021,CD8+ T_vs_Lymph prog,13,12 +Experiment 2,Luecken_2021,ILC_vs_MK_E prog,6,12 +Experiment 2,Luecken_2021,CD4+ T activated_vs_NK,13,13 +Experiment 2,Luecken_2021,G_M prog_vs_Lymph prog,11,12 +Experiment 2,Luecken_2021,CD4+ T activated_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,B1 B_vs_G_M prog,12,11 +Experiment 2,Luecken_2021,CD14+ Mono_vs_Naive CD20+ B,12,13 +Experiment 2,Luecken_2021,B1 B_vs_NK,12,13 +Experiment 2,Luecken_2021,CD8+ T_vs_ID2-hi myeloid prog,13,2 +Experiment 2,Luecken_2021,CD4+ T activated_vs_ILC,13,6 +Experiment 2,Luecken_2021,HSC_vs_Proerythroblast,13,13 +Experiment 2,Luecken_2021,ID2-hi myeloid prog_vs_Proerythroblast,2,13 +Experiment 2,Luecken_2021,CD4+ T naive_vs_Erythroblast,13,13 +Experiment 2,Luecken_2021,Erythroblast_vs_Proerythroblast,13,13 +Experiment 2,Luecken_2021,CD8+ T naive_vs_Erythroblast,4,13 +Experiment 2,Luecken_2021,ILC_vs_Naive CD20+ B,6,13 +Experiment 2,Luecken_2021,Lymph prog_vs_Naive CD20+ B,12,13 +Experiment 2,Luecken_2021,Naive CD20+ B_vs_Transitional B,13,12 +Experiment 2,Luecken_2021,Lymph prog_vs_Proerythroblast,12,13 +Experiment 2,Luecken_2021,HSC_vs_Naive CD20+ B,13,13 +Experiment 2,Luecken_2021,CD16+ Mono_vs_pDC,13,13 +Experiment 2,Luecken_2021,NK_vs_Plasma cell,13,8 +Experiment 2,Luecken_2021,Lymph prog_vs_Normoblast,12,13 +Experiment 2,Luecken_2021,CD4+ T naive_vs_Normoblast,13,13 +Experiment 2,Luecken_2021,B1 B_vs_Lymph prog,12,12 +Experiment 2,Luecken_2021,CD16+ Mono_vs_Naive CD20+ B,13,13 +Experiment 2,Luecken_2021,CD4+ T naive_vs_G_M prog,13,11 +Experiment 2,Luecken_2021,CD14+ Mono_vs_ILC,12,6 +Experiment 2,Luecken_2021,CD16+ Mono_vs_Transitional B,13,12 +Experiment 2,Luecken_2021,CD14+ Mono_vs_G_M prog,12,11 +Experiment 2,Luecken_2021,CD4+ T naive_vs_cDC2,13,13 +Experiment 2,Luecken_2021,CD8+ T_vs_pDC,13,13 +Experiment 2,Luecken_2021,CD8+ T naive_vs_NK,4,13 +Experiment 2,Luecken_2021,CD4+ T naive_vs_ID2-hi myeloid prog,13,2 +Experiment 2,Luecken_2021,CD8+ T naive_vs_Transitional B,4,12 +Experiment 2,Luecken_2021,CD8+ T naive_vs_cDC2,4,13 +Experiment 2,Boukhaled_2022,CD4 Effector_vs_CD4 naive or mem,10,10 +Experiment 2,Boukhaled_2022,CD4 Effector_vs_CD4 TEMRA,10,10 +Experiment 2,Boukhaled_2022,CD4 Effector_vs_TREG,10,10 +Experiment 2,Boukhaled_2022,CD4 naive or mem_vs_CD4 TEMRA,10,10 +Experiment 2,Boukhaled_2022,CD4 naive or mem_vs_TREG,10,10 +Experiment 2,Boukhaled_2022,CD4 TEMRA_vs_TREG,10,10 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Mature_oligodendrocytes_(MOL),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_B_cells,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_Newly_formed_oligodendrocytes_(NFOL),6,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Microglia,8,8 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Vascular_cells_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Ependymal_cells,8,8 +Experiment 2,SCI_dataset,Newly_formed_oligodendrocytes_(NFOL)_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Myeloid_dividing,8,6 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Myelin_forming_oligodendrocytes_(MFOL),8,8 +Experiment 2,SCI_dataset,NK_T_cells_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Vascular_cells_vs_Ventral,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Microglia_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Myeloid_dividing,8,6 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_Peripheral_immune_cells,6,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Myelin_forming_oligodendrocytes_(MFOL),8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,NK_T_cells_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Microglia_vs_Myeloid_dividing,8,6 +Experiment 2,SCI_dataset,Dorsal_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Myelin_forming_oligodendrocytes_(MFOL),8,8 +Experiment 2,SCI_dataset,Dorsal_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_Vascular_endothelial_cells,6,8 +Experiment 2,SCI_dataset,Peripheral_immune_cells_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Dorsal,8,8 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_Ventral,8,8 +Experiment 2,SCI_dataset,NK_T_cells_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Microglia,8,8 +Experiment 2,SCI_dataset,Microglia_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,B_cells_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Vascular_endothelial_cells_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Newly_formed_oligodendrocytes_(NFOL)_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Ependymal_cells,8,8 +Experiment 2,SCI_dataset,Newly_formed_oligodendrocytes_(NFOL)_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_Ventral,6,8 +Experiment 2,SCI_dataset,Newly_formed_oligodendrocytes_(NFOL)_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Dorsal,8,8 +Experiment 2,SCI_dataset,Oligodendrocytes_precursor_cells_(OPC)_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Newly_formed_oligodendrocytes_(NFOL)_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,B_cells_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Microglia_vs_Myelin_forming_oligodendrocytes_(MFOL),8,8 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_NK_T_cells,6,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Myeloid_dividing,8,6 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Oligodendrocytes_precursor_cells_(OPC)_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Myelin_forming_oligodendrocytes_(MFOL),8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Microglia,8,8 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_Vascular_cells,6,8 +Experiment 2,SCI_dataset,Microglia_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Microglia_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Oligodendrocytes_precursor_cells_(OPC)_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Mature_oligodendrocytes_(MOL),8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Microglia,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Myeloid_dividing,8,6 +Experiment 2,SCI_dataset,Astrocytes_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Peripheral_immune_cells_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,Microglia_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Mature_oligodendrocytes_(MOL)_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Peripheral_immune_cells_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Dorsal_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Myeloid_dividing_vs_Oligodendrocytes_precursor_cells_(OPC),6,8 +Experiment 2,SCI_dataset,NK_T_cells_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,NK_T_cells_vs_Ventral,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Ependymal_cells,8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Vascular_cells,8,8 +Experiment 2,SCI_dataset,Newly_formed_oligodendrocytes_(NFOL)_vs_Vascular_endothelial_cells,8,8 +Experiment 2,SCI_dataset,Microglia_vs_Ventral,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Oligodendrocytes_precursor_cells_(OPC),8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Myelin_forming_oligodendrocytes_(MFOL),8,8 +Experiment 2,SCI_dataset,Ependymal_cells_vs_Mature_oligodendrocytes_(MOL),8,8 +Experiment 2,SCI_dataset,B_cells_vs_Microglia,8,8 +Experiment 2,SCI_dataset,B_cells_vs_Newly_formed_oligodendrocytes_(NFOL),8,8 +Experiment 2,SCI_dataset,Myelin_forming_oligodendrocytes_(MFOL)_vs_Peripheral_immune_cells,8,8 +Experiment 2,SCI_dataset,Astrocytes_vs_Myeloid_dividing,8,6 +Experiment 2,SCI_dataset,B_cells_vs_NK_T_cells,8,8 +Experiment 2,SCI_dataset,Oligodendrocytes_precursor_cells_(OPC)_vs_Vascular_cells,8,8 +Experiment 3,Published data,B1 B,6,6 +Experiment 3,Published data,CD14+ Mono,6,6 +Experiment 3,Published data,CD16+ Mono,6,7 +Experiment 3,Published data,CD4+ T activated,6,7 +Experiment 3,Published data,CD4+ T naive,6,7 +Experiment 3,Published data,CD8+ T,6,7 +Experiment 3,Published data,cDC2,6,7 +Experiment 3,Published data,Erythroblast,6,7 +Experiment 3,Published data,G_M prog,6,5 +Experiment 3,Published data,HSC,6,7 +Experiment 3,Published data,Lymph prog,6,6 +Experiment 3,Published data,MK_E prog,6,6 +Experiment 3,Published data,Naive CD20+ B,6,7 +Experiment 3,Published data,NK,6,7 +Experiment 3,Published data,Normoblast,6,7 +Experiment 3,Published data,pDC,6,7 +Experiment 3,Published data,Plasma cell,4,4 +Experiment 3,Published data,Proerythroblast,6,7 +Experiment 3,Published data,Transitional B,6,6 +Experiment 3,Downsampled data,,3,3 +Experiment 3,Simulations,,3,3