diff --git a/R/cbea_internals.R b/R/cbea_internals.R index cc9b41c..b84f384 100644 --- a/R/cbea_internals.R +++ b/R/cbea_internals.R @@ -163,21 +163,23 @@ get_diagnostics <- function(env = caller_env()){ req_objs <- c("output", "distr", "parametric", "raw_scores", "perm_scores", "adj") obj_names <- env_names(env) - sapply(req_objs, function(x){ + vapply(req_objs, function(x){ if(!x %in% obj_names){ stop(x, " not found") } - }) + return(0) + }, FUN.VALUE = 0) if (env$parametric == TRUE){ add_objs <- c("final_distr", "perm_distr") if (env$adj == TRUE){ add_objs <- c(add_objs, "unperm_distr") } - sapply(add_objs, function(x){ + vapply(add_objs, function(x){ if(!x %in% obj_names){ stop(x, " not found") } - }) + return(0) + }, FUN.VALUE = 0) } if (env$output == "raw" | env$parametric == FALSE){ @@ -544,7 +546,8 @@ get_adj_mnorm <- function(perm, unperm, verbose = FALSE, fix_comp = "none") { mu = perm$mu, mean = perm_mean ) if (verbose == TRUE) { - print(paste("Total sd is", unperm_sd, "and estimated sd is", estim_sd)) + message("Total sd is ", unperm_sd, " and estimated sd is ", + estim_sd) } param <- list(mu = perm$mu, sigma = est_sig, lambda = perm$lambda) return(param) diff --git a/R/utils.R b/R/utils.R index 65c1bed..6c96f19 100644 --- a/R/utils.R +++ b/R/utils.R @@ -37,7 +37,7 @@ pmnorm <- function(q, mu, sigma, lambda, log = FALSE, verbose = FALSE) { q <- as.vector(q) n_components <- length(sigma) if (verbose == TRUE) { - message(paste(n_components, "components!", "\n")) + message(n_components, " components!", "\n") } comp <- vector(mode = "list", length = n_components) for (i in seq_len(n_components)) { @@ -53,7 +53,7 @@ dmnorm <- function(x, mu, sigma, lambda, log = FALSE, verbose = FALSE) { x <- as.vector(x) n_components <- length(sigma) if (verbose == TRUE) { - message(paste(n_components, "components!", "\n")) + message(n_components, "components!", "\n") } comp <- vector(mode = "list", length = n_components) for (i in seq_len(n_components)) { diff --git a/README.Rmd b/README.Rmd index c5aed2a..26e3815 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,15 +18,13 @@ knitr::opts_chunk$set( [![Codecov test coverage](https://codecov.io/gh/qpmnguyen/CBEA/branch/master/graph/badge.svg)](https://codecov.io/gh/qpmnguyen/CBEA?branch=master) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![R-CMD-check](https://github.com/qpmnguyen/CBEA/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/qpmnguyen/CBEA/actions) -[![BioC status](http://www.bioconductor.org/shields/build/release/bioc/CBEA.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/CBEA) + ### Quang Nguyen The `CBEA` package provides basic functionality to perform taxonomic enrichment analysis in R. This package mainly supports the `CBEA` method, and provides additional support for generating sets for analyses using approaches commonly used in the gene set testing literature. -**This package is under ongoing development and might not be stable at the moment. Only install the development version if R CMD CHECK badge is green (passed) or if the error is due to dependency installation.** - ### Installation And the development version from [GitHub](https://github.com/) with: @@ -38,13 +36,12 @@ devtools::install_github("qpmnguyen/CBEA") ### Features -This package supports the implementation of the CBEA approach (formerly known as cILR) for taxonomic enrichment analysis. - -### Dependency Graph +This package implements the CBEA approach for performing set-based enrichment analysis for microbiome relative abundance data. A preprint of the package can be found [on bioXriv](https://www.biorxiv.org/content/10.1101/2021.09.07.459294v1.full). In summary, CBEA (Competitive Balances for taxonomic Enrichment Analysis) provides an estimate of the activity of a set by transforming an input taxa-by-sample data matrix into a corresponding set-by-sample data matrix. The resulting output can be used for additional downstream analyses such as differential abundance, classification, clustering, etc. using set-based features instead of the original units. -```{r, include = TRUE, echo=FALSE} -#dd <- deepdep::deepdep(package = "CBEA", local = TRUE, depth = 2, dependency_type = "Imports") -#ggplot2::ggsave(plot = deepdep::plot_dependencies(dd), filename = "man/figures/dep_new.png") +The transformation that CBEA applies is based on the isometric log ratio transformation: +$$ +CBEA_{i,\mathbb{S}} = \sqrt{\frac{|\mathbb{S}||\mathbb{S_c}|}{|\mathbb{S}| + |\mathbb{S_c}|}} \ln \frac{g(X_{i,j | j\in \mathbb{S}})}{g(X_{i,j | j \notin \mathbb{S}})} +$$ +Where $\mathbb{S}$ is the set of interest, $\mathbb{S}_C$ is it's complement, $g()$ is the geometric mean operation, and $X$ is the original data matrix where $i$ is the index representing samples and $j$ is the index representing variables (or taxa). -knitr::include_graphics("man/figures/dep_new.png") -``` +The inference procedure is performed through estimating the null distribution of the test statistic. This can be done either via permutations or a parametric fit of a distributional form on the permuted scores. Users can also adjust for variance inflation due to inter-taxa correlation. Please refer to the main manuscript for any additional details. diff --git a/README.md b/README.md index f165f48..99034b6 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,7 @@ coverage](https://codecov.io/gh/qpmnguyen/CBEA/branch/master/graph/badge.svg)](h state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![R-CMD-check](https://github.com/qpmnguyen/CBEA/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/qpmnguyen/CBEA/actions) -[![BioC -status](http://www.bioconductor.org/shields/build/release/bioc/CBEA.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/CBEA) + ### Quang Nguyen @@ -22,10 +21,6 @@ enrichment analysis in R. This package mainly supports the `CBEA` method, and provides additional support for generating sets for analyses using approaches commonly used in the gene set testing literature. -**This package is under ongoing development and might not be stable at -the moment. Only install the development version if R CMD CHECK badge is -green (passed) or if the error is due to dependency installation.** - ### Installation And the development version from [GitHub](https://github.com/) with: @@ -37,9 +32,43 @@ devtools::install_github("qpmnguyen/CBEA") ### Features -This package supports the implementation of the CBEA approach (formerly -known as cILR) for taxonomic enrichment analysis. +This package implements the CBEA approach for performing set-based +enrichment analysis for microbiome relative abundance data. A preprint +of the package can be found [on +bioXriv](https://www.biorxiv.org/content/10.1101/2021.09.07.459294v1.full). +In summary, CBEA (Competitive Balances for taxonomic Enrichment +Analysis) provides an estimate of the activity of a set by transforming +an input taxa-by-sample data matrix into a corresponding set-by-sample +data matrix. The resulting output can be used for additional downstream +analyses such as differential abundance, classification, clustering, +etc. using set-based features instead of the original units. + +The transformation that CBEA applies is based on the isometric log ratio +transformation: + +![ +CBEA\_{i,\\mathbb{S}} = \\sqrt{\\frac{\|\\mathbb{S}\|\|\\mathbb{S\_c}\|}{\|\\mathbb{S}\| + \|\\mathbb{S\_c}\|}} \\ln \\frac{g(X\_{i,j \| j\\in \\mathbb{S}})}{g(X\_{i,j \| j \\notin \\mathbb{S}})} +](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;%0ACBEA_%7Bi%2C%5Cmathbb%7BS%7D%7D%20%3D%20%5Csqrt%7B%5Cfrac%7B%7C%5Cmathbb%7BS%7D%7C%7C%5Cmathbb%7BS_c%7D%7C%7D%7B%7C%5Cmathbb%7BS%7D%7C%20%2B%20%7C%5Cmathbb%7BS_c%7D%7C%7D%7D%20%5Cln%20%5Cfrac%7Bg%28X_%7Bi%2Cj%20%7C%20j%5Cin%20%5Cmathbb%7BS%7D%7D%29%7D%7Bg%28X_%7Bi%2Cj%20%7C%20j%20%5Cnotin%20%5Cmathbb%7BS%7D%7D%29%7D%0A " +CBEA_{i,\mathbb{S}} = \sqrt{\frac{|\mathbb{S}||\mathbb{S_c}|}{|\mathbb{S}| + |\mathbb{S_c}|}} \ln \frac{g(X_{i,j | j\in \mathbb{S}})}{g(X_{i,j | j \notin \mathbb{S}})} +") -### Dependency Graph +Where +![\\mathbb{S}](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;%5Cmathbb%7BS%7D "\mathbb{S}") +is the set of interest, +![\\mathbb{S}\_C](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;%5Cmathbb%7BS%7D_C "\mathbb{S}_C") +is it’s complement, +![g()](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;g%28%29 "g()") +is the geometric mean operation, and +![X](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;X "X") +is the original data matrix where +![i](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;i "i") +is the index representing samples and +![j](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;j "j") +is the index representing variables (or taxa). - +The inference procedure is performed through estimating the null +distribution of the test statistic. This can be done either via +permutations or a parametric fit of a distributional form on the +permuted scores. Users can also adjust for variance inflation due to +inter-taxa correlation. Please refer to the main manuscript for any +additional details. diff --git a/vignettes/basic_usage.Rmd b/vignettes/basic_usage.Rmd index 784a282..a10ed14 100644 --- a/vignettes/basic_usage.Rmd +++ b/vignettes/basic_usage.Rmd @@ -102,6 +102,7 @@ If there are any issues with the installation procedure or package features, the library("CBEA") library(BiocSet) library(tidyverse) +set.seed(1020) ``` ## Loading sample data