diff --git a/DESCRIPTION b/DESCRIPTION index 365d1d60..106dc06f 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,29 @@ -Package: liger +Package: rliger Version: 0.5.0 -Date: 2021-01-11 +Date: 2021-01-18 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. -Author: Joshua Welch, Chao Gao, Jialin Liu, Joshua Sodicoff +Authors@R: c( + person(given = 'Joshua', family = 'Welch', email = 'welchjd@umich.edu', role = c('aut', 'ctb')), + person(given = 'Chao', family = 'Gao', email = 'gchao@umich.edu', role = c('aut', 'ctb', 'cre')), + person(given = 'Jialin', family = 'Liu', email = 'alanliu@umich.edu', role = c('aut', 'ctb')), + person(given = 'Joshua', family = 'Sodicoff', email = 'sodicoff@umich.edu', role = c('aut', 'ctb')), + person(given = 'Velina', family = 'Kozareva', role = c('aut', 'ctb')), + person(given = 'Evan', family = 'Macosko', role = c('aut', 'ctb')), + person(given = 'Paul', family = 'Hoffman', role = 'ctb'), + person(given = 'Ilya', family = 'Korsunsky', role = 'ctb'), + person(given = 'Robert', family = 'Lee', role = 'ctb') + ) +Author: Joshua Welch [aut, ctb], + Chao Gao [aut, ctb, cre], + Jialin Liu [aut, ctb], + Joshua Sodicoff [aut, ctb], + Velina Kozareva [aut, ctb], + Evan Macosko [aut, ctb], + Paul Hoffman [ctb], + Ilya Korsunsky [ctb], + Robert Lee [ctb] Maintainer: Chao Gao BugReports: https://github.com/welch-lab/liger/issues URL: https://github.com/welch-lab/liger @@ -53,6 +72,5 @@ Suggests: org.Hs.eg.db, reactome.db, fgsea, - AnnotationDbi, - kBET + AnnotationDbi VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE old mode 100755 new mode 100644 index 91d8f651..55916a28 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,13 +4,11 @@ S3method(optimizeALS,liger) S3method(optimizeALS,list) S3method(quantile_norm,liger) S3method(quantile_norm,list) -export(Matrix.column_norm) export(calcARI) export(calcAgreement) export(calcAlignment) export(calcAlignmentPerCluster) export(calcDatasetSpecificity) -export(calcGeneVars) export(calcPurity) export(convertOldLiger) export(createLiger) @@ -61,8 +59,9 @@ export(suggestK) export(suggestLambda) exportClasses(liger) import(Matrix) -import(doSNOW) +import(doParallel) import(hdf5r) +import(parallel) import(patchwork) importFrom(FNN,get.knn) importFrom(FNN,get.knnx) @@ -147,8 +146,6 @@ importFrom(plyr,rbind.fill.matrix) importFrom(riverplot,makeRiver) importFrom(riverplot,riverplot) importFrom(rlang,.data) -importFrom(snow,makeCluster) -importFrom(snow,stopCluster) importFrom(stats,approxfun) importFrom(stats,complete.cases) importFrom(stats,heatmap) @@ -170,4 +167,4 @@ importFrom(utils,read.table) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) importFrom(utils,write.table) -useDynLib(liger) +useDynLib(rliger) diff --git a/R/RcppExports.R b/R/RcppExports.R index 17fc1a65..b08396de 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -2,67 +2,67 @@ # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) { - .Call('_liger_RunModularityClusteringCpp', PACKAGE = 'liger', SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) + .Call('_rliger_RunModularityClusteringCpp', PACKAGE = 'rliger', SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename) } scaleNotCenterFast <- function(x) { - .Call('_liger_scaleNotCenterFast', PACKAGE = 'liger', x) + .Call('_rliger_scaleNotCenterFast', PACKAGE = 'rliger', x) } rowMeansFast <- function(x) { - .Call('_liger_rowMeansFast', PACKAGE = 'liger', x) + .Call('_rliger_rowMeansFast', PACKAGE = 'rliger', x) } rowVarsFast <- function(x, means) { - .Call('_liger_rowVarsFast', PACKAGE = 'liger', x, means) + .Call('_rliger_rowVarsFast', PACKAGE = 'rliger', x, means) } sumSquaredDeviations <- function(x, means) { - .Call('_liger_sumSquaredDeviations', PACKAGE = 'liger', x, means) + .Call('_rliger_sumSquaredDeviations', PACKAGE = 'rliger', x, means) } cpp_sumGroups_dgc <- function(x, p, i, ncol, groups, ngroups) { - .Call('_liger_cpp_sumGroups_dgc', PACKAGE = 'liger', x, p, i, ncol, groups, ngroups) + .Call('_rliger_cpp_sumGroups_dgc', PACKAGE = 'rliger', x, p, i, ncol, groups, ngroups) } cpp_sumGroups_dgc_T <- function(x, p, i, ncol, nrow, groups, ngroups) { - .Call('_liger_cpp_sumGroups_dgc_T', PACKAGE = 'liger', x, p, i, ncol, nrow, groups, ngroups) + .Call('_rliger_cpp_sumGroups_dgc_T', PACKAGE = 'rliger', x, p, i, ncol, nrow, groups, ngroups) } cpp_sumGroups_dense <- function(X, groups, ngroups) { - .Call('_liger_cpp_sumGroups_dense', PACKAGE = 'liger', X, groups, ngroups) + .Call('_rliger_cpp_sumGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) } cpp_sumGroups_dense_T <- function(X, groups, ngroups) { - .Call('_liger_cpp_sumGroups_dense_T', PACKAGE = 'liger', X, groups, ngroups) + .Call('_rliger_cpp_sumGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) } cpp_nnzeroGroups_dense <- function(X, groups, ngroups) { - .Call('_liger_cpp_nnzeroGroups_dense', PACKAGE = 'liger', X, groups, ngroups) + .Call('_rliger_cpp_nnzeroGroups_dense', PACKAGE = 'rliger', X, groups, ngroups) } cpp_nnzeroGroups_dense_T <- function(X, groups, ngroups) { - .Call('_liger_cpp_nnzeroGroups_dense_T', PACKAGE = 'liger', X, groups, ngroups) + .Call('_rliger_cpp_nnzeroGroups_dense_T', PACKAGE = 'rliger', X, groups, ngroups) } cpp_nnzeroGroups_dgc <- function(p, i, ncol, groups, ngroups) { - .Call('_liger_cpp_nnzeroGroups_dgc', PACKAGE = 'liger', p, i, ncol, groups, ngroups) + .Call('_rliger_cpp_nnzeroGroups_dgc', PACKAGE = 'rliger', p, i, ncol, groups, ngroups) } cpp_in_place_rank_mean <- function(v_temp, idx_begin, idx_end) { - .Call('_liger_cpp_in_place_rank_mean', PACKAGE = 'liger', v_temp, idx_begin, idx_end) + .Call('_rliger_cpp_in_place_rank_mean', PACKAGE = 'rliger', v_temp, idx_begin, idx_end) } cpp_rank_matrix_dgc <- function(x, p, nrow, ncol) { - .Call('_liger_cpp_rank_matrix_dgc', PACKAGE = 'liger', x, p, nrow, ncol) + .Call('_rliger_cpp_rank_matrix_dgc', PACKAGE = 'rliger', x, p, nrow, ncol) } cpp_rank_matrix_dense <- function(X) { - .Call('_liger_cpp_rank_matrix_dense', PACKAGE = 'liger', X) + .Call('_rliger_cpp_rank_matrix_dense', PACKAGE = 'rliger', X) } cpp_nnzeroGroups_dgc_T <- function(p, i, ncol, nrow, groups, ngroups) { - .Call('_liger_cpp_nnzeroGroups_dgc_T', PACKAGE = 'liger', p, i, ncol, nrow, groups, ngroups) + .Call('_rliger_cpp_nnzeroGroups_dgc_T', PACKAGE = 'rliger', p, i, ncol, nrow, groups, ngroups) } #' Fast calculation of feature count matrix @@ -80,34 +80,34 @@ cpp_nnzeroGroups_dgc_T <- function(p, i, ncol, nrow, groups, ngroups) { #' samnple <- gene.counts + promoter.counts #' } makeFeatureMatrix <- function(bedmat, barcodes) { - .Call('_liger_makeFeatureMatrix', PACKAGE = 'liger', bedmat, barcodes) + .Call('_rliger_makeFeatureMatrix', PACKAGE = 'rliger', bedmat, barcodes) } cluster_vote <- function(nn_ranked, clusts) { - .Call('_liger_cluster_vote', PACKAGE = 'liger', nn_ranked, clusts) + .Call('_rliger_cluster_vote', PACKAGE = 'rliger', nn_ranked, clusts) } scale_columns_fast <- function(mat, scale = TRUE, center = TRUE) { - .Call('_liger_scale_columns_fast', PACKAGE = 'liger', mat, scale, center) + .Call('_rliger_scale_columns_fast', PACKAGE = 'rliger', mat, scale, center) } max_factor <- function(H, dims_use, center_cols = FALSE) { - .Call('_liger_max_factor', PACKAGE = 'liger', H, dims_use, center_cols) + .Call('_rliger_max_factor', PACKAGE = 'rliger', H, dims_use, center_cols) } solveNNLS <- function(C, B) { - .Call('_liger_solveNNLS', PACKAGE = 'liger', C, B) + .Call('_rliger_solveNNLS', PACKAGE = 'rliger', C, B) } ComputeSNN <- function(nn_ranked, prune) { - .Call('_liger_ComputeSNN', PACKAGE = 'liger', nn_ranked, prune) + .Call('_rliger_ComputeSNN', PACKAGE = 'rliger', nn_ranked, prune) } WriteEdgeFile <- function(snn, filename, display_progress) { - invisible(.Call('_liger_WriteEdgeFile', PACKAGE = 'liger', snn, filename, display_progress)) + invisible(.Call('_rliger_WriteEdgeFile', PACKAGE = 'rliger', snn, filename, display_progress)) } DirectSNNToFile <- function(nn_ranked, prune, display_progress, filename) { - .Call('_liger_DirectSNNToFile', PACKAGE = 'liger', nn_ranked, prune, display_progress, filename) + .Call('_rliger_DirectSNNToFile', PACKAGE = 'rliger', nn_ranked, prune, display_progress, filename) } diff --git a/R/liger.R b/R/rliger.R similarity index 88% rename from R/liger.R rename to R/rliger.R index 85a918a7..4bf1d3eb 100755 --- a/R/liger.R +++ b/R/rliger.R @@ -43,7 +43,7 @@ NULL #' @aliases liger-class #' @exportClass liger #' @importFrom Rcpp evalCpp -#' @useDynLib liger +#' @useDynLib rliger liger <- methods::setClass( "liger", @@ -121,11 +121,15 @@ setMethod( #' (default NULL) #' @param data.type Indicates the protocol of the input data. If not specified, input data will be #' considered scRNA-seq data (default 'rna', alternatives: 'atac'). +#' @param verbose Print messages (TRUE by default) +#' #' @return List of merged matrices across data types (returns sparse matrix if only one data type #' detected), or nested list of matrices organized by sample if merge=F. -#' @export +#' #' @import Matrix #' @importFrom utils read.delim read.table +#' +#' @export #' @examples #' \dontrun{ #' # 10X output directory V2 -- contains outs/raw_gene_bc_matrices//... @@ -136,8 +140,8 @@ setMethod( #' ligerex <- createLiger(expr = dges1[["Gene Expression"]], custom = dges1[["CUSTOM"]]) #' } -read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min.umis = 0, - use.filtered = F, reference = NULL, data.type = "rna") { +read10X <- function(sample.dirs, sample.names, merge = TRUE, num.cells = NULL, min.umis = 0, + use.filtered = FALSE, reference = NULL, data.type = "rna", verbose = TRUE) { datalist <- list() datatypes <- c("Gene Expression") @@ -157,8 +161,8 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. } else { if (is.null(reference)) { references <- list.dirs(paste0(sample.dir, "/raw_gene_bc_matrices"), - full.names = F, - recursive = F + full.names = FALSE, + recursive = FALSE ) if (length(references) > 1) { stop("Multiple reference genomes found. Please specify a single one.") @@ -193,9 +197,9 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. # filter for UMIs first to increase speed umi.pass <- which(colSums(rawdata) > min.umis) if (length(umi.pass) == 0) { - print("No cells pass UMI cutoff. Please lower it.") + message("No cells pass UMI cutoff. Please lower it.") } - rawdata <- rawdata[, umi.pass, drop = F] + rawdata <- rawdata[, umi.pass, drop = FALSE] barcodes <- readLines(barcodes.file)[umi.pass] # Remove -1 tag from barcodes @@ -205,10 +209,10 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. })) } if (data.type == "rna") { - features <- read.delim(features.file, header = F, stringsAsFactors = F) + features <- read.delim(features.file, header = FALSE, stringsAsFactors = FALSE) rownames(rawdata) <- make.unique(features[, 2]) } else if (data.type == "atac") { - features <- read.table(features.file, header = F) + features <- read.table(features.file, header = FALSE) features <- paste0(features[, 1], ":", features[, 2], "-", features[, 3]) rownames(rawdata) <- features } @@ -241,13 +245,13 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. cs <- colSums(samplelist[[data_label]]) limit <- ncol(samplelist[[data_label]]) if (num.cells[i] > limit) { - print(paste0( - "You selected more cells than are in matrix ", i, - ". Returning all ", limit, " cells." - )) + if (verbose) { + message("You selected more cells than are in matrix ", i, + ". Returning all ", limit, " cells.") + } num.cells[i] <- limit } - samplelist[[data_label]] <- samplelist[[data_label]][, order(cs, decreasing = T) + samplelist[[data_label]] <- samplelist[[data_label]][, order(cs, decreasing = TRUE) [1:num.cells[i]]] } @@ -260,14 +264,16 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. # )) # num.cells[i] <- limit # } - # samplelist[["Gene Expression"]] <- samplelist[["Gene Expression"]][, order(cs, decreasing = T) + # samplelist[["Gene Expression"]] <- samplelist[["Gene Expression"]][, order(cs, decreasing = TRUE) # [1:num.cells[i]]] } datalist[[i]] <- samplelist } if (merge) { - print("Merging samples") + if (verbose) { + message("Merging samples") + } return_dges <- lapply(datatypes, function(x) { mergelist <- lapply(datalist, function(d) { d[[x]] @@ -280,7 +286,9 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. # if only one type of data present if (length(return_dges) == 1) { - print(paste0("Returning ", datatypes, " data matrix")) + if (verbose){ + message("Returning ", datatypes, " data matrix") + } return(return_dges[[1]]) } return(return_dges) @@ -306,10 +314,16 @@ read10X <- function(sample.dirs, sample.names, merge = T, num.cells = NULL, min. #' @param barcodes.name Path to the barcodes stored in HDF5 file. #' #' @return Directly generates newly merged hdf5 file. -#' @export +#' #' @import hdf5r +#' +#' @export #' @examples #' \dontrun{ +#' # For instance, we want to merge two datasets saved in HDF5 files (10X CellRanger) +#' # paths to datasets: "library1.h5","library2.h5" +#' # dataset names: "lib1", "lib2" +#' # name for output HDF5 file: "merged.h5" #' mergeH5(list("library1.h5","library2.h5"), c("lib1","lib2"), "merged.h5") #' } @@ -388,10 +402,15 @@ mergeH5 <- function(file.list, #' @param file.path List of paths to hdf5 files. #' #' @return \code{liger} object with restored links. -#' @export +#' #' @import hdf5r +#' +#' @export #' @examples #' \dontrun{ +#' # We want to restore the ligerex (liger object based on HDF5 files) +#' # It has broken connections to HDF5 files +#' # Call the following function and provide the paths to the correspoinding files #' ligerex = restoreOnlineLiger(ligerex, file.path = list("path1/library1.h5", "path2/library2.h5")) #' } restoreOnlineLiger <- function(object, file.path = NULL) { @@ -421,9 +440,9 @@ restoreOnlineLiger <- function(object, file.path = NULL) { #' @param raw.data List of expression matrices (gene by cell). Should be named by dataset. #' @param make.sparse Whether to convert raw data into sparse matrices (default TRUE). #' @param take.gene.union Whether to fill out raw.data matrices with union of genes across all -#' datasets (filling in 0 for missing data) (requires make.sparse=T) (default FALSE). +#' datasets (filling in 0 for missing data) (requires make.sparse = TRUE) (default FALSE). #' @param remove.missing Whether to remove cells not expressing any measured genes, and genes not -#' expressed in any cells (if take.gene.union = T, removes only genes not expressed in any +#' expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any #' dataset) (default TRUE). #' @param format.type HDF5 format (10X CellRanger by default). #' @param data.name Path to the data values stored in HDF5 file. @@ -431,31 +450,35 @@ restoreOnlineLiger <- function(object, file.path = NULL) { #' @param indptr.name Path to the pointers stored in HDF5 file. #' @param genes.name Path to the gene names stored in HDF5 file. #' @param barcodes.name Path to the barcodes stored in HDF5 file. +#' @param verbose Print messages (TRUE by default) #' #' @return \code{liger} object with raw.data slot set. -#' @export +#' #' @import Matrix #' @import hdf5r +#' +#' @export #' @examples -#' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +#' # Demonstration using matrices with randomly generated numbers +#' Y <- matrix(runif(5000,0,2), 10,500) +#' Z <- matrix(runif(5000,0,2), 10,500) #' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' } + createLiger <- function(raw.data, - make.sparse = T, - take.gene.union = F, - remove.missing = T, + make.sparse = TRUE, + take.gene.union = FALSE, + remove.missing = TRUE, format.type = "10X", data.name = NULL, indices.name = NULL, indptr.name = NULL, genes.name = NULL, - barcodes.name = NULL) { - if (class(raw.data[[1]]) == "character") { #HDF5 filenames instead of in-memory matrices + barcodes.name = NULL, + verbose = TRUE) { + if (class(raw.data[[1]])[1] == "character") { #HDF5 filenames instead of in-memory matrices object <- methods::new(Class = "liger", raw.data = raw.data, - version = packageVersion("liger")) + version = packageVersion("rliger")) object@V = rep(list(NULL), length(raw.data)) object@H = rep(list(NULL), length(raw.data)) cell.data = list() @@ -547,12 +570,14 @@ createLiger <- function(raw.data, if (remove.missing) { missing_genes <- which(rowSums(merged.data) == 0) if (length(missing_genes) > 0) { - print( - paste0("Removing ", length(missing_genes), - " genes not expressed in any cells across merged datasets.") - ) + if (verbose) { + message("Removing ", length(missing_genes), + " genes not expressed in any cells across merged datasets.") + } if (length(missing_genes) < 25) { - print(rownames(merged.data)[missing_genes]) + if (verbose) { + message(rownames(merged.data)[missing_genes]) + } } merged.data <- merged.data[-missing_genes, ] } @@ -564,48 +589,48 @@ createLiger <- function(raw.data, object <- methods::new( Class = "liger", raw.data = raw.data, - version = packageVersion("liger") + version = packageVersion("rliger") ) # remove missing cells if (remove.missing) { - object <- removeMissingObs(object, use.cols = T) + object <- removeMissingObs(object, use.cols = TRUE, verbose = verbose) # remove missing genes if not already merged if (!take.gene.union) { - object <- removeMissingObs(object, use.cols = F) + object <- removeMissingObs(object, use.cols = FALSE, verbose = verbose) } } # Initialize cell.data for object with nUMI, nGene, and dataset nUMI <- unlist(lapply(object@raw.data, function(x) { colSums(x) - }), use.names = F) + }), use.names = FALSE) nGene <- unlist(lapply(object@raw.data, function(x) { colSums(x > 0) - }), use.names = F) + }), use.names = FALSE) dataset <- unlist(lapply(seq_along(object@raw.data), function(i) { rep(names(object@raw.data)[i], ncol(object@raw.data[[i]])) - }), use.names = F) + }), use.names = FALSE) object@cell.data <- data.frame(nUMI, nGene, dataset) rownames(object@cell.data) <- unlist(lapply(object@raw.data, function(x) { colnames(x) - }), use.names = F) + }), use.names = FALSE) return(object) } #create new dataset, first deleting existing record if dataset already exists -safe_h5_create = function(h5_object,dataset_name,dims,mode="double",chunk_size=dims, liger_object = object) +safe_h5_create = function(object, idx, dataset_name, dims, mode="double", chunk_size = dims) { - if (!h5_object$exists(dataset_name)) { - h5_object$create_dataset(name = dataset_name,dims = dims,dtype = mode, chunk_dims = chunk_size) + if (!object@raw.data[[idx]]$exists(dataset_name)) { + object@raw.data[[idx]]$create_dataset(name = dataset_name,dims = dims,dtype = mode, chunk_dims = chunk_size) } else { - if (h5_object$exists("scale.data")) { - if (h5_object[["scale.data"]]$dims[1] < length(liger_object@var.genes)){ - extendDataSet(h5_object[["scale.data"]], c(length(liger_object@var.genes), h5_object[["scale.data"]]$dims[2])) + if (object@raw.data[[idx]]$exists("scale.data")) { + if (object@raw.data[[idx]][["scale.data"]]$dims[1] < length(object@var.genes)){ + extendDataSet(object@raw.data[[idx]][["scale.data"]], c(length(object@var.genes), object@raw.data[[idx]][["scale.data"]]$dims[2])) } - } else if (h5_object$exists("gene_vars")) { - if (h5_object[["gene_vars"]]$dims[1] < length(liger_object@var.genes)){ - extendDataSet(h5_object[["gene_vars"]], length(liger_object@var.genes)) + } else if (object@raw.data[[idx]]$exists("gene_vars")) { + if (object@raw.data[[idx]][["gene_vars"]]$dims[1] < length(object@var.genes)){ + extendDataSet(object@raw.data[[idx]][["gene_vars"]], length(object@var.genes)) } } } @@ -619,31 +644,36 @@ safe_h5_create = function(h5_object,dataset_name,dims,mode="double",chunk_size=d #' @param chunk size of chunks in hdf5 file. (default 1000) #' @param format.type string of HDF5 format (10X CellRanger by default). #' @param remove.missing Whether to remove cells not expressing any measured genes, and genes not -#' expressed in any cells (if take.gene.union = T, removes only genes not expressed in any +#' expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any #' dataset) (default TRUE). +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with norm.data slot set. -#' @export +#' #' @import hdf5r +#' +#' @export #' @examples -#' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +#' # Demonstration using matrices with randomly generated numbers +#' Y <- matrix(runif(5000,0,2), 10,500) +#' Z <- matrix(runif(5000,0,2), 10,500) #' ligerex <- createLiger(list(y_set = Y, z_set = Z)) #' ligerex <- normalize(ligerex) -#' } normalize <- function(object, chunk = 1000, format.type = "10X", - remove.missing = TRUE) { + remove.missing = TRUE, + verbose = TRUE) { if (class(object@raw.data[[1]])[1] == "H5File") { hdf5_files = names(object@raw.data) nUMI = c() nGene = c() for (i in 1:length(hdf5_files)) { - print(hdf5_files[i]) + if (verbose) { + message(hdf5_files[i]) + } chunk_size = chunk #fname = hdf5_files[[i]] num_entries = object@h5file.info[[i]][["data"]]$dims @@ -658,12 +688,14 @@ normalize <- function(object, gene_means = rep(0,num_genes) #file.h5$close_all() - safe_h5_create(object@raw.data[[i]],"/norm.data",dims=num_entries,mode = h5types$double, chunk_size = chunk_size, liger_object = object) - safe_h5_create(object@raw.data[[i]],"/cell_sums",dims=num_cells,mode = h5types$int, chunk_size = chunk_size, liger_object = object) + safe_h5_create(object = object, idx = i, dataset_name = "/norm.data", dims = num_entries, mode = h5types$double, chunk_size = chunk_size) + safe_h5_create(object = object, idx = i, dataset_name = "/cell_sums", dims = num_cells, mode = h5types$int, chunk_size = chunk_size) #file.h5 = H5File$new(fname, mode="r+") num_chunks = ceiling(num_cells/chunk_size) - pb = txtProgressBar(0,num_chunks,style = 3) + if (verbose) { + pb = txtProgressBar(0,num_chunks,style = 3) + } ind = 0 while(prev_end_col < num_cells) { @@ -691,13 +723,17 @@ normalize <- function(object, row_sums = Matrix::rowSums(norm.data) gene_sum_sq = gene_sum_sq + rowSums(norm.data*norm.data) gene_means = gene_means + row_sums - setTxtProgressBar(pb,ind) + if (verbose) { + setTxtProgressBar(pb,ind) + } + } + if (verbose) { + setTxtProgressBar(pb,num_chunks) + cat("\n") } - setTxtProgressBar(pb,num_chunks) - cat("\n") gene_means = gene_means / num_cells - safe_h5_create(object@raw.data[[i]],"gene_means",dims=num_genes,mode=h5types$double, liger_object = object) - safe_h5_create(object@raw.data[[i]],"gene_sum_sq",dims=num_genes,mode=h5types$double, liger_object = object) + safe_h5_create(object = object, idx = i, dataset_name = "gene_means", dims=num_genes, mode=h5types$double) + safe_h5_create(object = object, idx = i, dataset_name = "gene_sum_sq", dims=num_genes, mode=h5types$double) object@raw.data[[i]][["gene_means"]][1:length(gene_means)] = gene_means object@raw.data[[i]][["gene_sum_sq"]][1:length(gene_sum_sq)] = gene_sum_sq object@norm.data[[i]] = object@raw.data[[i]][["norm.data"]] @@ -718,7 +754,7 @@ normalize <- function(object, names(object@norm.data) = names(object@raw.data) } else { if (remove.missing) { - object <- removeMissingObs(object, slot.use = "raw.data", use.cols = T) + object <- removeMissingObs(object, slot.use = "raw.data", use.cols = TRUE) } if (class(object@raw.data[[1]])[1] == "dgTMatrix" | class(object@raw.data[[1]])[1] == "dgCMatrix") { @@ -739,24 +775,19 @@ normalize <- function(object, #' @param object \code{liger} object. The input raw.data should be a list of hdf5 files. #' Should call normalize and selectGenes before calling. #' @param chunk size of chunks in hdf5 file. (default 1000) +#' @param verbose Print progress bar/messages (TRUE by default) +#' #' @return \code{liger} object with scale.data slot set. -#' @export +#' #' @import hdf5r -#' @examples -#' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' } -calcGeneVars = function (object, chunk = 1000) + +calcGeneVars = function (object, chunk = 1000, verbose = TRUE) { hdf5_files = names(object@raw.data) for (i in 1:length(hdf5_files)) { - print(hdf5_files[i]) + if (verbose) { + message(hdf5_files[i]) + } chunk_size = chunk num_cells = object@h5file.info[[i]][["barcodes"]]$dims num_genes = object@h5file.info[[i]][["genes"]]$dims @@ -770,7 +801,9 @@ calcGeneVars = function (object, chunk = 1000) gene_num_pos = rep(0,num_genes) num_chunks = ceiling(num_cells/chunk_size) - pb = txtProgressBar(0, num_chunks, style = 3) + if (verbose) { + pb = txtProgressBar(0, num_chunks, style = 3) + } ind = 0 while (prev_end_col < num_cells) { ind = ind + 1 @@ -787,13 +820,16 @@ calcGeneVars = function (object, chunk = 1000) prev_end_data = prev_end_data + num_read prev_end_ind = tail(start_inds, 1) gene_vars = gene_vars + sumSquaredDeviations(norm.data,gene_means) - setTxtProgressBar(pb, ind) + if (verbose) { + setTxtProgressBar(pb, ind) + } + } + if (verbose) { + setTxtProgressBar(pb, num_chunks) + cat("\n") } - setTxtProgressBar(pb, num_chunks) - cat("\n") gene_vars = gene_vars/(num_cells - 1) - safe_h5_create(object@raw.data[[i]], "/gene_vars", dims = num_genes, - mode = h5types$double, liger_object = object) + safe_h5_create(object = object, idx = i, dataset_name = "/gene_vars", dims = num_genes, mode = h5types$double) object@raw.data[[i]][["gene_vars"]][1:num_genes] = gene_vars } return(object) @@ -829,28 +865,29 @@ calcGeneVars = function (object, chunk = 1000) #' Selected genes are plotted in green. (default FALSE) #' @param cex.use Point size for plot. #' @param chunk size of chunks in hdf5 file. (default 1000) - +#' #' @return \code{liger} object with var.genes slot set. -#' @export +#' #' @import hdf5r #' @importFrom stats optimize #' @importFrom graphics abline plot points title #' @importFrom stats qnorm +#' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +#' # Given datasets Y and Z #' ligerex <- createLiger(list(y_set = Y, z_set = Z)) #' ligerex <- normalize(ligerex) -#' # use default selectGenes settings +#' # use default selectGenes settings (var.thresh = 0.1) #' ligerex <- selectGenes(ligerex) #' # select a smaller subset of genes -#' ligerex <- selectGenes(ligerex, var.thresh=0.8) +#' ligerex <- selectGenes(ligerex, var.thresh = 0.3) #' } selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes = NULL, tol = 0.0001, datasets.use = 1:length(object@raw.data), combine = "union", - capitalize = F, do.plot = F, cex.use = 0.3, chunk=1000) + capitalize = FALSE, do.plot = FALSE, cex.use = 0.3, chunk=1000) { if (class(object@raw.data[[1]])[1] == "H5File") { if (!object@raw.data[[1]]$exists("gene_vars")) { @@ -916,7 +953,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes if (length(genes.use) == 0) { warning("No genes were selected; lower var.thresh values or choose 'union' for combine parameter", - immediate. = T) + immediate. = TRUE) } object@var.genes = genes.use } else { @@ -1000,7 +1037,7 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes if (length(genes.use) == 0) { warning("No genes were selected; lower var.thresh values or choose 'union' for combine parameter", - immediate. = T) + immediate. = TRUE) } object@var.genes <- genes.use } @@ -1018,27 +1055,31 @@ selectGenes <- function(object, var.thresh = 0.1, alpha.thresh = 0.99, num.genes #' @param remove.missing Whether to remove cells from scale.data with no gene expression #' (default TRUE). #' @param chunk size of chunks in hdf5 file. (default 1000) +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with scale.data slot set. -#' @export +#' #' @import hdf5r +#' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +#' # Given datasets Y and Z #' ligerex <- createLiger(list(y_set = Y, z_set = Z)) #' ligerex <- normalize(ligerex) -#' # select genes +#' # use default selectGenes settings (var.thresh = 0.1) #' ligerex <- selectGenes(ligerex) #' ligerex <- scaleNotCenter(ligerex) #' } -scaleNotCenter <- function(object, remove.missing = T, chunk = 1000) { +scaleNotCenter <- function(object, remove.missing = TRUE, chunk = 1000, verbose = TRUE) { if (class(object@raw.data[[1]])[1] == "H5File") { hdf5_files = names(object@raw.data) vargenes = object@var.genes for (i in 1:length(hdf5_files)) { - print(hdf5_files[i]) + if (verbose) { + message(hdf5_files[i]) + } chunk_size = chunk if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ @@ -1059,9 +1100,11 @@ scaleNotCenter <- function(object, remove.missing = T, chunk = 1000) { gene_inds = which(genes %in% vargenes) gene_root_mean_sum_sq = sqrt(gene_sum_sq/(num_cells-1)) - safe_h5_create(object@raw.data[[i]], "scale.data", dims = c(length(vargenes), num_cells), mode = h5types$double, chunk_size = c(length(vargenes), chunk_size), liger_object = object) + safe_h5_create(object = object, idx = i, dataset_name = "scale.data", dims = c(length(vargenes), num_cells), mode = h5types$double, chunk_size = c(length(vargenes), chunk_size)) num_chunks = ceiling(num_cells/chunk_size) - pb = txtProgressBar(0, num_chunks, style = 3) + if (verbose) { + pb = txtProgressBar(0, num_chunks, style = 3) + } ind = 0 while (prev_end_col < num_cells) { ind = ind + 1 @@ -1085,11 +1128,15 @@ scaleNotCenter <- function(object, remove.missing = T, chunk = 1000) { prev_end_col = prev_end_col + chunk_size prev_end_data = prev_end_data + num_read prev_end_ind = tail(start_inds, 1) - setTxtProgressBar(pb, ind) + if (verbose) { + setTxtProgressBar(pb, ind) + } } object@scale.data[[i]] = object@raw.data[[i]][["scale.data"]] - setTxtProgressBar(pb, num_chunks) - cat("\n") + if (verbose) { + setTxtProgressBar(pb, num_chunks) + cat("\n") + } } names(object@scale.data) <- names(object@raw.data) } else { @@ -1104,7 +1151,7 @@ scaleNotCenter <- function(object, remove.missing = T, chunk = 1000) { }) } else { object@scale.data <- lapply(1:length(object@norm.data), function(i) { - scale(t(object@norm.data[[i]][object@var.genes, ]), center = F, scale = T) + scale(t(object@norm.data[[i]][object@var.genes, ]), center = FALSE, scale = TRUE) }) } names(object@scale.data) <- names(object@norm.data) @@ -1115,7 +1162,7 @@ scaleNotCenter <- function(object, remove.missing = T, chunk = 1000) { } # may want to remove such cells before scaling -- should not matter for large datasets? if (remove.missing) { - object <- removeMissingObs(object, slot.use = "scale.data", use.cols = F) + object <- removeMissingObs(object, slot.use = "scale.data", use.cols = FALSE, verbose = verbose) } } @@ -1129,19 +1176,20 @@ scaleNotCenter <- function(object, remove.missing = T, chunk = 1000) { #' @param object \code{liger} object (scale.data or norm.data must be set). #' @param slot.use The data slot to filter (takes "raw.data" and "scale.data") (default "raw.data"). #' @param use.cols Treat each column as a cell (default TRUE). +#' @param verbose Print messages (TRUE by default) #' #' @return \code{liger} object with modified raw.data (or chosen slot) (dataset names preserved). +#' #' @export #' @examples #' \dontrun{ -#' # liger object -#' ligerex +#' # liger object: ligerex #' ligerex <- removeMissingObs(ligerex) #' } -removeMissingObs <- function(object, slot.use = "raw.data", use.cols = T) { +removeMissingObs <- function(object, slot.use = "raw.data", use.cols = TRUE, verbose = TRUE) { filter.data <- slot(object, slot.use) - removed <- ifelse((slot.use %in% c("raw.data", "norm.data")) & (use.cols == T), + removed <- ifelse((slot.use %in% c("raw.data", "norm.data")) & (use.cols == TRUE), yes = "cells", no = "genes") expressed <- ifelse(removed == "cells", yes = " any genes", no = "") filter.data <- lapply(seq_along(filter.data), function(x) { @@ -1151,18 +1199,22 @@ removeMissingObs <- function(object, slot.use = "raw.data", use.cols = T) { missing <- which(rowSums(filter.data[[x]]) == 0) } if (length(missing) > 0) { - print(paste0( - "Removing ", length(missing), " ", removed, " not expressing", expressed, " in ", - names(object@raw.data)[x], "." - )) + if (verbose) { + message("Removing ", length(missing), " ", removed, " not expressing", expressed, " in ", + names(object@raw.data)[x], ".") + } if (use.cols) { if (length(missing) < 25) { - print(colnames(filter.data[[x]])[missing]) + if (verbose) { + message(colnames(filter.data[[x]])[missing]) + } } subset <- filter.data[[x]][, -missing] } else { if (length(missing) < 25) { - print(rownames(filter.data[[x]])[missing]) + if (verbose) { + message(rownames(filter.data[[x]])[missing]) + } } subset <- filter.data[[x]][-missing, ] } @@ -1183,14 +1235,17 @@ removeMissingObs <- function(object, slot.use = "raw.data", use.cols = T) { #balance="dataset" samples up to max_cells from each dataset #datasets.use uses only the specified datasets for sampling. Default is NULL (all datasets) #rand.seed for reproducibility (default 1). +#verbose for printing messages #Returns: vector of cell barcodes -downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed=1) +downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed=1, verbose = TRUE) { set.seed(seed) if(is.null(datasets.use)) { - datasets.use=names(object@H) - print(datasets.use) + datasets.use = names(object@H) + if (verbose) { + message(datasets.use) + } } inds = c() inds_ds = list() @@ -1221,12 +1276,12 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed { dataset <- unlist(lapply(seq_along(object@H), function(i) { rep(names(object@H)[i], nrow(object@H[[i]])) - }), use.names = F) + }), use.names = FALSE) object@cell.data <- data.frame(dataset) rownames(object@cell.data) <- unlist(lapply(object@H, function(x) { rownames(x) - }), use.names = F) + }), use.names = FALSE) } for (ds in 1:length(datasets.use)) { @@ -1257,20 +1312,17 @@ downsample <- function(object,balance=NULL,max_cells=1000,datasets.use=NULL,seed #' @param datasets.use uses only the specified datasets for sampling. Default is NULL (all datasets) #' @param genes.use samples from only the specified genes. Default is NULL (all genes) #' @param rand.seed for reproducibility (default 1). -#' #Note: +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with sample.data slot set. -#' @export +#' #' @import hdf5r +#' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) +#' # Only for online liger object (based on HDF5 files) +#' # Example: sample a total amount of 5000 cells from norm.data for downstream analysis #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) #' } @@ -1281,14 +1333,17 @@ readSubset <- function(object, chunk = 1000, datasets.use = NULL, genes.use = NULL, - rand.seed = 1) { + rand.seed = 1, + verbose = TRUE) { if (class(object@raw.data[[1]])[1] == "H5File") { - cat("Sampling\n") + if (verbose) { + message("Start sampling") + } if(is.null(datasets.use)) { datasets.use=names(object@H) } - cell_inds = downsample(object,balance=balance,max_cells=max.cells,datasets.use=datasets.use, seed=rand.seed) + cell_inds = downsample(object, balance = balance, max_cells = max.cells, datasets.use = datasets.use, seed = rand.seed, verbose = verbose) hdf5_files = names(object@raw.data) #vargenes = object@var.genes @@ -1314,11 +1369,13 @@ readSubset <- function(object, } for (i in 1:length(hdf5_files)) { - print(hdf5_files[i]) + if (verbose) { + message(hdf5_files[i]) + } if (slot.use == "scale.data") { data.subset = c() } else { - data.subset = Matrix(nrow=length(genes.use),ncol=0,sparse=T) + data.subset = Matrix(nrow=length(genes.use),ncol=0,sparse=TRUE) } chunk_size = chunk if (object@h5file.info[[i]][["format.type"]] == "AnnData"){ @@ -1339,7 +1396,9 @@ readSubset <- function(object, #gene_inds = which(genes %in% vargenes) num_chunks = ceiling(num_cells/chunk_size) - pb = txtProgressBar(0, num_chunks, style = 3) + if (verbose) { + pb = txtProgressBar(0, num_chunks, style = 3) + } ind = 0 while (prev_end_col < num_cells) { @@ -1379,7 +1438,9 @@ readSubset <- function(object, data.subset = cbind(data.subset,one_chunk) prev_end_col = prev_end_col + chunk_size - setTxtProgressBar(pb, ind) + if (verbose) { + setTxtProgressBar(pb, ind) + } } if (class(object@raw.data[[i]])[1] == "H5File") { object@sample.data[[i]] = data.subset @@ -1388,16 +1449,20 @@ readSubset <- function(object, } object@h5file.info[[i]][["sample.data.type"]] = slot.use } - setTxtProgressBar(pb, num_chunks) - cat("\n") + if (verbose) { + setTxtProgressBar(pb, num_chunks) + cat("\n") + } } } else { - cat("Sampling\n") + if (verbose) { + message("Start sampling") + } if(is.null(datasets.use)) { - datasets.use=names(object@H) + datasets.use = names(object@H) } - cell_inds = downsample(object,balance=balance,max_cells=max.cells,datasets.use=datasets.use) + cell_inds = downsample(object, balance = balance, max_cells = max.cells, datasets.use = datasets.use, verbose = verbose) files = names(object@raw.data) # find the intersect of genes from each input datasets @@ -1410,7 +1475,9 @@ readSubset <- function(object, { genes.use = genes } - pb = txtProgressBar(0, length(files), style = 3) + if (verbose) { + pb = txtProgressBar(0, length(files), style = 3) + } for (i in 1:length(files)){ if (slot.use=="raw.data") { @@ -1424,9 +1491,13 @@ readSubset <- function(object, { data.subset_i = t(object@scale.data[[i]][cell_inds[[i]], genes.use]) } - setTxtProgressBar(pb, i) + if (verbose) { + setTxtProgressBar(pb, i) + } + } + if (verbose){ + cat("\n") } - cat("\n") object@sample.data[[i]] = data.subset_i object@h5file.info[[i]][["sample.data.type"]] = slot.use } @@ -1473,40 +1544,37 @@ readSubset <- function(object, #' of cells in the smallest dataset. #' @param h5_chunk_size Chunk size of input hdf5 files (default 1000). The chunk size should be no larger than the batch size. #' @param seed Random seed to allow reproducible results (default 123). +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with H, W, V, A and B slots set. -#' @export +#' #' @import hdf5r #' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' # get factorization using 20 factors from scratch -#' ligerex <- online(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) +#' # Requires preprocessed liger object +#' # Get factorization using 20 factors and mini-batch of 5000 cells +#' # (default setting, can be adjusted for ideal results) +#' ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) #' } -#' online_iNMF <- function(object, X_new = NULL, projection = FALSE, - W.init=NULL, - V.init=NULL, - H.init=NULL, - A.init=NULL, - B.init=NULL, - k=20, - lambda=5, - max.epochs=5, - miniBatch_max_iters=1, - miniBatch_size=5000, - h5_chunk_size=1000, - seed=123){ + W.init = NULL, + V.init = NULL, + H.init = NULL, + A.init = NULL, + B.init = NULL, + k = 20, + lambda = 5, + max.epochs = 5, + miniBatch_max_iters = 1, + miniBatch_size = 5000, + h5_chunk_size = 1000, + seed = 123, + verbose = TRUE){ if (!is.null(X_new)){ # if there is new dataset raw.data_prev = object@raw.data norm.data_prev = object@norm.data @@ -1515,7 +1583,6 @@ online_iNMF <- function(object, cell.data_prev = object@cell.data names(raw.data_prev) = names(object@raw.data) - # assuming only one new dataset arrives at a time raw.data = c() norm.data = c() @@ -1544,12 +1611,18 @@ online_iNMF <- function(object, } if (processed) { - cat("New dataset", i, "already preprocessed.", "\n") + if (verbose) { + cat("New dataset", i, "already preprocessed.", "\n") + } } else { - cat("New dataset", i, "not preprocessed. Preprocessing...", "\n") + if (verbose) { + cat("New dataset", i, "not preprocessed. Preprocessing...", "\n") + } object = normalize(object, chunk = h5_chunk_size) - object = scaleNotCenter(object, remove.missing = T, chunk = h5_chunk_size) - cat("New dataset", i, "Processed.", "\n") + object = scaleNotCenter(object, remove.missing = TRUE, chunk = h5_chunk_size) + if (verbose) { + cat("New dataset", i, "Processed.", "\n") + } } } @@ -1579,7 +1652,9 @@ online_iNMF <- function(object, } else { num_new_files = length(X_new) num_prev_files = num_files - num_new_files - cat(num_new_files, "new datasets detected.", "\n") + if (verbose) { + cat(num_new_files, "new datasets detected.", "\n") + } } file_idx = 1:num_files # indices for all input files @@ -1709,9 +1784,12 @@ online_iNMF <- function(object, } } - cat("Starting Online iNMF...", "\n") total.iters = floor(sum(num_cells_new) * max.epochs / miniBatch_size) - pb <- txtProgressBar(min = 1, max = total.iters+1, style = 3) + if (verbose) { + cat("Starting Online iNMF...", "\n") + pb <- txtProgressBar(min = 1, max = total.iters+1, style = 3) + } + while(epoch[file_idx_new[1]] < max.epochs) { # track epochs minibatch_idx = rep(list(NULL), num_files) # indices of samples in each dataest used for this iteration @@ -1837,10 +1915,14 @@ online_iNMF <- function(object, } epoch_next = rep(FALSE, num_files) # reset epoch change indicator iter = iter + 1 - setTxtProgressBar(pb = pb, value = iter) + if (verbose) { + setTxtProgressBar(pb = pb, value = iter) + } } } - cat("\nCalculate metagene loadings...", "\n") + if (verbose) { + cat("\nCalculate metagene loadings...", "\n") + } object@H = rep(list(NULL), num_files) for (i in file_idx){ if (num_cells[i] %% miniBatch_size == 0) num_batch = num_cells[i] %/% miniBatch_size else num_batch = num_cells[i] %/% miniBatch_size + 1 @@ -1871,7 +1953,9 @@ online_iNMF <- function(object, } } else { - cat("Metagene projection", "\n") + if (verbose) { + cat("Metagene projection", "\n") + } object@W = if(!is.null(W.init)) W.init else object@W object@H[file_idx_new] = rep(list(NULL), num_new_files) object@V[file_idx_new] = rep(list(NULL), num_new_files) @@ -1920,7 +2004,7 @@ online_iNMF <- function(object, #' @param eps Threshold. Should be a small positive value. (default 1e-16) #' @return Dense matrix with smallest values equal to eps. -nonneg <- function(x, eps=1e-16) { +nonneg <- function(x, eps = 1e-16) { x[x < eps] = eps return(x) } @@ -1958,24 +2042,20 @@ nonneg <- function(x, eps=1e-16) { #' @param V.init Initial values to use for V matrices (default NULL) #' @param rand.seed Random seed to allow reproducible results (default 1). #' @param print.obj Print objective function values after convergence (default FALSE). +#' @param verbose Print progress bar/messages (TRUE by default) #' @param ... Arguments passed to other methods #' #' @return \code{liger} object with H, W, and V slots set. -#' @export #' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' # get factorization using three restarts and 20 factors -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) +#' # Requires preprocessed liger object (only for objected not based on HDF5 files) +#' # Get factorization using 20 factors and mini-batch of 5000 cells +#' # (default setting, can be adjusted for ideal results) +#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) #' } -#' + optimizeALS <- function( object, ... @@ -1984,9 +2064,10 @@ optimizeALS <- function( } #' @rdname optimizeALS -#' @export #' @importFrom stats runif #' @importFrom utils setTxtProgressBar txtProgressBar +#' +#' @export #' @method optimizeALS list #' optimizeALS.list <- function( @@ -2001,6 +2082,7 @@ optimizeALS.list <- function( V.init = NULL, rand.seed = 1, print.obj = FALSE, + verbose = TRUE, ... ) { if (!all(sapply(X = object, FUN = is.matrix))) { @@ -2154,27 +2236,32 @@ optimizeALS.list <- function( end_time <- difftime(time1 = Sys.time(), time2 = start_time, units = "auto") run_stats[i, 1] <- as.double(x = end_time) run_stats[i, 2] <- iters - cat( - "\nFinished in ", - run_stats[i, 1], - " ", - units(x = end_time), - ", ", - iters, - " iterations.\n", - "Max iterations set: ", - max.iters, - ".\n", - "Final objective delta: ", - delta, - '.\n', - sep = "" - ) - if (print.obj) { - cat("Objective:", obj, "\n") + if (verbose) { + cat( + "\nFinished in ", + run_stats[i, 1], + " ", + units(x = end_time), + ", ", + iters, + " iterations.\n", + "Max iterations set: ", + max.iters, + ".\n", + "Final objective delta: ", + delta, + '.\n', + sep = "" + ) + } + + if (verbose) { + if (print.obj) { + cat("Objective:", obj, "\n") + } + cat("Best results with seed ", best_seed, ".\n", sep = "") } } - cat("Best results with seed ", best_seed, ".\n", sep = "") out <- list() out$H <- H_m for (i in 1:length(x = object)) { @@ -2204,12 +2291,14 @@ optimizeALS.liger <- function( V.init = NULL, rand.seed = 1, print.obj = FALSE, + verbose = TRUE, ... ) { object <- removeMissingObs( object = object, slot.use = 'scale.data', - use.cols = FALSE + use.cols = FALSE, + verbose = TRUE ) out <- optimizeALS( object = object@scale.data, @@ -2222,7 +2311,8 @@ optimizeALS.liger <- function( W.init = W.init, V.init = V.init, rand.seed = rand.seed, - print.obj = print.obj + print.obj = print.obj, + verbose = verbose ) names(x = out$H) <- names(x = out$V) <- names(x = object@raw.data) for (i in 1:length(x = object@scale.data)) { @@ -2250,26 +2340,21 @@ optimizeALS.liger <- function( #' (default 1e-4). #' @param max.iters Maximum number of block coordinate descent iterations to perform (default 100). #' @param rand.seed Random seed to set. Only relevant if k.new > k. (default 1) +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with H, W, and V slots reset. -#' @export +#' #' @importFrom plyr rbind.fill.matrix +#' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' # get factorization using three restarts and 20 factors -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) #' # decide to run with k = 15 instead (keeping old lambda the same) #' ligerex <- optimizeNewK(ligerex, k.new = 15) #' } optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters = 100, - rand.seed = 1) { + rand.seed = 1, verbose = TRUE) { if (is.null(lambda)) { lambda <- object@parameters$lambda } @@ -2334,7 +2419,7 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters norm(H[[i]][, k] %*% t(W[k, ] + V[[i]][k, ]), "F") }) } - k.use <- order(deltas, decreasing = T)[1:k.new] + k.use <- order(deltas, decreasing = TRUE)[1:k.new] W <- W[k.use, ] H <- lapply(H, function(x) { x[, k.use] @@ -2344,8 +2429,8 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters }) } object <- optimizeALS(object, k.new, - lambda = lambda, thresh = thresh, max.iters = max.iters, - H.init = H, W.init = W, V.init = V, rand.seed = rand.seed) + lambda = lambda, thresh = thresh, max.iters = max.iters, H.init = H, + W.init = W, V.init = V, rand.seed = rand.seed, verbose = verbose) return(object) } @@ -2365,45 +2450,43 @@ optimizeNewK <- function(object, k.new, lambda = NULL, thresh = 1e-4, max.iters #' @param thresh Convergence threshold. Convergence occurs when |obj0-obj|/(mean(obj0,obj)) < thresh #' (default 1e-4). #' @param max.iters Maximum number of block coordinate descent iterations to perform (default 100). +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with H, W, and V slots reset. Raw.data, norm.data, and scale.data will #' also be updated to include the new data. +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) +#' # Given preprocessed liger object: ligerex (contains two datasets Y and Z) #' # get factorization using three restarts and 20 factors #' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) -#' # acquire new data from the same cell type, let's add it to existing datasets -#' Y_new <- matrix(41:52, nrow = 4, byrow = T) -#' Z_new <- matrix(c(51:57, 56:52), nrow = 4, byrow = T) +#' # acquire new data (Y_new, Z_new) from the same cell type, let's add it to existing datasets #' new_data <- list(Y_set = Y_new, Z_set = Z_new) #' ligerex2 <- optimizeNewData(ligerex, new.data = new_data, which.datasets = list('y_set', 'z_set')) -#' # acquire new data from different cell type, we'll just add another dataset -#' X <- matrix(35:46, nrow = 4, byrow = T) +#' # acquire new data from different cell type (X), we'll just add another dataset #' # it's probably most similar to y_set #' ligerex <- optimizeNewData(ligerex, new.data = list(x_set = X), which.datasets = list('y_set'), -#' add.to.existing = F) +#' add.to.existing = FALSE) #' } -optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = T, lambda = NULL, - thresh = 1e-4, max.iters = 100) { +optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = TRUE, lambda = NULL, + thresh = 1e-4, max.iters = 100, verbose = TRUE) { if (is.null(lambda)) { lambda <- object@parameters$lambda } if (add.to.existing) { for (i in 1:length(new.data)) { - print(dim(object@raw.data[[which.datasets[[i]]]])) + if (verbose) { + message(dim(object@raw.data[[which.datasets[[i]]]])) + } object@raw.data[[which.datasets[[i]]]] <- cbind( object@raw.data[[which.datasets[[i]]]], new.data[[i]] ) - print(dim(object@raw.data[[which.datasets[[i]]]])) + if (verbose) { + message(dim(object@raw.data[[which.datasets[[i]]]])) + } } object <- normalize(object) object <- scaleNotCenter(object) @@ -2460,7 +2543,7 @@ optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = k <- ncol(object@H[[1]]) object <- optimizeALS(object, k, lambda, thresh, max.iters, H.init = object@H, W.init = object@W, - V.init = object@V) + V.init = object@V, verbose = verbose) return(object) } @@ -2483,21 +2566,13 @@ optimizeNewData <- function(object, new.data, which.datasets, add.to.existing = #' #' @return \code{liger} object with H, W, and V slots reset. Scale.data #' (if desired) will also be updated to reflect the subset. +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' colnames(Y) <- c('a', 'b', 'c') -#' colnames(Z) <- c('p', 'q', 'r') -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' # get factorization using three restarts and 20 factors -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) #' # now want to look at only subset of data -#' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(c('a', 'b'), c('q'))) +#' # Requires a vector of cell names from data 1 and a vector of cell names from data 2 +#' ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2)) #' } optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, lambda = NULL, @@ -2526,12 +2601,11 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la object@norm.data[[i]] <- object@norm.data[[i]][, cell.subset[[i]]] if (names(object@norm.data)[i] %in% datasets.scale) { object@scale.data[[i]] <- scale(t(object@norm.data[[i]][object@var.genes, ]), - scale = T, center = F) + scale = TRUE, center = FALSE) object@scale.data[[i]][is.na(object@scale.data[[i]])] <- 0 } else { object@scale.data[[i]] <- t(object@norm.data[[i]][object@var.genes, ]) } - print(dim(object@scale.data[[i]])) } names(object@raw.data) <- names(object@norm.data) <- names(object@H) <- old_names @@ -2553,33 +2627,27 @@ optimizeSubset <- function(object, cell.subset = NULL, cluster.subset = NULL, la #' @param thresh Convergence threshold. Convergence occurs when |obj0-obj|/(mean(obj0,obj)) < thresh #' @param max.iters Maximum number of block coordinate descent iterations to perform (default 100). #' @param rand.seed Random seed for reproducibility (default 1). +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with optimized factorization values +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' # get factorization using three restarts and 20 factors -#' ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) #' # decide to run with lambda = 15 instead (keeping k the same) #' ligerex <- optimizeNewLambda(ligerex, new.lambda = 15) #' } -optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100, rand.seed = 1) { +optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100, rand.seed = 1, verbose = TRUE) { k <- ncol(object@H[[1]]) H <- object@H W <- object@W - if (new.lambda < object@parameters$lambda) { - print(paste("New lambda less than current lambda; new factorization may not be optimal.", - "Re-optimization with optimizeAlS recommended instead.")) + if (new.lambda < object@parameters$lambda && verbose) { + message("New lambda less than current lambda; new factorization may not be optimal. ", + "Re-optimization with optimizeAlS recommended instead.") } object <- optimizeALS(object, k, lambda = new.lambda, thresh = thresh, max.iters = max.iters, - H.init = H, W.init = W, rand.seed = rand.seed) + H.init = H, W.init = W, rand.seed = rand.seed, verbose = verbose) return(object) } @@ -2612,53 +2680,58 @@ optimizeNewLambda <- function(object, new.lambda, thresh = 1e-4, max.iters = 100 #' @param return.raw If return.results TRUE, whether to return raw data (in format described below), #' or dataframe used to produce ggplot object. Raw data is matrix of alignment values for each #' lambda value tested (each column represents a different rep for nrep).(default FALSE) +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return Matrix of results if indicated or ggplot object. Plots alignment vs. lambda to console. #' -#' @import doSNOW -#' @importFrom snow makeCluster stopCluster +#' @import doParallel +#' @import parallel #' @importFrom foreach foreach #' @importFrom foreach "%dopar%" #' @importFrom ggplot2 ggplot aes geom_point geom_line guides guide_legend labs theme theme_classic +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) +#' # Requires preprocessed liger object #' # examine plot for most appropriate lambda, use multiple cores for faster results #' suggestLambda(ligerex, k = 20, num.cores = 4) #' } -suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, - thresh = 1e-4, max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, - resolution = 1, gen.new = F, nrep = 1, return.data = F, return.raw = F) { +suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.cores = 1, thresh = 1e-4, + max.iters = 100, knn_k = 20, k2 = 500, ref_dataset = NULL, resolution = 1, + gen.new = FALSE, nrep = 1, return.data = FALSE, return.raw = FALSE, verbose = TRUE) { if (is.null(lambda.test)) { lambda.test <- c(seq(0.25, 1, 0.25), seq(2, 10, 1), seq(15, 60, 5)) } time_start <- Sys.time() # optimize smallest lambda value first to take advantage of efficient updating - print("This operation may take several minutes depending on number of values being tested") + if (verbose) { + message("This operation may take several minutes depending on number of values being tested") + } rep_data <- list() for (r in 1:nrep) { - print(paste0("Preprocessing for rep ", r, - ": optimizing initial factorization with smallest test lambda=", - lambda.test[1])) + if (verbose) { + message("Preprocessing for rep ", r, + ": optimizing initial factorization with smallest test lambda=", + lambda.test[1]) + } object <- optimizeALS(object, k = k, thresh = thresh, lambda = lambda.test[1], - max.iters = max.iters, nrep = 1, rand.seed = (rand.seed + r - 1)) - print('Progress now represents completed factorizations (out of total number of lambda values)') - cl <- makeCluster(num.cores) - registerDoSNOW(cl) - pb <- txtProgressBar(min = 0, max = length(lambda.test), style = 3, initial = 1, file = "") + max.iters = max.iters, nrep = 1, rand.seed = (rand.seed + r - 1), verbose = verbose) + if (verbose) { + message('Testing different choices of lambda values') + } + #cl <- makeCluster(num.cores) + cl <- parallel::makeCluster(num.cores) + #registerDoSNOW(cl) + doParallel::registerDoParallel(cl) + #pb <- txtProgressBar(min = 0, max = length(lambda.test), style = 3, initial = 1, file = "") # define progress bar function - progress <- function(n) setTxtProgressBar(pb, n) - opts <- list(progress = progress) + #progress <- function(n) setTxtProgressBar(pb, n) + #opts <- list(progress = progress) i <- 0 - data_matrix <- foreach(i = 1:length(lambda.test), .combine = "rbind", .options.snow = opts, - .packages = 'liger') %dopar% { + data_matrix <- foreach(i = 1:length(lambda.test), .combine = "rbind", + .packages = 'rliger') %dopar% { if (i != 1) { if (gen.new) { ob.test <- optimizeALS(object, @@ -2681,8 +2754,8 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core ) calcAlignment(ob.test) } - close(pb) - stopCluster(cl) + #close(pb) + parallel::stopCluster(cl) rep_data[[r]] <- data_matrix } @@ -2693,7 +2766,9 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core mean_aligns <- apply(aligns, 1, mean) time_elapsed <- difftime(Sys.time(), time_start, units = "auto") - cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) + if (verbose) { + cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) + } # make dataframe df_al <- data.frame(align = mean_aligns, lambda = lambda.test) @@ -2743,52 +2818,56 @@ suggestLambda <- function(object, k, lambda.test = NULL, rand.seed = 1, num.core #' @param return.raw If return.results TRUE, whether to return raw data (in format described below), #' or dataframe used to produce ggplot object. Raw data is list of matrices of K-L divergences #' (length(k.test) by n_cells). Length of list corresponds to nrep. (default FALSE) +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return Matrix of results if indicated or ggplot object. Plots K-L divergence vs. k to console. -#' @import doSNOW -#' @importFrom snow makeCluster stopCluster +#' +#' @import doParallel +#' @import parallel #' @importFrom foreach foreach #' @importFrom foreach "%dopar%" #' @importFrom ggplot2 ggplot aes geom_point geom_line guides guide_legend labs theme theme_classic +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z)) -#' ligerex <- normalize(ligerex) -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) +#' # Requires preprocessed liger object #' # examine plot for most appropriate k, use multiple cores for faster results #' suggestK(ligerex, num.cores = 4) #' } suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, max.iters = 100, - num.cores = 1, rand.seed = 1, gen.new = F, nrep = 1, plot.log2 = T, - return.data = F, return.raw = F) { + num.cores = 1, rand.seed = 1, gen.new = FALSE, nrep = 1, plot.log2 = TRUE, + return.data = FALSE, return.raw = FALSE, verbose = TRUE) { if (length(object@scale.data) == 0) { stop("scaleNotCenter should be run on the object before running suggestK.") } time_start <- Sys.time() # optimize largest k value first to take advantage of efficient updating - print("This operation may take several minutes depending on number of values being tested") + if (verbose) { + message("This operation may take several minutes depending on number of values being tested") + } rep_data <- list() for (r in 1:nrep) { - print(paste0("Preprocessing for rep ", r, - ": optimizing initial factorization with largest test k=", - k.test[length(k.test)])) + if (verbose) { + message("Preprocessing for rep ", r, + ": optimizing initial factorization with largest test k=", + k.test[length(k.test)]) + } object <- optimizeALS(object, k = k.test[length(k.test)], lambda = lambda, thresh = thresh, max.iters = max.iters, nrep = 1, rand.seed = (rand.seed + r - 1)) - print('Progress now represents completed factorizations (out of total number of k values)') - cl <- makeCluster(num.cores) - registerDoSNOW(cl) - pb <- txtProgressBar(min = 0, max = length(k.test), style = 3, initial = 1, file = "") + if (verbose) { + message('Testing different choices of k') + } + cl <- parallel::makeCluster(num.cores) + doParallel::registerDoParallel(cl) + #pb <- txtProgressBar(min = 0, max = length(k.test), style = 3, initial = 1, file = "") # define progress bar function - progress <- function(n) setTxtProgressBar(pb, n) - opts <- list(progress = progress) + #progress <- function(n) setTxtProgressBar(pb, n) + #opts <- list(progress = progress) i <- 0 - data_matrix <- foreach(i = length(k.test):1, .combine = "rbind", .options.snow = opts, - .packages = 'liger') %dopar% { + data_matrix <- foreach(i = length(k.test):1, .combine = "rbind", + .packages = 'rliger') %dopar% { if (i != length(k.test)) { if (gen.new) { ob.test <- optimizeALS(object, @@ -2807,8 +2886,8 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, dataset_split <- kl_divergence_uniform(ob.test) unlist(dataset_split) } - close(pb) - stopCluster(cl) + #close(pb) + parallel::stopCluster(cl) data_matrix <- data_matrix[nrow(data_matrix):1, ] rep_data[[r]] <- data_matrix } @@ -2820,7 +2899,9 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, mean_kls <- apply(medians, 1, mean) time_elapsed <- difftime(Sys.time(), time_start, units = "auto") - cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) + if (verbose) { + cat(paste("\nCompleted in:", as.double(time_elapsed), units(time_elapsed))) + } # make dataframe df_kl <- data.frame(median_kl = c(mean_kls, log2(k.test)), k = c(k.test, k.test), calc = c(rep('KL_div', length(k.test)), rep('log2(k)', length(k.test)))) @@ -2884,13 +2965,13 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, #' @param ... Arguments passed to other methods #' #' @return \code{liger} object with 'H.norm' and 'clusters' slot set. -#' @export +#' #' @importFrom stats approxfun #' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object), factorization complete #' # do basic quantile alignment #' ligerex <- quantile_norm(ligerex) #' # higher resolution for more clusters (note that SNF is conserved) @@ -2898,7 +2979,7 @@ suggestK <- function(object, k.test = seq(5, 50, 5), lambda = 5, thresh = 1e-4, #' # change knn_k for more fine-grained local clustering #' ligerex <- quantile_norm(ligerex, knn_k = 15, resolution = 1.2) #' } -#' + quantile_norm <- function( object, ... @@ -2917,10 +2998,10 @@ quantile_norm.list <- function( min_cells = 20, knn_k = 20, dims.use = NULL, - do.center = F, + do.center = FALSE, max_sample = 1000, eps = 0.9, - refine.knn = T, + refine.knn = TRUE, rand.seed = 1, ... ) { @@ -3009,10 +3090,10 @@ quantile_norm.liger <- function( min_cells = 20, knn_k = 20, dims.use = NULL, - do.center = F, + do.center = FALSE, max_sample = 1000, eps = 0.9, - refine.knn = T, + refine.knn = TRUE, rand.seed = 1, ... ) { @@ -3059,41 +3140,45 @@ quantile_norm.liger <- function( #' @param nRandomStarts Number of random starts. (default 10) #' @param nIterations Maximal number of iterations per random start. (default 100) #' @param random.seed Seed of the random number generator. (default 1) +#' @param verbose Print messages (TRUE by default) #' #' @return \code{liger} object with refined 'clusters' slot set. #' #' @export -#' #' @examples #' \dontrun{ -#' # liger object, factorization complete +#' # ligerex (liger object), factorization complete #' ligerex <- louvainCluster(ligerex, resulotion = 0.3) #' } #' louvainCluster <- function(object, resolution = 1.0, k = 20, prune = 1 / 15, eps = 0.1, nRandomStarts = 10, - nIterations = 100, random.seed = 1) { + nIterations = 100, random.seed = 1, verbose = TRUE) { output_path <- paste0('edge_', sub('\\s', '_', Sys.time()), '.txt') output_path = sub(":","_",output_path) output_path = sub(":","_",output_path) if (dim(object@H.norm)[1] == 0){ - print("Louvain Clustering on unnormalized cell factor loadings.") + if (verbose) { + message("Louvain Clustering on unnormalized cell factor loadings.") + } knn <- RANN::nn2(Reduce(rbind, object@H), k = k, eps = eps) } else { - print("Louvain Clustering on quantile normalized cell factor loadings.") + if (verbose) { + message("Louvain Clustering on quantile normalized cell factor loadings.") + } knn <- RANN::nn2(object@H.norm, k = k, eps = eps) } snn <- ComputeSNN(knn$nn.idx, prune = prune) - WriteEdgeFile(snn, output_path, display_progress = F) + WriteEdgeFile(snn, output_path, display_progress = FALSE) clusts <- RunModularityClusteringCpp(snn, modularityFunction = 1, resolution = resolution, nRandomStarts = nRandomStarts, - nIterations = nIterations, algorithm = 1, randomSeed = random.seed, printOutput = F, + nIterations = nIterations, algorithm = 1, randomSeed = random.seed, printOutput = FALSE, edgefilename = output_path ) names(clusts) = rownames(object@cell.data) rownames(snn) = rownames(object@cell.data) colnames(snn) = rownames(object@cell.data) - clusts <- GroupSingletons(ids = clusts, SNN = snn, verbose = F) + clusts <- GroupSingletons(ids = clusts, SNN = snn, verbose = FALSE) object@clusters = as.factor(clusts) unlink(output_path) return(object) @@ -3105,6 +3190,7 @@ louvainCluster <- function(object, resolution = 1.0, k = 20, prune = 1 / 15, eps # @param ids Named vector of cluster ids # @param SNN SNN graph used in clustering # @param group.singletons Group singletons into nearest cluster (TRUE by default). If FALSE, assign all singletons to +# @param verbose Print message # a "singleton" group # # @return Returns updated cluster assignment with all singletons merged with most connected cluster @@ -3137,7 +3223,7 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = FALSE) connectivity[j] <- mean(x = subSNN) } } - m <- max(connectivity, na.rm = T) + m <- max(connectivity, na.rm = TRUE) mi <- which(x = connectivity == m, arr.ind = TRUE) closest_cluster <- sample(x = names(x = connectivity[mi]), 1) ids[i.cells] <- closest_cluster @@ -3165,31 +3251,27 @@ GroupSingletons <- function(ids, SNN, group.singletons = TRUE, verbose = FALSE) #' @param weight Whether to use KNN distances as weight matrix (default FALSE). #' @param norm Whether normalize the imputed data with default parameters (default TRUE). #' @param scale Whether scale but not center the imputed data with default parameters (default TRUE). +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return \code{liger} object with raw data in raw.data slot replaced by imputed data (genes by cells) -#' @export +#' #' @importFrom FNN get.knnx +#' +#' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' X <- matrix(c(1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z, x_set = X)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 20) -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete #' # impute every dataset other than the reference dataset #' ligerex <- imputeKNN(ligerex, reference = "y_set", weight = FALSE) #' # impute only z_set dataset #' ligerex <- imputeKNN(ligerex, reference = "y_set", queries = list("z_set"), knn_k = 50) #' } -imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, norm = TRUE, scale = FALSE) { - cat("NOTE: This function will discard the raw data previously stored in the liger object and", - "replace the raw.data slot with the imputed data.\n\n") +imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, norm = TRUE, scale = FALSE, verbose = TRUE) { + if (verbose) { + cat("NOTE: This function will discard the raw data previously stored in the liger object and", + "replace the raw.data slot with the imputed data.\n\n") + } if (length(reference) > 1) { stop("Can only have ONE reference dataset") @@ -3197,13 +3279,15 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor if (missing(queries)) { # all datasets queries <- names(object@raw.data) queries <- as.list(queries[!queries %in% reference]) - cat( - "Imputing ALL the datasets except the reference dataset\n", - "Reference dataset:\n", - paste(" ", reference, "\n"), - "Query datasets:\n", - paste(" ", as.character(queries), "\n") - ) + if (verbose) { + cat( + "Imputing ALL the datasets except the reference dataset\n", + "Reference dataset:\n", + paste(" ", reference, "\n"), + "Query datasets:\n", + paste(" ", as.character(queries), "\n") + ) + } } else { # only given query datasets queries <- as.list(queries) @@ -3211,13 +3295,15 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor stop("Reference dataset CANNOT be inclued in the query datasets") } else { - cat( - "Imputing given query datasets\n", - "Reference dataset:\n", - paste(" ", reference, "\n"), - "Query datasets:\n", - paste(" ", as.character(queries), "\n") - ) + if (verbose) { + cat( + "Imputing given query datasets\n", + "Reference dataset:\n", + paste(" ", reference, "\n"), + "Query datasets:\n", + paste(" ", as.character(queries), "\n") + ) + } } } @@ -3258,12 +3344,16 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor } if (norm) { - cat('\nNormalizing data...\n') - object <- liger::normalize(object) + if (verbose) { + cat('\nNormalizing data...\n') + } + object <- rliger::normalize(object) } if (scale) { - cat('Scaling (but not centering) data...') - object <- liger::scaleNotCenter(object) + if (verbose) { + cat('Scaling (but not centering) data...') + } + object <- rliger::scaleNotCenter(object) } return(object) @@ -3278,28 +3368,20 @@ imputeKNN <- function(object, reference, queries, knn_k = 20, weight = TRUE, nor #' @param compare.method This indicates the metric of the test. Either 'clusters' or 'datasets'. #' #' @return A 10-columns data.frame with test results. +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' X <- matrix(c(1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z, x_set = X)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 20) -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object based on in-memory datasets), factorization complete #' wilcox.results <- runWilcoxon(ligerex, compare.method = "cluster") #' wilcox.results <- runWilcoxon(ligerex, compare.method = "datastes", data.use = c(1, 2)) #' # HDF5 input -#' ligerex <- online_iNMF(ligerex, k = 20) -#' ligerex <- quantile_norm(ligerex) +#' # ligerex (liger object based on datasets in HDF5 format), factorization complete +#' # Need to sample cells before implementing Wilcoxon test #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) #' de_genes <- runWilcoxon(ligerex, compare.method = "clusters") #' } -#' + runWilcoxon <- function(object, data.use = "all", compare.method) { # check parameter inputs if (missing(compare.method)) { @@ -3319,20 +3401,20 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { if (class(object@raw.data[[1]])[1] == "H5File"){ if (is.null(object@h5file.info[[1]][["sample.data.type"]])){ - print("Need to sample data before Wilcoxon test for HDF5 input.") + message("Need to sample data before Wilcoxon test for HDF5 input.") } else { - print(paste0("Running Wilcoxon test on ", object@h5file.info[[1]][["sample.data.type"]])) + message("Running Wilcoxon test on ", object@h5file.info[[1]][["sample.data.type"]]) } } ### create feature x sample matrix if (data.use == "all" | length(data.use) > 1) { # at least two datasets if (data.use == "all") { - print(paste0("Performing Wilcoxon test on ALL datasets: ", toString(names(object@norm.data)))) + message("Performing Wilcoxon test on ALL datasets: ", toString(names(object@norm.data))) sample.list <- attributes(object@norm.data)$names } else { - print(paste0("Performing Wilcoxon test on GIVEN datasets: ", toString(data.use))) + message("Performing Wilcoxon test on GIVEN datasets: ", toString(data.use)) sample.list <- data.use } # get all shared genes of every datasets @@ -3359,7 +3441,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { cell_source <- cell_source[colnames(feature_matrix), drop = TRUE] clusters <- object@clusters[colnames(feature_matrix), drop = TRUE] # from which cluster } else { # for one dataset only - print(paste0("Performing Wilcoxon test on GIVEN dataset: ", data.use)) + message("Performing Wilcoxon test on GIVEN dataset: ", data.use) if (class(object@norm.data[[data.use]])[[1]] == "dgCMatrix") { feature_matrix <- object@norm.data[[data.use]] clusters <- object@clusters[object@norm.data[[data.use]]@Dimnames[[2]], drop = TRUE] # from which cluster @@ -3373,7 +3455,7 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { if (compare.method == "clusters") { # compare between clusters across datasets len <- nrow(feature_matrix) if (len > 100000) { - print("Calculating Large-scale Input...") + message("Calculating Large-scale Input...") results <- Reduce(rbind, lapply(suppressWarnings(split(seq(len), seq(len / 100000))), function(index) { wilcoxauc(log(feature_matrix[index, ] + 1e-10), clusters) })) @@ -3388,13 +3470,12 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { sub_label <- paste0(cluster, "-", cell_source[sub_barcodes]) # data source for each cell sub_matrix <- feature_matrix[, sub_barcodes] if (length(unique(cell_source[sub_barcodes])) == 1) { # if cluster has only 1 data source - print(paste0("Note: Skip Cluster ", cluster, " since it has only ONE data source.")) + message("Note: Skip Cluster ", cluster, " since it has only ONE data source.") return() } return(wilcoxauc(log(sub_matrix + 1e-10), sub_label)) })) } - return(results) } @@ -3412,21 +3493,23 @@ runWilcoxon <- function(object, data.use = "all", compare.method) { #' this threshold are considered significant. The default is 0.05. #' @param dist This indicates the type of correlation to calculate -- one of “spearman” (default), "pearson", or "kendall". #' @param path_to_coords Path to the gene coordinates file. +#' @param verbose Print messages (TRUE by default) #' #' @return a sparse matrix with peak names as rows and gene symbols as columns, with each element indicating the #' correlation between peak i and gene j (or 0 if the gene and peak are not significantly linked). -#' @export +#' #' @importFrom utils read.csv2 +#' @export #' @examples #' \dontrun{ -#' gmat.small <- readRDS("../testdata/small_gmat.RDS") # some gene counts matrix -#' pmat.small <- readRDS("../testdata/small_pmat.RDS") # some peak counts matrix +#' # some gene counts matrix: gmat.small +#' # some peak counts matrix: pmat.small #' regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", #' alpha = 0.05, path_to_coords = 'some_path') #' } -#' + linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist = "spearman", - alpha = 0.05, path_to_coords) { + alpha = 0.05, path_to_coords, verbose = TRUE) { ## check dependency if (!requireNamespace("GenomicRanges", quietly = TRUE)) { stop("Package \"GenomicRanges\" needed for this function to work. Please install it by command:\n", @@ -3459,7 +3542,7 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist ) ### make Granges object for genes - gene.names <- read.csv2(path_to_coords, sep = "\t", header = FALSE, stringsAsFactors = F) + gene.names <- read.csv2(path_to_coords, sep = "\t", header = FALSE, stringsAsFactors = FALSE) gene.names <- gene.names[complete.cases(gene.names), ] genes.coords <- GenomicRanges::GRanges( seqnames = gene.names$V1, @@ -3476,18 +3559,15 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist genes.list <- colnames(gene_counts) } missing_genes <- !genes.list %in% names(genes.coords) - if (sum(missing_genes)!=0){ - print( - paste0( - "Removing ", sum(missing_genes), - " genes not found in given gene coordinates..." - ) - ) + if (sum(missing_genes)!=0 && verbose){ + message("Removing ", sum(missing_genes), " genes not found in given gene coordinates...") } genes.list <- genes.list[!missing_genes] genes.coords <- genes.coords[genes.list] - print("Calculating correlation for gene-peak pairs...") + if (verbose) { + message("Calculating correlation for gene-peak pairs...") + } each.len <- 0 # assign('each.len', 0, envir = globalenv()) @@ -3557,15 +3637,17 @@ linkGenesAndPeaks <- function(gene_counts, peak_counts, genes.list = NULL, dist #' @param path_to_coords Path to the gene coordinates file. #' #' @return An Interact Track file stored in the specified path. -#' @export +#' #' @importFrom stats complete.cases #' @importFrom utils write.table +#' +#' @export #' @examples #' \dontrun{ -#' regent <- readRDS("../testdata/small_gmat.RDS") # some gene-peak correlation matrix +#' # some gene-peak correlation matrix: regent #' makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') #' } -#' + makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) { # get genomic coordinates if (missing(path_to_coords)) { @@ -3574,7 +3656,7 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) ### make Granges object for genes genes.coords <- read.csv2(path_to_coords, - sep = "\t", header = F, colClasses = + sep = "\t", header = FALSE, colClasses = c("character", "integer", "integer", "character", "NULL", "NULL") ) genes.coords <- genes.coords[complete.cases(genes.coords$V4), ] @@ -3649,9 +3731,9 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) ) } - print(paste0("A total of ", genes_not_existed, " genes do not exist in input matrix.")) - print(paste0("A total of ", filtered_genes, " genes do not have significant correlated peaks.")) - print(paste0("The Interaction Track is stored in Path: ", output_path)) + message("A total of ", genes_not_existed, " genes do not exist in input matrix.") + message("A total of ", filtered_genes, " genes do not have significant correlated peaks.") + message("The Interaction Track is stored in Path: ", output_path) } @@ -3669,25 +3751,18 @@ makeInteractTrack <- function(corr.mat, genes.list, output_path, path_to_coords) #' this function will use all the gene symbols from the input matrix by default #' #' @return A list of matrices with GSEA analysis for each factor +#' #' @importFrom methods .hasSlot +#' #' @export #' @examples #' \dontrun{ -#' Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -#' Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -#' X <- matrix(c(1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6), nrow = 4, byrow = T) -#' ligerex <- createLiger(list(y_set = Y, z_set = Z, x_set = X)) -#' ligerex <- normalize(ligerex) -#' # select genes -#' ligerex <- selectGenes(ligerex) -#' ligerex <- scaleNotCenter(ligerex) -#' ligerex <- optimizeALS(ligerex, k = 20) -#' ligerex <- quantil_norm(ligerex) +#' # ligerex (liger object), factorization complete #' wilcox.results <- runGSEA(ligerex) #' wilcox.results <- runGSEA(ligerex, mat_v = c(1, 2)) #' } #' -runGSEA <- function(object, gene_sets = c(), mat_w = T, mat_v = 0, custom_gene_sets = c()) { +runGSEA <- function(object, gene_sets = c(), mat_w = TRUE, mat_v = 0, custom_gene_sets = c()) { if (!requireNamespace("org.Hs.eg.db", quietly = TRUE)) { stop("Package \"org.Hs.eg.db\" needed for this function to work. Please install it by command:\n", "BiocManager::install('org.Hs.eg.db')", @@ -3791,21 +3866,22 @@ runGSEA <- function(object, gene_sets = c(), mat_w = T, mat_v = 0, custom_gene_s #' @param rand.seed Random seed for reproducibility (default 42). #' #' @return \code{liger} object with tsne.coords slot set. +#' #' @importFrom Rtsne Rtsne +#' #' @export #' @examples #' \dontrun{ -#' # liger object -#' ligerex -#' # generate H.norm by quantile aligning factor loadings -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete +#' # generate H.norm by quantile normalizig factor loadings +#' ligerex <- quantile_norm(ligerex) #' # get tsne.coords for normalized data #' ligerex <- runTSNE(ligerex) #' # get tsne.coords for raw factor loadings -#' ligerex <- runTSNE(ligerex, use.raw = T) +#' ligerex <- runTSNE(ligerex, use.raw = TRUE) #' } -runTSNE <- function(object, use.raw = F, dims.use = 1:ncol(object@H.norm), use.pca = F, +runTSNE <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), use.pca = FALSE, perplexity = 30, theta = 0.5, method = "Rtsne", fitsne.path = NULL, rand.seed = 42) { if (use.raw) { @@ -3819,7 +3895,7 @@ runTSNE <- function(object, use.raw = F, dims.use = 1:ncol(object@H.norm), use.p if (method == "Rtsne") { set.seed(rand.seed) object@tsne.coords <- Rtsne(data.use[, dims.use], - pca = use.pca, check_duplicates = F, + pca = use.pca, check_duplicates = FALSE, theta = theta, perplexity = perplexity )$Y } else if (method == "fftRtsne") { @@ -3827,7 +3903,7 @@ runTSNE <- function(object, use.raw = F, dims.use = 1:ncol(object@H.norm), use.p # if (is.null(fitsne.path)) { # stop('Please pass in path to FIt-SNE directory as fitsne.path.') # } - # source(paste0(fitsne.path, '/fast_tsne.R'), chdir = T) + # source(paste0(fitsne.path, '/fast_tsne.R'), chdir = TRUE) # } # object@tsne.coords <- fftRtsne(data.use[, dims.use], rand_seed = rand.seed, # theta = theta, perplexity = perplexity) @@ -3872,20 +3948,20 @@ runTSNE <- function(object, use.raw = F, dims.use = 1:ncol(object@H.norm), use.p #' @param rand.seed Random seed for reproducibility (default 42). #' #' @return \code{liger} object with tsne.coords slot set. +#' #' @export #' @examples #' \dontrun{ -#' # liger object with factorization complete -#' ligerex -#' # generate H.norm by quantile aligning factor loadings +#' # ligerex (liger object), factorization complete +#' # generate H.norm by quantile normalizig factor loadings #' ligerex <- quantileAlignSNF(ligerex) #' # get tsne.coords for normalized data #' ligerex <- runUMAP(ligerex) #' # get tsne.coords for raw factor loadings -#' ligerex <- runUMAP(ligerex, use.raw = T) +#' ligerex <- runUMAP(ligerex, use.raw = TRUE) #' } -runUMAP <- function(object, use.raw = F, dims.use = 1:ncol(object@H.norm), k = 2, +runUMAP <- function(object, use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", n_neighbors = 10, min_dist = 0.1, rand.seed = 42) { set.seed(rand.seed) if (use.raw) { @@ -3928,17 +4004,19 @@ runUMAP <- function(object, use.raw = F, dims.use = 1:ncol(object@H.norm), k = 2 #' #' @return List containing three elements. First two elements are the norm of each metagene factor #' for each dataset. Last element is the vector of dataset specificity scores. +#' #' @importFrom graphics barplot +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete +#' # generate H.norm by quantile normalizig factor loadings +#' ligerex <- quantile_norm(ligerex) #' dataset_spec <- calcDatasetSpecificity(ligerex, do.plot = F) #' } -calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do.plot = T) { +calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do.plot = TRUE) { if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] @@ -3991,17 +4069,19 @@ calcDatasetSpecificity <- function(object, dataset1 = NULL, dataset2 = NULL, do. #' @param by.dataset Return agreement calculated for each dataset (default FALSE). #' #' @return Agreement metric (or vector of agreement per dataset). +#' #' @importFrom FNN get.knn #' @importFrom ica icafast #' @importFrom irlba prcomp_irlba +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object based on in-memory datasets), factorization complete +#' # generate H.norm by quantile normalizig factor loadings +#' ligerex <- quantile_norm(ligerex) #' agreement <- calcAgreement(ligerex, dr.method = "NMF") -#' # HDF5 input +#' # ligerex (liger object based on datasets in HDF5 format), factorization complete #' ligerex <- quantile_norm(ligerex) #' ligerex <- readSubset(ligerex, slot.use = "scale.data", max.cells = 5000) #' agreement <- calcAgreement(ligerex, dr.method = "NMF") @@ -4020,7 +4100,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali } } - print(paste("Reducing dimensionality using", dr.method)) + message("Reducing dimensionality using ", dr.method) set.seed(rand.seed) dr <- list() if (dr.method == "NMF") { @@ -4052,7 +4132,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali for (i in 1:length(object@H)){ dr[[i]] = suppressWarnings(prcomp_irlba(object@sample.data[[i]], n = ndims, - scale. = (colSums(t(object@sample.data[[i]])) > 0), center = F + scale. = (colSums(t(object@sample.data[[i]])) > 0), center = FALSE )$rotation) rownames(dr[[i]]) = colnames(object@sample.data[[i]]) } @@ -4061,7 +4141,7 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali dr <- lapply(object@scale.data, function(x) { suppressWarnings(prcomp_irlba(t(x), n = ndims, - scale. = (colSums(x) > 0), center = F + scale. = (colSums(x) > 0), center = FALSE )$rotation) }) for (i in 1:length(dr)) { @@ -4120,21 +4200,21 @@ calcAgreement <- function(object, dr.method = "NMF", ndims = 40, k = 15, use.ali #' @param clusters.use Names of clusters to use in calculating alignment (default NULL). #' @param by.cell Return alignment calculated individually for each cell (default FALSE). #' @param by.dataset Return alignment calculated for each dataset (default FALSE). -#' @param do.kbet Return kBET metric (default FALSE). #' #' @return Alignment metric. +#' #' @importFrom FNN get.knn +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object ), factorization complete #' ligerex <- quantile_norm(ligerex) #' alignment <- calcAlignment(ligerex) #' } calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cells.comp = NULL, - clusters.use = NULL, by.cell = F, by.dataset = F, do.kbet = F) { + clusters.use = NULL, by.cell = FALSE, by.dataset = FALSE) { if (is.null(cells.use)) { cells.use <- rownames(object@H.norm) } @@ -4146,7 +4226,7 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel num_cells <- length(c(cells.use, cells.comp)) func_H <- list(cells1 = nmf_factors[cells.use, ], cells2 = nmf_factors[cells.comp, ]) - print('Using designated sets cells.use and cells.comp as subsets to compare') + message('Using designated sets cells.use and cells.comp as subsets to compare') } else { nmf_factors <- object@H.norm[cells.use, ] num_cells <- length(cells.use) @@ -4156,7 +4236,7 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel object@H[[x]][cells.overlap, ] } else { warning(paste0("Selected subset eliminates dataset ", names(object@H)[x]), - immediate. = T + immediate. = TRUE ) return(NULL) } @@ -4166,7 +4246,7 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel num_factors <- ncol(object@H.norm) N <- length(func_H) if (N == 1) { - warning("Alignment null for single dataset", immediate. = T) + warning("Alignment null for single dataset", immediate. = TRUE) } set.seed(rand.seed) min_cells <- min(sapply(func_H, function(x) { @@ -4195,13 +4275,6 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel names(dataset) <- rownames(nmf_factors) dataset <- dataset[sampled_cells] - if(do.kbet) - { - kBET_res = kBET::kBET(nmf_factors[sampled_cells, 1:num_factors],dataset,k0=k, - knn=knn_graph,heuristic=F,n_repeat=100,do.pca=F,testSize=1000) - return(kBET_res) - } - num_sampled <- N * min_cells num_same_dataset <- rep(k, num_sampled) @@ -4238,24 +4311,25 @@ calcAlignment <- function(object, k = NULL, rand.seed = 1, cells.use = NULL, cel #' @param by.dataset Return alignment calculated for each dataset in cluster (default FALSE). #' #' @return Vector of alignment statistics (with names of clusters). +#' #' @importFrom FNN get.knn +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete +#' ligerex <- quantile_norm(ligerex) #' # get alignment for each cluster #' alignment_per_cluster <- calcAlignmentPerCluster(ligerex) #' } -calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset = F) { +calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset = FALSE) { clusters <- levels(object@clusters) if (typeof(k) == "double") { if (length(k) == 1) { k <- rep(k, length(clusters)) } else if (length(k) != length(clusters)) { - print("Length of k does not match length of clusters") + stop("Length of k does not match length of clusters") } } align_metrics <- sapply(seq_along(clusters), function(x) { @@ -4282,20 +4356,21 @@ calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset #' #' @param object \code{liger} object. Should run quantileAlignSNF before calling. #' @param clusters.compare Clustering with which to compare (named vector). -#' @return Value of ARI -#' @importFrom mclust adjustedRandIndex +#' @param verbose Print messages (TRUE by default) #' #' @return Adjusted Rand index value. +#' +#' @importFrom mclust adjustedRandIndex +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization done -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete +#' ligerex <- quantile_norm(ligerex) #' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = T) +#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) #' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = T) +#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) #' names(cluster2) <- colnames(ligerex@raw.data[[2]]) #' # get ARI for first clustering #' ari1 <- calcARI(ligerex, cluster1) @@ -4303,9 +4378,9 @@ calcAlignmentPerCluster <- function(object, rand.seed = 1, k = NULL, by.dataset #' ari2 <- calcARI(ligerex, cluster2) #' } -calcARI <- function(object, clusters.compare) { - if (length(clusters.compare) < length(object@clusters)) { - print("Calculating ARI for subset of full cells") +calcARI <- function(object, clusters.compare, verbose = TRUE) { + if (length(clusters.compare) < length(object@clusters) && verbose) { + message("Calculating ARI for subset of all cells") } return(adjustedRandIndex(object@clusters[names(clusters.compare)], clusters.compare)) @@ -4320,27 +4395,28 @@ calcARI <- function(object, clusters.compare) { #' #' @param object \code{liger} object. Should run quantileAlignSNF before calling. #' @param classes.compare Clustering with which to compare (named vector). +#' @param verbose Print messages (TRUE by default) #' #' @return Purity value. +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization done -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete +#' ligerex <- quantile_norm(ligerex) #' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = T) +#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) #' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = T) +#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) #' names(cluster2) <- colnames(ligerex@raw.data[[2]]) #' # get ARI for first clustering -#' ari1 <- calcARI(ligerex, cluster1) +#' ari1 <- calcPurity(ligerex, cluster1) #' # get ARI for second clustering -#' ari2 <- calcARI(ligerex, cluster2) +#' ari2 <- calcPurity(ligerex, cluster2) #' } -calcPurity <- function(object, classes.compare) { - if (length(classes.compare) < length(object@clusters)) { +calcPurity <- function(object, classes.compare, verbose = TRUE) { + if (length(classes.compare) < length(object@clusters) && verbose) { print("Calculating purity for subset of full cells") } clusters <- object@clusters[names(classes.compare)] @@ -4355,16 +4431,17 @@ calcPurity <- function(object, classes.compare) { #' #' @param object \code{liger} object. #' @param use.norm Whether to use cell normalized data in calculating contribution (default FALSE). +#' #' @return Named vector containing proportion of mitochondrial contribution for each cell. +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization done -#' ligerex +#' # ligerex (liger object), factorization complete #' ligerex@cell.data[["percent_mito"]] <- getProportionMito(ligerex) #' } -getProportionMito <- function(object, use.norm = F) { +getProportionMito <- function(object, use.norm = FALSE) { all.genes <- Reduce(union, lapply(object@raw.data, rownames)) mito.genes <- grep(pattern = "^mt-", x = all.genes, value = TRUE) data.use <- object@raw.data @@ -4373,7 +4450,7 @@ getProportionMito <- function(object, use.norm = F) { } percent_mito <- unlist(lapply(unname(data.use), function(x) { colSums(x[mito.genes, ]) / colSums(x) - }), use.names = T) + }), use.names = TRUE) return(percent_mito) } @@ -4408,26 +4485,27 @@ getProportionMito <- function(object, use.norm = F) { #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to #' console). -#' @export +#' #' @importFrom ggplot2 ggplot geom_point geom_text ggtitle guides guide_legend aes theme xlab ylab #' @importFrom dplyr %>% group_by summarize +#' +#' @export #' @examples #' \dontrun{ -#' # liger object with aligned factor loadings -#' ligerex +#' # ligerex (liger object), factorization complete #' # get tsne.coords for normalized data #' ligerex <- runTSNE(ligerex) #' # plot to console #' plotByDatasetAndCluster(ligerex) #' # return list of plots -#' plots <- plotByDatasetAndCluster(ligerex, return.plots = T) +#' plots <- plotByDatasetAndCluster(ligerex, return.plots = TRUE) #' } plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.size = 0.3, - text.size = 3, do.shuffle = T, rand.seed = 1, - axis.labels = NULL, do.legend = T, legend.size = 5, - reorder.idents = F, new.order = NULL, - return.plots = F, legend.fonts.size = 12) { + text.size = 3, do.shuffle = TRUE, rand.seed = 1, + axis.labels = NULL, do.legend = TRUE, legend.size = 5, + reorder.idents = FALSE, new.order = NULL, + return.plots = FALSE, legend.fonts.size = 12) { tsne_df <- data.frame(object@tsne.coords) colnames(tsne_df) <- c("tsne1", "tsne2") tsne_df[['Dataset']] <- unlist(lapply(1:length(object@H), function(x) { @@ -4475,12 +4553,12 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si p1 <- p1 + xlab(axis.labels[1]) + ylab(axis.labels[2]) p2 <- p2 + xlab(axis.labels[1]) + ylab(axis.labels[2]) } + p1 <- p1 + theme_cowplot(12) + p2 <- p2 + theme_cowplot(12) if (!do.legend) { p1 <- p1 + theme(legend.position = "none") p2 <- p2 + theme(legend.position = "none") } - p1 <- p1 + theme_cowplot(12) - p2 <- p2 + theme_cowplot(12) if (return.plots) { return(list(p1, p2)) } else { @@ -4493,7 +4571,7 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si #' #' Generates one plot for each dataset, colored by chosen feature (column) from cell.data slot. #' Feature can be categorical (factor) or continuous. -#' Can also plot all datasets combined with by.dataset = F. +#' Can also plot all datasets combined with by.dataset = FALSE. #' #' @param object \code{liger} object. Should call runTSNE or runUMAP before calling. #' @param feature Feature to plot (should be column from cell.data slot). @@ -4518,25 +4596,26 @@ plotByDatasetAndCluster <- function(object, clusters = NULL, title = NULL, pt.si #' #' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to #' console). -#' @export +#' #' @importFrom ggplot2 ggplot geom_point geom_text ggtitle aes guides guide_legend labs #' scale_color_viridis_c scale_color_gradientn theme xlab ylab #' @importFrom dplyr %>% group_by summarize #' @importFrom stats median +#' +#' @export #' @examples #' \dontrun{ -#' # liger object with aligned factor loadings -#' ligerex +#' # ligerex (liger object), factorization complete #' # get tsne.coords for normalized data #' ligerex <- runTSNE(ligerex) #' # plot nUMI to console #' plotFeature(ligerex, feature = 'nUMI') #' } -plotFeature <- function(object, feature, by.dataset = T, discrete = NULL, title = NULL, - pt.size = 0.3, text.size = 3, do.shuffle = T, rand.seed = 1, do.labels = F, - axis.labels = NULL, do.legend = T, legend.size = 5, option = 'plasma', - cols.use = NULL, zero.color = '#F5F5F5', return.plots = F) { +plotFeature <- function(object, feature, by.dataset = TRUE, discrete = NULL, title = NULL, + pt.size = 0.3, text.size = 3, do.shuffle = TRUE, rand.seed = 1, do.labels = FALSE, + axis.labels = NULL, do.legend = TRUE, legend.size = 5, option = 'plasma', + cols.use = NULL, zero.color = '#F5F5F5', return.plots = FALSE) { dr_df <- data.frame(object@tsne.coords) colnames(dr_df) <- c("dr1", "dr2") if (!(feature %in% colnames(object@cell.data))) { @@ -4639,16 +4718,18 @@ plotFeature <- function(object, feature, by.dataset = T, discrete = NULL, title #' @param num.genes Number of genes to display for each factor (default 10). #' @param cells.highlight Names of specific cells to highlight in plot (black) (default NULL). #' @param plot.tsne Plot t-SNE coordinates for each factor (default FALSE). +#' @param verbose Print messages (TRUE by default) #' #' @return Plots to console (1-2 pages per factor) +#' #' @importFrom graphics legend par plot #' @importFrom grDevices rainbow +#' #' @export #' @examples #' \dontrun{ -#' # liger object with factorization complete -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete +#' ligerex <- quantile_norm(ligerex) #' # get tsne.coords for normalized data #' ligerex <- runTSNE(ligerex) #' # factor plots into pdf file @@ -4657,16 +4738,20 @@ plotFeature <- function(object, feature, by.dataset = T, discrete = NULL, title #' # dev.off() #' } -plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsne = F) { +plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsne = FALSE, verbose = TRUE) { k <- ncol(object@H.norm) - pb <- txtProgressBar(min = 0, max = k, style = 3) - + if (verbose) { + pb <- txtProgressBar(min = 0, max = k, style = 3) + } W <- t(object@W) rownames(W) <- colnames(object@H[[1]]) Hs_norm <- object@H.norm + # restore default settings when the current function exits + init_par <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(init_par)) for (i in 1:k) { graphics::par(mfrow = c(2, 1)) - top_genes.W <- rownames(W)[order(W[, i], decreasing = T)[1:num.genes]] + top_genes.W <- rownames(W)[order(W[, i], decreasing = TRUE)[1:num.genes]] top_genes.W.string <- paste0(top_genes.W, collapse = ", ") factor_textstring <- paste0("Factor", i) @@ -4685,7 +4770,7 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn cex = 0.2, pch = 20, col = cols, main = plot_title1, xlab = "Cell", ylab = "Raw H Score" ) - graphics::legend("top", names(object@H), pch = 20, col = cols.use, horiz = T, cex = 0.75) + graphics::legend("top", names(object@H), pch = 20, col = cols.use, horiz = TRUE, cex = 0.75) graphics::plot(1:nrow(Hs_norm), object@H.norm[, i], pch = 20, cex = 0.2, col = cols, xlab = "Cell", ylab = "H_norm Score" @@ -4694,7 +4779,9 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn graphics::par(mfrow = c(1, 1)) fplot(object@tsne.coords, object@H.norm[, i], title = paste0("Factor ", i)) } - setTxtProgressBar(pb, i) + if (verbose) { + setTxtProgressBar(pb, i) + } } } @@ -4722,6 +4809,10 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' (default 0.05). #' @param do.spec.plot Include dataset specificity plot in printout (default TRUE). #' @param return.plots Return ggplot objects instead of printing directly (default FALSE). +#' @param verbose Print progress bar/messages (TRUE by default) +#' +#' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to +#' console). #' #' @importFrom ggrepel geom_text_repel #' @importFrom ggplot2 ggplot aes aes_string geom_point ggtitle scale_color_gradient scale_size @@ -4729,24 +4820,24 @@ plotFactors <- function(object, num.genes = 10, cells.highlight = NULL, plot.tsn #' @importFrom grid roundrectGrob #' @importFrom grid gpar #' @importFrom cowplot draw_grob +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object based on in-memory datasets), factorization complete +#' ligerex <- quantile_norm(ligerex) #' ligerex <- runTSNE(ligerex) #' # pdf('word_clouds.pdf') #' plotWordClouds(ligerex, num.genes = 20) #' # dev.off() -#' # HDF5 input +#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) #' plotWordClouds(ligerex, num.genes = 20) #' } plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = 30, min.size = 1, max.size = 4, factor.share.thresh = 10, log.fc.thresh = 1, pval.thresh = 0.05, - do.spec.plot = T, return.plots = F) { + do.spec.plot = TRUE, return.plots = FALSE, verbose = TRUE) { if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] @@ -4774,13 +4865,16 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = factor.share.thresh = factor.share.thresh, num.genes = num.genes, log.fc.thresh = log.fc.thresh, pval.thresh = pval.thresh, - dataset.specificity = dataset.specificity + dataset.specificity = dataset.specificity, + verbose = verbose ) rownames(W) <- rownames(V1) <- rownames(V2) <- object@var.genes loadings_list <- list(V1, W, V2) names_list <- list(dataset1, "Shared", dataset2) - pb <- txtProgressBar(min = 0, max = length(factors.use), style = 3) + if (verbose) { + pb <- txtProgressBar(min = 0, max = length(factors.use), style = 3) + } return_plots <- list() for (i in factors.use) { tsne_df <- data.frame(H_aligned[, i], tsne_coords) @@ -4826,7 +4920,9 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = if (!return.plots) { print(return_plots[[i]]) } - setTxtProgressBar(pb, i) + if (verbose) { + setTxtProgressBar(pb, i) + } } if (return.plots) { return(return_plots) @@ -4866,6 +4962,10 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' @param return.plots Return ggplot objects instead of printing directly (default FALSE). #' @param axis.labels Vector of two strings to use as x and y labels respectively (default NULL). #' @param do.title Include top title with cluster and Dataset Specificity (default FALSE). +#' @param verbose Print progress bar/messages (TRUE by default) +#' +#' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to +#' console). #' #' @importFrom ggplot2 aes aes_string annotate coord_cartesian element_blank ggplot geom_point #' ggtitle scale_color_viridis_c theme @@ -4874,27 +4974,27 @@ plotWordClouds <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes = #' @import patchwork #' @importFrom stats loadings #' @importFrom cowplot theme_cowplot +#' #' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object based on in-memory datasets), factorization complete #' ligerex <- quantile_norm(ligerex) #' ligerex <- runUMAP(ligerex) #' # pdf("gene_loadings.pdf") #' plotGeneLoadings(ligerex, num.genes = 20) #' # dev.off() -#' # HDF5 input +#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) #' plotGeneLoadings(ligerex, num.genes = 20) #' } #' plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes.show = 12, - num.genes = 30, mark.top.genes = T, factor.share.thresh = 10, + num.genes = 30, mark.top.genes = TRUE, factor.share.thresh = 10, log.fc.thresh = 1, umi.thresh = 30, frac.thresh = 0, - pval.thresh = 0.05, do.spec.plot = T, max.val = 0.1, pt.size = 0.1, - option = "plasma", zero.color = "#F5F5F5", return.plots = F, - axis.labels = NULL, do.title = F) { + pval.thresh = 0.05, do.spec.plot = TRUE, max.val = 0.1, pt.size = 0.1, + option = "plasma", zero.color = "#F5F5F5", return.plots = FALSE, + axis.labels = NULL, do.title = FALSE, verbose = TRUE) { if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] @@ -4927,13 +5027,16 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes factor.share.thresh = factor.share.thresh, num.genes = num.genes, log.fc.thresh = log.fc.thresh, pval.thresh = pval.thresh, - dataset.specificity = dataset.specificity + dataset.specificity = dataset.specificity, + verbose = verbose ) rownames(W) <- rownames(V1) <- rownames(V2) <- rownames(W_orig) <- object@var.genes loadings_list <- list(V1, W, V2) names_list <- list(dataset1, "Shared", dataset2) - pb <- txtProgressBar(min = 0, max = length(factors.use), style = 3) + if (verbose) { + pb <- txtProgressBar(min = 0, max = length(factors.use), style = 3) + } return_plots <- list() for (i in factors.use) { tsne_df <- data.frame(H_aligned[, i], tsne_coords) @@ -5042,7 +5145,9 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes if (!return.plots) { print(return_plots[[i]]) } - setTxtProgressBar(pb, i) + if (verbose) { + setTxtProgressBar(pb, i) + } } if (return.plots) { return(return_plots) @@ -5061,23 +5166,26 @@ plotGeneLoadings <- function(object, dataset1 = NULL, dataset2 = NULL, num.genes #' @param return.plots Return ggplot objects instead of printing directly to console (default #' FALSE). #' -#' @export +#' @return List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to +#' console). +#' #' @importFrom cowplot plot_grid #' @importFrom ggplot2 aes_string ggplot geom_point geom_boxplot geom_violin ggtitle labs #' scale_color_gradient2 theme +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object based on in-memory datasets), factorization complete #' # plot expression for CD4 and return plots -#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = T) -#' HDF5 input +#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) +#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = T) +#' violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) #' } plotGeneViolin <- function(object, gene, methylation.indices = NULL, - by.dataset = T, return.plots = F) { + by.dataset = TRUE, return.plots = FALSE) { if (class(object@raw.data[[1]])[1] == "H5File"){ if (object@h5file.info[[1]][["sample.data.type"]] != "norm.data"){ stop("norm.data should be sampled for making violin plots.") @@ -5131,8 +5239,8 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, gene_df.sub$Cluster <- object@clusters title <- "All Datasets" } - max_v <- max(gene_df.sub["gene"], na.rm = T) - min_v <- min(gene_df.sub["gene"], na.rm = T) + max_v <- max(gene_df.sub["gene"], na.rm = TRUE) + min_v <- min(gene_df.sub["gene"], na.rm = TRUE) midpoint <- (max_v - min_v) / 2 plot_i <- ggplot(gene_df.sub, aes_string(x = "Cluster", y = "gene", fill = "Cluster")) + geom_boxplot(position = "dodge", width = 0.4, outlier.shape = NA, alpha = 0.7) + @@ -5166,7 +5274,7 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' @param scale.by Grouping of cells by which to scale gene (can be any factor column in cell.data #' or 'none' for scaling across all cells) (default 'dataset'). #' @param log2scale Whether to show log2 transformed values or original normalized, raw, or scaled -#' values (as stored in object). Default value is FALSE if use.raw = T, otherwise TRUE. +#' values (as stored in object). Default value is FALSE if use.raw = TRUE, otherwise TRUE. #' @param methylation.indices Indices of datasets in object with methylation data (this data is not #' log transformed and must use normalized values). (default NULL) #' @param plot.by How to group cells for plotting (can be any factor column in cell.data or 'none' @@ -5176,11 +5284,11 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' plots created (default FALSE). #' @param pt.size Point size for plots (default 0.1). #' @param min.clip Minimum value for expression values plotted. Can pass in quantile (0-1) or -#' absolute cutoff (set clip.absolute = T). Can also pass in vector if expecting multiple plots; +#' absolute cutoff (set clip.absolute = TRUE). Can also pass in vector if expecting multiple plots; #' users are encouraged to pass in named vector (from levels of desired feature) to avoid #' mismatches in order (default NULL). #' @param max.clip Maximum value for expression values plotted. Can pass in quantile (0-1) or -#' absolute cutoff (set clip.absolute = T). Can also pass in vector if expecting multiple plots; +#' absolute cutoff (set clip.absolute = TRUE). Can also pass in vector if expecting multiple plots; #' users are encouraged to pass in named vector (from levels of desired feature) to avoid #' mismatches in order (default NULL). #' @param clip.absolute Whether to treat clip values as absolute cutoffs instead of quantiles @@ -5197,29 +5305,31 @@ plotGeneViolin <- function(object, gene, methylation.indices = NULL, #' #' @return If returning single plot, returns ggplot object; if returning multiple plots; returns #' list of ggplot objects. -#' @export +#' #' @importFrom dplyr %>% group_by mutate_at vars group_cols #' @importFrom ggplot2 ggplot geom_point aes_string element_blank ggtitle labs xlim ylim #' scale_color_viridis_c scale_color_gradientn theme #' @importFrom stats quantile +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete +#' # ligerex (liger object based on in-memory datasets), factorization complete #' ligerex #' ligerex <- runTSNE(ligerex) #' # plot expression for CD4 and return plots -#' gene_plots <- plotGene(ligerex, "CD4", return.plots = T) -#' HDF5 input +#' gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) +#' # ligerex (liger object based on datasets in HDF5 format), factorization complete input #' ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -#' gene_plots <- plotGene(ligerex, "CD4", return.plots = T) +#' gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) #' } -plotGene <- function(object, gene, use.raw = F, use.scaled = F, scale.by = 'dataset', +plotGene <- function(object, gene, use.raw = FALSE, use.scaled = FALSE, scale.by = 'dataset', log2scale = NULL, methylation.indices = NULL, plot.by = 'dataset', - set.dr.lims = F, pt.size = 0.1, min.clip = NULL, max.clip = NULL, - clip.absolute = F, points.only = F, option = 'plasma', cols.use = NULL, - zero.color = '#F5F5F5', axis.labels = NULL, do.legend = T, return.plots = F, - keep.scale = F) { + set.dr.lims = FALSE, pt.size = 0.1, min.clip = NULL, max.clip = NULL, + clip.absolute = FALSE, points.only = FALSE, option = 'plasma', cols.use = NULL, + zero.color = '#F5F5F5', axis.labels = NULL, do.legend = TRUE, return.plots = FALSE, + keep.scale = FALSE) { if ((plot.by != scale.by) & (use.scaled)) { warning("Provided values for plot.by and scale.by do not match; results may not be very interpretable.") @@ -5268,7 +5378,7 @@ plotGene <- function(object, gene, use.raw = F, use.scaled = F, scale.by = 'data gene_df1 <- gene_df %>% group_by(.data[['scaleby']]) %>% # scale by selected feature - mutate_at(vars(-group_cols()), function(x) { scale(x, center = F)}) + mutate_at(vars(-group_cols()), function(x) { scale(x, center = FALSE)}) gene_vals <- gene_df1$gene if (log2scale) { gene_vals <- log2(10000 * gene_vals + 1) @@ -5290,7 +5400,7 @@ plotGene <- function(object, gene, use.raw = F, use.scaled = F, scale.by = 'data } } gene_vals[gene_vals == 0] <- NA - # Extract min and max expression values for plot scaling if keep.scale = T + # Extract min and max expression values for plot scaling if keep.scale = TRUE if (keep.scale){ max_exp_val <- max(gene_vals, na.rm = TRUE) min_exp_val <- min(gene_vals, na.rm = TRUE) @@ -5348,8 +5458,8 @@ plotGene <- function(object, gene, use.raw = F, use.scaled = F, scale.by = 'data # maybe do quantile cutoff here group_name <- as.character(sub_df$plotby[1]) if (!clip.absolute) { - max_v <- quantile(sub_df$gene, probs = max.clip[group_name], na.rm = T) - min_v <- quantile(sub_df$gene, probs = min.clip[group_name], na.rm = T) + max_v <- quantile(sub_df$gene, probs = max.clip[group_name], na.rm = TRUE) + min_v <- quantile(sub_df$gene, probs = min.clip[group_name], na.rm = TRUE) } else { max_v <- max.clip[group_name] min_v <- min.clip[group_name] @@ -5435,14 +5545,17 @@ plotGene <- function(object, gene, use.raw = F, use.scaled = F, scale.by = 'data #' #' @param object \code{liger} object. Should call runTSNE before calling. #' @param genes Vector of gene names. -#' @param ... arguments passed from \code{\link[liger]{plotGene}} +#' @param ... arguments passed from \code{\link[rliger]{plotGene}} +#' +#' @return If returning single plot, returns ggplot object; if returning multiple plots; returns +#' list of ggplot objects. #' -#' @export #' @importFrom ggplot2 ggplot geom_point aes_string scale_color_gradient2 ggtitle +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object), factorization complete input #' ligerex <- runTSNE(ligerex) #' # plot expression for CD4 and FCGR3A #' # pdf("gene_plots.pdf") @@ -5484,21 +5597,22 @@ plotGenes <- function(object, genes, ...) { #' @param node.order Order of clusters in each set (list with three vectors of ordinal numbers). #' By default will try to automatically order them appropriately. #' -#' @export +#' @return A riverplot object +#' #' @importFrom plyr mapvalues #' @importFrom riverplot makeRiver #' @importFrom riverplot riverplot #' @importFrom grDevices hcl #' @importFrom utils capture.output +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization done -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete input #' # toy clusters -#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = T) +#' cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) #' names(cluster1) <- colnames(ligerex@raw.data[[1]]) -#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = T) +#' cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) #' names(cluster2) <- colnames(ligerex@raw.data[[2]]) #' # create riverplot #' makeRiverplot(ligerex, cluster1, cluster2) @@ -5517,7 +5631,7 @@ makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, if (length(intersect(levels(cluster1), levels(cluster2))) > 0 | length(intersect(levels(cluster1), levels(cluster_consensus))) > 0 | length(intersect(levels(cluster2), levels(cluster_consensus))) > 0) { - print("Duplicate cluster names detected. Adding 1- and 2- to make unique names.") + message("Duplicate cluster names detected. Adding 1- and 2- to make unique names.") cluster1 <- mapvalues(cluster1, from = levels(cluster1), to = paste("1", levels(cluster1), sep = "-")) cluster2 <- mapvalues(cluster2, from = levels(cluster2), @@ -5633,32 +5747,36 @@ makeRiverplot <- function(object, cluster1, cluster2, cluster_consensus = NULL, #' @param object \code{liger} object. Should call quantileAlignSNF before calling. #' @param return.plot Return ggplot object (default FALSE) #' -#' @export +#' @return print plot to console (return.plot = FALSE); ggplot object (return.plot = TRUE) +#' list of ggplot objects. +#' #' @importFrom grid unit #' @importFrom ggplot2 ggplot aes coord_fixed element_blank geom_point guides guide_legend #' scale_size scale_y_discrete theme +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex -#' ligerex <- quantileAlignSNF(ligerex) +#' # ligerex (liger object), factorization complete input +#' ligerex <- quantile_norm(ligerex) #' # plot cluster proportions #' plotClusterProportions(ligerex) #' } -plotClusterProportions <- function(object, return.plot = F) { +plotClusterProportions <- function(object, return.plot = FALSE) { + sample_names <- unlist(lapply(seq_along(object@H), function(i) { rep(names(object@H)[i], nrow(object@H[[i]])) })) - freq_table <- data.frame(Cluster = rep(object@clusters, length(object@scale.data)), - Sample = sample_names) - freq_table <- table(freq_table[['Cluster']], freq_table[['Sample']]) + freq_table <- data.frame(rep(object@clusters, length(object@scale.data)), + sample_names) + freq_table <- table(freq_table[,1], freq_table[,2]) for (i in 1:ncol(freq_table)) { freq_table[, i] <- freq_table[, i] / sum(freq_table[, i]) } - freq_table <- data.frame(freq_table) + freq_table <- as.data.frame(freq_table) colnames(freq_table) <- c("Cluster", "Sample", "Proportion") - p1 <- ggplot(freq_table, aes(x = Cluster, y = Sample)) + + p1 <- ggplot(freq_table, aes_string(x = "Cluster", y = "Sample")) + geom_point(aes_string(size = 'Proportion', fill = 'Cluster', color = 'Cluster')) + scale_size(guide = "none") + theme( axis.line = element_blank(), @@ -5699,25 +5817,26 @@ plotClusterProportions <- function(object, return.plot = F) { #' @param return.data Return matrix of total factor loadings for each cluster (default FALSE). #' @param ... Additional parameters to pass on to heatmap() #' -#' @export +#' @return If requested, matrix of size num_cluster x num_factor +#' #' @importFrom grDevices colorRampPalette #' @importFrom stats heatmap -#' @return If requested, matrix of size num_cluster x num_factor +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object), factorization complete input #' # plot expression for CD4 and return plots -#' loading.matrix <- plotClusterFactors(ligerex, return.data = T) +#' loading.matrix <- plotClusterFactors(ligerex, return.data = TRUE) #' } -plotClusterFactors <- function(object, use.aligned = F, Rowv = NA, Colv = "Rowv", col = NULL, - return.data = F, ...) { +plotClusterFactors <- function(object, use.aligned = FALSE, Rowv = NA, Colv = "Rowv", col = NULL, + return.data = FALSE, ...) { if (use.aligned) { data.mat <- object@H.norm } else { scaled <- lapply(object@H, function(i) { - scale(i, center = F, scale = T) + scale(i, center = FALSE, scale = TRUE) }) data.mat <- Reduce(rbind, scaled) } @@ -5769,16 +5888,18 @@ plotClusterFactors <- function(object, use.aligned = F, Rowv = NA, Colv = "Rowv" #' (default 0.05). #' @param num.genes Max number of genes to report for each dataset (default 30). #' @param print.genes Print ordered markers passing logfc, umi and frac thresholds (default FALSE). +#' @param verbose Print messages (TRUE by default) #' #' @return List of shared and specific factors. First three elements are dataframes of dataset1- #' specific, shared, and dataset2-specific markers. Last two elements are tables indicating the #' number of factors in which marker appears. -#' @export +#' #' @importFrom stats wilcox.test +#' +#' @export #' @examples #' \dontrun{ -#' # liger object, factorization complete -#' ligerex +#' # ligerex (liger object), factorization complete input #' markers <- getFactorMarkers(ligerex, num.genes = 10) #' # look at shared markers #' head(markers[[2]]) @@ -5786,7 +5907,7 @@ plotClusterFactors <- function(object, use.aligned = F, Rowv = NA, Colv = "Rowv" getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.share.thresh = 10, dataset.specificity = NULL, log.fc.thresh = 1, pval.thresh = 0.05, - num.genes = 30, print.genes = F) { + num.genes = 30, print.genes = FALSE, verbose = TRUE) { if (is.null(dataset1) | is.null(dataset2)) { dataset1 <- names(object@H)[1] dataset2 <- names(object@H)[2] @@ -5796,19 +5917,19 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh } if (is.null(dataset.specificity)) { dataset.specificity <- calcDatasetSpecificity(object, dataset1 = dataset1, - dataset2 = dataset2, do.plot = F) + dataset2 = dataset2, do.plot = FALSE) } factors.use <- which(abs(dataset.specificity[[3]]) <= factor.share.thresh) - if (length(factors.use) < 2) { - print(paste( - "Warning: only", length(factors.use), - "factors passed the dataset specificity threshold." - )) + if (length(factors.use) < 2 && verbose) { + message( + "Warning: only ", length(factors.use), + " factors passed the dataset specificity threshold." + ) } Hs_scaled <- lapply(object@H, function(x) { - scale(x, scale = T, center = T) + scale(x, scale = TRUE, center = TRUE) }) labels <- list() for (i in 1:length(Hs_scaled)) { @@ -5835,7 +5956,7 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh rownames(W) <- rownames(V1) <- rownames(V2) <- object@var.genes # if not max factor for any cell in either dataset if (sum(labels[[dataset1]] == i) <= 1 | sum(labels[[dataset2]] == i) <= 1) { - print(paste("Warning: factor", i, "did not appear as max in any cell in either dataset")) + message("Warning: factor", i, "did not appear as max in any cell in either dataset") next } # filter genes by gene_count and cell_frac thresholds @@ -5859,35 +5980,35 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh filtered_genes_V2 = wilcoxon_result[-wilcoxon_result$logFC > log.fc.thresh & wilcoxon_result$pval < pval.thresh, ]$feature W <- pmin(W + V1, W + V2) - V1 <- V1[filtered_genes_V1, , drop = F] - V2 <- V2[filtered_genes_V2, , drop = F] + V1 <- V1[filtered_genes_V1, , drop = FALSE] + V2 <- V2[filtered_genes_V2, , drop = FALSE] if (length(filtered_genes_V1) == 0) { top_genes_V1 <- character(0) } else { - top_genes_V1 <- row.names(V1)[order(V1[, i], decreasing = T)[1:num.genes] ] + top_genes_V1 <- row.names(V1)[order(V1[, i], decreasing = TRUE)[1:num.genes] ] top_genes_V1 <- top_genes_V1[!is.na(top_genes_V1)] top_genes_V1 <- top_genes_V1[which(V1[top_genes_V1, i] > 0)] } if (length(filtered_genes_V2) == 0) { top_genes_V2 <- character(0) } else { - top_genes_V2 <- row.names(V2)[order(V2[, i], decreasing = T)[1:num.genes] ] + top_genes_V2 <- row.names(V2)[order(V2[, i], decreasing = TRUE)[1:num.genes] ] top_genes_V2 <- top_genes_V2[!is.na(top_genes_V2)] top_genes_V2 <- top_genes_V2[which(V2[top_genes_V2, i] > 0)] } - top_genes_W <- row.names(W)[order(W[, i], decreasing = T)[1:num.genes] ] + top_genes_W <- row.names(W)[order(W[, i], decreasing = TRUE)[1:num.genes] ] top_genes_W <- top_genes_W[!is.na(top_genes_W)] top_genes_W <- top_genes_W[which(W[top_genes_W, i] > 0)] - if (print.genes) { - print(paste("Factor", i)) - print('Dataset 1') - print(top_genes_V1) - print('Shared') - print(top_genes_W) - print('Dataset 2') - print(top_genes_V2) + if (print.genes && verbose) { + message("Factor ", i) + message('Dataset 1') + message(top_genes_V1) + message('Shared') + message(top_genes_W) + message('Dataset 2') + message(top_genes_V2) } pvals <- list() # order is V1, V2, W @@ -5909,9 +6030,9 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh log2fc[top_genes_W], pvals[[3]] )) } - V1_genes <- data.frame(Reduce(rbind, V1_matrices), stringsAsFactors = F) - V2_genes <- data.frame(Reduce(rbind, V2_matrices), stringsAsFactors = F) - W_genes <- data.frame(Reduce(rbind, W_matrices), stringsAsFactors = F) + V1_genes <- data.frame(Reduce(rbind, V1_matrices), stringsAsFactors = FALSE) + V2_genes <- data.frame(Reduce(rbind, V2_matrices), stringsAsFactors = FALSE) + W_genes <- data.frame(Reduce(rbind, W_matrices), stringsAsFactors = FALSE) df_cols <- c("factor_num", "gene", "log2fc", "p_value") output_list <- list(V1_genes, W_genes, V2_genes) output_list <- lapply(seq_along(output_list), function(x) { @@ -5954,19 +6075,20 @@ getFactorMarkers <- function(object, dataset1 = NULL, dataset2 = NULL, factor.sh #' @param by.dataset Include dataset of origin in cluster identity in Seurat object (default FALSE). #' #' @return Seurat object with raw.data, scale.data, dr$tsne, dr$inmf, and ident slots set. -#' @export +#' #' @import Matrix #' @importFrom methods new #' @importFrom utils packageVersion +#' +#' @export #' @examples #' \dontrun{ -#' # liger object -#' ligerex +#' # ligerex (liger object based on in-memory datasets ONLY), factorization complete input #' s.object <- ligerToSeurat(ligerex) #' } -ligerToSeurat <- function(object, nms = names(object@H), renormalize = T, use.liger.genes = T, - by.dataset = F) { +ligerToSeurat <- function(object, nms = names(object@H), renormalize = TRUE, use.liger.genes = TRUE, + by.dataset = FALSE) { if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" needed for this function to work. Please install it.", call. = FALSE @@ -5997,7 +6119,7 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = T, use.li } else { var.genes <- object@var.genes if (any(grepl('_', var.genes))) { - print("Warning: Seurat v3 genes cannot have underscores, replacing with dashes ('-')") + message("Warning: Seurat v3 genes cannot have underscores, replacing with dashes ('-')") var.genes <- gsub("_", replacement = "-", var.genes) } inmf.loadings <- t(x = object@W) @@ -6093,10 +6215,12 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = T, use.li #' @param cca.to.H Carry over CCA (and aligned) loadings and insert them into H (and H.norm) slot in #' liger object (only meaningful for combined analysis Seurat object). Useful for plotting directly #' afterwards. (default FALSE) - +#' #' @return \code{liger} object. -#' @export +#' #' @import Matrix +#' +#' @export #' @examples #' \dontrun{ #' # Seurat objects for two pbmc datasets @@ -6109,13 +6233,13 @@ ligerToSeurat <- function(object, nms = names(object@H), renormalize = T, use.li #' # Seurat object for joint analysis #' pbmc <- readRDS('pbmc.RDS') #' # create liger object, using 'protocol' for dataset names -#' ligerex3 <- seuratToLiger(pbmc, combined.seurat = T, meta.var = 'protocol', num.hvg.info = 2000) +#' ligerex3 <- seuratToLiger(pbmc, combined.seurat = TRUE, meta.var = 'protocol', num.hvg.info = 2000) #' } -seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", meta.var = NULL, - assays.use = NULL, raw.assay = "RNA", remove.missing = T, renormalize = T, - use.seurat.genes = T, num.hvg.info = NULL, use.idents = T, use.tsne = T, - cca.to.H = F) { +seuratToLiger <- function(objects, combined.seurat = FALSE, names = "use-projects", meta.var = NULL, + assays.use = NULL, raw.assay = "RNA", remove.missing = TRUE, renormalize = TRUE, + use.seurat.genes = TRUE, num.hvg.info = NULL, use.idents = TRUE, use.tsne = TRUE, + cca.to.H = FALSE) { if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" needed for this function to work. Please install it.", call. = FALSE @@ -6124,7 +6248,7 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", # Remind to set combined.seurat if ((typeof(objects) != "list") & (!combined.seurat)) { - stop("Please pass a list of objects or set combined.seurat = T") + stop("Please pass a list of objects or set combined.seurat = TRUE") } # Get Seurat versions if (typeof(objects) != "list") { @@ -6154,7 +6278,7 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", object.raw <- objects@raw.data } if (nrow(objects@meta.data) != ncol(object.raw)) { - cat("Warning: Mismatch between meta.data and raw.data in this Seurat object. \nSome cells", + message("Warning: Mismatch between meta.data and raw.data in this Seurat object. \nSome cells", "will not be assigned to a raw dataset. \nRepeat Seurat analysis without filters to", "allow all cells to be assigned.\n") } @@ -6175,7 +6299,7 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", var.genes <- Seurat::VariableFeatures(objects) idents <- Seurat::Idents(objects) if (is.null(objects@reductions$tsne)) { - cat("Warning: no t-SNE coordinates available for this Seurat object.\n") + message("Warning: no t-SNE coordinates available for this Seurat object.") tsne.coords <- NULL } else { tsne.coords <- objects@reductions$tsne@cell.embeddings @@ -6187,7 +6311,7 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", idents <- objects@ident # Get tsne.coords if (is.null(objects@dr$tsne)) { - cat("Warning: no t-SNE coordinates available for this Seurat object.\n") + message("Warning: no t-SNE coordinates available for this Seurat object.") tsne.coords <- NULL } else { tsne.coords <- objects@dr$tsne@cell.embeddings @@ -6206,7 +6330,7 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", names(raw.data) <- lapply(seq_along(objects), function(x) { if (identical(names, "use-projects")) { if (!is.null(meta.var)) { - cat("Warning: meta.var value is set - set names = 'use-meta' to use meta.var for names.\n") + message("Warning: meta.var value is set - set names = 'use-meta' to use meta.var for names.\n") } objects[[x]]@project.name } else if (identical(names, "use-meta")) { @@ -6276,11 +6400,11 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", # Get CCA loadings if requested if (cca.to.H & combined.seurat) { if (version > 2) { - cat("Warning: no CCA loadings available for Seurat v3 objects.\n") + message("Warning: no CCA loadings available for Seurat v3 objects.\n") return(new.liger) } if (is.null(objects@dr$cca)) { - cat("Warning: no CCA loadings available for this Seurat object.\n") + message("Warning: no CCA loadings available for this Seurat object.\n") } else { new.liger@H <- lapply(unique(objects@meta.data[[meta.var]]), function(x) { cells <- rownames(objects@meta.data[objects@meta.data[[meta.var]] == x, ]) @@ -6292,11 +6416,11 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", names(new.liger@H) <- names(new.liger@raw.data) } if (is.null(objects@dr$cca.aligned)) { - cat("Warning: no aligned CCA loadings available for this Seurat object.\n") + message("Warning: no aligned CCA loadings available for this Seurat object.\n") } else { new.liger@H.norm <- objects@dr$cca.aligned@cell.embeddings new.liger@H.norm <- addMissingCells(Reduce(rbind, new.liger@H), new.liger@H.norm, - transpose = T) + transpose = TRUE) } } return(new.liger) @@ -6316,17 +6440,17 @@ seuratToLiger <- function(objects, combined.seurat = F, names = "use-projects", #' #' @return \code{liger} object with subsetting applied to raw.data, norm.data, scale.data, H, W, V, #' H.norm, tsne.coords, and clusters. +#' #' @export #' @examples #' \dontrun{ -#' # liger object, with clusters 0:10 +#' # ligerex (liger object based on in-memory datasets), with clusters 0:10 #' # factorization, alignment, and t-SNE calculation have been performed -#' ligerex #' # subset by clusters #' ligerex_subset <- subsetLiger(ligerex, clusters.use = c(1, 4, 5)) #' } -subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.missing = T) { +subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.missing = TRUE) { if (!is.null(clusters.use)) { cells.use <- names(object@clusters)[which(object@clusters %in% clusters.use)] } @@ -6336,9 +6460,9 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi if (length(cells < 25)) { warning("Number of subsetted cells too small (less than 25), please check cells.use!") } - object@raw.data[[q]][, cells, drop = F] + object@raw.data[[q]][, cells, drop = FALSE] } else { - warning(paste0("Selected subset eliminates dataset ", names(object@raw.data)[q])) + warning("Selected subset eliminates dataset ", names(object@raw.data)[q]) return(NULL) } }) @@ -6385,17 +6509,19 @@ subsetLiger <- function(object, clusters.use = NULL, cells.use = NULL, remove.mi #' @param ... Additional parameters passed on to createLiger. #' #' @return \code{liger} object with rearranged raw.data slot. -#' @export +#' #' @import Matrix +#' +#' @export #' @examples #' \dontrun{ -#' # liger object organized by species, with column designating sex in cell.data -#' ligerex +#' # ligerex (liger object based on in-memory objects) organized by species +#' # with column designating sex in cell.data #' # rearrange by sex #' ligerex_new <- reorganizeLiger(ligerex, by.feature = "sex", new.label = "species") #' } -reorganizeLiger <- function(object, by.feature, keep.meta = T, new.label = "orig.dataset", +reorganizeLiger <- function(object, by.feature, keep.meta = TRUE, new.label = "orig.dataset", ...) { if (!(by.feature %in% colnames(object@cell.data))) { stop("Please select existing feature in cell.data to reorganize by, or add it before calling.") @@ -6440,19 +6566,21 @@ reorganizeLiger <- function(object, by.feature, keep.meta = T, new.label = "orig #' @param object \code{liger} object. #' @param override.raw Keep original raw.data without any modifications (removing missing cells #' etc.) (defualt FALSE). +#' @param verbose Print progress bar/messages (TRUE by default) #' #' @return Updated \code{liger} object. -#' @export +#' #' @importFrom methods .hasSlot slot slotNames +#' +#' @export #' @examples #' \dontrun{ -#' # old Analogizer object -#' analogy +#' # analogy (old Analogizer object) #' # convert to latest class definition #' ligerex <- convertOldLiger(analogy) #' } -convertOldLiger = function(object, override.raw = F) { +convertOldLiger = function(object, override.raw = FALSE, verbose = TRUE) { new.liger <- createLiger(object@raw.data) slots_new <- slotNames(new.liger) slots_old <- slotNames(object) @@ -6466,9 +6594,11 @@ convertOldLiger = function(object, override.raw = F) { slot(new.liger, slotname) <- slot(object, slotname) } } - print(paste0('Old slots not transferred: ', setdiff(slots_old, slots_new))) - # compare to slots since it's possible that the analogizer object - # class has slots that this particular object does not - print(paste0('New slots not filled: ', setdiff(slots_new[slots_new != "cell.data"], slots))) + if (verbose) { + message('Old slots not transferred: ', setdiff(slots_old, slots_new)) + # compare to slots since it's possible that the analogizer object + # class has slots that this particular object does not + message('New slots not filled: ', setdiff(slots_new[slots_new != "cell.data"], slots)) + } return(new.liger) } diff --git a/R/utilities.R b/R/utilities.R index eb015b41..ac01d101 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -30,7 +30,7 @@ kl_divergence_uniform = function(object, Hs=NULL) n_factors = ncol(Hs[[1]]) dataset_list = list() for (i in 1:length(Hs)) { - scaled = scale(Hs[[i]], center=F, scale=T) + scaled = scale(Hs[[i]], center=FALSE, scale=TRUE) inflated = t(apply(scaled, 1, function(x) { replace(x, x == 0, 1e-20) @@ -98,18 +98,9 @@ MergeSparseDataAll <- function(datalist, library.names = NULL) { return(M) } -#' Perform fast and memory-efficient normalization operation on sparse matrix data. -#' -#' @param A Sparse matrix DGE. -#' @export -#' @examples -#' \dontrun{ -#' Y = matrix(c(1,2,3,4,5,6,7,8,9,10,11,12),nrow=4,byrow=T) -#' Z = matrix(c(1,2,3,4,5,6,7,6,5,4,3,2),nrow=4,byrow=T) -#' ligerex = createLiger(list(Y,Z)) -#' ligerex@var.genes = c(1,2,3,4) -#' ligerex = scaleNotCenter(ligerex) -#' } +# Perform fast and memory-efficient normalization operation on sparse matrix data. +# param A Sparse matrix DGE. + Matrix.column_norm <- function(A){ if (class(A)[1] == "dgTMatrix") { temp = summary(A) @@ -152,7 +143,7 @@ assign.singletons.list <- function(object, idents, k.use = 15, center = FALSE) { if (!is.list(x = object) || !all(sapply(X = object, FUN = is.matrix))) { stop("'assign.singletons.list' expects a list of matrices") } - print('Assigning singletons') + message('Assigning singletons') singleton.clusters <- names(x = table(idents))[which(x = table(idents) == 1)] singleton.cells <- names(x = idents)[which(x = idents %in% singleton.clusters)] if (length(x = singleton.cells) > 0) { @@ -204,9 +195,14 @@ SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R= # Prepare data for modularity based clustering edge = edge[which(edge[,3]>prune.thresh),] - print("making edge file.") + message("making edge file.") edge_file <- paste0("edge", id.number, fileext=".txt") # Make sure no scientific notation written to edge file + + # restore default settings when the current function exits + init_option <- options() + on.exit(options(init_option)) + saveScipen=options(scipen=10000)[[1]] write.table(x=edge,file=edge_file,sep="\t",row.names=F,col.names=F) options(scipen=saveScipen) @@ -215,12 +211,12 @@ SLMCluster<-function(edge,prune.thresh=0.2,nstart=100,iter.max=10,algorithm=1,R= # NULL random.seed disallowed for this program. random.seed = 0 } - liger.dir <- system.file(package = "liger") + liger.dir <- system.file(package = "rliger") ModularityJarFile <- paste0(liger.dir, "/java/ModularityOptimizer.jar") command <- paste("java -jar", ModularityJarFile, edge_file, output_file, modularity, R, algorithm, nstart, iter.max, random.seed, as.numeric(print.mod), sep = " ") - print ("Starting SLM") + message ("Starting SLM") ret = system(command, wait = TRUE) if (ret != 0) { stop(paste0(ret, " exit status from ", command)) @@ -248,7 +244,7 @@ getMode <- function(x, na.rm = FALSE) { # utility function for seuratToLiger function # Compares colnames in reference matrix1 and adds back any missing # column names to matrix.subset as rows -# Set transpose = T if rownames of matrix1 should be referenced +# Set transpose = TRUE if rownames of matrix1 should be referenced addMissingCells <- function(matrix1, matrix.subset, transpose = F) { if (transpose) { matrix1 <- t(matrix1) @@ -269,7 +265,7 @@ addMissingCells <- function(matrix1, matrix.subset, transpose = F) { #' #' @description #' Returns single vector of gene values across all datasets in list provided. Data can be in raw, -#' normalized or scaled form. If matrices are in cell x gene format, set use.cols = T. +#' normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. #' #' @param list List of gene x cell (or cell x gene) matrices #' @param gene Gene for which to return values (if gene is not found in appropriate dimnames will @@ -285,12 +281,12 @@ addMissingCells <- function(matrix1, matrix.subset, transpose = F) { #' @export #' @examples #' \dontrun{ -#' # liger object with factorization complete -#' ligerex +#' # liger object with factorization complete +#' # ligerex #' gene_values <- getGeneValues(ligerex@raw.data, 'MALAT1') #' } -getGeneValues <- function(list, gene, use.cols = F, methylation.indices = NULL, log2scale = F, +getGeneValues <- function(list, gene, use.cols = FALSE, methylation.indices = NULL, log2scale = FALSE, scale.factor = 10000) { gene_vals <- unlist(lapply(seq_along(list), function(i) { mtx <- unname(list)[[i]] @@ -308,7 +304,7 @@ getGeneValues <- function(list, gene, use.cols = F, methylation.indices = NULL, } return(gene_vals_int) }), - use.names = T) + use.names = TRUE) return(gene_vals) } @@ -416,20 +412,6 @@ wilcoxauc <- function(X, y, groups_use=NULL, verbose=TRUE) { } - -#' Pipe operator -#' -#' @name %>% -#' @rdname pipe -#' @importFrom dplyr %>% -#' @examples -#' \dontrun{ -#' x <- 5 %>% sum(10) -#' } -#' @return return value of rhs function. -NULL - - tidy_results <- function(wide_res, features, groups) { res <- Reduce(cbind, lapply(wide_res, as.numeric)) %>% data.frame() colnames(res) <- names(wide_res) @@ -487,28 +469,21 @@ compute_pval <- function(ustat, ties, N, n1n2) { #' #' @param X feature by observation matrix. #' -#' @examples -#' \dontrun{ -#' data(exprs) -#' rank_res <- rank_matrix(exprs) -#' } #' @return List with 2 items -#' \itemize{ -#' \item X_ranked - matrix of entry ranks -#' \item ties - list of tied group sizes -#' } + + rank_matrix <- function(X) { UseMethod('rank_matrix') } -#' @rdname rank_matrix +##' @rdname rank_matrix rank_matrix.dgCMatrix <- function(X) { Xr <- Matrix(X, sparse = TRUE) ties <- cpp_rank_matrix_dgc(Xr@x, Xr@p, nrow(Xr), ncol(Xr)) return(list(X_ranked = Xr, ties = ties)) } -#' @rdname rank_matrix +##' @rdname rank_matrix rank_matrix.matrix <- function(X) { cpp_rank_matrix_dense(X) } @@ -521,14 +496,8 @@ rank_matrix.matrix <- function(X) { #' @param y group labels #' @param MARGIN whether observations are rows (=2) or columns (=1) #' -#' @examples -#' \dontrun{ -#' data(exprs) -#' data(y) -#' sumGroups_res <- sumGroups(exprs, y, 1) -#' sumGroups_res <- sumGroups(t(exprs), y, 2) -#' } #' @return Matrix of groups by features + sumGroups <- function(X, y, MARGIN=2) { if (MARGIN == 2 & nrow(X) != length(y)) { stop('wrong dims') @@ -538,7 +507,7 @@ sumGroups <- function(X, y, MARGIN=2) { UseMethod('sumGroups') } -#' @rdname sumGroups +##' @rdname sumGroups sumGroups.dgCMatrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_sumGroups_dgc_T(X@x, X@p, X@i, ncol(X), nrow(X), as.integer(y) - 1, @@ -549,7 +518,7 @@ sumGroups.dgCMatrix <- function(X, y, MARGIN=2) { } } -#' @rdname sumGroups +##' @rdname sumGroups sumGroups.matrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_sumGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) @@ -568,15 +537,8 @@ sumGroups.matrix <- function(X, y, MARGIN=2) { #' @param y group labels #' @param MARGIN whether observations are rows (=2) or columns (=1) #' -#' @examples -#' -#' \dontrun{ -#' data(exprs) -#' data(y) -#' nnz_res <- nnzeroGroups(exprs, y, 1) -#' nnz_res <- nnzeroGroups(t(exprs), y, 2) -#' } #' @return Matrix of groups by features + nnzeroGroups <- function(X, y, MARGIN=2) { if (MARGIN == 2 & nrow(X) != length(y)) { stop('wrong dims') @@ -586,7 +548,7 @@ nnzeroGroups <- function(X, y, MARGIN=2) { UseMethod('nnzeroGroups') } -#' @rdname nnzeroGroups +##' @rdname nnzeroGroups nnzeroGroups.dgCMatrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_nnzeroGroups_dgc_T(X@p, X@i, ncol(X), nrow(X), as.integer(y) - 1, @@ -597,7 +559,7 @@ nnzeroGroups.dgCMatrix <- function(X, y, MARGIN=2) { } } -#' @rdname nnzeroGroups +##' @rdname nnzeroGroups nnzeroGroups.matrix <- function(X, y, MARGIN=2) { if (MARGIN == 1) { cpp_nnzeroGroups_dense_T(X, as.integer(y) - 1, length(unique(y))) diff --git a/README.md b/README.md index 4d5e10b0..9de879dc 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -[![Build Status](https://travis-ci.com/welch-lab/liger.svg?branch=master)](https://travis-ci.com/welch-lab/liger.svg?branch=master) +[![Build Status](https://api.travis-ci.com/welch-lab/liger.svg?branch=master)](https://api.travis-ci.com/welch-lab/liger.svg?branch=master) # LIGER (Linked Inference of Genomic Experimental Relationships) -LIGER (`liger`) is a package for integrating and analyzing multiple single-cell datasets, developed by the Macosko lab and maintained/extended by the Welch lab. It relies on integrative non-negative matrix factorization to identify shared and dataset-specific factors. +LIGER (installed as `rliger` ) is a package for integrating and analyzing multiple single-cell datasets, developed by the Macosko lab and maintained/extended by the Welch lab. It relies on integrative non-negative matrix factorization to identify shared and dataset-specific factors. Check out our [Cell paper](https://www.cell.com/cell/fulltext/S0092-8674%2819%2930504-5) for a more complete description of the methods and analyses. To access data used in our SN and BNST analyses, visit our [study](https://portals.broadinstitute.org/single_cell/study/SCP466) on the Single Cell Portal. @@ -48,7 +48,7 @@ For usage examples and guided walkthroughs, check the `vignettes` directory of t ## System Requirements ### Hardware requirements -The `liger` package requires only a standard computer with enough RAM to support the in-memory operations. For minimal performance, please make sure that the computer has at least about 2 GB of RAM. For optimal performance, we recommend a computer with the following specs: +The `rliger` package requires only a standard computer with enough RAM to support the in-memory operations. For minimal performance, please make sure that the computer has at least about 2 GB of RAM. For optimal performance, we recommend a computer with the following specs: * RAM: 16+ GB * CPU: 4+ cores, 2.3 GHz/core @@ -60,21 +60,20 @@ The package development version is tested on *Linux* operating systems and *Mac * Linux: CentOS 7, Manjaro 5.3.18 * Mac OSX: Mojave (10.14.1), Catalina (10.15.2) -The `liger` package should be compatible with Windows, Mac, and Linux operating systems. +The `rliger` package should be compatible with Windows, Mac, and Linux operating systems. -Before setting up the `liger` package, users should have R version 3.4.0 or higher, and several packages set up from CRAN and other repositories. The user can check the dependencies in `DESCRIPTION`. +Before setting up the `rliger` package, users should have R version 3.4.0 or higher, and several packages set up from CRAN and other repositories. The user can check the dependencies in `DESCRIPTION`. ## Installation -`liger` is written in R and is available on the Comprehensive R Archive Network (CRAN). Note that the package name on CRAN is `rliger` to avoid a naming conflict with an unrelated package. To install the version on CRAN, follow these instructions: +LIGER is written in R and is also available on the Comprehensive R Archive Network (CRAN). Note that the package name is `rliger` to avoid a naming conflict with an unrelated package. To install the version on CRAN, follow these instructions: 1. Install [R](https://www.r-project.org/) (>= 3.4) -2. Install [Rstudio](https://www.rstudio.com/products/rstudio/download/) (recommended) +2. Install [Rstudio](https://rstudio.com/products/rstudio/download/) (recommended) 3. Type the following R command: ``` install.packages('rliger') ``` - To install the latest development version directly from GitHub, type the following commands instead of step 3: ``` install.packages('devtools') @@ -88,7 +87,7 @@ Installation from CRAN is easy because pre-compiled binaries are available for W Installing RcppArmadillo on R>=3.4 requires Clang >= 4 and gfortran-6.1. For newer versions of R (R>=3.5), it's recommended to follow the instructions in this [post](https://thecoatlessprofessor.com/programming/r-compiler-tools-for-rcpp-on-macos/). Follow the instructions below if you have R version 3.4.0-3.4.4. 1. Install gfortran as suggested [here](https://gcc.gnu.org/wiki/GFortranBinaries) -2. Download clang4 from this [page](http://r.research.att.com/libs/clang-4.0.0-darwin15.6-Release.tar.gz) +2. Download clang4 from this [page](https://mac.R-project.org/libs/clang-4.0.0-darwin15.6-Release.tar.gz) 3. Uncompress the resulting zip file and type into Terminal (`sudo` if needed): ``` mv /path/to/clang4/ /usr/local/ @@ -113,8 +112,8 @@ nano Makevars ``` Paste in the required text above and save with `Ctrl-X`. -### Additional Installation Steps for Online Learning using Liger -The HDF5 library is required for implementing online learning in Liger on data files in HDF5 format. It can be installed via one of the following commands: +### Additional Installation Steps for Online Learning using LIGER +The HDF5 library is required for implementing online learning in LIGER on data files in HDF5 format. It can be installed via one of the following commands: | System | Command |:------------------------------------------|:---------------------------------| @@ -161,12 +160,12 @@ pwd ### Install Time and Expected Run Time -The installation process of `liger` should take less than 30 minutes. +The installation process of `rliger` should take less than 30 minutes. The expected run time is 1 - 4 hours depending on dataset size and downstream analysis of the user’s choice. ## Sample Datasets -The `liger` package provides a small sample dataset for basic demos of the functions. You can find it in folder `liger/tests/testdata/small_pbmc_data.RDS`. +The `rliger` package provides a small sample dataset for basic demos of the functions. You can find it in folder `liger/tests/testdata/small_pbmc_data.RDS`. We also provide a set of scRNA-seq and scATAC-seq datasets for real-world style demos. These datasets are as follows: diff --git a/inst/include/liger.h b/inst/include/rliger.h similarity index 100% rename from inst/include/liger.h rename to inst/include/rliger.h diff --git a/man/.Rapp.history b/man/.Rapp.history deleted file mode 100644 index e69de29b..00000000 diff --git a/man/Matrix.column_norm.Rd b/man/Matrix.column_norm.Rd deleted file mode 100755 index 67596c95..00000000 --- a/man/Matrix.column_norm.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{Matrix.column_norm} -\alias{Matrix.column_norm} -\title{Perform fast and memory-efficient normalization operation on sparse matrix data.} -\usage{ -Matrix.column_norm(A) -} -\arguments{ -\item{A}{Sparse matrix DGE.} -} -\description{ -Perform fast and memory-efficient normalization operation on sparse matrix data. -} -\examples{ -\dontrun{ -Y = matrix(c(1,2,3,4,5,6,7,8,9,10,11,12),nrow=4,byrow=T) -Z = matrix(c(1,2,3,4,5,6,7,6,5,4,3,2),nrow=4,byrow=T) -ligerex = createLiger(list(Y,Z)) -ligerex@var.genes = c(1,2,3,4) -ligerex = scaleNotCenter(ligerex) -} -} diff --git a/man/calcARI.Rd b/man/calcARI.Rd old mode 100755 new mode 100644 index 912f20c9..222711f6 --- a/man/calcARI.Rd +++ b/man/calcARI.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcARI} \alias{calcARI} \title{Calculate adjusted Rand index} \usage{ -calcARI(object, clusters.compare) +calcARI(object, clusters.compare, verbose = TRUE) } \arguments{ \item{object}{\code{liger} object. Should run quantileAlignSNF before calling.} \item{clusters.compare}{Clustering with which to compare (named vector).} + +\item{verbose}{Print messages (TRUE by default)} } \value{ -Value of ARI - Adjusted Rand index value. } \description{ @@ -23,13 +23,12 @@ indicating perfect agreement. } \examples{ \dontrun{ -# liger object, factorization done -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete +ligerex <- quantile_norm(ligerex) # toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = T) +cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = T) +cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) names(cluster2) <- colnames(ligerex@raw.data[[2]]) # get ARI for first clustering ari1 <- calcARI(ligerex, cluster1) diff --git a/man/calcAgreement.Rd b/man/calcAgreement.Rd old mode 100755 new mode 100644 index 853644bf..1ce3ad5a --- a/man/calcAgreement.Rd +++ b/man/calcAgreement.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcAgreement} \alias{calcAgreement} \title{Calculate agreement metric} @@ -50,11 +50,11 @@ it is usually no higher than 0.2-0.3 (particularly for non-deterministic approac } \examples{ \dontrun{ -# liger object, factorization complete -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object based on in-memory datasets), factorization complete +# generate H.norm by quantile normalizig factor loadings +ligerex <- quantile_norm(ligerex) agreement <- calcAgreement(ligerex, dr.method = "NMF") -# HDF5 input +# ligerex (liger object based on datasets in HDF5 format), factorization complete ligerex <- quantile_norm(ligerex) ligerex <- readSubset(ligerex, slot.use = "scale.data", max.cells = 5000) agreement <- calcAgreement(ligerex, dr.method = "NMF") diff --git a/man/calcAlignment.Rd b/man/calcAlignment.Rd old mode 100755 new mode 100644 index fef529e7..4b7bc143 --- a/man/calcAlignment.Rd +++ b/man/calcAlignment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcAlignment} \alias{calcAlignment} \title{Calculate alignment metric} @@ -11,9 +11,8 @@ calcAlignment( cells.use = NULL, cells.comp = NULL, clusters.use = NULL, - by.cell = F, - by.dataset = F, - do.kbet = F + by.cell = FALSE, + by.dataset = FALSE ) } \arguments{ @@ -36,8 +35,6 @@ alignment (instead of dataset designations). These can be from the same dataset \item{by.cell}{Return alignment calculated individually for each cell (default FALSE).} \item{by.dataset}{Return alignment calculated for each dataset (default FALSE).} - -\item{do.kbet}{Return kBET metric (default FALSE).} } \value{ Alignment metric. @@ -52,8 +49,7 @@ alignment can be greater than 1 occasionally. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object ), factorization complete ligerex <- quantile_norm(ligerex) alignment <- calcAlignment(ligerex) } diff --git a/man/calcAlignmentPerCluster.Rd b/man/calcAlignmentPerCluster.Rd old mode 100755 new mode 100644 index 7cdd9190..22890873 --- a/man/calcAlignmentPerCluster.Rd +++ b/man/calcAlignmentPerCluster.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcAlignmentPerCluster} \alias{calcAlignmentPerCluster} \title{Calculate alignment for each cluster} \usage{ -calcAlignmentPerCluster(object, rand.seed = 1, k = NULL, by.dataset = F) +calcAlignmentPerCluster(object, rand.seed = 1, k = NULL, by.dataset = FALSE) } \arguments{ \item{object}{\code{liger} object. Should call quantileAlignSNF before calling.} @@ -24,9 +24,8 @@ Returns alignment for each cluster in analysiss (see documentation for calcAlign } \examples{ \dontrun{ -# liger object, factorization complete -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete +ligerex <- quantile_norm(ligerex) # get alignment for each cluster alignment_per_cluster <- calcAlignmentPerCluster(ligerex) } diff --git a/man/calcDatasetSpecificity.Rd b/man/calcDatasetSpecificity.Rd old mode 100755 new mode 100644 index 7649bc4b..cc7f39a4 --- a/man/calcDatasetSpecificity.Rd +++ b/man/calcDatasetSpecificity.Rd @@ -1,10 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcDatasetSpecificity} \alias{calcDatasetSpecificity} \title{Calculate a dataset-specificity score for each factor} \usage{ -calcDatasetSpecificity(object, dataset1 = NULL, dataset2 = NULL, do.plot = T) +calcDatasetSpecificity( + object, + dataset1 = NULL, + dataset2 = NULL, + do.plot = TRUE +) } \arguments{ \item{object}{\code{liger} object. Should run optimizeALS before calling.} @@ -28,9 +33,9 @@ description. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete +# generate H.norm by quantile normalizig factor loadings +ligerex <- quantile_norm(ligerex) dataset_spec <- calcDatasetSpecificity(ligerex, do.plot = F) } } diff --git a/man/calcGeneVars.Rd b/man/calcGeneVars.Rd index 1d093138..cae36433 100644 --- a/man/calcGeneVars.Rd +++ b/man/calcGeneVars.Rd @@ -1,16 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcGeneVars} \alias{calcGeneVars} \title{Calculate variance of gene expression across cells in an online fashion} \usage{ -calcGeneVars(object, chunk = 1000) +calcGeneVars(object, chunk = 1000, verbose = TRUE) } \arguments{ \item{object}{\code{liger} object. The input raw.data should be a list of hdf5 files. Should call normalize and selectGenes before calling.} \item{chunk}{size of chunks in hdf5 file. (default 1000)} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with scale.data slot set. @@ -18,14 +20,3 @@ Should call normalize and selectGenes before calling.} \description{ This function calculates the variance of gene expression values across cells for hdf5 files. } -\examples{ -\dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -} -} diff --git a/man/calcPurity.Rd b/man/calcPurity.Rd old mode 100755 new mode 100644 index 91757d5e..4a466d5e --- a/man/calcPurity.Rd +++ b/man/calcPurity.Rd @@ -1,15 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{calcPurity} \alias{calcPurity} \title{Calculate purity} \usage{ -calcPurity(object, classes.compare) +calcPurity(object, classes.compare, verbose = TRUE) } \arguments{ \item{object}{\code{liger} object. Should run quantileAlignSNF before calling.} \item{classes.compare}{Clustering with which to compare (named vector).} + +\item{verbose}{Print messages (TRUE by default)} } \value{ Purity value. @@ -22,17 +24,16 @@ with a score of 1 representing a pure, or accurate, clustering. } \examples{ \dontrun{ -# liger object, factorization done -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete +ligerex <- quantile_norm(ligerex) # toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = T) +cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = T) +cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) names(cluster2) <- colnames(ligerex@raw.data[[2]]) # get ARI for first clustering -ari1 <- calcARI(ligerex, cluster1) +ari1 <- calcPurity(ligerex, cluster1) # get ARI for second clustering -ari2 <- calcARI(ligerex, cluster2) +ari2 <- calcPurity(ligerex, cluster2) } } diff --git a/man/convertOldLiger.Rd b/man/convertOldLiger.Rd old mode 100755 new mode 100644 index 256b7e86..88aa09f4 --- a/man/convertOldLiger.Rd +++ b/man/convertOldLiger.Rd @@ -1,16 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{convertOldLiger} \alias{convertOldLiger} \title{Convert older liger object into most current version (based on class definition)} \usage{ -convertOldLiger(object, override.raw = F) +convertOldLiger(object, override.raw = FALSE, verbose = TRUE) } \arguments{ \item{object}{\code{liger} object.} \item{override.raw}{Keep original raw.data without any modifications (removing missing cells etc.) (defualt FALSE).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ Updated \code{liger} object. @@ -22,8 +24,7 @@ class NULL. } \examples{ \dontrun{ -# old Analogizer object -analogy +# analogy (old Analogizer object) # convert to latest class definition ligerex <- convertOldLiger(analogy) } diff --git a/man/createLiger.Rd b/man/createLiger.Rd old mode 100755 new mode 100644 index 8d7e3c78..5e06d045 --- a/man/createLiger.Rd +++ b/man/createLiger.Rd @@ -1,20 +1,21 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{createLiger} \alias{createLiger} \title{Create a liger object.} \usage{ createLiger( raw.data, - make.sparse = T, - take.gene.union = F, - remove.missing = T, + make.sparse = TRUE, + take.gene.union = FALSE, + remove.missing = TRUE, format.type = "10X", data.name = NULL, indices.name = NULL, indptr.name = NULL, genes.name = NULL, - barcodes.name = NULL + barcodes.name = NULL, + verbose = TRUE ) } \arguments{ @@ -23,10 +24,10 @@ createLiger( \item{make.sparse}{Whether to convert raw data into sparse matrices (default TRUE).} \item{take.gene.union}{Whether to fill out raw.data matrices with union of genes across all -datasets (filling in 0 for missing data) (requires make.sparse=T) (default FALSE).} +datasets (filling in 0 for missing data) (requires make.sparse = TRUE) (default FALSE).} \item{remove.missing}{Whether to remove cells not expressing any measured genes, and genes not -expressed in any cells (if take.gene.union = T, removes only genes not expressed in any +expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE).} \item{format.type}{HDF5 format (10X CellRanger by default).} @@ -40,6 +41,8 @@ dataset) (default TRUE).} \item{genes.name}{Path to the gene names stored in HDF5 file.} \item{barcodes.name}{Path to the barcodes stored in HDF5 file.} + +\item{verbose}{Print messages (TRUE by default)} } \value{ \code{liger} object with raw.data slot set. @@ -51,9 +54,8 @@ By default, it converts all passed data into sparse matrices (dgCMatrix) to redu It initializes cell.data with nUMI and nGene calculated for every cell. } \examples{ -\dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +# Demonstration using matrices with randomly generated numbers +Y <- matrix(runif(5000,0,2), 10,500) +Z <- matrix(runif(5000,0,2), 10,500) ligerex <- createLiger(list(y_set = Y, z_set = Z)) } -} diff --git a/man/getFactorMarkers.Rd b/man/getFactorMarkers.Rd old mode 100755 new mode 100644 index e372aca1..84a47632 --- a/man/getFactorMarkers.Rd +++ b/man/getFactorMarkers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{getFactorMarkers} \alias{getFactorMarkers} \title{Find shared and dataset-specific markers} @@ -13,7 +13,8 @@ getFactorMarkers( log.fc.thresh = 1, pval.thresh = 0.05, num.genes = 30, - print.genes = F + print.genes = FALSE, + verbose = TRUE ) } \arguments{ @@ -38,6 +39,8 @@ available.} \item{num.genes}{Max number of genes to report for each dataset (default 30).} \item{print.genes}{Print ordered markers passing logfc, umi and frac thresholds (default FALSE).} + +\item{verbose}{Print messages (TRUE by default)} } \value{ List of shared and specific factors. First three elements are dataframes of dataset1- @@ -51,8 +54,7 @@ or dataset-specific way). } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object), factorization complete input markers <- getFactorMarkers(ligerex, num.genes = 10) # look at shared markers head(markers[[2]]) diff --git a/man/getGeneValues.Rd b/man/getGeneValues.Rd old mode 100755 new mode 100644 index cd334c16..cc72db70 --- a/man/getGeneValues.Rd +++ b/man/getGeneValues.Rd @@ -7,9 +7,9 @@ getGeneValues( list, gene, - use.cols = F, + use.cols = FALSE, methylation.indices = NULL, - log2scale = F, + log2scale = FALSE, scale.factor = 10000 ) } @@ -34,12 +34,12 @@ Plots to console (1-2 pages per factor) } \description{ Returns single vector of gene values across all datasets in list provided. Data can be in raw, -normalized or scaled form. If matrices are in cell x gene format, set use.cols = T. +normalized or scaled form. If matrices are in cell x gene format, set use.cols = TRUE. } \examples{ \dontrun{ - # liger object with factorization complete -ligerex +# liger object with factorization complete +# ligerex gene_values <- getGeneValues(ligerex@raw.data, 'MALAT1') } } diff --git a/man/getProportionMito.Rd b/man/getProportionMito.Rd old mode 100755 new mode 100644 index 571ff967..67e7e9e5 --- a/man/getProportionMito.Rd +++ b/man/getProportionMito.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{getProportionMito} \alias{getProportionMito} \title{Calculate proportion mitochondrial contribution} \usage{ -getProportionMito(object, use.norm = F) +getProportionMito(object, use.norm = FALSE) } \arguments{ \item{object}{\code{liger} object.} @@ -19,8 +19,7 @@ Calculates proportion of mitochondrial contribution based on raw or normalized d } \examples{ \dontrun{ -# liger object, factorization done -ligerex +# ligerex (liger object), factorization complete ligerex@cell.data[["percent_mito"]] <- getProportionMito(ligerex) } } diff --git a/man/imputeKNN.Rd b/man/imputeKNN.Rd old mode 100755 new mode 100644 index 5cf4f3db..861fb3fd --- a/man/imputeKNN.Rd +++ b/man/imputeKNN.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{imputeKNN} \alias{imputeKNN} \title{Impute the query cell expression matrix} @@ -11,7 +11,8 @@ imputeKNN( knn_k = 20, weight = TRUE, norm = TRUE, - scale = FALSE + scale = FALSE, + verbose = TRUE ) } \arguments{ @@ -28,6 +29,8 @@ imputeKNN( \item{norm}{Whether normalize the imputed data with default parameters (default TRUE).} \item{scale}{Whether scale but not center the imputed data with default parameters (default TRUE).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with raw data in raw.data slot replaced by imputed data (genes by cells) @@ -37,16 +40,7 @@ Impute query features from a reference dataset using KNN. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -X <- matrix(c(1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z, x_set = X)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 20) -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete # impute every dataset other than the reference dataset ligerex <- imputeKNN(ligerex, reference = "y_set", weight = FALSE) # impute only z_set dataset diff --git a/man/liger-class.Rd b/man/liger-class.Rd old mode 100755 new mode 100644 index 5f54aff5..515ce8f7 --- a/man/liger-class.Rd +++ b/man/liger-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \docType{class} \name{liger-class} \alias{liger-class} diff --git a/man/liger-package.Rd b/man/liger-package.Rd deleted file mode 100755 index 3ba8abd8..00000000 --- a/man/liger-package.Rd +++ /dev/null @@ -1,25 +0,0 @@ -\name{liger-package} -\alias{liger-package} -\docType{package} -\title{ -\packageTitle{liger} -} -\description{ -\packageDescription{liger} -} -\details{ - -The DESCRIPTION file: -\packageDESCRIPTION{liger} -\packageIndices{liger} - An overview of how to use the package, including the most important functions -} -\author{ -\packageAuthor{liger} - -Maintainer: \packageMaintainer{liger} -} -\references{ -Literature or other references for background information -} -\keyword{ package } \ No newline at end of file diff --git a/man/ligerToSeurat.Rd b/man/ligerToSeurat.Rd old mode 100755 new mode 100644 index c3598869..4129fa3b --- a/man/ligerToSeurat.Rd +++ b/man/ligerToSeurat.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{ligerToSeurat} \alias{ligerToSeurat} \title{Create a Seurat object containing the data from a liger object} @@ -7,9 +7,9 @@ ligerToSeurat( object, nms = names(object@H), - renormalize = T, - use.liger.genes = T, - by.dataset = F + renormalize = TRUE, + use.liger.genes = TRUE, + by.dataset = FALSE ) } \arguments{ @@ -38,8 +38,7 @@ in nms. iNMF factorization is stored in dim.reduction object with key "iNMF". } \examples{ \dontrun{ -# liger object -ligerex +# ligerex (liger object based on in-memory datasets ONLY), factorization complete input s.object <- ligerToSeurat(ligerex) } } diff --git a/man/linkGenesAndPeaks.Rd b/man/linkGenesAndPeaks.Rd old mode 100755 new mode 100644 index 1b020347..f09687b0 --- a/man/linkGenesAndPeaks.Rd +++ b/man/linkGenesAndPeaks.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{linkGenesAndPeaks} \alias{linkGenesAndPeaks} \title{Linking genes to putative regulatory elements} @@ -10,7 +10,8 @@ linkGenesAndPeaks( genes.list = NULL, dist = "spearman", alpha = 0.05, - path_to_coords + path_to_coords, + verbose = TRUE ) } \arguments{ @@ -29,6 +30,8 @@ this function will use all the gene symbols from the matrix passed to gmat by de this threshold are considered significant. The default is 0.05.} \item{path_to_coords}{Path to the gene coordinates file.} + +\item{verbose}{Print messages (TRUE by default)} } \value{ a sparse matrix with peak names as rows and gene symbols as columns, with each element indicating the @@ -39,10 +42,9 @@ Evaluate the relationships between pairs of genes and peaks based on specified d } \examples{ \dontrun{ -gmat.small <- readRDS("../testdata/small_gmat.RDS") # some gene counts matrix -pmat.small <- readRDS("../testdata/small_pmat.RDS") # some peak counts matrix +# some gene counts matrix: gmat.small +# some peak counts matrix: pmat.small regnet <- linkGenesAndPeaks(gmat.small, pmat.small, dist = "spearman", alpha = 0.05, path_to_coords = 'some_path') } - } diff --git a/man/louvainCluster.Rd b/man/louvainCluster.Rd old mode 100755 new mode 100644 index a6fde886..9d079fae --- a/man/louvainCluster.Rd +++ b/man/louvainCluster.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{louvainCluster} \alias{louvainCluster} \title{Louvain algorithm for community detection} @@ -12,7 +12,8 @@ louvainCluster( eps = 0.1, nRandomStarts = 10, nIterations = 100, - random.seed = 1 + random.seed = 1, + verbose = TRUE ) } \arguments{ @@ -36,6 +37,8 @@ prune everything). (default 1/15)} \item{nIterations}{Maximal number of iterations per random start. (default 100)} \item{random.seed}{Seed of the random number generator. (default 1)} + +\item{verbose}{Print messages (TRUE by default)} } \value{ \code{liger} object with refined 'clusters' slot set. @@ -47,7 +50,7 @@ small clusters into broad cell classes. } \examples{ \dontrun{ -# liger object, factorization complete +# ligerex (liger object), factorization complete ligerex <- louvainCluster(ligerex, resulotion = 0.3) } diff --git a/man/makeInteractTrack.Rd b/man/makeInteractTrack.Rd old mode 100755 new mode 100644 index a2774393..96faaeaa --- a/man/makeInteractTrack.Rd +++ b/man/makeInteractTrack.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{makeInteractTrack} \alias{makeInteractTrack} \title{Export predicted gene-pair interaction} @@ -25,8 +25,7 @@ Export the predicted gene-pair interactions calculated by upstream function 'lin } \examples{ \dontrun{ -regent <- readRDS("../testdata/small_gmat.RDS") # some gene-peak correlation matrix +# some gene-peak correlation matrix: regent makeInteractTrack(regnet, path_to_coords = 'some_path_to_gene_coordinates/hg19_genes.bed') } - } diff --git a/man/makeRiverplot.Rd b/man/makeRiverplot.Rd old mode 100755 new mode 100644 index 6d488cb3..4b1b0b3e --- a/man/makeRiverplot.Rd +++ b/man/makeRiverplot.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{makeRiverplot} \alias{makeRiverplot} \title{Generate a river (Sankey) plot} @@ -56,6 +56,9 @@ between the nodes (default 0.1).} \item{node.order}{Order of clusters in each set (list with three vectors of ordinal numbers). By default will try to automatically order them appropriately.} } +\value{ +A riverplot object +} \description{ Creates a riverplot to show how separate cluster assignments from two datasets map onto a joint clustering. The joint clustering is by default the object clustering, but an external one @@ -63,13 +66,11 @@ can also be passed in. Uses the riverplot package to construct riverplot object } \examples{ \dontrun{ -# liger object, factorization done -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete input # toy clusters -cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = T) +cluster1 <- sample(c('type1', 'type2', 'type3'), ncol(ligerex@raw.data[[1]]), replace = TRUE) names(cluster1) <- colnames(ligerex@raw.data[[1]]) -cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = T) +cluster2 <- sample(c('type4', 'type5', 'type6'), ncol(ligerex@raw.data[[2]]), replace = TRUE) names(cluster2) <- colnames(ligerex@raw.data[[2]]) # create riverplot makeRiverplot(ligerex, cluster1, cluster2) diff --git a/man/mergeH5.Rd b/man/mergeH5.Rd index aedc58ea..4befc43e 100644 --- a/man/mergeH5.Rd +++ b/man/mergeH5.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{mergeH5} \alias{mergeH5} \title{Merge hdf5 files} @@ -44,6 +44,10 @@ before they are preprocessed through Liger pipeline. } \examples{ \dontrun{ +# For instance, we want to merge two datasets saved in HDF5 files (10X CellRanger) +# paths to datasets: "library1.h5","library2.h5" +# dataset names: "lib1", "lib2" +# name for output HDF5 file: "merged.h5" mergeH5(list("library1.h5","library2.h5"), c("lib1","lib2"), "merged.h5") } } diff --git a/man/nnzeroGroups.Rd b/man/nnzeroGroups.Rd old mode 100755 new mode 100644 index d8a66478..1844d764 --- a/man/nnzeroGroups.Rd +++ b/man/nnzeroGroups.Rd @@ -25,12 +25,3 @@ Matrix of groups by features \description{ Utility function to compute number of zeros-per-feature within group } -\examples{ - -\dontrun{ -data(exprs) -data(y) -nnz_res <- nnzeroGroups(exprs, y, 1) -nnz_res <- nnzeroGroups(t(exprs), y, 2) -} -} diff --git a/man/nonneg.Rd b/man/nonneg.Rd index 05f5f3c7..9b82c0b2 100644 --- a/man/nonneg.Rd +++ b/man/nonneg.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{nonneg} \alias{nonneg} \title{Perform thresholding on dense matrix} diff --git a/man/normalize.Rd b/man/normalize.Rd old mode 100755 new mode 100644 index 580bff4e..daa761c7 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -1,10 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{normalize} \alias{normalize} \title{Normalize raw datasets to column sums} \usage{ -normalize(object, chunk = 1000, format.type = "10X", remove.missing = TRUE) +normalize( + object, + chunk = 1000, + format.type = "10X", + remove.missing = TRUE, + verbose = TRUE +) } \arguments{ \item{object}{\code{liger} object.} @@ -14,8 +20,10 @@ normalize(object, chunk = 1000, format.type = "10X", remove.missing = TRUE) \item{format.type}{string of HDF5 format (10X CellRanger by default).} \item{remove.missing}{Whether to remove cells not expressing any measured genes, and genes not -expressed in any cells (if take.gene.union = T, removes only genes not expressed in any +expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with norm.data slot set. @@ -24,10 +32,9 @@ dataset) (default TRUE).} This function normalizes data to account for total gene expression across a cell. } \examples{ -\dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +# Demonstration using matrices with randomly generated numbers +Y <- matrix(runif(5000,0,2), 10,500) +Z <- matrix(runif(5000,0,2), 10,500) ligerex <- createLiger(list(y_set = Y, z_set = Z)) ligerex <- normalize(ligerex) } -} diff --git a/man/online_iNMF.Rd b/man/online_iNMF.Rd index dcc568a0..ccb0f1e4 100644 --- a/man/online_iNMF.Rd +++ b/man/online_iNMF.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{online_iNMF} \alias{online_iNMF} \title{Perform online iNMF on scaled datasets} @@ -19,7 +19,8 @@ online_iNMF( miniBatch_max_iters = 1, miniBatch_size = 5000, h5_chunk_size = 1000, - seed = 123 + seed = 123, + verbose = TRUE ) } \arguments{ @@ -58,6 +59,8 @@ of cells in the smallest dataset.} \item{h5_chunk_size}{Chunk size of input hdf5 files (default 1000). The chunk size should be no larger than the batch size.} \item{seed}{Random seed to allow reproducible results (default 123).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with H, W, V, A and B slots set. @@ -77,15 +80,9 @@ across datasets. The V matrices represent the dataset-specific components of the } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -# get factorization using 20 factors from scratch -ligerex <- online(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) +# Requires preprocessed liger object +# Get factorization using 20 factors and mini-batch of 5000 cells +# (default setting, can be adjusted for ideal results) +ligerex <- online_iNMF(ligerex, k = 20, lambda = 5, miniBatch_size = 5000) } - } diff --git a/man/optimizeALS.Rd b/man/optimizeALS.Rd old mode 100755 new mode 100644 index 10a3e0f7..d0ab1e33 --- a/man/optimizeALS.Rd +++ b/man/optimizeALS.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{optimizeALS} \alias{optimizeALS} \alias{optimizeALS.list} @@ -20,6 +20,7 @@ optimizeALS(object, ...) V.init = NULL, rand.seed = 1, print.obj = FALSE, + verbose = TRUE, ... ) @@ -35,6 +36,7 @@ optimizeALS(object, ...) V.init = NULL, rand.seed = 1, print.obj = FALSE, + verbose = TRUE, ... ) } @@ -70,6 +72,8 @@ factorizations of the same dataset can be run with one rep if necessary. (defaul \item{rand.seed}{Random seed to allow reproducible results (default 1).} \item{print.obj}{Print objective function values after convergence (default FALSE).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with H, W, and V slots set. @@ -87,15 +91,9 @@ across datasets. The V matrices represent the dataset-specific components of the } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -# get factorization using three restarts and 20 factors -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) +# Requires preprocessed liger object (only for objected not based on HDF5 files) +# Get factorization using 20 factors and mini-batch of 5000 cells +# (default setting, can be adjusted for ideal results) +ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 1) } - } diff --git a/man/optimizeNewData.Rd b/man/optimizeNewData.Rd old mode 100755 new mode 100644 index 4f2668ce..e8877d1b --- a/man/optimizeNewData.Rd +++ b/man/optimizeNewData.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{optimizeNewData} \alias{optimizeNewData} \title{Perform factorization for new data} @@ -8,10 +8,11 @@ optimizeNewData( object, new.data, which.datasets, - add.to.existing = T, + add.to.existing = TRUE, lambda = NULL, thresh = 1e-04, - max.iters = 100 + max.iters = 100, + verbose = TRUE ) } \arguments{ @@ -32,6 +33,8 @@ optimizeALS.} (default 1e-4).} \item{max.iters}{Maximum number of block coordinate descent iterations to perform (default 100).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with H, W, and V slots reset. Raw.data, norm.data, and scale.data will @@ -43,23 +46,15 @@ factorization. Assumes that selected genes (var.genes) are represented in the ne } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) +# Given preprocessed liger object: ligerex (contains two datasets Y and Z) # get factorization using three restarts and 20 factors ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) -# acquire new data from the same cell type, let's add it to existing datasets -Y_new <- matrix(41:52, nrow = 4, byrow = T) -Z_new <- matrix(c(51:57, 56:52), nrow = 4, byrow = T) +# acquire new data (Y_new, Z_new) from the same cell type, let's add it to existing datasets new_data <- list(Y_set = Y_new, Z_set = Z_new) ligerex2 <- optimizeNewData(ligerex, new.data = new_data, which.datasets = list('y_set', 'z_set')) -# acquire new data from different cell type, we'll just add another dataset -X <- matrix(35:46, nrow = 4, byrow = T) +# acquire new data from different cell type (X), we'll just add another dataset # it's probably most similar to y_set ligerex <- optimizeNewData(ligerex, new.data = list(x_set = X), which.datasets = list('y_set'), - add.to.existing = F) + add.to.existing = FALSE) } } diff --git a/man/optimizeNewK.Rd b/man/optimizeNewK.Rd old mode 100755 new mode 100644 index fe5445d5..3279c5e7 --- a/man/optimizeNewK.Rd +++ b/man/optimizeNewK.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{optimizeNewK} \alias{optimizeNewK} \title{Perform factorization for new value of k} @@ -10,7 +10,8 @@ optimizeNewK( lambda = NULL, thresh = 1e-04, max.iters = 100, - rand.seed = 1 + rand.seed = 1, + verbose = TRUE ) } \arguments{ @@ -27,6 +28,8 @@ optimizeALS.} \item{max.iters}{Maximum number of block coordinate descent iterations to perform (default 100).} \item{rand.seed}{Random seed to set. Only relevant if k.new > k. (default 1)} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with H, W, and V slots reset. @@ -38,14 +41,6 @@ where it is more likely to speed up the factorization. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -# get factorization using three restarts and 20 factors -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) # decide to run with k = 15 instead (keeping old lambda the same) ligerex <- optimizeNewK(ligerex, k.new = 15) } diff --git a/man/optimizeNewLambda.Rd b/man/optimizeNewLambda.Rd old mode 100755 new mode 100644 index cb5fd30a..dd6f1f87 --- a/man/optimizeNewLambda.Rd +++ b/man/optimizeNewLambda.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{optimizeNewLambda} \alias{optimizeNewLambda} \title{Perform factorization for new lambda value} @@ -9,7 +9,8 @@ optimizeNewLambda( new.lambda, thresh = 1e-04, max.iters = 100, - rand.seed = 1 + rand.seed = 1, + verbose = TRUE ) } \arguments{ @@ -23,6 +24,8 @@ strongly.} \item{max.iters}{Maximum number of block coordinate descent iterations to perform (default 100).} \item{rand.seed}{Random seed for reproducibility (default 1).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with optimized factorization values @@ -34,14 +37,6 @@ new lambda value is significantly different; otherwise may not return optimal re } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -# get factorization using three restarts and 20 factors -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) # decide to run with lambda = 15 instead (keeping k the same) ligerex <- optimizeNewLambda(ligerex, new.lambda = 15) } diff --git a/man/optimizeSubset.Rd b/man/optimizeSubset.Rd old mode 100755 new mode 100644 index 46a743d1..7cc719ec --- a/man/optimizeSubset.Rd +++ b/man/optimizeSubset.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{optimizeSubset} \alias{optimizeSubset} \title{Perform factorization for subset of data} @@ -43,17 +43,8 @@ functionality (without automatic optimization), see subsetLiger. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -colnames(Y) <- c('a', 'b', 'c') -colnames(Z) <- c('p', 'q', 'r') -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -# get factorization using three restarts and 20 factors -ligerex <- optimizeALS(ligerex, k = 20, lambda = 5, nrep = 3) # now want to look at only subset of data -ligerex2 <- optimizeSubset(ligerex, cell.subset = list(c('a', 'b'), c('q'))) +# Requires a vector of cell names from data 1 and a vector of cell names from data 2 +ligerex2 <- optimizeSubset(ligerex, cell.subset = list(cell_names_1, cell_names_2)) } } diff --git a/man/pipe.Rd b/man/pipe.Rd deleted file mode 100755 index ba2176fd..00000000 --- a/man/pipe.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R -\name{\%>\%} -\alias{\%>\%} -\title{Pipe operator} -\value{ -return value of rhs function. -} -\description{ -Pipe operator -} -\examples{ -\dontrun{ -x <- 5 \%>\% sum(10) -} -} diff --git a/man/plotByDatasetAndCluster.Rd b/man/plotByDatasetAndCluster.Rd old mode 100755 new mode 100644 index 22d85f08..2512dd4d --- a/man/plotByDatasetAndCluster.Rd +++ b/man/plotByDatasetAndCluster.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotByDatasetAndCluster} \alias{plotByDatasetAndCluster} \title{Plot t-SNE coordinates of cells across datasets} @@ -10,14 +10,14 @@ plotByDatasetAndCluster( title = NULL, pt.size = 0.3, text.size = 3, - do.shuffle = T, + do.shuffle = TRUE, rand.seed = 1, axis.labels = NULL, - do.legend = T, + do.legend = TRUE, legend.size = 5, - reorder.idents = F, + reorder.idents = FALSE, new.order = NULL, - return.plots = F, + return.plots = FALSE, legend.fonts.size = 12 ) } @@ -65,13 +65,12 @@ names match those of cells). } \examples{ \dontrun{ - # liger object with aligned factor loadings -ligerex +# ligerex (liger object), factorization complete # get tsne.coords for normalized data ligerex <- runTSNE(ligerex) # plot to console plotByDatasetAndCluster(ligerex) # return list of plots -plots <- plotByDatasetAndCluster(ligerex, return.plots = T) +plots <- plotByDatasetAndCluster(ligerex, return.plots = TRUE) } } diff --git a/man/plotClusterFactors.Rd b/man/plotClusterFactors.Rd old mode 100755 new mode 100644 index da741ef5..6d813bea --- a/man/plotClusterFactors.Rd +++ b/man/plotClusterFactors.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotClusterFactors} \alias{plotClusterFactors} \title{Plot heatmap of cluster/factor correspondence} \usage{ plotClusterFactors( object, - use.aligned = F, + use.aligned = FALSE, Rowv = NA, Colv = "Rowv", col = NULL, - return.data = F, + return.data = FALSE, ... ) } @@ -43,9 +43,8 @@ factors and clusters. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object), factorization complete input # plot expression for CD4 and return plots -loading.matrix <- plotClusterFactors(ligerex, return.data = T) +loading.matrix <- plotClusterFactors(ligerex, return.data = TRUE) } } diff --git a/man/plotClusterProportions.Rd b/man/plotClusterProportions.Rd old mode 100755 new mode 100644 index ad4a8a83..5fbe344e --- a/man/plotClusterProportions.Rd +++ b/man/plotClusterProportions.Rd @@ -1,24 +1,27 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotClusterProportions} \alias{plotClusterProportions} \title{Plot cluster proportions by dataset} \usage{ -plotClusterProportions(object, return.plot = F) +plotClusterProportions(object, return.plot = FALSE) } \arguments{ \item{object}{\code{liger} object. Should call quantileAlignSNF before calling.} \item{return.plot}{Return ggplot object (default FALSE)} } +\value{ +print plot to console (return.plot = FALSE); ggplot object (return.plot = TRUE) + list of ggplot objects. +} \description{ Generates plot of clusters sized by the proportion of total cells } \examples{ \dontrun{ -# liger object, factorization complete -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete input +ligerex <- quantile_norm(ligerex) # plot cluster proportions plotClusterProportions(ligerex) } diff --git a/man/plotFactors.Rd b/man/plotFactors.Rd old mode 100755 new mode 100644 index dd05c58c..621c8858 --- a/man/plotFactors.Rd +++ b/man/plotFactors.Rd @@ -1,10 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotFactors} \alias{plotFactors} \title{Plot scatter plots of unaligned and aligned factor loadings} \usage{ -plotFactors(object, num.genes = 10, cells.highlight = NULL, plot.tsne = F) +plotFactors( + object, + num.genes = 10, + cells.highlight = NULL, + plot.tsne = FALSE, + verbose = TRUE +) } \arguments{ \item{object}{\code{liger} object. Should call quantileAlignSNF before calling.} @@ -14,6 +20,8 @@ plotFactors(object, num.genes = 10, cells.highlight = NULL, plot.tsne = F) \item{cells.highlight}{Names of specific cells to highlight in plot (black) (default NULL).} \item{plot.tsne}{Plot t-SNE coordinates for each factor (default FALSE).} + +\item{verbose}{Print messages (TRUE by default)} } \value{ Plots to console (1-2 pages per factor) @@ -29,9 +37,8 @@ plots produced. } \examples{ \dontrun{ - # liger object with factorization complete -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete +ligerex <- quantile_norm(ligerex) # get tsne.coords for normalized data ligerex <- runTSNE(ligerex) # factor plots into pdf file diff --git a/man/plotFeature.Rd b/man/plotFeature.Rd old mode 100755 new mode 100644 index 3cf52508..d9a1dbb0 --- a/man/plotFeature.Rd +++ b/man/plotFeature.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotFeature} \alias{plotFeature} \title{Plot specific feature on t-SNE coordinates} @@ -7,21 +7,21 @@ plotFeature( object, feature, - by.dataset = T, + by.dataset = TRUE, discrete = NULL, title = NULL, pt.size = 0.3, text.size = 3, - do.shuffle = T, + do.shuffle = TRUE, rand.seed = 1, - do.labels = F, + do.labels = FALSE, axis.labels = NULL, - do.legend = T, + do.legend = TRUE, legend.size = 5, option = "plasma", cols.use = NULL, zero.color = "#F5F5F5", - return.plots = F + return.plots = FALSE ) } \arguments{ @@ -69,12 +69,11 @@ List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots t \description{ Generates one plot for each dataset, colored by chosen feature (column) from cell.data slot. Feature can be categorical (factor) or continuous. -Can also plot all datasets combined with by.dataset = F. +Can also plot all datasets combined with by.dataset = FALSE. } \examples{ \dontrun{ - # liger object with aligned factor loadings -ligerex +# ligerex (liger object), factorization complete # get tsne.coords for normalized data ligerex <- runTSNE(ligerex) # plot nUMI to console diff --git a/man/plotGene.Rd b/man/plotGene.Rd old mode 100755 new mode 100644 index d3d8a1ec..cecec670 --- a/man/plotGene.Rd +++ b/man/plotGene.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotGene} \alias{plotGene} \title{Plot gene expression on dimensional reduction (t-SNE) coordinates} @@ -7,25 +7,25 @@ plotGene( object, gene, - use.raw = F, - use.scaled = F, + use.raw = FALSE, + use.scaled = FALSE, scale.by = "dataset", log2scale = NULL, methylation.indices = NULL, plot.by = "dataset", - set.dr.lims = F, + set.dr.lims = FALSE, pt.size = 0.1, min.clip = NULL, max.clip = NULL, - clip.absolute = F, - points.only = F, + clip.absolute = FALSE, + points.only = FALSE, option = "plasma", cols.use = NULL, zero.color = "#F5F5F5", axis.labels = NULL, - do.legend = T, - return.plots = F, - keep.scale = F + do.legend = TRUE, + return.plots = FALSE, + keep.scale = FALSE ) } \arguments{ @@ -42,7 +42,7 @@ plotGene( or 'none' for scaling across all cells) (default 'dataset').} \item{log2scale}{Whether to show log2 transformed values or original normalized, raw, or scaled -values (as stored in object). Default value is FALSE if use.raw = T, otherwise TRUE.} +values (as stored in object). Default value is FALSE if use.raw = TRUE, otherwise TRUE.} \item{methylation.indices}{Indices of datasets in object with methylation data (this data is not log transformed and must use normalized values). (default NULL)} @@ -57,12 +57,12 @@ plots created (default FALSE).} \item{pt.size}{Point size for plots (default 0.1).} \item{min.clip}{Minimum value for expression values plotted. Can pass in quantile (0-1) or -absolute cutoff (set clip.absolute = T). Can also pass in vector if expecting multiple plots; +absolute cutoff (set clip.absolute = TRUE). Can also pass in vector if expecting multiple plots; users are encouraged to pass in named vector (from levels of desired feature) to avoid mismatches in order (default NULL).} \item{max.clip}{Maximum value for expression values plotted. Can pass in quantile (0-1) or -absolute cutoff (set clip.absolute = T). Can also pass in vector if expecting multiple plots; +absolute cutoff (set clip.absolute = TRUE). Can also pass in vector if expecting multiple plots; users are encouraged to pass in named vector (from levels of desired feature) to avoid mismatches in order (default NULL).} @@ -97,13 +97,13 @@ all cells). Data plots can be split by feature. } \examples{ \dontrun{ -# liger object, factorization complete +# ligerex (liger object based on in-memory datasets), factorization complete ligerex ligerex <- runTSNE(ligerex) # plot expression for CD4 and return plots -gene_plots <- plotGene(ligerex, "CD4", return.plots = T) -HDF5 input +gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) +# ligerex (liger object based on datasets in HDF5 format), factorization complete input ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -gene_plots <- plotGene(ligerex, "CD4", return.plots = T) +gene_plots <- plotGene(ligerex, "CD4", return.plots = TRUE) } } diff --git a/man/plotGeneLoadings.Rd b/man/plotGeneLoadings.Rd old mode 100755 new mode 100644 index f7207f5e..264543a5 --- a/man/plotGeneLoadings.Rd +++ b/man/plotGeneLoadings.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotGeneLoadings} \alias{plotGeneLoadings} \title{Generate t-SNE plots and gene loading plots} @@ -10,20 +10,21 @@ plotGeneLoadings( dataset2 = NULL, num.genes.show = 12, num.genes = 30, - mark.top.genes = T, + mark.top.genes = TRUE, factor.share.thresh = 10, log.fc.thresh = 1, umi.thresh = 30, frac.thresh = 0, pval.thresh = 0.05, - do.spec.plot = T, + do.spec.plot = TRUE, max.val = 0.1, pt.size = 0.1, option = "plasma", zero.color = "#F5F5F5", - return.plots = F, + return.plots = FALSE, axis.labels = NULL, - do.title = F + do.title = FALSE, + verbose = TRUE ) } \arguments{ @@ -70,6 +71,12 @@ NULL to revert to default gradient scaling. (default 0.1)} \item{axis.labels}{Vector of two strings to use as x and y labels respectively (default NULL).} \item{do.title}{Include top title with cluster and Dataset Specificity (default FALSE).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} +} +\value{ +List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to + console). } \description{ Plots t-SNE coordinates of all cells by their loadings on each factor. Underneath it displays the @@ -81,14 +88,13 @@ plots produced. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object based on in-memory datasets), factorization complete ligerex <- quantile_norm(ligerex) ligerex <- runUMAP(ligerex) # pdf("gene_loadings.pdf") plotGeneLoadings(ligerex, num.genes = 20) # dev.off() -# HDF5 input +# ligerex (liger object based on datasets in HDF5 format), factorization complete input ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) plotGeneLoadings(ligerex, num.genes = 20) } diff --git a/man/plotGeneViolin.Rd b/man/plotGeneViolin.Rd old mode 100755 new mode 100644 index f774a30b..66e59c28 --- a/man/plotGeneViolin.Rd +++ b/man/plotGeneViolin.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotGeneViolin} \alias{plotGeneViolin} \title{Plot violin plots for gene expression} @@ -8,8 +8,8 @@ plotGeneViolin( object, gene, methylation.indices = NULL, - by.dataset = T, - return.plots = F + by.dataset = TRUE, + return.plots = FALSE ) } \arguments{ @@ -25,17 +25,20 @@ magnified and put on log scale).} \item{return.plots}{Return ggplot objects instead of printing directly to console (default FALSE).} } +\value{ +List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to + console). +} \description{ Generates violin plots of expression of specified gene for each dataset. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object based on in-memory datasets), factorization complete # plot expression for CD4 and return plots -violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = T) -HDF5 input +violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) +# ligerex (liger object based on datasets in HDF5 format), factorization complete input ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) -violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = T) +violin_plots <- plotGeneViolin(ligerex, "CD4", return.plots = TRUE) } } diff --git a/man/plotGenes.Rd b/man/plotGenes.Rd old mode 100755 new mode 100644 index d42ef0bf..22fec2d5 --- a/man/plotGenes.Rd +++ b/man/plotGenes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotGenes} \alias{plotGenes} \title{Plot expression of multiple genes} @@ -11,7 +11,11 @@ plotGenes(object, genes, ...) \item{genes}{Vector of gene names.} -\item{...}{arguments passed from \code{\link[liger]{plotGene}}} +\item{...}{arguments passed from \code{\link[rliger]{plotGene}}} +} +\value{ +If returning single plot, returns ggplot object; if returning multiple plots; returns + list of ggplot objects. } \description{ Uses plotGene to plot each gene (and dataset) on a separate page. It is recommended to call this @@ -19,8 +23,7 @@ function into a PDF due to the large number of plots produced. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object), factorization complete input ligerex <- runTSNE(ligerex) # plot expression for CD4 and FCGR3A # pdf("gene_plots.pdf") diff --git a/man/plotWordClouds.Rd b/man/plotWordClouds.Rd old mode 100755 new mode 100644 index 858f55b4..f946523a --- a/man/plotWordClouds.Rd +++ b/man/plotWordClouds.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{plotWordClouds} \alias{plotWordClouds} \title{Generate word clouds and t-SNE plots} @@ -14,8 +14,9 @@ plotWordClouds( factor.share.thresh = 10, log.fc.thresh = 1, pval.thresh = 0.05, - do.spec.plot = T, - return.plots = F + do.spec.plot = TRUE, + return.plots = FALSE, + verbose = TRUE ) } \arguments{ @@ -43,6 +44,12 @@ threshold (default 10).} \item{do.spec.plot}{Include dataset specificity plot in printout (default TRUE).} \item{return.plots}{Return ggplot objects instead of printing directly (default FALSE).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} +} +\value{ +List of ggplot plot objects (only if return.plots TRUE, otherwise prints plots to + console). } \description{ Plots t-SNE coordinates of all cells by their loadings on each factor. Underneath it displays the @@ -54,14 +61,13 @@ plots produced. } \examples{ \dontrun{ -# liger object, factorization complete -ligerex -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object based on in-memory datasets), factorization complete +ligerex <- quantile_norm(ligerex) ligerex <- runTSNE(ligerex) # pdf('word_clouds.pdf') plotWordClouds(ligerex, num.genes = 20) # dev.off() -# HDF5 input +# ligerex (liger object based on datasets in HDF5 format), factorization complete input ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) plotWordClouds(ligerex, num.genes = 20) } diff --git a/man/quantileAlignSNF.Rd b/man/quantileAlignSNF.Rd old mode 100755 new mode 100644 diff --git a/man/quantile_norm.Rd b/man/quantile_norm.Rd old mode 100755 new mode 100644 index b5a1661c..5f86098f --- a/man/quantile_norm.Rd +++ b/man/quantile_norm.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{quantile_norm} \alias{quantile_norm} \alias{quantile_norm.list} @@ -15,10 +15,10 @@ quantile_norm(object, ...) min_cells = 20, knn_k = 20, dims.use = NULL, - do.center = F, + do.center = FALSE, max_sample = 1000, eps = 0.9, - refine.knn = T, + refine.knn = TRUE, rand.seed = 1, ... ) @@ -30,10 +30,10 @@ quantile_norm(object, ...) min_cells = 20, knn_k = 20, dims.use = NULL, - do.center = F, + do.center = FALSE, max_sample = 1000, eps = 0.9, - refine.knn = T, + refine.knn = TRUE, rand.seed = 1, ... ) @@ -87,8 +87,7 @@ aligned factor loadings are combined into a single matrix and returned as H.norm } \examples{ \dontrun{ -# liger object, factorization complete -ligerex +# ligerex (liger object), factorization complete # do basic quantile alignment ligerex <- quantile_norm(ligerex) # higher resolution for more clusters (note that SNF is conserved) @@ -96,5 +95,4 @@ ligerex <- quantile_norm(ligerex, resolution = 1.2) # change knn_k for more fine-grained local clustering ligerex <- quantile_norm(ligerex, knn_k = 15, resolution = 1.2) } - } diff --git a/man/rank_matrix.Rd b/man/rank_matrix.Rd old mode 100755 new mode 100644 index 2970a5c2..77a78398 --- a/man/rank_matrix.Rd +++ b/man/rank_matrix.Rd @@ -17,17 +17,7 @@ rank_matrix(X) } \value{ List with 2 items -\itemize{ -\item X_ranked - matrix of entry ranks -\item ties - list of tied group sizes -} } \description{ Utility function to rank columns of matrix } -\examples{ -\dontrun{ -data(exprs) -rank_res <- rank_matrix(exprs) -} -} diff --git a/man/read10X.Rd b/man/read10X.Rd old mode 100755 new mode 100644 index 0d2d619c..62561ad6 --- a/man/read10X.Rd +++ b/man/read10X.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{read10X} \alias{read10X} \title{Read 10X alignment data (including V3)} @@ -7,12 +7,13 @@ read10X( sample.dirs, sample.names, - merge = T, + merge = TRUE, num.cells = NULL, min.umis = 0, - use.filtered = F, + use.filtered = FALSE, reference = NULL, - data.type = "rna" + data.type = "rna", + verbose = TRUE ) } \arguments{ @@ -38,6 +39,8 @@ level 10X directory (only necessary if more than one reference used for sequenci \item{data.type}{Indicates the protocol of the input data. If not specified, input data will be considered scRNA-seq data (default 'rna', alternatives: 'atac').} + +\item{verbose}{Print messages (TRUE by default)} } \value{ List of merged matrices across data types (returns sparse matrix if only one data type diff --git a/man/readSubset.Rd b/man/readSubset.Rd index 37fae09e..f0462a11 100644 --- a/man/readSubset.Rd +++ b/man/readSubset.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{readSubset} \alias{readSubset} \title{Sample data for plotting} @@ -12,7 +12,8 @@ readSubset( chunk = 1000, datasets.use = NULL, genes.use = NULL, - rand.seed = 1 + rand.seed = 1, + verbose = TRUE ) } \arguments{ @@ -32,8 +33,9 @@ balance="cluster" samples up to max_cells from each cluster.} \item{genes.use}{samples from only the specified genes. Default is NULL (all genes)} -\item{rand.seed}{for reproducibility (default 1). -#Note:} +\item{rand.seed}{for reproducibility (default 1).} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with sample.data slot set. @@ -44,13 +46,8 @@ This function assumes that the cell barcodes are unique across all datasets. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) +# Only for online liger object (based on HDF5 files) +# Example: sample a total amount of 5000 cells from norm.data for downstream analysis ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 5000) } } diff --git a/man/removeMissingObs.Rd b/man/removeMissingObs.Rd old mode 100755 new mode 100644 index 1a964cf3..1135dee1 --- a/man/removeMissingObs.Rd +++ b/man/removeMissingObs.Rd @@ -1,10 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{removeMissingObs} \alias{removeMissingObs} \title{Remove cells/genes with no expression across any genes/cells} \usage{ -removeMissingObs(object, slot.use = "raw.data", use.cols = T) +removeMissingObs( + object, + slot.use = "raw.data", + use.cols = TRUE, + verbose = TRUE +) } \arguments{ \item{object}{\code{liger} object (scale.data or norm.data must be set).} @@ -12,6 +17,8 @@ removeMissingObs(object, slot.use = "raw.data", use.cols = T) \item{slot.use}{The data slot to filter (takes "raw.data" and "scale.data") (default "raw.data").} \item{use.cols}{Treat each column as a cell (default TRUE).} + +\item{verbose}{Print messages (TRUE by default)} } \value{ \code{liger} object with modified raw.data (or chosen slot) (dataset names preserved). @@ -21,8 +28,7 @@ Removes cells/genes from chosen slot with no expression in any genes or cells re } \examples{ \dontrun{ -# liger object -ligerex +# liger object: ligerex ligerex <- removeMissingObs(ligerex) } } diff --git a/man/reorganizeLiger.Rd b/man/reorganizeLiger.Rd old mode 100755 new mode 100644 index 25ff0116..09174895 --- a/man/reorganizeLiger.Rd +++ b/man/reorganizeLiger.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{reorganizeLiger} \alias{reorganizeLiger} \title{Construct a liger object organized by another feature} @@ -7,7 +7,7 @@ reorganizeLiger( object, by.feature, - keep.meta = T, + keep.meta = TRUE, new.label = "orig.dataset", ... ) @@ -33,8 +33,8 @@ This removes most computed data slots, though cell.data and current clustering c } \examples{ \dontrun{ -# liger object organized by species, with column designating sex in cell.data -ligerex +# ligerex (liger object based on in-memory objects) organized by species +# with column designating sex in cell.data # rearrange by sex ligerex_new <- reorganizeLiger(ligerex, by.feature = "sex", new.label = "species") } diff --git a/man/restoreOnlineLiger.Rd b/man/restoreOnlineLiger.Rd index f67e19b4..6a49c74d 100644 --- a/man/restoreOnlineLiger.Rd +++ b/man/restoreOnlineLiger.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{restoreOnlineLiger} \alias{restoreOnlineLiger} \title{Restore links (to hdf5 files) for reloaded online Liger object} @@ -20,6 +20,9 @@ the restoration of those links so that new analyses can be carried out. } \examples{ \dontrun{ +# We want to restore the ligerex (liger object based on HDF5 files) +# It has broken connections to HDF5 files +# Call the following function and provide the paths to the correspoinding files ligerex = restoreOnlineLiger(ligerex, file.path = list("path1/library1.h5", "path2/library2.h5")) } } diff --git a/man/runGSEA.Rd b/man/runGSEA.Rd index dbfbfb91..a32e9ea9 100644 --- a/man/runGSEA.Rd +++ b/man/runGSEA.Rd @@ -1,10 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{runGSEA} \alias{runGSEA} \title{Analyze biological interpretations of metagene} \usage{ -runGSEA(object, gene_sets = c(), mat_w = T, mat_v = 0, custom_gene_sets = c()) +runGSEA( + object, + gene_sets = c(), + mat_w = TRUE, + mat_v = 0, + custom_gene_sets = c() +) } \arguments{ \item{object}{\code{liger} object.} @@ -28,16 +34,7 @@ Identify the biological pathways (gene sets from Reactome) that each metagene (f } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -X <- matrix(c(1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z, x_set = X)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 20) -ligerex <- quantil_norm(ligerex) +# ligerex (liger object), factorization complete wilcox.results <- runGSEA(ligerex) wilcox.results <- runGSEA(ligerex, mat_v = c(1, 2)) } diff --git a/man/runTSNE.Rd b/man/runTSNE.Rd old mode 100755 new mode 100644 index 91efbe82..b1590917 --- a/man/runTSNE.Rd +++ b/man/runTSNE.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{runTSNE} \alias{runTSNE} \title{Perform t-SNE dimensionality reduction} \usage{ runTSNE( object, - use.raw = F, + use.raw = FALSE, dims.use = 1:ncol(object@H.norm), - use.pca = F, + use.pca = FALSE, perplexity = 30, theta = 0.5, method = "Rtsne", @@ -55,13 +55,12 @@ to runTSNE. For more detailed FIt-SNE installation instructions, see the liger r } \examples{ \dontrun{ - # liger object -ligerex -# generate H.norm by quantile aligning factor loadings -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object), factorization complete +# generate H.norm by quantile normalizig factor loadings +ligerex <- quantile_norm(ligerex) # get tsne.coords for normalized data ligerex <- runTSNE(ligerex) # get tsne.coords for raw factor loadings -ligerex <- runTSNE(ligerex, use.raw = T) +ligerex <- runTSNE(ligerex, use.raw = TRUE) } } diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd old mode 100755 new mode 100644 index b1bdc5fc..00087453 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{runUMAP} \alias{runUMAP} \title{Perform UMAP dimensionality reduction} \usage{ runUMAP( object, - use.raw = F, + use.raw = FALSE, dims.use = 1:ncol(object@H.norm), k = 2, distance = "euclidean", @@ -54,13 +54,12 @@ on reticulate or python umap-learn. } \examples{ \dontrun{ - # liger object with factorization complete -ligerex -# generate H.norm by quantile aligning factor loadings +# ligerex (liger object), factorization complete +# generate H.norm by quantile normalizig factor loadings ligerex <- quantileAlignSNF(ligerex) # get tsne.coords for normalized data ligerex <- runUMAP(ligerex) # get tsne.coords for raw factor loadings -ligerex <- runUMAP(ligerex, use.raw = T) +ligerex <- runUMAP(ligerex, use.raw = TRUE) } } diff --git a/man/runWilcoxon.Rd b/man/runWilcoxon.Rd old mode 100755 new mode 100644 index eacd7da5..4dfbbdc2 --- a/man/runWilcoxon.Rd +++ b/man/runWilcoxon.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{runWilcoxon} \alias{runWilcoxon} \title{Perform Wilcoxon rank-sum test} @@ -21,23 +21,13 @@ Perform Wilcoxon rank-sum tests on specified dataset using given method. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -X <- matrix(c(1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z, x_set = X)) -ligerex <- normalize(ligerex) -# select genes -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) -ligerex <- optimizeALS(ligerex, k = 20) -ligerex <- quantileAlignSNF(ligerex) +# ligerex (liger object based on in-memory datasets), factorization complete wilcox.results <- runWilcoxon(ligerex, compare.method = "cluster") wilcox.results <- runWilcoxon(ligerex, compare.method = "datastes", data.use = c(1, 2)) # HDF5 input -ligerex <- online_iNMF(ligerex, k = 20) -ligerex <- quantile_norm(ligerex) +# ligerex (liger object based on datasets in HDF5 format), factorization complete +# Need to sample cells before implementing Wilcoxon test ligerex <- readSubset(ligerex, slot.use = "norm.data", max.cells = 1000) de_genes <- runWilcoxon(ligerex, compare.method = "clusters") } - } diff --git a/man/scaleNotCenter.Rd b/man/scaleNotCenter.Rd old mode 100755 new mode 100644 index 3a7aebfb..6082c595 --- a/man/scaleNotCenter.Rd +++ b/man/scaleNotCenter.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{scaleNotCenter} \alias{scaleNotCenter} \title{Scale genes by root-mean-square across cells} \usage{ -scaleNotCenter(object, remove.missing = T, chunk = 1000) +scaleNotCenter(object, remove.missing = TRUE, chunk = 1000, verbose = TRUE) } \arguments{ \item{object}{\code{liger} object. Should call normalize and selectGenes before calling.} @@ -13,6 +13,8 @@ scaleNotCenter(object, remove.missing = T, chunk = 1000) (default TRUE).} \item{chunk}{size of chunks in hdf5 file. (default 1000)} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ \code{liger} object with scale.data slot set. @@ -25,11 +27,10 @@ expression across the genes selected, by default. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +# Given datasets Y and Z ligerex <- createLiger(list(y_set = Y, z_set = Z)) ligerex <- normalize(ligerex) -# select genes +# use default selectGenes settings (var.thresh = 0.1) ligerex <- selectGenes(ligerex) ligerex <- scaleNotCenter(ligerex) } diff --git a/man/selectGenes.Rd b/man/selectGenes.Rd old mode 100755 new mode 100644 index ccc285fb..44665eaa --- a/man/selectGenes.Rd +++ b/man/selectGenes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{selectGenes} \alias{selectGenes} \title{Select a subset of informative genes} @@ -12,8 +12,8 @@ selectGenes( tol = 1e-04, datasets.use = 1:length(object@raw.data), combine = "union", - capitalize = F, - do.plot = F, + capitalize = FALSE, + do.plot = FALSE, cex.use = 0.3, chunk = 1000 ) @@ -64,13 +64,12 @@ expression across genes and cells). Selected genes are plotted in green. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) +# Given datasets Y and Z ligerex <- createLiger(list(y_set = Y, z_set = Z)) ligerex <- normalize(ligerex) -# use default selectGenes settings +# use default selectGenes settings (var.thresh = 0.1) ligerex <- selectGenes(ligerex) # select a smaller subset of genes -ligerex <- selectGenes(ligerex, var.thresh=0.8) +ligerex <- selectGenes(ligerex, var.thresh = 0.3) } } diff --git a/man/seuratToLiger.Rd b/man/seuratToLiger.Rd old mode 100755 new mode 100644 index 48f943cc..39a2e071 --- a/man/seuratToLiger.Rd +++ b/man/seuratToLiger.Rd @@ -1,23 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{seuratToLiger} \alias{seuratToLiger} \title{Create liger object from one or more Seurat objects} \usage{ seuratToLiger( objects, - combined.seurat = F, + combined.seurat = FALSE, names = "use-projects", meta.var = NULL, assays.use = NULL, raw.assay = "RNA", - remove.missing = T, - renormalize = T, - use.seurat.genes = T, + remove.missing = TRUE, + renormalize = TRUE, + use.seurat.genes = TRUE, num.hvg.info = NULL, - use.idents = T, - use.tsne = T, - cca.to.H = F + use.idents = TRUE, + use.tsne = TRUE, + cca.to.H = FALSE ) } \arguments{ @@ -87,6 +87,6 @@ ligerex2 <- seuratToLiger(list(tenx, seqwell), names = c('tenx', 'seqwell'), num # Seurat object for joint analysis pbmc <- readRDS('pbmc.RDS') # create liger object, using 'protocol' for dataset names -ligerex3 <- seuratToLiger(pbmc, combined.seurat = T, meta.var = 'protocol', num.hvg.info = 2000) +ligerex3 <- seuratToLiger(pbmc, combined.seurat = TRUE, meta.var = 'protocol', num.hvg.info = 2000) } } diff --git a/man/show-methods.Rd b/man/show-methods.Rd old mode 100755 new mode 100644 index 4618570e..55da3d41 --- a/man/show-methods.Rd +++ b/man/show-methods.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \docType{methods} \name{show} \alias{show} diff --git a/man/subsetLiger.Rd b/man/subsetLiger.Rd old mode 100755 new mode 100644 index 0f44f016..98c98480 --- a/man/subsetLiger.Rd +++ b/man/subsetLiger.Rd @@ -1,10 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{subsetLiger} \alias{subsetLiger} \title{Construct a liger object with a specified subset} \usage{ -subsetLiger(object, clusters.use = NULL, cells.use = NULL, remove.missing = T) +subsetLiger( + object, + clusters.use = NULL, + cells.use = NULL, + remove.missing = TRUE +) } \arguments{ \item{object}{\code{liger} object. Should run quantileAlignSNF and runTSNE before calling.} @@ -27,9 +32,8 @@ Note that it does NOT reoptimize the factorization. See optimizeSubset for this } \examples{ \dontrun{ -# liger object, with clusters 0:10 +# ligerex (liger object based on in-memory datasets), with clusters 0:10 # factorization, alignment, and t-SNE calculation have been performed -ligerex # subset by clusters ligerex_subset <- subsetLiger(ligerex, clusters.use = c(1, 4, 5)) } diff --git a/man/suggestK.Rd b/man/suggestK.Rd old mode 100755 new mode 100644 index ff1be553..09b2a1e9 --- a/man/suggestK.Rd +++ b/man/suggestK.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{suggestK} \alias{suggestK} \title{Visually suggest appropiate k value} @@ -12,11 +12,12 @@ suggestK( max.iters = 100, num.cores = 1, rand.seed = 1, - gen.new = F, + gen.new = FALSE, nrep = 1, - plot.log2 = T, - return.data = F, - return.raw = F + plot.log2 = TRUE, + return.data = FALSE, + return.raw = FALSE, + verbose = TRUE ) } \arguments{ @@ -49,6 +50,8 @@ instead of ggplot object (default FALSE).} \item{return.raw}{If return.results TRUE, whether to return raw data (in format described below), or dataframe used to produce ggplot object. Raw data is list of matrices of K-L divergences (length(k.test) by n_cells). Length of list corresponds to nrep. (default FALSE)} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ Matrix of results if indicated or ggplot object. Plots K-L divergence vs. k to console. @@ -64,12 +67,7 @@ Depending on number of cores used, this process can take 10-20 minutes. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) +# Requires preprocessed liger object # examine plot for most appropriate k, use multiple cores for faster results suggestK(ligerex, num.cores = 4) } diff --git a/man/suggestLambda.Rd b/man/suggestLambda.Rd old mode 100755 new mode 100644 index 4c353c52..f055850e --- a/man/suggestLambda.Rd +++ b/man/suggestLambda.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/liger.R +% Please edit documentation in R/rliger.R \name{suggestLambda} \alias{suggestLambda} \title{Visually suggest appropriate lambda value} @@ -16,10 +16,11 @@ suggestLambda( k2 = 500, ref_dataset = NULL, resolution = 1, - gen.new = F, + gen.new = FALSE, nrep = 1, - return.data = F, - return.raw = F + return.data = FALSE, + return.raw = FALSE, + verbose = TRUE ) } \arguments{ @@ -58,6 +59,8 @@ instead of ggplot object (default FALSE).} \item{return.raw}{If return.results TRUE, whether to return raw data (in format described below), or dataframe used to produce ggplot object. Raw data is matrix of alignment values for each lambda value tested (each column represents a different rep for nrep).(default FALSE)} + +\item{verbose}{Print progress bar/messages (TRUE by default)} } \value{ Matrix of results if indicated or ggplot object. Plots alignment vs. lambda to console. @@ -71,12 +74,7 @@ this process can take 10-20 minutes. } \examples{ \dontrun{ -Y <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), nrow = 4, byrow = T) -Z <- matrix(c(1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2), nrow = 4, byrow = T) -ligerex <- createLiger(list(y_set = Y, z_set = Z)) -ligerex <- normalize(ligerex) -ligerex <- selectGenes(ligerex) -ligerex <- scaleNotCenter(ligerex) +# Requires preprocessed liger object # examine plot for most appropriate lambda, use multiple cores for faster results suggestLambda(ligerex, k = 20, num.cores = 4) } diff --git a/man/sumGroups.Rd b/man/sumGroups.Rd old mode 100755 new mode 100644 index f6c18ec9..5572a97b --- a/man/sumGroups.Rd +++ b/man/sumGroups.Rd @@ -25,11 +25,3 @@ Matrix of groups by features \description{ Utility function to sum over group labels } -\examples{ -\dontrun{ -data(exprs) -data(y) -sumGroups_res <- sumGroups(exprs, y, 1) -sumGroups_res <- sumGroups(t(exprs), y, 2) -} -} diff --git a/liger.Rproj b/rliger.Rproj similarity index 100% rename from liger.Rproj rename to rliger.Rproj diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d9022f6a..f51c32b4 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,7 +1,7 @@ // Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -#include "../inst/include/liger.h" +#include "../inst/include/rliger.h" #include #include #include @@ -10,7 +10,7 @@ using namespace Rcpp; // RunModularityClusteringCpp IntegerVector RunModularityClusteringCpp(Eigen::SparseMatrix SNN, int modularityFunction, double resolution, int algorithm, int nRandomStarts, int nIterations, int randomSeed, bool printOutput, std::string edgefilename); -RcppExport SEXP _liger_RunModularityClusteringCpp(SEXP SNNSEXP, SEXP modularityFunctionSEXP, SEXP resolutionSEXP, SEXP algorithmSEXP, SEXP nRandomStartsSEXP, SEXP nIterationsSEXP, SEXP randomSeedSEXP, SEXP printOutputSEXP, SEXP edgefilenameSEXP) { +RcppExport SEXP _rliger_RunModularityClusteringCpp(SEXP SNNSEXP, SEXP modularityFunctionSEXP, SEXP resolutionSEXP, SEXP algorithmSEXP, SEXP nRandomStartsSEXP, SEXP nIterationsSEXP, SEXP randomSeedSEXP, SEXP printOutputSEXP, SEXP edgefilenameSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -29,7 +29,7 @@ END_RCPP } // scaleNotCenterFast arma::sp_mat scaleNotCenterFast(arma::sp_mat x); -RcppExport SEXP _liger_scaleNotCenterFast(SEXP xSEXP) { +RcppExport SEXP _rliger_scaleNotCenterFast(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -40,7 +40,7 @@ END_RCPP } // rowMeansFast NumericVector rowMeansFast(arma::sp_mat x); -RcppExport SEXP _liger_rowMeansFast(SEXP xSEXP) { +RcppExport SEXP _rliger_rowMeansFast(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -51,7 +51,7 @@ END_RCPP } // rowVarsFast NumericVector rowVarsFast(arma::sp_mat x, NumericVector means); -RcppExport SEXP _liger_rowVarsFast(SEXP xSEXP, SEXP meansSEXP) { +RcppExport SEXP _rliger_rowVarsFast(SEXP xSEXP, SEXP meansSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -63,7 +63,7 @@ END_RCPP } // sumSquaredDeviations NumericVector sumSquaredDeviations(arma::sp_mat x, NumericVector means); -RcppExport SEXP _liger_sumSquaredDeviations(SEXP xSEXP, SEXP meansSEXP) { +RcppExport SEXP _rliger_sumSquaredDeviations(SEXP xSEXP, SEXP meansSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -75,7 +75,7 @@ END_RCPP } // cpp_sumGroups_dgc arma::mat cpp_sumGroups_dgc(const arma::vec& x, const arma::uvec& p, const arma::vec& i, unsigned ncol, const arma::uvec& groups, unsigned ngroups); -RcppExport SEXP _liger_cpp_sumGroups_dgc(SEXP xSEXP, SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_sumGroups_dgc(SEXP xSEXP, SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -91,7 +91,7 @@ END_RCPP } // cpp_sumGroups_dgc_T arma::mat cpp_sumGroups_dgc_T(const arma::vec& x, const arma::vec& p, const arma::vec& i, int ncol, int nrow, const arma::uvec& groups, int ngroups); -RcppExport SEXP _liger_cpp_sumGroups_dgc_T(SEXP xSEXP, SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP nrowSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_sumGroups_dgc_T(SEXP xSEXP, SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP nrowSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -108,7 +108,7 @@ END_RCPP } // cpp_sumGroups_dense arma::mat cpp_sumGroups_dense(const arma::mat& X, const arma::uvec& groups, unsigned ngroups); -RcppExport SEXP _liger_cpp_sumGroups_dense(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_sumGroups_dense(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -121,7 +121,7 @@ END_RCPP } // cpp_sumGroups_dense_T arma::mat cpp_sumGroups_dense_T(const arma::mat& X, const arma::uvec& groups, unsigned ngroups); -RcppExport SEXP _liger_cpp_sumGroups_dense_T(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_sumGroups_dense_T(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -134,7 +134,7 @@ END_RCPP } // cpp_nnzeroGroups_dense arma::mat cpp_nnzeroGroups_dense(const arma::mat& X, const arma::uvec& groups, unsigned ngroups); -RcppExport SEXP _liger_cpp_nnzeroGroups_dense(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_nnzeroGroups_dense(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -147,7 +147,7 @@ END_RCPP } // cpp_nnzeroGroups_dense_T arma::mat cpp_nnzeroGroups_dense_T(const arma::mat& X, const arma::uvec& groups, unsigned ngroups); -RcppExport SEXP _liger_cpp_nnzeroGroups_dense_T(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_nnzeroGroups_dense_T(SEXP XSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -160,7 +160,7 @@ END_RCPP } // cpp_nnzeroGroups_dgc arma::mat cpp_nnzeroGroups_dgc(const arma::uvec& p, const arma::vec& i, unsigned ncol, const arma::uvec& groups, unsigned ngroups); -RcppExport SEXP _liger_cpp_nnzeroGroups_dgc(SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_nnzeroGroups_dgc(SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -175,7 +175,7 @@ END_RCPP } // cpp_in_place_rank_mean std::list cpp_in_place_rank_mean(arma::vec& v_temp, int idx_begin, int idx_end); -RcppExport SEXP _liger_cpp_in_place_rank_mean(SEXP v_tempSEXP, SEXP idx_beginSEXP, SEXP idx_endSEXP) { +RcppExport SEXP _rliger_cpp_in_place_rank_mean(SEXP v_tempSEXP, SEXP idx_beginSEXP, SEXP idx_endSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -188,7 +188,7 @@ END_RCPP } // cpp_rank_matrix_dgc std::vector > cpp_rank_matrix_dgc(arma::vec& x, const arma::vec& p, int nrow, int ncol); -RcppExport SEXP _liger_cpp_rank_matrix_dgc(SEXP xSEXP, SEXP pSEXP, SEXP nrowSEXP, SEXP ncolSEXP) { +RcppExport SEXP _rliger_cpp_rank_matrix_dgc(SEXP xSEXP, SEXP pSEXP, SEXP nrowSEXP, SEXP ncolSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -202,7 +202,7 @@ END_RCPP } // cpp_rank_matrix_dense Rcpp::List cpp_rank_matrix_dense(arma::mat& X); -RcppExport SEXP _liger_cpp_rank_matrix_dense(SEXP XSEXP) { +RcppExport SEXP _rliger_cpp_rank_matrix_dense(SEXP XSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -213,7 +213,7 @@ END_RCPP } // cpp_nnzeroGroups_dgc_T arma::mat cpp_nnzeroGroups_dgc_T(const arma::vec& p, const arma::vec& i, int ncol, int nrow, const arma::uvec& groups, int ngroups); -RcppExport SEXP _liger_cpp_nnzeroGroups_dgc_T(SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP nrowSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { +RcppExport SEXP _rliger_cpp_nnzeroGroups_dgc_T(SEXP pSEXP, SEXP iSEXP, SEXP ncolSEXP, SEXP nrowSEXP, SEXP groupsSEXP, SEXP ngroupsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -229,7 +229,7 @@ END_RCPP } // makeFeatureMatrix NumericMatrix makeFeatureMatrix(DataFrame& bedmat, StringVector& barcodes); -RcppExport SEXP _liger_makeFeatureMatrix(SEXP bedmatSEXP, SEXP barcodesSEXP) { +RcppExport SEXP _rliger_makeFeatureMatrix(SEXP bedmatSEXP, SEXP barcodesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -241,7 +241,7 @@ END_RCPP } // cluster_vote IntegerVector cluster_vote(const Eigen::MatrixXd& nn_ranked, IntegerVector clusts); -RcppExport SEXP _liger_cluster_vote(SEXP nn_rankedSEXP, SEXP clustsSEXP) { +RcppExport SEXP _rliger_cluster_vote(SEXP nn_rankedSEXP, SEXP clustsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -253,7 +253,7 @@ END_RCPP } // scale_columns_fast Eigen::MatrixXd scale_columns_fast(Eigen::MatrixXd mat, bool scale, bool center); -RcppExport SEXP _liger_scale_columns_fast(SEXP matSEXP, SEXP scaleSEXP, SEXP centerSEXP) { +RcppExport SEXP _rliger_scale_columns_fast(SEXP matSEXP, SEXP scaleSEXP, SEXP centerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -266,7 +266,7 @@ END_RCPP } // max_factor IntegerVector max_factor(Eigen::MatrixXd H, IntegerVector dims_use, bool center_cols); -RcppExport SEXP _liger_max_factor(SEXP HSEXP, SEXP dims_useSEXP, SEXP center_colsSEXP) { +RcppExport SEXP _rliger_max_factor(SEXP HSEXP, SEXP dims_useSEXP, SEXP center_colsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -279,7 +279,7 @@ END_RCPP } // solveNNLS arma::mat solveNNLS(const arma::mat& C, const arma::mat& B); -RcppExport SEXP _liger_solveNNLS(SEXP CSEXP, SEXP BSEXP) { +RcppExport SEXP _rliger_solveNNLS(SEXP CSEXP, SEXP BSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -291,7 +291,7 @@ END_RCPP } // ComputeSNN Eigen::SparseMatrix ComputeSNN(Eigen::MatrixXd nn_ranked, double prune); -RcppExport SEXP _liger_ComputeSNN(SEXP nn_rankedSEXP, SEXP pruneSEXP) { +RcppExport SEXP _rliger_ComputeSNN(SEXP nn_rankedSEXP, SEXP pruneSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -303,7 +303,7 @@ END_RCPP } // WriteEdgeFile void WriteEdgeFile(Eigen::SparseMatrix snn, String filename, bool display_progress); -RcppExport SEXP _liger_WriteEdgeFile(SEXP snnSEXP, SEXP filenameSEXP, SEXP display_progressSEXP) { +RcppExport SEXP _rliger_WriteEdgeFile(SEXP snnSEXP, SEXP filenameSEXP, SEXP display_progressSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Eigen::SparseMatrix >::type snn(snnSEXP); @@ -315,7 +315,7 @@ END_RCPP } // DirectSNNToFile Eigen::SparseMatrix DirectSNNToFile(Eigen::MatrixXd nn_ranked, double prune, bool display_progress, String filename); -RcppExport SEXP _liger_DirectSNNToFile(SEXP nn_rankedSEXP, SEXP pruneSEXP, SEXP display_progressSEXP, SEXP filenameSEXP) { +RcppExport SEXP _rliger_DirectSNNToFile(SEXP nn_rankedSEXP, SEXP pruneSEXP, SEXP display_progressSEXP, SEXP filenameSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -329,34 +329,34 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_liger_RunModularityClusteringCpp", (DL_FUNC) &_liger_RunModularityClusteringCpp, 9}, - {"_liger_scaleNotCenterFast", (DL_FUNC) &_liger_scaleNotCenterFast, 1}, - {"_liger_rowMeansFast", (DL_FUNC) &_liger_rowMeansFast, 1}, - {"_liger_rowVarsFast", (DL_FUNC) &_liger_rowVarsFast, 2}, - {"_liger_sumSquaredDeviations", (DL_FUNC) &_liger_sumSquaredDeviations, 2}, - {"_liger_cpp_sumGroups_dgc", (DL_FUNC) &_liger_cpp_sumGroups_dgc, 6}, - {"_liger_cpp_sumGroups_dgc_T", (DL_FUNC) &_liger_cpp_sumGroups_dgc_T, 7}, - {"_liger_cpp_sumGroups_dense", (DL_FUNC) &_liger_cpp_sumGroups_dense, 3}, - {"_liger_cpp_sumGroups_dense_T", (DL_FUNC) &_liger_cpp_sumGroups_dense_T, 3}, - {"_liger_cpp_nnzeroGroups_dense", (DL_FUNC) &_liger_cpp_nnzeroGroups_dense, 3}, - {"_liger_cpp_nnzeroGroups_dense_T", (DL_FUNC) &_liger_cpp_nnzeroGroups_dense_T, 3}, - {"_liger_cpp_nnzeroGroups_dgc", (DL_FUNC) &_liger_cpp_nnzeroGroups_dgc, 5}, - {"_liger_cpp_in_place_rank_mean", (DL_FUNC) &_liger_cpp_in_place_rank_mean, 3}, - {"_liger_cpp_rank_matrix_dgc", (DL_FUNC) &_liger_cpp_rank_matrix_dgc, 4}, - {"_liger_cpp_rank_matrix_dense", (DL_FUNC) &_liger_cpp_rank_matrix_dense, 1}, - {"_liger_cpp_nnzeroGroups_dgc_T", (DL_FUNC) &_liger_cpp_nnzeroGroups_dgc_T, 6}, - {"_liger_makeFeatureMatrix", (DL_FUNC) &_liger_makeFeatureMatrix, 2}, - {"_liger_cluster_vote", (DL_FUNC) &_liger_cluster_vote, 2}, - {"_liger_scale_columns_fast", (DL_FUNC) &_liger_scale_columns_fast, 3}, - {"_liger_max_factor", (DL_FUNC) &_liger_max_factor, 3}, - {"_liger_solveNNLS", (DL_FUNC) &_liger_solveNNLS, 2}, - {"_liger_ComputeSNN", (DL_FUNC) &_liger_ComputeSNN, 2}, - {"_liger_WriteEdgeFile", (DL_FUNC) &_liger_WriteEdgeFile, 3}, - {"_liger_DirectSNNToFile", (DL_FUNC) &_liger_DirectSNNToFile, 4}, + {"_rliger_RunModularityClusteringCpp", (DL_FUNC) &_rliger_RunModularityClusteringCpp, 9}, + {"_rliger_scaleNotCenterFast", (DL_FUNC) &_rliger_scaleNotCenterFast, 1}, + {"_rliger_rowMeansFast", (DL_FUNC) &_rliger_rowMeansFast, 1}, + {"_rliger_rowVarsFast", (DL_FUNC) &_rliger_rowVarsFast, 2}, + {"_rliger_sumSquaredDeviations", (DL_FUNC) &_rliger_sumSquaredDeviations, 2}, + {"_rliger_cpp_sumGroups_dgc", (DL_FUNC) &_rliger_cpp_sumGroups_dgc, 6}, + {"_rliger_cpp_sumGroups_dgc_T", (DL_FUNC) &_rliger_cpp_sumGroups_dgc_T, 7}, + {"_rliger_cpp_sumGroups_dense", (DL_FUNC) &_rliger_cpp_sumGroups_dense, 3}, + {"_rliger_cpp_sumGroups_dense_T", (DL_FUNC) &_rliger_cpp_sumGroups_dense_T, 3}, + {"_rliger_cpp_nnzeroGroups_dense", (DL_FUNC) &_rliger_cpp_nnzeroGroups_dense, 3}, + {"_rliger_cpp_nnzeroGroups_dense_T", (DL_FUNC) &_rliger_cpp_nnzeroGroups_dense_T, 3}, + {"_rliger_cpp_nnzeroGroups_dgc", (DL_FUNC) &_rliger_cpp_nnzeroGroups_dgc, 5}, + {"_rliger_cpp_in_place_rank_mean", (DL_FUNC) &_rliger_cpp_in_place_rank_mean, 3}, + {"_rliger_cpp_rank_matrix_dgc", (DL_FUNC) &_rliger_cpp_rank_matrix_dgc, 4}, + {"_rliger_cpp_rank_matrix_dense", (DL_FUNC) &_rliger_cpp_rank_matrix_dense, 1}, + {"_rliger_cpp_nnzeroGroups_dgc_T", (DL_FUNC) &_rliger_cpp_nnzeroGroups_dgc_T, 6}, + {"_rliger_makeFeatureMatrix", (DL_FUNC) &_rliger_makeFeatureMatrix, 2}, + {"_rliger_cluster_vote", (DL_FUNC) &_rliger_cluster_vote, 2}, + {"_rliger_scale_columns_fast", (DL_FUNC) &_rliger_scale_columns_fast, 3}, + {"_rliger_max_factor", (DL_FUNC) &_rliger_max_factor, 3}, + {"_rliger_solveNNLS", (DL_FUNC) &_rliger_solveNNLS, 2}, + {"_rliger_ComputeSNN", (DL_FUNC) &_rliger_ComputeSNN, 2}, + {"_rliger_WriteEdgeFile", (DL_FUNC) &_rliger_WriteEdgeFile, 3}, + {"_rliger_DirectSNNToFile", (DL_FUNC) &_rliger_DirectSNNToFile, 4}, {NULL, NULL, 0} }; -RcppExport void R_init_liger(DllInfo *dll) { +RcppExport void R_init_rliger(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } diff --git a/vignettes/online_iNMF_tutorial.Rmd b/vignettes/online_iNMF_tutorial.Rmd index bf47986d..0f608e02 100644 --- a/vignettes/online_iNMF_tutorial.Rmd +++ b/vignettes/online_iNMF_tutorial.Rmd @@ -1,7 +1,7 @@ --- title: "Iterative single-cell multi-omic integration using online learning" author: "Chao Gao and Joshua Welch" -date: "Dec 22, 2020" +date: "3/8/2020" output: html_document --- @@ -19,7 +19,7 @@ install_github("welch-lab/liger") We first create a Liger object by passing the filenames of HDF5 files containing the raw count data. The data can be downloaded [here](https://umich.box.com/s/m5cojytshxvk44pc4kki6clh6c9sv7ai). Liger assumes by default that the HDF5 files are formatted by the 10X CellRanger pipeline. Large datasets are often generated over multiple 10X runs (for example, multiple biological replicates). In such cases it may be necessary to merge the HDF5 files from each run into a single HDF5 file. We provide the `mergeH5` function for this purpose (see below for details). ```{r create.object, message=FALSE, results='hide'} -library(liger) +library(rliger) pbmcs = createLiger(list(stim = "pbmcs_stim.h5", ctrl = "pbmcs_ctrl.h5")) ``` @@ -52,13 +52,13 @@ calcAlignment(pbmcs) ``` With HDF5 files as input, we need to sample the raw, normalized, or scaled data from the full dataset on disk and load them in memory. Some plotting functions and downstream analyses are designed to operate on a subset of cells sampled from the full dataset. This enables rapid analysis using limited memory. The readSubset function allows either uniform random sampling or sampling balanced by cluster. Here we extract the normalized count data of 5000 sampled cells. -```{r subset, results='hide'} +```{r subset, message=FALSE, results='hide'} pbmcs = readSubset(pbmcs, slot.use = "norm.data", max.cells = 5000) ``` Using the sampled data stored in memory, we can now compare clusters or datasets (within each cluster) to identify differentially expressed genes. The runWilcoxon function performs differential expression analysis by performing an in-memory Wilcoxon rank-sum test on this subset. Thus, users can still analyze large datasets with a fixed amount of memory. -```{r wilcox 1, results='hide'} +```{r wilcox 1, message=FALSE, results='hide'} de_genes = runWilcoxon(pbmcs, compare.method = "datasets") ``` diff --git a/vignettes/online_iNMF_tutorial.html b/vignettes/online_iNMF_tutorial.html index 781a1456..cdaf0301 100644 --- a/vignettes/online_iNMF_tutorial.html +++ b/vignettes/online_iNMF_tutorial.html @@ -425,7 +425,7 @@

Iterative single-cell multi-omic integration using online learning

Chao Gao and Joshua Welch

-

Dec 22, 2020

+

3/8/2020

@@ -436,7 +436,7 @@

Scenario 1: sampling minibatches from fully observed datasets

library(devtools)
 install_github("welch-lab/liger")

We first create a Liger object by passing the filenames of HDF5 files containing the raw count data. The data can be downloaded here. Liger assumes by default that the HDF5 files are formatted by the 10X CellRanger pipeline. Large datasets are often generated over multiple 10X runs (for example, multiple biological replicates). In such cases it may be necessary to merge the HDF5 files from each run into a single HDF5 file. We provide the mergeH5 function for this purpose (see below for details).

-
library(liger)
+
library(rliger)
 pbmcs = createLiger(list(stim = "pbmcs_stim.h5", ctrl = "pbmcs_ctrl.h5"))

We then perform the normalization, gene selection, and gene scaling in an online fashion, reading the data from disk in small batches.

pbmcs = normalize(pbmcs)