diff --git a/DESCRIPTION b/DESCRIPTION index c049871..6305eb7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,6 +55,7 @@ Imports: lifecycle Collate: 'RcppExports.R' + 'bigwig.R' 'data.R' 'differential_accessibility.R' 'generics.R' diff --git a/NAMESPACE b/NAMESPACE index b7e06a8..2f0de2e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -126,6 +126,7 @@ export(CreateMotifObject) export(DensityScatter) export(DepthCor) export(DownsampleFeatures) +export(ExportGroupBW) export(ExpressionPlot) export(Extend) export(FRiP) @@ -227,6 +228,7 @@ importFrom(BiocGenerics,end) importFrom(BiocGenerics,sort) importFrom(BiocGenerics,start) importFrom(BiocGenerics,strand) +importFrom(BiocGenerics,which) importFrom(BiocGenerics,width) importFrom(GenomeInfoDb,"genome<-") importFrom(GenomeInfoDb,"isCircular<-") @@ -246,6 +248,7 @@ importFrom(GenomeInfoDb,seqlevels) importFrom(GenomeInfoDb,seqnames) importFrom(GenomeInfoDb,sortSeqlevels) importFrom(GenomicRanges,GRanges) +importFrom(GenomicRanges,coverage) importFrom(GenomicRanges,disjoin) importFrom(GenomicRanges,end) importFrom(GenomicRanges,findOverlaps) @@ -255,6 +258,7 @@ importFrom(GenomicRanges,makeGRangesFromDataFrame) importFrom(GenomicRanges,reduce) importFrom(GenomicRanges,resize) importFrom(GenomicRanges,seqnames) +importFrom(GenomicRanges,slidingWindows) importFrom(GenomicRanges,start) importFrom(GenomicRanges,strand) importFrom(GenomicRanges,tileGenome) @@ -301,6 +305,7 @@ importFrom(Rsamtools,scanTabix) importFrom(Rsamtools,seqnamesTabix) importFrom(S4Vectors,"mcols<-") importFrom(S4Vectors,elementNROWS) +importFrom(S4Vectors,match) importFrom(S4Vectors,mcols) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,split) @@ -337,6 +342,7 @@ importFrom(dplyr,top_n) importFrom(dplyr,ungroup) importFrom(fastmatch,fmatch) importFrom(future,nbrOfWorkers) +importFrom(future,plan) importFrom(future.apply,future_lapply) importFrom(ggplot2,aes) importFrom(ggplot2,aes_string) @@ -402,6 +408,7 @@ importFrom(methods,slotNames) importFrom(patchwork,guide_area) importFrom(patchwork,plot_layout) importFrom(patchwork,wrap_plots) +importFrom(pbapply,pbapply) importFrom(pbapply,pblapply) importFrom(rlang,.data) importFrom(scales,comma) diff --git a/R/bigwig.R b/R/bigwig.R new file mode 100644 index 0000000..0b55625 --- /dev/null +++ b/R/bigwig.R @@ -0,0 +1,242 @@ +#' Export bigwig files for groups of cells +#' +#' @param object A Seurat object +#' @param assay Name of assay to use +#' @param group.by The metadata variable used to group the cells +#' @param idents Identities to include (defined by group.by parameter) +#' @param normMethod Normalization method for the bigwig files. Deafult 'RC'. +#' 'RC' will divide the number of fragments in a tile by the total number of +#' fragments in the group. A scaling factor of 10^4 will be applied. +#' 'ncells' will divide the number of fragments in a tile by the number of cell +#' in the group. 'none' will apply no normalization method. +#' A vector of values for each cell can also be passed as a metadata column +#' name. A scaling factor of 10^4 will be applied +#' @param tileSize The size of the tiles in the bigwig file +#' @param minCells The minimum of cells in a group to be exported +#' @param cutoff The maximum number of fragments in a given genomic tile +#' @param chromosome A chromosomes vector to be exported +#' @param outdir Directory to write output files (splitted bed and bigwigs) +#' @param verbose Display messages +#' +#' @importFrom GenomicRanges GRanges slidingWindows +#' @importFrom future plan nbrOfWorkers +#' @importFrom future.apply future_lapply +#' @importFrom pbapply pbapply +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' ExportGroupBW(object, assay = "peaks") +#' } +ExportGroupBW <- function( + object, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = getwd(), + verbose = TRUE +) { + # Check if temporary directory exist + if (!dir.exists(outdir)){ + dir.create(outdir) + } + if (length(Fragments(object)) == 0) { + stop("This object does not have Fragments, cannot generate bigwig.") + } + # Get chromosome information + if(!is.null(x = chromosome)){ + seqlevels(object) <- chromosome + } + chromLengths <- seqlengths(object) + if (is.null(chromLengths)) { + stop("Object has no seqlength, bigwig coverages cannot be evaluated.") + } + availableChr <- names(chromLengths) + chromSizes <- GRanges( + seqnames = availableChr, + ranges = IRanges( + start = rep(1, length(x = availableChr)), + end = as.numeric(x = chromLengths) + ) + ) + assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) + obj.groups <- GetGroups( + object = object, + group.by = group.by, + idents = idents + ) + GroupsNames <- names(x = table(obj.groups)[table(obj.groups) > minCells]) + # Check if output files already exist + lapply(X = GroupsNames, FUN = function(x) { + fn <- paste0(outdir, .Platform$file.sep, x, ".bed") + if (file.exists(fn)) { + message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) + file.remove(fn) + } + }) + # Splitting fragments file for each idents in group.by + SplitFragments( + object = object, + assay = assay, + group.by = group.by, + idents = idents, + outdir = outdir, + file.suffix = "", + append = TRUE, + buffer_length = 256L, + verbose = verbose + ) + # Column to normalized by + if(!is.null(x = normMethod)) { + if (tolower(x = normMethod) %in% c('rc', 'ncells', 'none')){ + normBy <- normMethod + } else{ + normBy <- object[[normMethod, drop = FALSE]] + } + } + + if (verbose) { + message("Creating tiles") + } + # Create tiles for each chromosome, from GenomicRanges + tiles <- unlist( + x = slidingWindows(x = chromSizes, width = tileSize, step = tileSize) + ) + if (verbose) { + message("Creating bigwig files at ", outdir) + } + # Run the creation of bigwig for each cellgroups + if (nbrOfWorkers() > 1) { + mylapply <- future_lapply + } else { + mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) + } + covFiles <- mylapply( + GroupsNames, + FUN = CreateBWGroup, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir + ) + return(covFiles) +} + +# Helper function for ExportGroupBW +# +# @param groupNamei The group to be exported +# @param availableChr Chromosomes to be processed +# @param chromLengths Chromosomes lengths +# @param tiles The tiles object +# @param normBy A vector of values to normalized the cells by +# @param tileSize The size of the tiles in the bigwig file +# @param normMethod Normalization method for the bigwig files +# 'RC' will divide the number of fragments in a tile by the number of fragments +# in the group. A scaling factor of 10^4 will be applied +# 'ncells' will divide the number of fragments in a tile by the number of cell +# in the group. 'none' will apply no normalization method. A vector of values +# for each cell can also be passed as a meta.data column name. A scaling factor +# of 10^4 will be applied +# @param cutoff The maximum number of fragment in a given tile +# @param outdir The output directory for bigwig file +# +#' @importFrom GenomicRanges seqnames GRanges coverage +#' @importFrom BiocGenerics which +#' @importFrom S4Vectors match +#' @importFrom Matrix sparseMatrix rowSums +CreateBWGroup <- function( + groupNamei, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir +) { + if (!requireNamespace("rtracklayer", quietly = TRUE)) { + message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") + return(NULL) + } + normMethod <- tolower(x = normMethod) + # Read the fragments file associated to the group + fragi <- rtracklayer::import( + paste0(outdir, .Platform$file.sep, groupNamei, ".bed"), + format = "bed" + ) + cellGroupi <- unique(x = fragi$name) + # Open the writing bigwig file + covFile <- file.path( + outdir, + paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw") + ) + + covList <- lapply(X = seq_along(availableChr), FUN = function(k) { + fragik <- fragi[seqnames(fragi) == availableChr[k],] + tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] + if (length(x = fragik) == 0) { + tilesk$reads <- 0 + # If fragments + } else { + # N Tiles + nTiles <- chromLengths[availableChr[k]] / tileSize + # Add one tile if there is extra bases + if (nTiles%%1 != 0) { + nTiles <- trunc(x = nTiles) + 1 + } + # Create Sparse Matrix + matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) + + # For each tiles of this chromosome, create start tile and end tile row, + # set the associated counts matching with the fragments + # This changes compared to ArchR version 1.0.2 + # See https://github.com/GreenleafLab/ArchR/issues/2214 + mat <- sparseMatrix( + i = c(trunc(x = (start(x = fragik) - 1) / tileSize), + trunc(x = (end(x = fragik) - 1) / tileSize)) + 1, + j = as.vector(x = c(matchID, matchID)), + x = rep(1, 2*length(x = fragik)), + dims = c(nTiles, length(x = cellGroupi)) + ) + + # Max count for a cells in a tile is set to cutoff + if (!is.null(x = cutoff)){ + mat@x[mat@x > cutoff] <- cutoff + } + # Sums the cells + mat <- rowSums(x = mat) + tilesk$reads <- mat + # Normalization + if (!is.null(x = normMethod)) { + if (normMethod == "rc") { + tilesk$reads <- tilesk$reads * 10^4 / length(fragi$name) + } else if (normMethod == "ncells") { + tilesk$reads <- tilesk$reads / length(cellGroupi) + } else if (normMethod == "none") { + } else { + if (!is.null(x = normBy)){ + tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) + } + } + } + } + tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] + tilesk + }) + + names(covList) <- availableChr + covList <- as(object = covList, Class = "RleList") + rtracklayer::export.bw(object = covList, con = covFile) + return(covFile) +} diff --git a/R/utilities.R b/R/utilities.R index 148b7bb..e11c9fb 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -599,12 +599,12 @@ GetGRangesFromEnsDb <- function( # extract genes from each chromosome my_lapply <- ifelse(test = verbose, yes = pblapply, no = lapply) tx <- my_lapply(X = seq_along(whole.genome), FUN = function(x){ - suppressMessages(expr = biovizBase::crunch( - obj = ensdb, - which = whole.genome[x], - columns = c("tx_id", "gene_name", "gene_id", "gene_biotype"))) - }) - + suppressMessages(expr = biovizBase::crunch( + obj = ensdb, + which = whole.genome[x], + columns = c("tx_id", "gene_name", "gene_id", "gene_biotype"))) + }) + # combine tx <- do.call(what = c, args = tx) tx <- tx[tx$gene_biotype %in% biotypes] @@ -1714,7 +1714,7 @@ SingleFileCutMatrix <- function( ) cut.df <- cut.df[ (cut.df$position > 0) & (cut.df$position <= width(x = region)[[1]]), - ] + ] cell.vector <- seq_along(along.with = cells) names(x = cell.vector) <- cells cell.matrix.info <- cell.vector[cut.df$cell] @@ -2192,7 +2192,6 @@ MergeOverlappingRows <- function( for (i in seq_along(along.with = assay.list)) { # get count matrix counts <- GetAssayData(object = assay.list[[i]], layer = slot) - if (nrow(x = counts) == 0) { # no counts, only data # skip row merge and return empty counts matrices @@ -2443,8 +2442,8 @@ SparsifiedRanks <- function(X){ x = lapply( X = seq_along(col_lst), FUN = function(i) rank(x = col_lst[[i]]) + offsets[i] - ) ) + ) ## Create template rank matrix X.ranks <- X X.ranks@x <- sparsified_ranks @@ -2457,7 +2456,7 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { if (is.null(Y)){ # Calculate pearson correlation on rank matrices return (corSparse(X = rankX, cov = cov)) - } + } rankY <- SparsifiedRanks(X = Y) return(corSparse(X = rankX, Y = rankY, cov = cov)) } diff --git a/man/ExportGroupBW.Rd b/man/ExportGroupBW.Rd new file mode 100644 index 0000000..f8d34de --- /dev/null +++ b/man/ExportGroupBW.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bigwig.R +\name{ExportGroupBW} +\alias{ExportGroupBW} +\title{Export bigwig files for groups of cells} +\usage{ +ExportGroupBW( + object, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = getwd(), + verbose = TRUE +) +} +\arguments{ +\item{object}{A Seurat object} + +\item{assay}{Name of assay to use} + +\item{group.by}{The metadata variable used to group the cells} + +\item{idents}{Identities to include (defined by group.by parameter)} + +\item{normMethod}{Normalization method for the bigwig files. Deafult 'RC'. +'RC' will divide the number of fragments in a tile by the total number of +fragments in the group. A scaling factor of 10^4 will be applied. +'ncells' will divide the number of fragments in a tile by the number of cell +in the group. 'none' will apply no normalization method. +A vector of values for each cell can also be passed as a metadata column +name. A scaling factor of 10^4 will be applied} + +\item{tileSize}{The size of the tiles in the bigwig file} + +\item{minCells}{The minimum of cells in a group to be exported} + +\item{cutoff}{The maximum number of fragments in a given genomic tile} + +\item{chromosome}{A chromosomes vector to be exported} + +\item{outdir}{Directory to write output files (splitted bed and bigwigs)} + +\item{verbose}{Display messages} +} +\description{ +Export bigwig files for groups of cells +} +\examples{ +\dontrun{ +ExportGroupBW(object, assay = "peaks") +} +} diff --git a/tests/testthat/test-bigwig.R b/tests/testthat/test-bigwig.R new file mode 100644 index 0000000..c08633a --- /dev/null +++ b/tests/testthat/test-bigwig.R @@ -0,0 +1,139 @@ + +test_that("CreateBWGroup works with single tile", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "createBW") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 249250621), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = 1, end = 249250621)), + normBy = NULL, + tileSize = 249250621, + normMethod = 'RC', + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")) + expect_equal(object = bw$score, 20000) +}) + +test_that("CreateBWGroup works with 100bp tile", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "createBW2") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 249250621), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 249250621, 100), end = c(seq(100, 249250621, 100), 249250621))), + normBy = NULL, + tileSize = 100, + normMethod = "RC", + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = bw$score, c(6000, 8000, 2000, 0)) +}) + +test_that("CreateBWGroup works with seqlength equal to final pos", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "createBW2") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 500), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 499, 100), end = c(seq(100, 500, 100)))), + normBy = NULL, + tileSize = 100, + normMethod = "RC", + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = bw$score, c(6000, 8000, 2000)) +}) + +test_that("ExportGroupBW works", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "ExportGroupBW") + dir.create(outdir, showWarnings = FALSE) + fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") + cells <- colnames(x = atac_small) + names(x = cells) <- cells + frags <- CreateFragmentObject( + path = fpath, + cells = cells, + verbose = FALSE, + validate = FALSE + ) + Fragments(atac_small) <- frags + ExportGroupBW( + object = atac_small, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = outdir, + verbose = TRUE + ) + expect_equal(object = length(list.files(outdir)), expected = 4) + expect( + file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), + "File does not exist." + ) + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = length(seqlengths(bw)), 298) +}) \ No newline at end of file diff --git a/tests/testthat/test-preprocessing.R b/tests/testthat/test-preprocessing.R index 4a43f44..23e607a 100644 --- a/tests/testthat/test-preprocessing.R +++ b/tests/testthat/test-preprocessing.R @@ -118,18 +118,4 @@ test_that("CreateMotifMatrix works", { genome = genome )) expect_equal(dim(motif.matrix), c(323, 2)) - features <- suppressWarnings(c( - granges(atac_small), - GenomicRanges::GRanges( - seqnames = "fake_chr", - ranges = IRanges::IRanges(start = 1, end = 1000) - ) - )) - skip_if_not_installed("BSgenome.Hsapiens.UCSC.hg19") - motif.matrix <- suppressWarnings(CreateMotifMatrix( - features = features, - pwm = pwm, - genome = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19 - )) - expect_equal(dim(motif.matrix), c(324, 2)) }) \ No newline at end of file