From a4f9b7338a9ac517930c6ee079b0ef27b24e9702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20Herv=C3=A9?= Date: Wed, 19 Apr 2023 10:56:49 +0200 Subject: [PATCH 01/34] Update utilities.R Addition of ExportGroupBW, CreateBWGroup and '%bcin%', to export bigwig file from split fragments file --- R/utilities.R | 173 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 01671c09..300ce842 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2236,4 +2236,175 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { } rankY <- SparsifiedRanks(X = Y) return(qlcMatrix::corSparse(X = rankX, Y = rankY, cov = cov)) -} \ No newline at end of file +} + +# Export bigwig file for each group of bed file present in outdir +# @param object The seurat object +# @param assay The assay of the fragment file +# @param group.by The metadata used to split the fragment file +# @param idents The idents from the group.by to be exported +# @param normMethod Normalization method for the biwig files +# It can be the name of any quantitative column in the @meta.data +# or ncells as the number of cells +# @param tileSize The size of the tiles in the bigwig file +# @param minCells The minimum of cells in a group to be exported +# @param cutoff The maximum number of fragment in a given tile +# @param chromosome The chromosomes to be exported +# @param threads Number of threads +# @param outdir The output directory for bigwig file +# Also used as output directory for SlitFragment function +# @param verbose Display messages +#' @importFrom GenomicRanges seqlengths GRanges slidingWindows +#' @importFrom future plan +#' @importFrom future.apply future_lapply + +ExportGroupBW <- function( + object = NULL, + assay = "peaks", + group.by = "seurat_clusters", + idents = c(0,2), + normMethod = "TSS.enrichment", + tileSize = 100, + minCells = 5, + cutoff = 4, + chromosome = "primary", + threads = NULL, + outdir = NULL, + verbose=TRUE + ){ + + DefaultAssay(object) <- assay + + GroupsNames <- names(table(object@meta.data[,group.by])[table(object@meta.data[,group.by]) > minCells]) + GroupsNames <- GroupsNames[GroupsNames %in% idents] + + #Column to normalized by + if (normMethod == 'ncells'){ + normBy <- normMethod + } else{ + normBy <- object@meta.data[, normMethod, drop=FALSE] + } + + #Get chromosome information + if(chromosome=="primary"){ + prim_chr <- names(seqlengths(object)[!grepl("_alt|_fix|_random|chrUn", names(seqlengths(object)))]) + seqlevels(object) <- prim_chr + } + availableChr <- names(seqlengths(object)) + chromLengths <- seqlengths(object) + chromSizes <- GRanges(seqnames = availableChr, IRanges(start = rep(1, length(availableChr)), end = as.numeric(chromLengths))) + + if (verbose) { + message("Creation of tiles") + } + + #Create tiles for each chromosome, from GenomicRanges + tiles <- unlist(slidingWindows(chromSizes, width = tileSize, step = tileSize)) + + if (verbose) { + message("Bigwig files generation at ", outdir) + } + + #Set number of thread in future + plan("multicore", workers = threads) + + #Run the creation of bigwig for each cellgroups + covFiles <- future_lapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) + + covFiles + + } + +# Helper function for ExportGroupBW +# @param groupNamei The group to be exported +# @param availableChr Chromosomes to be processed +# @param chromLengths Chromosomes lengths +# @param tiles The tiles object +# @param normBy A vector of values to normalized the cells by +# or ncells as number of cells normalization +# @param tileSize The size of the tiles in the bigwig file +# @param normMethod Normalization method for the biwig files +# It can be the name of any quantitative column in the @meta.data +# or ncells as the number of cells +# @param cutoff The maximum number of fragment in a given tile +# @param outdir The output directory for bigwig file +# Also used as output directory for SlitFragment function +#' @importFrom rtracklayer import export.bw +#' @importFrom GenomicRanges seqnames seqlengths GRanges coverage +#' @importFrom BiocGenerics which +#' @importFrom S4Vectors match +#' @importFrom Matrix sparseMatrix rowSums +CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir){ + + #Read the fragments file associated to the group + fragi <- rtracklayer::import(paste0(outdir, "/", groupNamei, ".bed"),format = "bed") + + cellGroupi <- unique(fragi$name) + + #Open the writting bigwig file + covFile <- file.path(outdir, paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw")) + + covList <- lapply(seq_along(availableChr), function(k){ + + fragik <- fragi[seqnames(fragi) == availableChr[k],] + tilesk <- tiles[BiocGenerics::which(seqnames(tiles) %bcin% availableChr[k])] + + if(length(fragik) == 0){ + tilesk$reads <- 0 + #If fragments + }else{ + + #N Tiles + nTiles <- chromLengths[availableChr[k]] / tileSize + #Add one tile if there is extra bases + if (nTiles%%1 != 0) { + nTiles <- trunc(nTiles) + 1 + } + + #Create Sparse Matrix + matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) + + #For each tiles of this chromosome, create start tile and end tile row, set the associated counts matching with the fragments + mat <- Matrix::sparseMatrix( + i = c(trunc(start(fragik) / tileSize), trunc(end(fragik) / tileSize)) + 1, + j = as.vector(c(matchID, matchID)), + x = rep(1, 2*length(fragik)), + dims = c(nTiles, length(cellGroupi))) + + #Max count for a cells in a tile is set to cutoff (4) + if(!is.null(cutoff)){ + mat@x[mat@x > cutoff] <- cutoff + } + + #Sums the cells + mat <- Matrix::rowSums(mat) + + tilesk$reads <- mat + + #Normalization of counts by the sum of readsintss for each cells in group + if(normMethod == "ncells"){ + tilesk$reads <- tilesk$reads / length(cellGroupi) + }else if(tolower(normMethod) %in% c("none")){ + }else{ + tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) + } + } + + tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] + + tilesk + + }) + + names(covList) <- availableChr + + covList <- as(covList, "RleList") + + rtracklayer::export.bw(object = covList, con = covFile) + + covFile +} + +# Helper function for CreateBWGroup +#' @importFrom S4Vectors match +'%bcin%' <- function(x, table) S4Vectors::match(x, table, nomatch = 0) > 0 From 033b7224a9e0a1c17a95cb136e0e7b8ef5633e45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20Herv=C3=A9?= Date: Thu, 20 Apr 2023 11:55:27 +0200 Subject: [PATCH 02/34] Update utilities.R Some changes from @timoast comments on the PR --- R/utilities.R | 54 ++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 300ce842..c553afc4 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2249,7 +2249,8 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { # @param tileSize The size of the tiles in the bigwig file # @param minCells The minimum of cells in a group to be exported # @param cutoff The maximum number of fragment in a given tile -# @param chromosome The chromosomes to be exported +# @param chromosome A chromosomes vector to be exported, standard +# chromosomes can be fetched via standardChromosomes() # @param threads Number of threads # @param outdir The output directory for bigwig file # Also used as output directory for SlitFragment function @@ -2260,36 +2261,48 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { ExportGroupBW <- function( object = NULL, - assay = "peaks", - group.by = "seurat_clusters", - idents = c(0,2), - normMethod = "TSS.enrichment", + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "nCount_peaks", tileSize = 100, minCells = 5, - cutoff = 4, - chromosome = "primary", + cutoff = NULL, + chromosome = NULL, threads = NULL, outdir = NULL, verbose=TRUE ){ + if (!requireNamespace("rtracklayer", quietly = TRUE)) { + message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") + return(NULL) + } + + assay <- assay %||% DefaultAssay(object = object) DefaultAssay(object) <- assay - GroupsNames <- names(table(object@meta.data[,group.by])[table(object@meta.data[,group.by]) > minCells]) + group.by <- group.by %||% 'ident' + Idents(object = object) <- group.by + + idents <- idents %||% levels(object = object) + levels(object = object) <- idents + + GroupsNames <- names(table(object[[group.by]])[table(object[[group.by]]) > minCells]) GroupsNames <- GroupsNames[GroupsNames %in% idents] #Column to normalized by if (normMethod == 'ncells'){ normBy <- normMethod } else{ - normBy <- object@meta.data[, normMethod, drop=FALSE] + normBy <- object[[normMethod, drop=FALSE]] } #Get chromosome information - if(chromosome=="primary"){ - prim_chr <- names(seqlengths(object)[!grepl("_alt|_fix|_random|chrUn", names(seqlengths(object)))]) - seqlevels(object) <- prim_chr + if(!is.null(chromosome)){ + seqlevels(object) <- chromosome } + availableChr <- names(seqlengths(object)) chromLengths <- seqlengths(object) chromSizes <- GRanges(seqnames = availableChr, IRanges(start = rep(1, length(availableChr)), end = as.numeric(chromLengths))) @@ -2304,10 +2317,7 @@ ExportGroupBW <- function( if (verbose) { message("Bigwig files generation at ", outdir) } - - #Set number of thread in future - plan("multicore", workers = threads) - + #Run the creation of bigwig for each cellgroups covFiles <- future_lapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) @@ -2337,7 +2347,7 @@ ExportGroupBW <- function( CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir){ #Read the fragments file associated to the group - fragi <- rtracklayer::import(paste0(outdir, "/", groupNamei, ".bed"),format = "bed") + fragi <- rtracklayer::import(paste0(outdir, .Platform$file.sep, groupNamei, ".bed"),format = "bed") cellGroupi <- unique(fragi$name) @@ -2347,8 +2357,8 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, covList <- lapply(seq_along(availableChr), function(k){ fragik <- fragi[seqnames(fragi) == availableChr[k],] - tilesk <- tiles[BiocGenerics::which(seqnames(tiles) %bcin% availableChr[k])] - + tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] + if(length(fragik) == 0){ tilesk$reads <- 0 #If fragments @@ -2402,9 +2412,5 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, rtracklayer::export.bw(object = covList, con = covFile) - covFile + return(covFile) } - -# Helper function for CreateBWGroup -#' @importFrom S4Vectors match -'%bcin%' <- function(x, table) S4Vectors::match(x, table, nomatch = 0) > 0 From 081c8e1df3d4db37d8dbe3e3b77e5d2a75541e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20Herv=C3=A9?= Date: Wed, 3 May 2023 10:54:44 +0200 Subject: [PATCH 03/34] Update utilities.R SplitFragments inside the export function and check existing files --- R/utilities.R | 57 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index c553afc4..a3eac542 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2280,7 +2280,7 @@ ExportGroupBW <- function( } assay <- assay %||% DefaultAssay(object = object) - DefaultAssay(object) <- assay + DefaultAssay(object = object) <- assay group.by <- group.by %||% 'ident' Idents(object = object) <- group.by @@ -2290,12 +2290,34 @@ ExportGroupBW <- function( GroupsNames <- names(table(object[[group.by]])[table(object[[group.by]]) > minCells]) GroupsNames <- GroupsNames[GroupsNames %in% idents] + + #Check if output files already exist + lapply(GroupsNames, function(x){ + if (file.exists(paste0(outdir, .Platform$file.sep, x, ".bed"))){ + message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) + } + }) + + #Splitting fragments file for each idents in group.by + SplitFragments( + object = object, + assay = assay, + group.by = group.by, + idents = idents, + outdir = outdir, + file.suffix = "", + append = TRUE, + buffer_length = 256L, + verbose = verbose + ) #Column to normalized by - if (normMethod == 'ncells'){ - normBy <- normMethod - } else{ - normBy <- object[[normMethod, drop=FALSE]] + if(!is.null(normMethod)){ + if (normMethod == 'ncells'){ + normBy <- normMethod + } else{ + normBy <- object[[normMethod, drop=FALSE]] + } } #Get chromosome information @@ -2319,7 +2341,14 @@ ExportGroupBW <- function( } #Run the creation of bigwig for each cellgroups - covFiles <- future_lapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) + + if (nbrOfWorkers() > 1) { + mylapply <- future_lapply + } else { + mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) + } + + covFiles <- mylapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) covFiles @@ -2381,7 +2410,7 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, x = rep(1, 2*length(fragik)), dims = c(nTiles, length(cellGroupi))) - #Max count for a cells in a tile is set to cutoff (4) + #Max count for a cells in a tile is set to cutoff if(!is.null(cutoff)){ mat@x[mat@x > cutoff] <- cutoff } @@ -2392,11 +2421,15 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tilesk$reads <- mat #Normalization of counts by the sum of readsintss for each cells in group - if(normMethod == "ncells"){ - tilesk$reads <- tilesk$reads / length(cellGroupi) - }else if(tolower(normMethod) %in% c("none")){ - }else{ - tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) + if(!is.null(normMethod)){ + if(normMethod == "ncells"){ + tilesk$reads <- tilesk$reads / length(cellGroupi) + }else if(tolower(normMethod) %in% c("none")){ + }else{ + if(!is.null(normBy)){ + tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) + } + } } } From 27d4565994570e6981cda3bca004383415578955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20Herv=C3=A9?= Date: Wed, 3 May 2023 11:55:07 +0200 Subject: [PATCH 04/34] Update utilities.R Add relative count normalization per group per 10k --- R/utilities.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index a3eac542..dacd9227 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2244,8 +2244,8 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { # @param group.by The metadata used to split the fragment file # @param idents The idents from the group.by to be exported # @param normMethod Normalization method for the biwig files -# It can be the name of any quantitative column in the @meta.data -# or ncells as the number of cells +# It can be RC, ncells, none or the name of any quantitative column +# in the @meta.data # @param tileSize The size of the tiles in the bigwig file # @param minCells The minimum of cells in a group to be exported # @param cutoff The maximum number of fragment in a given tile @@ -2264,7 +2264,7 @@ ExportGroupBW <- function( assay = NULL, group.by = NULL, idents = NULL, - normMethod = "nCount_peaks", + normMethod = "RC", tileSize = 100, minCells = 5, cutoff = NULL, @@ -2313,7 +2313,7 @@ ExportGroupBW <- function( #Column to normalized by if(!is.null(normMethod)){ - if (normMethod == 'ncells'){ + if (tolower(normMethod) %in% ('rc', 'ncells', 'none')){ normBy <- normMethod } else{ normBy <- object[[normMethod, drop=FALSE]] @@ -2363,8 +2363,8 @@ ExportGroupBW <- function( # or ncells as number of cells normalization # @param tileSize The size of the tiles in the bigwig file # @param normMethod Normalization method for the biwig files -# It can be the name of any quantitative column in the @meta.data -# or ncells as the number of cells +# It can be RC, ncells, none or the name of any quantitative column +# in the @meta.data # @param cutoff The maximum number of fragment in a given tile # @param outdir The output directory for bigwig file # Also used as output directory for SlitFragment function @@ -2379,7 +2379,7 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, fragi <- rtracklayer::import(paste0(outdir, .Platform$file.sep, groupNamei, ".bed"),format = "bed") cellGroupi <- unique(fragi$name) - + #Open the writting bigwig file covFile <- file.path(outdir, paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw")) @@ -2420,11 +2420,13 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tilesk$reads <- mat - #Normalization of counts by the sum of readsintss for each cells in group + #Normalization if(!is.null(normMethod)){ - if(normMethod == "ncells"){ + if(tolower(normMethod) == "rc"){ + tilesk$reads <- tilesk$reads * 10^4 / length(fragi$name) + }else if(tolower(normMethod) == "ncells"){ tilesk$reads <- tilesk$reads / length(cellGroupi) - }else if(tolower(normMethod) %in% c("none")){ + }else if(tolower(normMethod) == "none"){ }else{ if(!is.null(normBy)){ tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) From 14171951014ddca15ca016dcc131f4949818a77b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20Herv=C3=A9?= Date: Thu, 4 May 2023 16:32:19 +0200 Subject: [PATCH 05/34] Update utilities.R Roxygen documentation and tests --- R/utilities.R | 105 +++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 43 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index dacd9227..b91297f2 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2238,29 +2238,36 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { return(qlcMatrix::corSparse(X = rankX, Y = rankY, cov = cov)) } -# Export bigwig file for each group of bed file present in outdir -# @param object The seurat object -# @param assay The assay of the fragment file -# @param group.by The metadata used to split the fragment file -# @param idents The idents from the group.by to be exported -# @param normMethod Normalization method for the biwig files -# It can be RC, ncells, none or the name of any quantitative column -# in the @meta.data -# @param tileSize The size of the tiles in the bigwig file -# @param minCells The minimum of cells in a group to be exported -# @param cutoff The maximum number of fragment in a given tile -# @param chromosome A chromosomes vector to be exported, standard -# chromosomes can be fetched via standardChromosomes() -# @param threads Number of threads -# @param outdir The output directory for bigwig file -# Also used as output directory for SlitFragment function -# @param verbose Display messages +#' Export bigwig file for each group of bed file present in outdir +#' +#' @param object The seurat object +#' @param assay The assay of the fragment file +#' @param group.by The metadata used to split the fragment file +#' @param idents The idents from the group.by to be exported +#' @param normMethod Normalization method for the biwig files. Deafult 'RC'. +'RC' will divide the number of fragments in a tile by the number of fragments in the group. A scaling factor of 10^4 will be applied +'ncells' will divide the number of fragments in a tile by the number of cell in the group. +'none' will apply no normalization method. +A vector of values for each cell can also be passed as a meta.data column name. A scaling factor of 10^4 will be applied +#' @param tileSize The size of the tiles in the bigwig file +#' @param minCells The minimum of cells in a group to be exported +#' @param cutoff The maximum number of fragment in a given tile +#' @param chromosome A chromosomes vector to be exported +#' @param outdir The output directory for bigwig file +#' @param verbose Display messages +#' #' @importFrom GenomicRanges seqlengths GRanges slidingWindows #' @importFrom future plan #' @importFrom future.apply future_lapply - +#' @importFrom pbapply pbapply +#' +#' @return +#' @export +#' +#' @examples +#' ExportGroupBW(object, assay = "peaks", group.by = "seurat_clusters", idents=NULL, normMethod = "RC", tileSize = 100, minCells = 5, cutoff = NULL, chromosome = NULL, outdir = getwd(), verbose=TRUE) ExportGroupBW <- function( - object = NULL, + object, assay = NULL, group.by = NULL, idents = NULL, @@ -2269,24 +2276,27 @@ ExportGroupBW <- function( minCells = 5, cutoff = NULL, chromosome = NULL, - threads = NULL, outdir = NULL, verbose=TRUE ){ + #Check if temporary directory exist + if (!dir.exists(outdir)){ + dir.create(outdir) + } + if (!requireNamespace("rtracklayer", quietly = TRUE)) { message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") return(NULL) - } + } assay <- assay %||% DefaultAssay(object = object) DefaultAssay(object = object) <- assay - + group.by <- group.by %||% 'ident' Idents(object = object) <- group.by - idents <- idents %||% levels(object = object) - levels(object = object) <- idents + idents <- idents %||% levels(object) GroupsNames <- names(table(object[[group.by]])[table(object[[group.by]]) > minCells]) GroupsNames <- GroupsNames[GroupsNames %in% idents] @@ -2295,6 +2305,7 @@ ExportGroupBW <- function( lapply(GroupsNames, function(x){ if (file.exists(paste0(outdir, .Platform$file.sep, x, ".bed"))){ message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) + file.remove(paste0(outdir, .Platform$file.sep, x, ".bed")) } }) @@ -2313,7 +2324,7 @@ ExportGroupBW <- function( #Column to normalized by if(!is.null(normMethod)){ - if (tolower(normMethod) %in% ('rc', 'ncells', 'none')){ + if (tolower(normMethod) %in% c('rc', 'ncells', 'none')){ normBy <- normMethod } else{ normBy <- object[[normMethod, drop=FALSE]] @@ -2328,7 +2339,7 @@ ExportGroupBW <- function( availableChr <- names(seqlengths(object)) chromLengths <- seqlengths(object) chromSizes <- GRanges(seqnames = availableChr, IRanges(start = rep(1, length(availableChr)), end = as.numeric(chromLengths))) - + if (verbose) { message("Creation of tiles") } @@ -2346,33 +2357,41 @@ ExportGroupBW <- function( mylapply <- future_lapply } else { mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) - } - + } + covFiles <- mylapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) covFiles } -# Helper function for ExportGroupBW -# @param groupNamei The group to be exported -# @param availableChr Chromosomes to be processed -# @param chromLengths Chromosomes lengths -# @param tiles The tiles object -# @param normBy A vector of values to normalized the cells by -# or ncells as number of cells normalization -# @param tileSize The size of the tiles in the bigwig file -# @param normMethod Normalization method for the biwig files -# It can be RC, ncells, none or the name of any quantitative column -# in the @meta.data -# @param cutoff The maximum number of fragment in a given tile -# @param outdir The output directory for bigwig file -# Also used as output directory for SlitFragment function +#' Helper function for ExportGroupBW +#' +#' @param groupNamei The group to be exported +#' @param availableChr Chromosomes to be processed +#' @param chromLengths Chromosomes lengths +#' @param tiles The tiles object +#' @param normBy A vector of values to normalized the cells by +#' @param tileSize The size of the tiles in the bigwig file +#' @param normMethod Normalization method for the biwig files +'RC' will divide the number of fragments in a tile by the number of fragments in the group. A scaling factor of 10^4 will be applied +'ncells' will divide the number of fragments in a tile by the number of cell in the group. +'none' will apply no normalization method. +A vector of values for each cell can also be passed as a meta.data column name. A scaling factor of 10^4 will be applied +#' @param cutoff The maximum number of fragment in a given tile +#' @param outdir The output directory for bigwig file +#' #' @importFrom rtracklayer import export.bw #' @importFrom GenomicRanges seqnames seqlengths GRanges coverage #' @importFrom BiocGenerics which #' @importFrom S4Vectors match #' @importFrom Matrix sparseMatrix rowSums +#' +#' @return The names of the exported files +#' @export +#' +#' @examples +#' CreateBWGroup(group, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, getwd()) CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir){ #Read the fragments file associated to the group @@ -2407,7 +2426,7 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, mat <- Matrix::sparseMatrix( i = c(trunc(start(fragik) / tileSize), trunc(end(fragik) / tileSize)) + 1, j = as.vector(c(matchID, matchID)), - x = rep(1, 2*length(fragik)), + x = rep(1, 2*length(fragik)), dims = c(nTiles, length(cellGroupi))) #Max count for a cells in a tile is set to cutoff From 6309c3bd2969e81b3856d0ed782ec2d7bfa2bdaf Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 23 Jun 2023 17:19:03 +0800 Subject: [PATCH 06/34] Update function documentation --- R/utilities.R | 608 +++++++++++++++++++++++++------------------------- 1 file changed, 304 insertions(+), 304 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index cb3d23a9..545d1e13 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -28,24 +28,24 @@ NULL #' @export #' @concept utilities AddChromatinModule <- function( - object, - features, - genome, - assay = NULL, - verbose = TRUE, - ... + object, + features, + genome, + assay = NULL, + verbose = TRUE, + ... ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) if (!inherits(x = object[[assay]], what = "ChromatinAssay")) { stop("The requested assay is not a ChromatinAssay.") } - + # first find index of each feature feat.idx <- sapply(X = features, FUN = fmatch, rownames(x = object[[assay]])) j <- sapply(X = seq_along(along.with = features), FUN = function(x) { rep(x = x, length(x = features[[x]])) }) - + # construct sparse matrix with features mat <- sparseMatrix( i = unlist(x = feat.idx, use.names = FALSE), @@ -55,7 +55,7 @@ AddChromatinModule <- function( ) rownames(x = mat) <- rownames(x = object[[assay]]) colnames(x = mat) <- names(x = features) - + # run chromVAR cv <- RunChromVAR( object = object[[assay]], @@ -64,7 +64,7 @@ AddChromatinModule <- function( verbose = verbose, ... ) - + # add module scores to metadata chromvar.data <- GetAssayData(object = cv, slot = "data") object <- AddMetaData( @@ -92,10 +92,10 @@ globalVariables(names = c("group", "readcount"), package = "Signac") #' @examples #' AverageCounts(atac_small) AverageCounts <- function( - object, - assay = NULL, - group.by = NULL, - verbose = TRUE + object, + assay = NULL, + group.by = NULL, + verbose = TRUE ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) if (is.null(x = group.by)) { @@ -135,11 +135,11 @@ AverageCounts <- function( #' @importFrom Matrix rowSums #' @return Returns a vector of peak names AccessiblePeaks <- function( - object, - assay = NULL, - idents = NULL, - cells = NULL, - min.cells = 10 + object, + assay = NULL, + idents = NULL, + cells = NULL, + min.cells = 10 ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) cells <- SetIfNull(x = cells, y = WhichCells(object, idents = idents)) @@ -165,8 +165,8 @@ AccessiblePeaks <- function( #' @examples #' CellsPerGroup(atac_small) CellsPerGroup <- function( - object, - group.by = NULL + object, + group.by = NULL ) { if (is.null(x = group.by)) { cellgroups <- Idents(object = object) @@ -207,10 +207,10 @@ CellsPerGroup <- function( #' ) #' } ClosestFeature <- function( - object, - regions, - annotation = NULL, - ... + object, + regions, + annotation = NULL, + ... ) { if (!is(object = regions, class2 = 'GRanges')) { regions <- StringToGRanges(regions = regions, ...) @@ -290,16 +290,16 @@ ClosestFeature <- function( #' Fragments(atac_small) <- fragments #' GeneActivity(atac_small) GeneActivity <- function( - object, - assay = NULL, - features = NULL, - extend.upstream = 2000, - extend.downstream = 0, - biotypes = "protein_coding", - max.width = 500000, - process_n = 2000, - gene.id = FALSE, - verbose = TRUE + object, + assay = NULL, + features = NULL, + extend.upstream = 2000, + extend.downstream = 0, + biotypes = "protein_coding", + max.width = 500000, + process_n = 2000, + gene.id = FALSE, + verbose = TRUE ) { if (!is.null(x = features)) { if (length(x = features) == 0) { @@ -334,7 +334,7 @@ GeneActivity <- function( stop("No genes remaining after filtering for requested biotypes") } } - + # filter genes if provided if (!is.null(x = features)) { transcripts <- transcripts[transcripts$gene_name %in% features] @@ -349,14 +349,14 @@ GeneActivity <- function( stop("No genes remaining after filtering for max.width") } } - + # extend to include promoters transcripts <- Extend( x = transcripts, upstream = extend.upstream, downstream = extend.downstream ) - + # quantify frags <- Fragments(object = object[[assay]]) if (length(x = frags) == 0) { @@ -375,7 +375,7 @@ GeneActivity <- function( names(x = gene.key) <- GRangesToString(grange = transcripts) rownames(x = counts) <- as.vector(x = gene.key[rownames(x = counts)]) counts <- counts[rownames(x = counts) != "", ] - + return(counts) } @@ -395,10 +395,10 @@ GeneActivity <- function( #' @concept utilities #' @export GetGRangesFromEnsDb <- function( - ensdb, - standard.chromosomes = TRUE, - biotypes = c("protein_coding", "lincRNA", "rRNA", "processed_transcript"), - verbose = TRUE + ensdb, + standard.chromosomes = TRUE, + biotypes = c("protein_coding", "lincRNA", "rRNA", "processed_transcript"), + verbose = TRUE ) { if (!requireNamespace("biovizBase", quietly = TRUE)) { stop("Please install biovizBase\n", @@ -409,15 +409,15 @@ GetGRangesFromEnsDb <- function( if (standard.chromosomes) { whole.genome <- keepStandardChromosomes(whole.genome, pruning.mode = "coarse") } - + # extract genes from each chromosome my_lapply <- ifelse(test = verbose, yes = pblapply, no = lapply) tx <- my_lapply(X = seq_along(whole.genome), FUN = function(x){ - suppressMessages(expr = biovizBase::crunch( - obj = ensdb, - which = whole.genome[x], - columns = c("tx_id", "gene_name", "gene_id", "gene_biotype"))) - }) + suppressMessages(expr = biovizBase::crunch( + obj = ensdb, + which = whole.genome[x], + columns = c("tx_id", "gene_name", "gene_id", "gene_biotype"))) + }) # combine tx <- do.call(what = c, args = tx) @@ -486,12 +486,12 @@ GetTSSPositions <- function(ranges, biotypes = "protein_coding") { #' assay.2 = 'bins' #' ) GetIntersectingFeatures <- function( - object.1, - object.2, - assay.1 = NULL, - assay.2 = NULL, - distance = 0, - verbose = TRUE + object.1, + object.2, + assay.1 = NULL, + assay.2 = NULL, + distance = 0, + verbose = TRUE ) { assay.1 <- SetIfNull(x = assay.1, y = DefaultAssay(object = object.1)) assay.2 <- SetIfNull(x = assay.2, y = DefaultAssay(object = object.2)) @@ -591,10 +591,10 @@ GRangesToString <- function(grange, sep = c("-", "-")) { #' @examples #' Extend(x = blacklist_hg19, upstream = 100, downstream = 100) Extend <- function( - x, - upstream = 0, - downstream = 0, - from.midpoint = FALSE + x, + upstream = 0, + downstream = 0, + from.midpoint = FALSE ) { if (any(strand(x = x) == "*")) { warning("'*' ranges were treated as '+'") @@ -683,10 +683,10 @@ GetCellsInRegion <- function(tabix, region, cells = NULL) { #' ) #' } CountsInRegion <- function( - object, - assay, - regions, - ... + object, + assay, + regions, + ... ) { if (!is(object = object[[assay]], class2 = "ChromatinAssay")) { stop("Must supply a ChromatinAssay") @@ -724,10 +724,10 @@ CountsInRegion <- function( #' ) #' } FractionCountsInRegion <- function( - object, - regions, - assay = NULL, - ... + object, + regions, + assay = NULL, + ... ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) reads.in.region <- CountsInRegion( @@ -772,12 +772,12 @@ FractionCountsInRegion <- function( #' "chr1-762106-763359","chr1-779589-780271") #' IntersectMatrix(matrix = counts, regions = blacklist_hg19) IntersectMatrix <- function( - matrix, - regions, - invert = FALSE, - sep = c("-", "-"), - verbose = TRUE, - ... + matrix, + regions, + invert = FALSE, + sep = c("-", "-"), + verbose = TRUE, + ... ) { if (is(object = regions, class2 = "character")) { regions <- StringToGRanges(regions = regions, sep = sep) @@ -880,12 +880,12 @@ LookupGeneCoords <- function(object, gene, assay = NULL) { #' n = 10 #' ) MatchRegionStats <- function( - meta.feature, - query.feature, - features.match = c("GC.percent"), - n = 10000, - verbose = TRUE, - ... + meta.feature, + query.feature, + features.match = c("GC.percent"), + n = 10000, + verbose = TRUE, + ... ) { if (!inherits(x = meta.feature, what = 'data.frame')) { stop("meta.feature should be a data.frame") @@ -997,11 +997,11 @@ UnifyPeaks <- function(object.list, mode = "reduce") { #' @examples #' SubsetMatrix(mat = volcano) SubsetMatrix <- function( - mat, - min.rows = 1, - min.cols = 1, - max.row.val = 10, - max.col.val = NULL + mat, + min.rows = 1, + min.cols = 1, + max.row.val = 10, + max.col.val = NULL ) { rowcount <- rowSums(mat > 0) colcount <- colSums(mat > 0) @@ -1053,10 +1053,10 @@ AddMissingCells <- function(x, cells) { #' @importFrom SeuratObject DefaultAssay GetAssayData #' @importFrom Matrix Diagonal tcrossprod rowSums AverageCountMatrix <- function( - object, - assay = NULL, - group.by = NULL, - idents = NULL + object, + assay = NULL, + group.by = NULL, + idents = NULL ) { assay = SetIfNull(x = assay, y = DefaultAssay(object = object)) countmatrix <- GetAssayData(object = object[[assay]], slot = "counts") @@ -1242,12 +1242,12 @@ ExtractFragments <- function(fragments, n = NULL, verbose = TRUE) { # convert region argument to genomic coordinates # region can be a string, name of a gene, or GRanges object FindRegion <- function( - object, - region, - sep = c("-", "-"), - assay = NULL, - extend.upstream = 0, - extend.downstream = 0 + object, + region, + sep = c("-", "-"), + assay = NULL, + extend.upstream = 0, + extend.downstream = 0 ) { if (!is(object = region, class2 = "GRanges")) { # first try to convert to coordinates, if not lookup gene @@ -1298,16 +1298,16 @@ FindRegion <- function( # # @return Returns a data frame GetReadsInRegion <- function( - cellmap, - region, - tabix.file, - cells = NULL, - verbose = TRUE, - ... + cellmap, + region, + tabix.file, + cells = NULL, + verbose = TRUE, + ... ) { file.to.object <- names(x = cellmap) names(x = file.to.object) <- cellmap - + if (verbose) { message("Extracting reads in requested region") } @@ -1362,9 +1362,9 @@ GetReadsInRegion <- function( # @return Returns a named vector #' @importFrom SeuratObject Idents GetGroups <- function( - object, - group.by, - idents + object, + group.by, + idents ) { if (is.null(x = group.by)) { obj.groups <- Idents(object = object) @@ -1414,11 +1414,11 @@ MergeMatrixParts <- function(mat.list, new.rownames) { #' @importFrom Rsamtools TabixFile #' @importFrom GenomeInfoDb keepSeqlevels MultiGetReadsInRegion <- function( - object, - region, - fragment.list = NULL, - assay = NULL, - ... + object, + region, + fragment.list = NULL, + assay = NULL, + ... ) { if (inherits(x = object, what = "Seurat")) { # pull the assay @@ -1471,11 +1471,11 @@ MultiGetReadsInRegion <- function( #' @importMethodsFrom GenomicRanges width start end # @return Returns a sparse matrix SingleFileCutMatrix <- function( - cellmap, - region, - cells = NULL, - tabix.file, - verbose = TRUE + cellmap, + region, + cells = NULL, + tabix.file, + verbose = TRUE ) { # if multiple regions supplied, must be the same width cells <- SetIfNull(x = cells, y = names(x = cellmap)) @@ -1508,7 +1508,7 @@ SingleFileCutMatrix <- function( ) cut.df <- cut.df[ (cut.df$position > 0) & (cut.df$position <= width(x = region)[[1]]), - ] + ] cell.vector <- seq_along(along.with = cells) names(x = cell.vector) <- cells cell.matrix.info <- cell.vector[cut.df$cell] @@ -1541,12 +1541,12 @@ SingleFileCutMatrix <- function( #' @importFrom Rsamtools TabixFile seqnamesTabix #' @importFrom GenomeInfoDb keepSeqlevels CutMatrix <- function( - object, - region, - group.by = NULL, - assay = NULL, - cells = NULL, - verbose = TRUE + object, + region, + group.by = NULL, + assay = NULL, + cells = NULL, + verbose = TRUE ) { # run SingleFileCutMatrix for each fragment file and combine results assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) @@ -1603,13 +1603,13 @@ CutMatrix <- function( #' @importFrom SeuratObject DefaultAssay #' @importFrom GenomeInfoDb keepSeqlevels MultiRegionCutMatrix <- function( - object, - regions, - group.by = NULL, - fragments = NULL, - assay = NULL, - cells = NULL, - verbose = FALSE + object, + regions, + group.by = NULL, + fragments = NULL, + assay = NULL, + cells = NULL, + verbose = FALSE ) { if (inherits(x = object, what = "Seurat")) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) @@ -1666,11 +1666,11 @@ MultiRegionCutMatrix <- function( # @param verbose Display messages #' @importMethodsFrom GenomicRanges strand CreateRegionPileupMatrix <- function( - object, - regions, - assay = NULL, - cells = NULL, - verbose = TRUE + object, + regions, + assay = NULL, + cells = NULL, + verbose = TRUE ) { if (length(x = regions) == 0) { stop("No regions supplied") @@ -1679,7 +1679,7 @@ CreateRegionPileupMatrix <- function( on_plus <- strand(x = regions) == "+" | strand(x = regions) == "*" plus.strand <- regions[on_plus, ] minus.strand <- regions[!on_plus, ] - + # get cut matrices for each strand if (verbose) { message("Finding + strand cut sites") @@ -1701,7 +1701,7 @@ CreateRegionPileupMatrix <- function( cells = cells, verbose = FALSE ) - + # reverse minus strand and add together if (is.null(x = cut.matrix.plus)) { full.matrix <- cut.matrix.minus[, rev(x = colnames(x = cut.matrix.minus))] @@ -1737,12 +1737,12 @@ CreateRegionPileupMatrix <- function( # @param scale.factor Scaling factor to use. If NULL (default), will use the # median normalization factor for all the groups. ApplyMatrixByGroup <- function( - mat, - groups, - fun, - normalize = TRUE, - group.scale.factors = NULL, - scale.factor = NULL + mat, + groups, + fun, + normalize = TRUE, + group.scale.factors = NULL, + scale.factor = NULL ) { if (normalize) { if (is.null(x = group.scale.factors) | is.null(x = scale.factor)) { @@ -1755,13 +1755,13 @@ ApplyMatrixByGroup <- function( } ngroup <- length(x = all.groups) npos <- ncol(x = mat) - + group <- unlist( x = lapply(X = all.groups, FUN = function(x) rep(x, npos)) ) position <- rep(x = as.numeric(x = colnames(x = mat)), ngroup) count <- vector(mode = "numeric", length = npos * ngroup) - + for (i in seq_along(along.with = all.groups)) { grp <- all.groups[[i]] if (is.na(x = grp)) { @@ -1776,13 +1776,13 @@ ApplyMatrixByGroup <- function( } count[((i - 1) * npos + 1):((i * npos))] <- totals } - + # construct dataframe coverages <- data.frame( "group" = group, "position" = position, "count" = count, stringsAsFactors = FALSE ) - + if (normalize) { scale.factor <- SetIfNull( x = scale.factor, y = median(x = group.scale.factors) @@ -1911,12 +1911,12 @@ IsMatrixEmpty <- function(x) { # information GetRowsToMerge <- function(assay.list, all.ranges, reduced.ranges) { revmap <- as.vector(x = reduced.ranges$revmap) - + # get indices of ranges that changed revmap.lengths <- sapply(X = revmap, FUN = length) changed.ranges <- which(x = revmap.lengths > 1) grange.string <- GRangesToString(grange = reduced.ranges[changed.ranges]) - + # preallocate offsets <- list() results <- list() @@ -1935,7 +1935,7 @@ GetRowsToMerge <- function(assay.list, all.ranges, reduced.ranges) { results[['matrix']][[i]] <- matrix.indices results[['grange']][[i]] <- granges } - + # find sets of ranges for each dataset counter <- vector(mode = "numeric", length = length(x = assay.list)) for (x in seq_along(along.with = changed.ranges)) { @@ -1971,16 +1971,16 @@ GetRowsToMerge <- function(assay.list, all.ranges, reduced.ranges) { #' @importFrom Matrix rowSums #' @importMethodsFrom Matrix t MergeOverlappingRows <- function( - mergeinfo, - assay.list, - slot = "counts", - verbose = TRUE + mergeinfo, + assay.list, + slot = "counts", + verbose = TRUE ) { merge.counts <- list() for (i in seq_along(along.with = assay.list)) { # get count matrix counts <- GetAssayData(object = assay.list[[i]], slot = slot) - + if (nrow(x = counts) == 0) { # no counts, only data # skip row merge and return empty counts matrices @@ -1992,15 +1992,15 @@ MergeOverlappingRows <- function( ) return(merge.counts) } - + # transpose for faster access since matrix is column major counts <- t(x = counts) - + # get rows to merge mrows <- mergeinfo$matrix[[i]] new.rownames <- mergeinfo$grange[[i]] nrep <- rle(x = new.rownames) - + # allocate todelete <- c() newmat <- vector( @@ -2058,13 +2058,13 @@ MergeOverlappingRows <- function( to.rename.names <- to.rename.names[1:idx.counter] newmat <- newmat[1:(y - 1)] newmat.names <- newmat.names[1:(y - 1)] - + # transpose back counts <- t(x = counts) - + # rename matrix rows that weren't merged rownames(counts)[to.rename.idx] <- to.rename.names - + if (y == 1) { # no rows were merged, can return counts merge.counts[[i]] <- counts @@ -2084,12 +2084,12 @@ MergeOverlappingRows <- function( merged.mat <- Reduce(f = rbind, x = newmat) rownames(merged.mat) <- newmat.names merged.mat <- as(object = merged.mat, Class = "CsparseMatrix") - + # remove rows from count matrix that were merged mat.rows <- seq_len(length.out = nrow(x = counts)) tokeep <- setdiff(mat.rows, todelete) counts <- counts[tokeep, ] - + # add new merged rows to counts counts <- rbind(counts, merged.mat) merge.counts[[i]] <- counts @@ -2231,8 +2231,8 @@ SparsifiedRanks <- function(X){ x = lapply( X = seq_along(col_lst), FUN = function(i) rank(x = col_lst[[i]]) + offsets[i] - ) ) + ) ## Create template rank matrix X.ranks <- X X.ranks@x <- sparsified_ranks @@ -2248,25 +2248,27 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { if (is.null(Y)){ # Calculate pearson correlation on rank matrices return (qlcMatrix::corSparse(X = rankX, cov = cov)) - } + } rankY <- SparsifiedRanks(X = Y) return(qlcMatrix::corSparse(X = rankX, Y = rankY, cov = cov)) } -#' Export bigwig file for each group of bed file present in outdir -#' -#' @param object The seurat object -#' @param assay The assay of the fragment file -#' @param group.by The metadata used to split the fragment file -#' @param idents The idents from the group.by to be exported -#' @param normMethod Normalization method for the biwig files. Deafult 'RC'. -'RC' will divide the number of fragments in a tile by the number of fragments in the group. A scaling factor of 10^4 will be applied -'ncells' will divide the number of fragments in a tile by the number of cell in the group. -'none' will apply no normalization method. -A vector of values for each cell can also be passed as a meta.data column name. A scaling factor of 10^4 will be applied +#' Export bigwig files for groups of cells +#' +#' @param object A Seurat object +#' @param assay Name of assay to use +#' @param group.by The metadata variable used to group the cells +#' @param idents Identities to include (defined by group.by parameter) +#' @param normMethod Normalization method for the bigwig files. Deafult 'RC'. +#' 'RC' will divide the number of fragments in a tile by the total number of +#' fragments in the group. A scaling factor of 10^4 will be applied. +#' 'ncells' will divide the number of fragments in a tile by the number of cell +#' in the group. 'none' will apply no normalization method. +#' A vector of values for each cell can also be passed as a metadata column +#' name. A scaling factor of 10^4 will be applied #' @param tileSize The size of the tiles in the bigwig file #' @param minCells The minimum of cells in a group to be exported -#' @param cutoff The maximum number of fragment in a given tile +#' @param cutoff The maximum number of fragments in a given genomic tile #' @param chromosome A chromosomes vector to be exported #' @param outdir The output directory for bigwig file #' @param verbose Display messages @@ -2280,180 +2282,178 @@ A vector of values for each cell can also be passed as a meta.data column name. #' @export #' #' @examples -#' ExportGroupBW(object, assay = "peaks", group.by = "seurat_clusters", idents=NULL, normMethod = "RC", tileSize = 100, minCells = 5, cutoff = NULL, chromosome = NULL, outdir = getwd(), verbose=TRUE) +#' \dontrun{ +#' ExportGroupBW(object, assay = "peaks") +#' } ExportGroupBW <- function( - object, - assay = NULL, - group.by = NULL, - idents = NULL, - normMethod = "RC", - tileSize = 100, - minCells = 5, - cutoff = NULL, - chromosome = NULL, - outdir = NULL, - verbose=TRUE - ){ + object, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = NULL, + verbose=TRUE +){ - #Check if temporary directory exist - if (!dir.exists(outdir)){ - dir.create(outdir) - } + #Check if temporary directory exist + if (!dir.exists(outdir)){ + dir.create(outdir) + } - if (!requireNamespace("rtracklayer", quietly = TRUE)) { - message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") - return(NULL) - } + if (!requireNamespace("rtracklayer", quietly = TRUE)) { + message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") + return(NULL) + } - assay <- assay %||% DefaultAssay(object = object) - DefaultAssay(object = object) <- assay - - group.by <- group.by %||% 'ident' - Idents(object = object) <- group.by + assay <- assay %||% DefaultAssay(object = object) + DefaultAssay(object = object) <- assay - idents <- idents %||% levels(object) + group.by <- group.by %||% 'ident' + Idents(object = object) <- group.by - GroupsNames <- names(table(object[[group.by]])[table(object[[group.by]]) > minCells]) - GroupsNames <- GroupsNames[GroupsNames %in% idents] + idents <- idents %||% levels(object) - #Check if output files already exist - lapply(GroupsNames, function(x){ - if (file.exists(paste0(outdir, .Platform$file.sep, x, ".bed"))){ - message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) - file.remove(paste0(outdir, .Platform$file.sep, x, ".bed")) - } - }) - - #Splitting fragments file for each idents in group.by - SplitFragments( - object = object, - assay = assay, - group.by = group.by, - idents = idents, - outdir = outdir, - file.suffix = "", - append = TRUE, - buffer_length = 256L, - verbose = verbose - ) - - #Column to normalized by - if(!is.null(normMethod)){ - if (tolower(normMethod) %in% c('rc', 'ncells', 'none')){ - normBy <- normMethod - } else{ - normBy <- object[[normMethod, drop=FALSE]] - } - } - - #Get chromosome information - if(!is.null(chromosome)){ - seqlevels(object) <- chromosome - } - - availableChr <- names(seqlengths(object)) - chromLengths <- seqlengths(object) - chromSizes <- GRanges(seqnames = availableChr, IRanges(start = rep(1, length(availableChr)), end = as.numeric(chromLengths))) - - if (verbose) { - message("Creation of tiles") - } - - #Create tiles for each chromosome, from GenomicRanges - tiles <- unlist(slidingWindows(chromSizes, width = tileSize, step = tileSize)) - - if (verbose) { - message("Bigwig files generation at ", outdir) + GroupsNames <- names(table(object[[group.by]])[table(object[[group.by]]) > minCells]) + GroupsNames <- GroupsNames[GroupsNames %in% idents] + + #Check if output files already exist + lapply(GroupsNames, function(x){ + if (file.exists(paste0(outdir, .Platform$file.sep, x, ".bed"))){ + message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) + file.remove(paste0(outdir, .Platform$file.sep, x, ".bed")) } - - #Run the creation of bigwig for each cellgroups + }) - if (nbrOfWorkers() > 1) { - mylapply <- future_lapply - } else { - mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) + #Splitting fragments file for each idents in group.by + SplitFragments( + object = object, + assay = assay, + group.by = group.by, + idents = idents, + outdir = outdir, + file.suffix = "", + append = TRUE, + buffer_length = 256L, + verbose = verbose + ) + + #Column to normalized by + if(!is.null(normMethod)){ + if (tolower(normMethod) %in% c('rc', 'ncells', 'none')){ + normBy <- normMethod + } else{ + normBy <- object[[normMethod, drop=FALSE]] } - - covFiles <- mylapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) - - covFiles - } + + #Get chromosome information + if(!is.null(chromosome)){ + seqlevels(object) <- chromosome + } + + availableChr <- names(seqlengths(object)) + chromLengths <- seqlengths(object) + chromSizes <- GRanges(seqnames = availableChr, IRanges(start = rep(1, length(availableChr)), end = as.numeric(chromLengths))) + + if (verbose) { + message("Creation of tiles") + } + + #Create tiles for each chromosome, from GenomicRanges + tiles <- unlist(slidingWindows(chromSizes, width = tileSize, step = tileSize)) + + if (verbose) { + message("Bigwig files generation at ", outdir) + } + + #Run the creation of bigwig for each cellgroups + + if (nbrOfWorkers() > 1) { + mylapply <- future_lapply + } else { + mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) + } + + covFiles <- mylapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) + + covFiles + +} -#' Helper function for ExportGroupBW -#' -#' @param groupNamei The group to be exported -#' @param availableChr Chromosomes to be processed -#' @param chromLengths Chromosomes lengths -#' @param tiles The tiles object -#' @param normBy A vector of values to normalized the cells by -#' @param tileSize The size of the tiles in the bigwig file -#' @param normMethod Normalization method for the biwig files -'RC' will divide the number of fragments in a tile by the number of fragments in the group. A scaling factor of 10^4 will be applied -'ncells' will divide the number of fragments in a tile by the number of cell in the group. -'none' will apply no normalization method. -A vector of values for each cell can also be passed as a meta.data column name. A scaling factor of 10^4 will be applied -#' @param cutoff The maximum number of fragment in a given tile -#' @param outdir The output directory for bigwig file -#' +# Helper function for ExportGroupBW +# +# @param groupNamei The group to be exported +# @param availableChr Chromosomes to be processed +# @param chromLengths Chromosomes lengths +# @param tiles The tiles object +# @param normBy A vector of values to normalized the cells by +# @param tileSize The size of the tiles in the bigwig file +# @param normMethod Normalization method for the bigwig files +# 'RC' will divide the number of fragments in a tile by the number of fragments +# in the group. A scaling factor of 10^4 will be applied +# 'ncells' will divide the number of fragments in a tile by the number of cell +# in the group. 'none' will apply no normalization method. A vector of values +# for each cell can also be passed as a meta.data column name. A scaling factor +# of 10^4 will be applied +# @param cutoff The maximum number of fragment in a given tile +# @param outdir The output directory for bigwig file +# #' @importFrom rtracklayer import export.bw #' @importFrom GenomicRanges seqnames seqlengths GRanges coverage #' @importFrom BiocGenerics which #' @importFrom S4Vectors match #' @importFrom Matrix sparseMatrix rowSums -#' -#' @return The names of the exported files -#' @export -#' -#' @examples -#' CreateBWGroup(group, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, getwd()) CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir){ - + #Read the fragments file associated to the group fragi <- rtracklayer::import(paste0(outdir, .Platform$file.sep, groupNamei, ".bed"),format = "bed") - + cellGroupi <- unique(fragi$name) #Open the writting bigwig file covFile <- file.path(outdir, paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw")) - + covList <- lapply(seq_along(availableChr), function(k){ - + fragik <- fragi[seqnames(fragi) == availableChr[k],] tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] if(length(fragik) == 0){ tilesk$reads <- 0 - #If fragments + #If fragments }else{ - + #N Tiles nTiles <- chromLengths[availableChr[k]] / tileSize #Add one tile if there is extra bases if (nTiles%%1 != 0) { nTiles <- trunc(nTiles) + 1 } - + #Create Sparse Matrix matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) - + #For each tiles of this chromosome, create start tile and end tile row, set the associated counts matching with the fragments mat <- Matrix::sparseMatrix( i = c(trunc(start(fragik) / tileSize), trunc(end(fragik) / tileSize)) + 1, j = as.vector(c(matchID, matchID)), x = rep(1, 2*length(fragik)), dims = c(nTiles, length(cellGroupi))) - + #Max count for a cells in a tile is set to cutoff if(!is.null(cutoff)){ mat@x[mat@x > cutoff] <- cutoff } - + #Sums the cells mat <- Matrix::rowSums(mat) - + tilesk$reads <- mat - + #Normalization if(!is.null(normMethod)){ if(tolower(normMethod) == "rc"){ @@ -2468,18 +2468,18 @@ CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, } } } - + tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] - + tilesk - + }) - + names(covList) <- availableChr - + covList <- as(covList, "RleList") - + rtracklayer::export.bw(object = covList, con = covFile) - + return(covFile) } \ No newline at end of file From 0d5eee5b3bd6fb2f5118494de14ed5304c398eb8 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 23 Jun 2023 17:37:23 +0800 Subject: [PATCH 07/34] Reformat --- R/utilities.R | 193 ++++++++++++++++++++++++++------------------------ 1 file changed, 101 insertions(+), 92 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 545d1e13..4854086d 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2297,38 +2297,31 @@ ExportGroupBW <- function( chromosome = NULL, outdir = NULL, verbose=TRUE -){ - - #Check if temporary directory exist +) { + # Check if temporary directory exist if (!dir.exists(outdir)){ dir.create(outdir) } - if (!requireNamespace("rtracklayer", quietly = TRUE)) { message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") return(NULL) } - - assay <- assay %||% DefaultAssay(object = object) + assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) DefaultAssay(object = object) <- assay - - group.by <- group.by %||% 'ident' + group.by <- SetIfNull(x = group.by, y = 'ident') Idents(object = object) <- group.by - - idents <- idents %||% levels(object) - - GroupsNames <- names(table(object[[group.by]])[table(object[[group.by]]) > minCells]) + idents <- SetIfNull(x = idents, y = levels(x = object)) + GroupsNames <- names(x = table(object[[group.by]])[table(object[[group.by]]) > minCells]) GroupsNames <- GroupsNames[GroupsNames %in% idents] - - #Check if output files already exist - lapply(GroupsNames, function(x){ - if (file.exists(paste0(outdir, .Platform$file.sep, x, ".bed"))){ + # Check if output files already exist + lapply(X = GroupsNames, FUN = function(x) { + fn <- paste0(outdir, .Platform$file.sep, x, ".bed") + if (file.exists(fn)) { message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) - file.remove(paste0(outdir, .Platform$file.sep, x, ".bed")) + file.remove(fn) } }) - - #Splitting fragments file for each idents in group.by + # Splitting fragments file for each idents in group.by SplitFragments( object = object, assay = assay, @@ -2340,48 +2333,57 @@ ExportGroupBW <- function( buffer_length = 256L, verbose = verbose ) - - #Column to normalized by - if(!is.null(normMethod)){ - if (tolower(normMethod) %in% c('rc', 'ncells', 'none')){ + # Column to normalized by + if(!is.null(x = normMethod)) { + if (tolower(x = normMethod) %in% c('rc', 'ncells', 'none')){ normBy <- normMethod } else{ - normBy <- object[[normMethod, drop=FALSE]] + normBy <- object[[normMethod, drop = FALSE]] } } - - #Get chromosome information - if(!is.null(chromosome)){ + # Get chromosome information + if(!is.null(x = chromosome)){ seqlevels(object) <- chromosome } - - availableChr <- names(seqlengths(object)) + availableChr <- names(x = seqlengths(object)) chromLengths <- seqlengths(object) - chromSizes <- GRanges(seqnames = availableChr, IRanges(start = rep(1, length(availableChr)), end = as.numeric(chromLengths))) + chromSizes <- GRanges( + seqnames = availableChr, + ranges = IRanges( + start = rep(1, length(x = availableChr)), + end = as.numeric(x = chromLengths) + ) + ) if (verbose) { - message("Creation of tiles") + message("Creating tiles") } - - #Create tiles for each chromosome, from GenomicRanges - tiles <- unlist(slidingWindows(chromSizes, width = tileSize, step = tileSize)) - + # Create tiles for each chromosome, from GenomicRanges + tiles <- unlist( + x = slidingWindows(x = chromSizes, width = tileSize, step = tileSize) + ) if (verbose) { - message("Bigwig files generation at ", outdir) + message("Creating bigwig files at ", outdir) } - - #Run the creation of bigwig for each cellgroups - + # Run the creation of bigwig for each cellgroups if (nbrOfWorkers() > 1) { mylapply <- future_lapply } else { mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) } - - covFiles <- mylapply(GroupsNames, CreateBWGroup, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir) - - covFiles - + covFiles <- mylapply( + x = GroupsNames, + FUN = CreateBWGroup, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir + ) + return(covFiles) } # Helper function for ExportGroupBW @@ -2407,79 +2409,86 @@ ExportGroupBW <- function( #' @importFrom BiocGenerics which #' @importFrom S4Vectors match #' @importFrom Matrix sparseMatrix rowSums -CreateBWGroup <- function(groupNamei, availableChr, chromLengths, tiles, normBy, tileSize, normMethod, cutoff, outdir){ - - #Read the fragments file associated to the group - fragi <- rtracklayer::import(paste0(outdir, .Platform$file.sep, groupNamei, ".bed"),format = "bed") - - cellGroupi <- unique(fragi$name) - - #Open the writting bigwig file - covFile <- file.path(outdir, paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw")) +CreateBWGroup <- function( + groupNamei, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir +) { + if (!requireNamespace("rtracklayer", quietly = TRUE)) { + message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") + return(NULL) + } + normMethod <- tolower(x = normMethod) + # Read the fragments file associated to the group + fragi <- rtracklayer::import( + paste0(outdir, .Platform$file.sep, groupNamei, ".bed"), format = "bed" + ) + cellGroupi <- unique(x = fragi$name) + # Open the writing bigwig file + covFile <- file.path( + outdir, + paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw") + ) - covList <- lapply(seq_along(availableChr), function(k){ - + covList <- lapply(X = seq_along(availableChr), FUN = function(k) { fragik <- fragi[seqnames(fragi) == availableChr[k],] tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] - - if(length(fragik) == 0){ + if (length(x = fragik) == 0) { tilesk$reads <- 0 - #If fragments - }else{ - - #N Tiles + # If fragments + } else { + # N Tiles nTiles <- chromLengths[availableChr[k]] / tileSize - #Add one tile if there is extra bases + # Add one tile if there is extra bases if (nTiles%%1 != 0) { - nTiles <- trunc(nTiles) + 1 + nTiles <- trunc(x = nTiles) + 1 } - - #Create Sparse Matrix + # Create Sparse Matrix matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) - #For each tiles of this chromosome, create start tile and end tile row, set the associated counts matching with the fragments - mat <- Matrix::sparseMatrix( - i = c(trunc(start(fragik) / tileSize), trunc(end(fragik) / tileSize)) + 1, - j = as.vector(c(matchID, matchID)), - x = rep(1, 2*length(fragik)), - dims = c(nTiles, length(cellGroupi))) + # For each tiles of this chromosome, create start tile and end tile row, + # set the associated counts matching with the fragments + mat <- sparseMatrix( + i = c(trunc(x = start(x = fragik) / tileSize), + trunc(x = end(x = fragik) / tileSize)) + 1, + j = as.vector(x = c(matchID, matchID)), + x = rep(1, 2*length(x = fragik)), + dims = c(nTiles, length(x = cellGroupi)) + ) - #Max count for a cells in a tile is set to cutoff - if(!is.null(cutoff)){ + # Max count for a cells in a tile is set to cutoff + if (!is.null(x = cutoff)){ mat@x[mat@x > cutoff] <- cutoff } - - #Sums the cells - mat <- Matrix::rowSums(mat) - + # Sums the cells + mat <- rowSums(x = mat) tilesk$reads <- mat - - #Normalization - if(!is.null(normMethod)){ - if(tolower(normMethod) == "rc"){ + # Normalization + if (!is.null(x = normMethod)) { + if (normMethod == "rc") { tilesk$reads <- tilesk$reads * 10^4 / length(fragi$name) - }else if(tolower(normMethod) == "ncells"){ + } else if (normMethod == "ncells") { tilesk$reads <- tilesk$reads / length(cellGroupi) - }else if(tolower(normMethod) == "none"){ - }else{ - if(!is.null(normBy)){ + } else if (normMethod == "none") { + } else { + if (!is.null(x = normBy)){ tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) } } } } - tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] - tilesk - }) names(covList) <- availableChr - - covList <- as(covList, "RleList") - + covList <- as(object = covList, Class = "RleList") rtracklayer::export.bw(object = covList, con = covFile) - return(covFile) } \ No newline at end of file From e65a74dbe8e489f92b01a2a9c049c67a317c83a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20Herv=C3=A9?= Date: Fri, 3 Nov 2023 10:56:37 +0100 Subject: [PATCH 08/34] Update utilities.R I have an error passing the GroupNames via x --- R/utilities.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 4854086d..70d6f336 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2372,7 +2372,7 @@ ExportGroupBW <- function( mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) } covFiles <- mylapply( - x = GroupsNames, + GroupsNames, FUN = CreateBWGroup, availableChr, chromLengths, @@ -2491,4 +2491,4 @@ CreateBWGroup <- function( covList <- as(object = covList, Class = "RleList") rtracklayer::export.bw(object = covList, con = covFile) return(covFile) -} \ No newline at end of file +} From 487bd46e971f4ab4d495f498857455318f7985fc Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 15:41:41 +0200 Subject: [PATCH 09/34] use GetGroups --- R/utilities.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 70d6f336..ad319abb 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2308,11 +2308,12 @@ ExportGroupBW <- function( } assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) DefaultAssay(object = object) <- assay - group.by <- SetIfNull(x = group.by, y = 'ident') - Idents(object = object) <- group.by - idents <- SetIfNull(x = idents, y = levels(x = object)) - GroupsNames <- names(x = table(object[[group.by]])[table(object[[group.by]]) > minCells]) - GroupsNames <- GroupsNames[GroupsNames %in% idents] + obj.groups <- GetGroups( + object = object, + group.by = group.by, + idents = idents + ) + GroupsNames <- names(x = table(obj.groups)[table(obj.groups) > minCells]) # Check if output files already exist lapply(X = GroupsNames, FUN = function(x) { fn <- paste0(outdir, .Platform$file.sep, x, ".bed") From 5271bbc7265049d6c32327e81ad6d4f207dbb4c4 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 15:45:43 +0200 Subject: [PATCH 10/34] raise error if outdir is not set --- R/utilities.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index ad319abb..e9c78f88 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2298,6 +2298,9 @@ ExportGroupBW <- function( outdir = NULL, verbose=TRUE ) { + if (is.null(outdir)) { + stop("Must supply the output directory") + } # Check if temporary directory exist if (!dir.exists(outdir)){ dir.create(outdir) From cc56669709d0604b74b7a61be98f88b72a027516 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 15:50:15 +0200 Subject: [PATCH 11/34] add an import for nbrOfWorkers --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index e9c78f88..ac53618d 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2274,7 +2274,7 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { #' @param verbose Display messages #' #' @importFrom GenomicRanges seqlengths GRanges slidingWindows -#' @importFrom future plan +#' @importFrom future plan nbrOfWorkers #' @importFrom future.apply future_lapply #' @importFrom pbapply pbapply #' From d72e1549c2c98c7c2db4299038a370a4e9f9f973 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 17:16:04 +0200 Subject: [PATCH 12/34] set outdir default value to wd --- R/utilities.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index ac53618d..8fff1a41 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2295,19 +2295,16 @@ ExportGroupBW <- function( minCells = 5, cutoff = NULL, chromosome = NULL, - outdir = NULL, - verbose=TRUE + outdir = getwd(), + verbose = TRUE ) { - if (is.null(outdir)) { - stop("Must supply the output directory") - } # Check if temporary directory exist if (!dir.exists(outdir)){ dir.create(outdir) } if (!requireNamespace("rtracklayer", quietly = TRUE)) { message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") - return(NULL) + return(NULL) } assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) DefaultAssay(object = object) <- assay From aad0a9b01992ab4b4f94aadce6a54c1f23bdab76 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 17:16:53 +0200 Subject: [PATCH 13/34] change help for outdir to match SplitFragments --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 8fff1a41..6c23ee34 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2270,7 +2270,7 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { #' @param minCells The minimum of cells in a group to be exported #' @param cutoff The maximum number of fragments in a given genomic tile #' @param chromosome A chromosomes vector to be exported -#' @param outdir The output directory for bigwig file +#' @param outdir Directory to write output files (splitted bed and bigwigs) #' @param verbose Display messages #' #' @importFrom GenomicRanges seqlengths GRanges slidingWindows From 093074df59e0611547b3a7b651f08938c05d493c Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 17:17:24 +0200 Subject: [PATCH 14/34] remove unneeded change of DefaultAssay --- R/utilities.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 6c23ee34..e98c519c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2307,7 +2307,6 @@ ExportGroupBW <- function( return(NULL) } assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) - DefaultAssay(object = object) <- assay obj.groups <- GetGroups( object = object, group.by = group.by, From c6e81e731676d6ac82e5730f3d125a1bd09aa5d6 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 17:17:46 +0200 Subject: [PATCH 15/34] raise an error if the object has no Fragments --- R/utilities.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index e98c519c..86b98cc1 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2306,6 +2306,9 @@ ExportGroupBW <- function( message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") return(NULL) } + if (length(Fragments(object)) == 0) { + stop("This object does not have Fragments, cannot generate bigwig.") + } assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) obj.groups <- GetGroups( object = object, From bfa0d217a3bfa8bef7061a011f451559b46e52dc Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 20:59:45 +0200 Subject: [PATCH 16/34] add tests --- tests/testthat/test-utilities.R | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index 45f69377..e0f0f3b1 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -67,3 +67,74 @@ test_that("ExtractCell works", { object = ExtractCell(x = "chr1\t1\t300\tTGCA\t1"), expected = "TGCA" ) }) + +test_that("CreateBWGroup works", { + outdir <- file.path(tempdir(), "createBW") + dir.create(outdir, showWarnings = FALSE) + fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") + cells <- colnames(x = atac_small) + names(x = cells) <- cells + frags <- CreateFragmentObject( + path = fpath, + cells = cells, + verbose = FALSE, + validate = FALSE + ) + Fragments(atac_small) <- frags + SplitFragments( + object = atac_small, + assay = "peaks", + group.by = "seurat_clusters", + outdir = outdir + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 249250621), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = 1, end = 249250621)), + normBy = NULL, + tileSize = 249250621, + normMethod = 'RC', + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 3) + expect(file.exists(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")), "File does not exist.") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")) + expect_equal(object = bw$score, 20000) +}) + +test_that("ExportGroupBW works", { + outdir <- file.path(tempdir(), "ExportGroupBW") + dir.create(outdir, showWarnings = FALSE) + fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") + cells <- colnames(x = atac_small) + names(x = cells) <- cells + frags <- CreateFragmentObject( + path = fpath, + cells = cells, + verbose = FALSE, + validate = FALSE + ) + Fragments(atac_small) <- frags + ExportGroupBW( + object = atac_small, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = outdir, + verbose = TRUE + ) + expect_equal(object = length(list.files(outdir)), expected = 4) + expect( + file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), + "File does not exist." + ) + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + assert_equal(object = length(seqlengths(bw)), 298) +}) \ No newline at end of file From f7f667b788bbcbb14a24d8d77fa44f083fdd1df7 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 21:17:09 +0200 Subject: [PATCH 17/34] run document --- NAMESPACE | 9 +++++++ R/utilities.R | 5 ++-- man/ExportGroupBW.Rd | 57 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 man/ExportGroupBW.Rd diff --git a/NAMESPACE b/NAMESPACE index 25899c45..218f91a6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -117,6 +117,7 @@ export(CreateMotifObject) export(DensityScatter) export(DepthCor) export(DownsampleFeatures) +export(ExportGroupBW) export(ExpressionPlot) export(Extend) export(FRiP) @@ -216,6 +217,7 @@ importFrom(BiocGenerics,end) importFrom(BiocGenerics,sort) importFrom(BiocGenerics,start) importFrom(BiocGenerics,strand) +importFrom(BiocGenerics,which) importFrom(BiocGenerics,width) importFrom(GenomeInfoDb,"genome<-") importFrom(GenomeInfoDb,"isCircular<-") @@ -235,6 +237,7 @@ importFrom(GenomeInfoDb,seqlevels) importFrom(GenomeInfoDb,seqnames) importFrom(GenomeInfoDb,sortSeqlevels) importFrom(GenomicRanges,GRanges) +importFrom(GenomicRanges,coverage) importFrom(GenomicRanges,disjoin) importFrom(GenomicRanges,end) importFrom(GenomicRanges,findOverlaps) @@ -244,6 +247,7 @@ importFrom(GenomicRanges,makeGRangesFromDataFrame) importFrom(GenomicRanges,reduce) importFrom(GenomicRanges,resize) importFrom(GenomicRanges,seqnames) +importFrom(GenomicRanges,slidingWindows) importFrom(GenomicRanges,start) importFrom(GenomicRanges,strand) importFrom(GenomicRanges,tileGenome) @@ -290,6 +294,7 @@ importFrom(Rsamtools,scanTabix) importFrom(Rsamtools,seqnamesTabix) importFrom(S4Vectors,"mcols<-") importFrom(S4Vectors,elementNROWS) +importFrom(S4Vectors,match) importFrom(S4Vectors,mcols) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,split) @@ -318,6 +323,7 @@ importFrom(dplyr,top_n) importFrom(dplyr,ungroup) importFrom(fastmatch,fmatch) importFrom(future,nbrOfWorkers) +importFrom(future,plan) importFrom(future.apply,future_lapply) importFrom(ggplot2,aes) importFrom(ggplot2,aes_string) @@ -381,8 +387,11 @@ importFrom(methods,slotNames) importFrom(patchwork,guide_area) importFrom(patchwork,plot_layout) importFrom(patchwork,wrap_plots) +importFrom(pbapply,pbapply) importFrom(pbapply,pblapply) importFrom(rlang,.data) +importFrom(rtracklayer,export.bw) +importFrom(rtracklayer,import) importFrom(scales,comma) importFrom(scales,hue_pal) importFrom(stats,approx) diff --git a/R/utilities.R b/R/utilities.R index 86b98cc1..102fbdd5 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2273,12 +2273,11 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { #' @param outdir Directory to write output files (splitted bed and bigwigs) #' @param verbose Display messages #' -#' @importFrom GenomicRanges seqlengths GRanges slidingWindows +#' @importFrom GenomicRanges GRanges slidingWindows #' @importFrom future plan nbrOfWorkers #' @importFrom future.apply future_lapply #' @importFrom pbapply pbapply #' -#' @return #' @export #' #' @examples @@ -2408,7 +2407,7 @@ ExportGroupBW <- function( # @param outdir The output directory for bigwig file # #' @importFrom rtracklayer import export.bw -#' @importFrom GenomicRanges seqnames seqlengths GRanges coverage +#' @importFrom GenomicRanges seqnames GRanges coverage #' @importFrom BiocGenerics which #' @importFrom S4Vectors match #' @importFrom Matrix sparseMatrix rowSums diff --git a/man/ExportGroupBW.Rd b/man/ExportGroupBW.Rd new file mode 100644 index 00000000..9a7c684a --- /dev/null +++ b/man/ExportGroupBW.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{ExportGroupBW} +\alias{ExportGroupBW} +\title{Export bigwig files for groups of cells} +\usage{ +ExportGroupBW( + object, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = getwd(), + verbose = TRUE +) +} +\arguments{ +\item{object}{A Seurat object} + +\item{assay}{Name of assay to use} + +\item{group.by}{The metadata variable used to group the cells} + +\item{idents}{Identities to include (defined by group.by parameter)} + +\item{normMethod}{Normalization method for the bigwig files. Deafult 'RC'. +'RC' will divide the number of fragments in a tile by the total number of +fragments in the group. A scaling factor of 10^4 will be applied. +'ncells' will divide the number of fragments in a tile by the number of cell +in the group. 'none' will apply no normalization method. +A vector of values for each cell can also be passed as a metadata column +name. A scaling factor of 10^4 will be applied} + +\item{tileSize}{The size of the tiles in the bigwig file} + +\item{minCells}{The minimum of cells in a group to be exported} + +\item{cutoff}{The maximum number of fragments in a given genomic tile} + +\item{chromosome}{A chromosomes vector to be exported} + +\item{outdir}{Directory to write output files (splitted bed and bigwigs)} + +\item{verbose}{Display messages} +} +\description{ +Export bigwig files for groups of cells +} +\examples{ +\dontrun{ +ExportGroupBW(object, assay = "peaks") +} +} From b849dab48bf3d547d644adc20199b25f396c16a9 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 21:24:50 +0200 Subject: [PATCH 18/34] Add rtracklayer to the Imports --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 55b27307..15a334fe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,8 @@ Imports: Rcpp, grid, tidyselect, - vctrs + vctrs, + rtracklayer Collate: 'RcppExports.R' 'data.R' @@ -88,7 +89,6 @@ Suggests: BSgenome, shiny, miniUI, - rtracklayer, biovizBase, Biostrings, lsa, From 74f5fae1646fc1661ca26d43c945ec029561245b Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 21:34:49 +0200 Subject: [PATCH 19/34] remove unneeded check for rtracklayer --- R/utilities.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 102fbdd5..c9cb86a4 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2301,10 +2301,6 @@ ExportGroupBW <- function( if (!dir.exists(outdir)){ dir.create(outdir) } - if (!requireNamespace("rtracklayer", quietly = TRUE)) { - message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") - return(NULL) - } if (length(Fragments(object)) == 0) { stop("This object does not have Fragments, cannot generate bigwig.") } @@ -2422,10 +2418,6 @@ CreateBWGroup <- function( cutoff, outdir ) { - if (!requireNamespace("rtracklayer", quietly = TRUE)) { - message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") - return(NULL) - } normMethod <- tolower(x = normMethod) # Read the fragments file associated to the group fragi <- rtracklayer::import( From c555f6cca5b529809f08b3f342fe66b224104cc9 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 21:34:56 +0200 Subject: [PATCH 20/34] fix assert to expect --- tests/testthat/test-utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index e0f0f3b1..8a1403a9 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -136,5 +136,5 @@ test_that("ExportGroupBW works", { "File does not exist." ) bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) - assert_equal(object = length(seqlengths(bw)), 298) + expect_equal(object = length(seqlengths(bw)), 298) }) \ No newline at end of file From 0109f1dc86334dfeb0c1ccd840227de7ff2792dc Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 23:00:11 +0200 Subject: [PATCH 21/34] use more controled tests for CreateBWGroup --- tests/testthat/test-utilities.R | 60 ++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index 8a1403a9..23d7390b 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -68,24 +68,20 @@ test_that("ExtractCell works", { ) }) -test_that("CreateBWGroup works", { +test_that("CreateBWGroup works with single tile", { outdir <- file.path(tempdir(), "createBW") dir.create(outdir, showWarnings = FALSE) - fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") - cells <- colnames(x = atac_small) - names(x = cells) <- cells - frags <- CreateFragmentObject( - path = fpath, - cells = cells, - verbose = FALSE, - validate = FALSE + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 ) - Fragments(atac_small) <- frags - SplitFragments( - object = atac_small, - assay = "peaks", - group.by = "seurat_clusters", - outdir = outdir + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE ) CreateBWGroup( groupNamei = "0", @@ -98,12 +94,44 @@ test_that("CreateBWGroup works", { cutoff = NULL, outdir = outdir ) - expect_equal(object = length(list.files(outdir)), expected = 3) + expect_equal(object = length(list.files(outdir)), expected = 2) expect(file.exists(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")), "File does not exist.") bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")) expect_equal(object = bw$score, 20000) }) +test_that("CreateBWGroup works with 100bp tile", { + outdir <- file.path(tempdir(), "createBW2") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 249250621), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 249250621, 100), end = c(seq(100, 249250621, 100), 249250621))), + normBy = NULL, + tileSize = 100, + normMethod = "RC", + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = bw$score, c(4000, 8000, 4000, 2000, 0, 2000, 0)) +}) + test_that("ExportGroupBW works", { outdir <- file.path(tempdir(), "ExportGroupBW") dir.create(outdir, showWarnings = FALSE) From 3b1cf35076eb3e3118e69a8bfb67043fd35f37b2 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 23:01:28 +0200 Subject: [PATCH 22/34] remove spaces in empty lines --- R/utilities.R | 66 +++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index c9cb86a4..b532df50 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -39,13 +39,13 @@ AddChromatinModule <- function( if (!inherits(x = object[[assay]], what = "ChromatinAssay")) { stop("The requested assay is not a ChromatinAssay.") } - + # first find index of each feature feat.idx <- sapply(X = features, FUN = fmatch, rownames(x = object[[assay]])) j <- sapply(X = seq_along(along.with = features), FUN = function(x) { rep(x = x, length(x = features[[x]])) }) - + # construct sparse matrix with features mat <- sparseMatrix( i = unlist(x = feat.idx, use.names = FALSE), @@ -55,7 +55,7 @@ AddChromatinModule <- function( ) rownames(x = mat) <- rownames(x = object[[assay]]) colnames(x = mat) <- names(x = features) - + # run chromVAR cv <- RunChromVAR( object = object[[assay]], @@ -64,7 +64,7 @@ AddChromatinModule <- function( verbose = verbose, ... ) - + # add module scores to metadata chromvar.data <- GetAssayData(object = cv, slot = "data") object <- AddMetaData( @@ -334,7 +334,7 @@ GeneActivity <- function( stop("No genes remaining after filtering for requested biotypes") } } - + # filter genes if provided if (!is.null(x = features)) { transcripts <- transcripts[transcripts$gene_name %in% features] @@ -349,14 +349,14 @@ GeneActivity <- function( stop("No genes remaining after filtering for max.width") } } - + # extend to include promoters transcripts <- Extend( x = transcripts, upstream = extend.upstream, downstream = extend.downstream ) - + # quantify frags <- Fragments(object = object[[assay]]) if (length(x = frags) == 0) { @@ -375,7 +375,7 @@ GeneActivity <- function( names(x = gene.key) <- GRangesToString(grange = transcripts) rownames(x = counts) <- as.vector(x = gene.key[rownames(x = counts)]) counts <- counts[rownames(x = counts) != "", ] - + return(counts) } @@ -409,7 +409,7 @@ GetGRangesFromEnsDb <- function( if (standard.chromosomes) { whole.genome <- keepStandardChromosomes(whole.genome, pruning.mode = "coarse") } - + # extract genes from each chromosome my_lapply <- ifelse(test = verbose, yes = pblapply, no = lapply) tx <- my_lapply(X = seq_along(whole.genome), FUN = function(x){ @@ -418,7 +418,7 @@ GetGRangesFromEnsDb <- function( which = whole.genome[x], columns = c("tx_id", "gene_name", "gene_id", "gene_biotype"))) }) - + # combine tx <- do.call(what = c, args = tx) tx <- tx[tx$gene_biotype %in% biotypes] @@ -1307,7 +1307,7 @@ GetReadsInRegion <- function( ) { file.to.object <- names(x = cellmap) names(x = file.to.object) <- cellmap - + if (verbose) { message("Extracting reads in requested region") } @@ -1679,7 +1679,7 @@ CreateRegionPileupMatrix <- function( on_plus <- strand(x = regions) == "+" | strand(x = regions) == "*" plus.strand <- regions[on_plus, ] minus.strand <- regions[!on_plus, ] - + # get cut matrices for each strand if (verbose) { message("Finding + strand cut sites") @@ -1701,7 +1701,7 @@ CreateRegionPileupMatrix <- function( cells = cells, verbose = FALSE ) - + # reverse minus strand and add together if (is.null(x = cut.matrix.plus)) { full.matrix <- cut.matrix.minus[, rev(x = colnames(x = cut.matrix.minus))] @@ -1755,13 +1755,13 @@ ApplyMatrixByGroup <- function( } ngroup <- length(x = all.groups) npos <- ncol(x = mat) - + group <- unlist( x = lapply(X = all.groups, FUN = function(x) rep(x, npos)) ) position <- rep(x = as.numeric(x = colnames(x = mat)), ngroup) count <- vector(mode = "numeric", length = npos * ngroup) - + for (i in seq_along(along.with = all.groups)) { grp <- all.groups[[i]] if (is.na(x = grp)) { @@ -1776,13 +1776,13 @@ ApplyMatrixByGroup <- function( } count[((i - 1) * npos + 1):((i * npos))] <- totals } - + # construct dataframe coverages <- data.frame( "group" = group, "position" = position, "count" = count, stringsAsFactors = FALSE ) - + if (normalize) { scale.factor <- SetIfNull( x = scale.factor, y = median(x = group.scale.factors) @@ -1911,12 +1911,12 @@ IsMatrixEmpty <- function(x) { # information GetRowsToMerge <- function(assay.list, all.ranges, reduced.ranges) { revmap <- as.vector(x = reduced.ranges$revmap) - + # get indices of ranges that changed revmap.lengths <- sapply(X = revmap, FUN = length) changed.ranges <- which(x = revmap.lengths > 1) grange.string <- GRangesToString(grange = reduced.ranges[changed.ranges]) - + # preallocate offsets <- list() results <- list() @@ -1935,7 +1935,7 @@ GetRowsToMerge <- function(assay.list, all.ranges, reduced.ranges) { results[['matrix']][[i]] <- matrix.indices results[['grange']][[i]] <- granges } - + # find sets of ranges for each dataset counter <- vector(mode = "numeric", length = length(x = assay.list)) for (x in seq_along(along.with = changed.ranges)) { @@ -1992,15 +1992,15 @@ MergeOverlappingRows <- function( ) return(merge.counts) } - + # transpose for faster access since matrix is column major counts <- t(x = counts) - + # get rows to merge mrows <- mergeinfo$matrix[[i]] new.rownames <- mergeinfo$grange[[i]] nrep <- rle(x = new.rownames) - + # allocate todelete <- c() newmat <- vector( @@ -2058,13 +2058,13 @@ MergeOverlappingRows <- function( to.rename.names <- to.rename.names[1:idx.counter] newmat <- newmat[1:(y - 1)] newmat.names <- newmat.names[1:(y - 1)] - + # transpose back counts <- t(x = counts) - + # rename matrix rows that weren't merged rownames(counts)[to.rename.idx] <- to.rename.names - + if (y == 1) { # no rows were merged, can return counts merge.counts[[i]] <- counts @@ -2084,12 +2084,12 @@ MergeOverlappingRows <- function( merged.mat <- Reduce(f = rbind, x = newmat) rownames(merged.mat) <- newmat.names merged.mat <- as(object = merged.mat, Class = "CsparseMatrix") - + # remove rows from count matrix that were merged mat.rows <- seq_len(length.out = nrow(x = counts)) tokeep <- setdiff(mat.rows, todelete) counts <- counts[tokeep, ] - + # add new merged rows to counts counts <- rbind(counts, merged.mat) merge.counts[[i]] <- counts @@ -2352,7 +2352,7 @@ ExportGroupBW <- function( end = as.numeric(x = chromLengths) ) ) - + if (verbose) { message("Creating tiles") } @@ -2429,7 +2429,7 @@ CreateBWGroup <- function( outdir, paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw") ) - + covList <- lapply(X = seq_along(availableChr), FUN = function(k) { fragik <- fragi[seqnames(fragi) == availableChr[k],] tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] @@ -2445,7 +2445,7 @@ CreateBWGroup <- function( } # Create Sparse Matrix matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) - + # For each tiles of this chromosome, create start tile and end tile row, # set the associated counts matching with the fragments mat <- sparseMatrix( @@ -2455,7 +2455,7 @@ CreateBWGroup <- function( x = rep(1, 2*length(x = fragik)), dims = c(nTiles, length(x = cellGroupi)) ) - + # Max count for a cells in a tile is set to cutoff if (!is.null(x = cutoff)){ mat@x[mat@x > cutoff] <- cutoff @@ -2480,7 +2480,7 @@ CreateBWGroup <- function( tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] tilesk }) - + names(covList) <- availableChr covList <- as(object = covList, Class = "RleList") rtracklayer::export.bw(object = covList, con = covFile) From 3f4ba003f445c98e4fcfdc99c0595a081796a4b4 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Wed, 2 Oct 2024 23:06:24 +0200 Subject: [PATCH 23/34] restore 2 space indentation for params --- R/utilities.R | 294 +++++++++++++++++++++++++------------------------- 1 file changed, 147 insertions(+), 147 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index b532df50..cd741aa3 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -28,12 +28,12 @@ NULL #' @export #' @concept utilities AddChromatinModule <- function( - object, - features, - genome, - assay = NULL, - verbose = TRUE, - ... + object, + features, + genome, + assay = NULL, + verbose = TRUE, + ... ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) if (!inherits(x = object[[assay]], what = "ChromatinAssay")) { @@ -92,10 +92,10 @@ globalVariables(names = c("group", "readcount"), package = "Signac") #' @examples #' AverageCounts(atac_small) AverageCounts <- function( - object, - assay = NULL, - group.by = NULL, - verbose = TRUE + object, + assay = NULL, + group.by = NULL, + verbose = TRUE ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) if (is.null(x = group.by)) { @@ -135,11 +135,11 @@ AverageCounts <- function( #' @importFrom Matrix rowSums #' @return Returns a vector of peak names AccessiblePeaks <- function( - object, - assay = NULL, - idents = NULL, - cells = NULL, - min.cells = 10 + object, + assay = NULL, + idents = NULL, + cells = NULL, + min.cells = 10 ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) cells <- SetIfNull(x = cells, y = WhichCells(object, idents = idents)) @@ -165,8 +165,8 @@ AccessiblePeaks <- function( #' @examples #' CellsPerGroup(atac_small) CellsPerGroup <- function( - object, - group.by = NULL + object, + group.by = NULL ) { if (is.null(x = group.by)) { cellgroups <- Idents(object = object) @@ -207,10 +207,10 @@ CellsPerGroup <- function( #' ) #' } ClosestFeature <- function( - object, - regions, - annotation = NULL, - ... + object, + regions, + annotation = NULL, + ... ) { if (!is(object = regions, class2 = 'GRanges')) { regions <- StringToGRanges(regions = regions, ...) @@ -290,16 +290,16 @@ ClosestFeature <- function( #' Fragments(atac_small) <- fragments #' GeneActivity(atac_small) GeneActivity <- function( - object, - assay = NULL, - features = NULL, - extend.upstream = 2000, - extend.downstream = 0, - biotypes = "protein_coding", - max.width = 500000, - process_n = 2000, - gene.id = FALSE, - verbose = TRUE + object, + assay = NULL, + features = NULL, + extend.upstream = 2000, + extend.downstream = 0, + biotypes = "protein_coding", + max.width = 500000, + process_n = 2000, + gene.id = FALSE, + verbose = TRUE ) { if (!is.null(x = features)) { if (length(x = features) == 0) { @@ -395,10 +395,10 @@ GeneActivity <- function( #' @concept utilities #' @export GetGRangesFromEnsDb <- function( - ensdb, - standard.chromosomes = TRUE, - biotypes = c("protein_coding", "lincRNA", "rRNA", "processed_transcript"), - verbose = TRUE + ensdb, + standard.chromosomes = TRUE, + biotypes = c("protein_coding", "lincRNA", "rRNA", "processed_transcript"), + verbose = TRUE ) { if (!requireNamespace("biovizBase", quietly = TRUE)) { stop("Please install biovizBase\n", @@ -486,12 +486,12 @@ GetTSSPositions <- function(ranges, biotypes = "protein_coding") { #' assay.2 = 'bins' #' ) GetIntersectingFeatures <- function( - object.1, - object.2, - assay.1 = NULL, - assay.2 = NULL, - distance = 0, - verbose = TRUE + object.1, + object.2, + assay.1 = NULL, + assay.2 = NULL, + distance = 0, + verbose = TRUE ) { assay.1 <- SetIfNull(x = assay.1, y = DefaultAssay(object = object.1)) assay.2 <- SetIfNull(x = assay.2, y = DefaultAssay(object = object.2)) @@ -591,10 +591,10 @@ GRangesToString <- function(grange, sep = c("-", "-")) { #' @examples #' Extend(x = blacklist_hg19, upstream = 100, downstream = 100) Extend <- function( - x, - upstream = 0, - downstream = 0, - from.midpoint = FALSE + x, + upstream = 0, + downstream = 0, + from.midpoint = FALSE ) { if (any(strand(x = x) == "*")) { warning("'*' ranges were treated as '+'") @@ -683,10 +683,10 @@ GetCellsInRegion <- function(tabix, region, cells = NULL) { #' ) #' } CountsInRegion <- function( - object, - assay, - regions, - ... + object, + assay, + regions, + ... ) { if (!is(object = object[[assay]], class2 = "ChromatinAssay")) { stop("Must supply a ChromatinAssay") @@ -724,10 +724,10 @@ CountsInRegion <- function( #' ) #' } FractionCountsInRegion <- function( - object, - regions, - assay = NULL, - ... + object, + regions, + assay = NULL, + ... ) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) reads.in.region <- CountsInRegion( @@ -772,12 +772,12 @@ FractionCountsInRegion <- function( #' "chr1-762106-763359","chr1-779589-780271") #' IntersectMatrix(matrix = counts, regions = blacklist_hg19) IntersectMatrix <- function( - matrix, - regions, - invert = FALSE, - sep = c("-", "-"), - verbose = TRUE, - ... + matrix, + regions, + invert = FALSE, + sep = c("-", "-"), + verbose = TRUE, + ... ) { if (is(object = regions, class2 = "character")) { regions <- StringToGRanges(regions = regions, sep = sep) @@ -880,12 +880,12 @@ LookupGeneCoords <- function(object, gene, assay = NULL) { #' n = 10 #' ) MatchRegionStats <- function( - meta.feature, - query.feature, - features.match = c("GC.percent"), - n = 10000, - verbose = TRUE, - ... + meta.feature, + query.feature, + features.match = c("GC.percent"), + n = 10000, + verbose = TRUE, + ... ) { if (!inherits(x = meta.feature, what = 'data.frame')) { stop("meta.feature should be a data.frame") @@ -997,11 +997,11 @@ UnifyPeaks <- function(object.list, mode = "reduce") { #' @examples #' SubsetMatrix(mat = volcano) SubsetMatrix <- function( - mat, - min.rows = 1, - min.cols = 1, - max.row.val = 10, - max.col.val = NULL + mat, + min.rows = 1, + min.cols = 1, + max.row.val = 10, + max.col.val = NULL ) { rowcount <- rowSums(mat > 0) colcount <- colSums(mat > 0) @@ -1053,10 +1053,10 @@ AddMissingCells <- function(x, cells) { #' @importFrom SeuratObject DefaultAssay GetAssayData #' @importFrom Matrix Diagonal tcrossprod rowSums AverageCountMatrix <- function( - object, - assay = NULL, - group.by = NULL, - idents = NULL + object, + assay = NULL, + group.by = NULL, + idents = NULL ) { assay = SetIfNull(x = assay, y = DefaultAssay(object = object)) countmatrix <- GetAssayData(object = object[[assay]], slot = "counts") @@ -1242,12 +1242,12 @@ ExtractFragments <- function(fragments, n = NULL, verbose = TRUE) { # convert region argument to genomic coordinates # region can be a string, name of a gene, or GRanges object FindRegion <- function( - object, - region, - sep = c("-", "-"), - assay = NULL, - extend.upstream = 0, - extend.downstream = 0 + object, + region, + sep = c("-", "-"), + assay = NULL, + extend.upstream = 0, + extend.downstream = 0 ) { if (!is(object = region, class2 = "GRanges")) { # first try to convert to coordinates, if not lookup gene @@ -1298,12 +1298,12 @@ FindRegion <- function( # # @return Returns a data frame GetReadsInRegion <- function( - cellmap, - region, - tabix.file, - cells = NULL, - verbose = TRUE, - ... + cellmap, + region, + tabix.file, + cells = NULL, + verbose = TRUE, + ... ) { file.to.object <- names(x = cellmap) names(x = file.to.object) <- cellmap @@ -1362,9 +1362,9 @@ GetReadsInRegion <- function( # @return Returns a named vector #' @importFrom SeuratObject Idents GetGroups <- function( - object, - group.by, - idents + object, + group.by, + idents ) { if (is.null(x = group.by)) { obj.groups <- Idents(object = object) @@ -1414,11 +1414,11 @@ MergeMatrixParts <- function(mat.list, new.rownames) { #' @importFrom Rsamtools TabixFile #' @importFrom GenomeInfoDb keepSeqlevels MultiGetReadsInRegion <- function( - object, - region, - fragment.list = NULL, - assay = NULL, - ... + object, + region, + fragment.list = NULL, + assay = NULL, + ... ) { if (inherits(x = object, what = "Seurat")) { # pull the assay @@ -1471,11 +1471,11 @@ MultiGetReadsInRegion <- function( #' @importMethodsFrom GenomicRanges width start end # @return Returns a sparse matrix SingleFileCutMatrix <- function( - cellmap, - region, - cells = NULL, - tabix.file, - verbose = TRUE + cellmap, + region, + cells = NULL, + tabix.file, + verbose = TRUE ) { # if multiple regions supplied, must be the same width cells <- SetIfNull(x = cells, y = names(x = cellmap)) @@ -1541,12 +1541,12 @@ SingleFileCutMatrix <- function( #' @importFrom Rsamtools TabixFile seqnamesTabix #' @importFrom GenomeInfoDb keepSeqlevels CutMatrix <- function( - object, - region, - group.by = NULL, - assay = NULL, - cells = NULL, - verbose = TRUE + object, + region, + group.by = NULL, + assay = NULL, + cells = NULL, + verbose = TRUE ) { # run SingleFileCutMatrix for each fragment file and combine results assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) @@ -1603,13 +1603,13 @@ CutMatrix <- function( #' @importFrom SeuratObject DefaultAssay #' @importFrom GenomeInfoDb keepSeqlevels MultiRegionCutMatrix <- function( - object, - regions, - group.by = NULL, - fragments = NULL, - assay = NULL, - cells = NULL, - verbose = FALSE + object, + regions, + group.by = NULL, + fragments = NULL, + assay = NULL, + cells = NULL, + verbose = FALSE ) { if (inherits(x = object, what = "Seurat")) { assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) @@ -1666,11 +1666,11 @@ MultiRegionCutMatrix <- function( # @param verbose Display messages #' @importMethodsFrom GenomicRanges strand CreateRegionPileupMatrix <- function( - object, - regions, - assay = NULL, - cells = NULL, - verbose = TRUE + object, + regions, + assay = NULL, + cells = NULL, + verbose = TRUE ) { if (length(x = regions) == 0) { stop("No regions supplied") @@ -1737,12 +1737,12 @@ CreateRegionPileupMatrix <- function( # @param scale.factor Scaling factor to use. If NULL (default), will use the # median normalization factor for all the groups. ApplyMatrixByGroup <- function( - mat, - groups, - fun, - normalize = TRUE, - group.scale.factors = NULL, - scale.factor = NULL + mat, + groups, + fun, + normalize = TRUE, + group.scale.factors = NULL, + scale.factor = NULL ) { if (normalize) { if (is.null(x = group.scale.factors) | is.null(x = scale.factor)) { @@ -1971,10 +1971,10 @@ GetRowsToMerge <- function(assay.list, all.ranges, reduced.ranges) { #' @importFrom Matrix rowSums #' @importMethodsFrom Matrix t MergeOverlappingRows <- function( - mergeinfo, - assay.list, - slot = "counts", - verbose = TRUE + mergeinfo, + assay.list, + slot = "counts", + verbose = TRUE ) { merge.counts <- list() for (i in seq_along(along.with = assay.list)) { @@ -2285,17 +2285,17 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { #' ExportGroupBW(object, assay = "peaks") #' } ExportGroupBW <- function( - object, - assay = NULL, - group.by = NULL, - idents = NULL, - normMethod = "RC", - tileSize = 100, - minCells = 5, - cutoff = NULL, - chromosome = NULL, - outdir = getwd(), - verbose = TRUE + object, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = getwd(), + verbose = TRUE ) { # Check if temporary directory exist if (!dir.exists(outdir)){ @@ -2408,15 +2408,15 @@ ExportGroupBW <- function( #' @importFrom S4Vectors match #' @importFrom Matrix sparseMatrix rowSums CreateBWGroup <- function( - groupNamei, - availableChr, - chromLengths, - tiles, - normBy, - tileSize, - normMethod, - cutoff, - outdir + groupNamei, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir ) { normMethod <- tolower(x = normMethod) # Read the fragments file associated to the group From 09203f8dfe69bd42d34926b13d842e5f912904c4 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 3 Oct 2024 06:56:22 +0200 Subject: [PATCH 24/34] change bin assignment --- R/utilities.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index cd741aa3..25979a4f 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2421,7 +2421,8 @@ CreateBWGroup <- function( normMethod <- tolower(x = normMethod) # Read the fragments file associated to the group fragi <- rtracklayer::import( - paste0(outdir, .Platform$file.sep, groupNamei, ".bed"), format = "bed" + paste0(outdir, .Platform$file.sep, groupNamei, ".bed"), + format = "bed" ) cellGroupi <- unique(x = fragi$name) # Open the writing bigwig file @@ -2448,9 +2449,11 @@ CreateBWGroup <- function( # For each tiles of this chromosome, create start tile and end tile row, # set the associated counts matching with the fragments + # This changes compared to ArchR version 1.0.2 + # See https://github.com/GreenleafLab/ArchR/issues/2214 mat <- sparseMatrix( - i = c(trunc(x = start(x = fragik) / tileSize), - trunc(x = end(x = fragik) / tileSize)) + 1, + i = c(trunc(x = (start(x = fragik) - 1) / tileSize), + trunc(x = (end(x = fragik) - 1) / tileSize)) + 1, j = as.vector(x = c(matchID, matchID)), x = rep(1, 2*length(x = fragik)), dims = c(nTiles, length(x = cellGroupi)) From 2a1d612bef828eb2d446354b32814756c2d0085a Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 3 Oct 2024 07:01:25 +0200 Subject: [PATCH 25/34] add test with fragment end matching chromosome length --- tests/testthat/test-utilities.R | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index 23d7390b..6b8e8d7d 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -132,6 +132,38 @@ test_that("CreateBWGroup works with 100bp tile", { expect_equal(object = bw$score, c(4000, 8000, 4000, 2000, 0, 2000, 0)) }) +test_that("CreateBWGroup works with seqlength equal to final pos", { + outdir <- file.path(tempdir(), "createBW2") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 500), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 499, 100), end = c(seq(100, 500, 100)))), + normBy = NULL, + tileSize = 100, + normMethod = "RC", + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = bw$score, c(6000, 8000, 2000)) +}) + test_that("ExportGroupBW works", { outdir <- file.path(tempdir(), "ExportGroupBW") dir.create(outdir, showWarnings = FALSE) From a884050f71c149dc619f7b060b6ced9a27980111 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 3 Oct 2024 07:01:38 +0200 Subject: [PATCH 26/34] guess chromosome lengths if not provided --- R/utilities.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 25979a4f..4b7cd6d9 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2343,8 +2343,14 @@ ExportGroupBW <- function( if(!is.null(x = chromosome)){ seqlevels(object) <- chromosome } - availableChr <- names(x = seqlengths(object)) chromLengths <- seqlengths(object) + if (is.null(chromLengths)) { + message("Object has no seqlength. They will be estimated.") + range_granges <- range(granges(object)) + chromLengths <- end(range_granges) + names(chromLengths) <- seqnames(range_granges) + } + availableChr <- names(chromLengths) chromSizes <- GRanges( seqnames = availableChr, ranges = IRanges( From 8a63452d37131645c8b6787171da3558156a369b Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 3 Oct 2024 07:11:51 +0200 Subject: [PATCH 27/34] fix test --- tests/testthat/test-utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index 6b8e8d7d..47feb355 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -129,7 +129,7 @@ test_that("CreateBWGroup works with 100bp tile", { expect_equal(object = length(list.files(outdir)), expected = 2) expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) - expect_equal(object = bw$score, c(4000, 8000, 4000, 2000, 0, 2000, 0)) + expect_equal(object = bw$score, c(6000, 8000, 2000, 0)) }) test_that("CreateBWGroup works with seqlength equal to final pos", { From 1c6e811bdb958b6827371245fc57f09078da7972 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 3 Oct 2024 09:04:09 +0200 Subject: [PATCH 28/34] impose seqlength to be specified --- R/utilities.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 4b7cd6d9..398b45ff 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2345,10 +2345,7 @@ ExportGroupBW <- function( } chromLengths <- seqlengths(object) if (is.null(chromLengths)) { - message("Object has no seqlength. They will be estimated.") - range_granges <- range(granges(object)) - chromLengths <- end(range_granges) - names(chromLengths) <- seqnames(range_granges) + stop("Object has no seqlength, bigwig coverages cannot be evaluated.") } availableChr <- names(chromLengths) chromSizes <- GRanges( From 5c1457abfc6465d0ccfa5329327f9379af10ebb3 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 3 Oct 2024 09:11:02 +0200 Subject: [PATCH 29/34] move the chromSize before split fragments --- R/utilities.R | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 398b45ff..b9362fc9 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2304,6 +2304,22 @@ ExportGroupBW <- function( if (length(Fragments(object)) == 0) { stop("This object does not have Fragments, cannot generate bigwig.") } + # Get chromosome information + if(!is.null(x = chromosome)){ + seqlevels(object) <- chromosome + } + chromLengths <- seqlengths(object) + if (is.null(chromLengths)) { + stop("Object has no seqlength, bigwig coverages cannot be evaluated.") + } + availableChr <- names(chromLengths) + chromSizes <- GRanges( + seqnames = availableChr, + ranges = IRanges( + start = rep(1, length(x = availableChr)), + end = as.numeric(x = chromLengths) + ) + ) assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) obj.groups <- GetGroups( object = object, @@ -2339,22 +2355,6 @@ ExportGroupBW <- function( normBy <- object[[normMethod, drop = FALSE]] } } - # Get chromosome information - if(!is.null(x = chromosome)){ - seqlevels(object) <- chromosome - } - chromLengths <- seqlengths(object) - if (is.null(chromLengths)) { - stop("Object has no seqlength, bigwig coverages cannot be evaluated.") - } - availableChr <- names(chromLengths) - chromSizes <- GRanges( - seqnames = availableChr, - ranges = IRanges( - start = rep(1, length(x = availableChr)), - end = as.numeric(x = chromLengths) - ) - ) if (verbose) { message("Creating tiles") From 2396fc8d620655d82479623a105706607905c434 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 17 Jan 2025 10:45:23 +0800 Subject: [PATCH 30/34] Skip test if rtracklayer not installed --- tests/testthat/test-utilities.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index cd63091b..112ed61f 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -112,6 +112,7 @@ test_that("CreateBWGroup works with single tile", { ) expect_equal(object = length(list.files(outdir)), expected = 2) expect(file.exists(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")) expect_equal(object = bw$score, 20000) }) @@ -144,6 +145,7 @@ test_that("CreateBWGroup works with 100bp tile", { ) expect_equal(object = length(list.files(outdir)), expected = 2) expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) expect_equal(object = bw$score, c(6000, 8000, 2000, 0)) }) @@ -176,6 +178,7 @@ test_that("CreateBWGroup works with seqlength equal to final pos", { ) expect_equal(object = length(list.files(outdir)), expected = 2) expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) expect_equal(object = bw$score, c(6000, 8000, 2000)) }) @@ -211,6 +214,7 @@ test_that("ExportGroupBW works", { file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist." ) + skip_if_not_installed("rtracklayer") bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) expect_equal(object = length(seqlengths(bw)), 298) }) \ No newline at end of file From 137abf1fa29ee820767c94cdd7708a2b8a58e0dd Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:25:06 +0800 Subject: [PATCH 31/34] move rtracklayer to suggests --- DESCRIPTION | 2 +- NAMESPACE | 2 -- R/utilities.R | 5 ++++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index aadc8725..c0498712 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,7 +52,6 @@ Imports: grid, tidyselect, vctrs, - rtracklayer, lifecycle Collate: 'RcppExports.R' @@ -90,6 +89,7 @@ Suggests: BSgenome, shiny, miniUI, + rtracklayer, biovizBase, Biostrings, lsa, diff --git a/NAMESPACE b/NAMESPACE index fb8478fc..2f0de2e1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -411,8 +411,6 @@ importFrom(patchwork,wrap_plots) importFrom(pbapply,pbapply) importFrom(pbapply,pblapply) importFrom(rlang,.data) -importFrom(rtracklayer,export.bw) -importFrom(rtracklayer,import) importFrom(scales,comma) importFrom(scales,hue_pal) importFrom(stats,approx) diff --git a/R/utilities.R b/R/utilities.R index 6b7cae67..19fed4d6 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2613,7 +2613,6 @@ ExportGroupBW <- function( # @param cutoff The maximum number of fragment in a given tile # @param outdir The output directory for bigwig file # -#' @importFrom rtracklayer import export.bw #' @importFrom GenomicRanges seqnames GRanges coverage #' @importFrom BiocGenerics which #' @importFrom S4Vectors match @@ -2629,6 +2628,10 @@ CreateBWGroup <- function( cutoff, outdir ) { + if (!requireNamespace("rtracklayer", quietly = TRUE)) { + message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") + return(NULL) + } normMethod <- tolower(x = normMethod) # Read the fragments file associated to the group fragi <- rtracklayer::import( From f73d9c70dfa83cc30c3b6aa1f7831a30c5cc3b73 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:51:31 +0800 Subject: [PATCH 32/34] skip tests if no rtracklayer --- tests/testthat/test-preprocessing.R | 14 -------------- tests/testthat/test-utilities.R | 4 ++++ 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/tests/testthat/test-preprocessing.R b/tests/testthat/test-preprocessing.R index 4a43f44b..23e607a2 100644 --- a/tests/testthat/test-preprocessing.R +++ b/tests/testthat/test-preprocessing.R @@ -118,18 +118,4 @@ test_that("CreateMotifMatrix works", { genome = genome )) expect_equal(dim(motif.matrix), c(323, 2)) - features <- suppressWarnings(c( - granges(atac_small), - GenomicRanges::GRanges( - seqnames = "fake_chr", - ranges = IRanges::IRanges(start = 1, end = 1000) - ) - )) - skip_if_not_installed("BSgenome.Hsapiens.UCSC.hg19") - motif.matrix <- suppressWarnings(CreateMotifMatrix( - features = features, - pwm = pwm, - genome = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19 - )) - expect_equal(dim(motif.matrix), c(324, 2)) }) \ No newline at end of file diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index 112ed61f..d874f91f 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -85,6 +85,7 @@ test_that("ExtractCell works", { }) test_that("CreateBWGroup works with single tile", { + skip_if_not_installed("rtracklayer") outdir <- file.path(tempdir(), "createBW") dir.create(outdir, showWarnings = FALSE) fake.bed.data <- data.frame( @@ -118,6 +119,7 @@ test_that("CreateBWGroup works with single tile", { }) test_that("CreateBWGroup works with 100bp tile", { + skip_if_not_installed("rtracklayer") outdir <- file.path(tempdir(), "createBW2") dir.create(outdir, showWarnings = FALSE) fake.bed.data <- data.frame( @@ -151,6 +153,7 @@ test_that("CreateBWGroup works with 100bp tile", { }) test_that("CreateBWGroup works with seqlength equal to final pos", { + skip_if_not_installed("rtracklayer") outdir <- file.path(tempdir(), "createBW2") dir.create(outdir, showWarnings = FALSE) fake.bed.data <- data.frame( @@ -184,6 +187,7 @@ test_that("CreateBWGroup works with seqlength equal to final pos", { }) test_that("ExportGroupBW works", { + skip_if_not_installed("rtracklayer") outdir <- file.path(tempdir(), "ExportGroupBW") dir.create(outdir, showWarnings = FALSE) fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") From b7d1e694ec4c80dff28a26cacba72cac82d4d421 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 17 Jan 2025 12:14:27 +0800 Subject: [PATCH 33/34] Reorganize bigwig --- DESCRIPTION | 1 + R/bigwig.R | 242 +++++++++++++++++++++++++++++++ R/utilities.R | 243 -------------------------------- tests/testthat/test-bigwig.R | 139 ++++++++++++++++++ tests/testthat/test-utilities.R | 139 ------------------ 5 files changed, 382 insertions(+), 382 deletions(-) create mode 100644 R/bigwig.R create mode 100644 tests/testthat/test-bigwig.R diff --git a/DESCRIPTION b/DESCRIPTION index c0498712..6305eb7e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,6 +55,7 @@ Imports: lifecycle Collate: 'RcppExports.R' + 'bigwig.R' 'data.R' 'differential_accessibility.R' 'generics.R' diff --git a/R/bigwig.R b/R/bigwig.R new file mode 100644 index 00000000..0b556254 --- /dev/null +++ b/R/bigwig.R @@ -0,0 +1,242 @@ +#' Export bigwig files for groups of cells +#' +#' @param object A Seurat object +#' @param assay Name of assay to use +#' @param group.by The metadata variable used to group the cells +#' @param idents Identities to include (defined by group.by parameter) +#' @param normMethod Normalization method for the bigwig files. Deafult 'RC'. +#' 'RC' will divide the number of fragments in a tile by the total number of +#' fragments in the group. A scaling factor of 10^4 will be applied. +#' 'ncells' will divide the number of fragments in a tile by the number of cell +#' in the group. 'none' will apply no normalization method. +#' A vector of values for each cell can also be passed as a metadata column +#' name. A scaling factor of 10^4 will be applied +#' @param tileSize The size of the tiles in the bigwig file +#' @param minCells The minimum of cells in a group to be exported +#' @param cutoff The maximum number of fragments in a given genomic tile +#' @param chromosome A chromosomes vector to be exported +#' @param outdir Directory to write output files (splitted bed and bigwigs) +#' @param verbose Display messages +#' +#' @importFrom GenomicRanges GRanges slidingWindows +#' @importFrom future plan nbrOfWorkers +#' @importFrom future.apply future_lapply +#' @importFrom pbapply pbapply +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' ExportGroupBW(object, assay = "peaks") +#' } +ExportGroupBW <- function( + object, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = getwd(), + verbose = TRUE +) { + # Check if temporary directory exist + if (!dir.exists(outdir)){ + dir.create(outdir) + } + if (length(Fragments(object)) == 0) { + stop("This object does not have Fragments, cannot generate bigwig.") + } + # Get chromosome information + if(!is.null(x = chromosome)){ + seqlevels(object) <- chromosome + } + chromLengths <- seqlengths(object) + if (is.null(chromLengths)) { + stop("Object has no seqlength, bigwig coverages cannot be evaluated.") + } + availableChr <- names(chromLengths) + chromSizes <- GRanges( + seqnames = availableChr, + ranges = IRanges( + start = rep(1, length(x = availableChr)), + end = as.numeric(x = chromLengths) + ) + ) + assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) + obj.groups <- GetGroups( + object = object, + group.by = group.by, + idents = idents + ) + GroupsNames <- names(x = table(obj.groups)[table(obj.groups) > minCells]) + # Check if output files already exist + lapply(X = GroupsNames, FUN = function(x) { + fn <- paste0(outdir, .Platform$file.sep, x, ".bed") + if (file.exists(fn)) { + message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) + file.remove(fn) + } + }) + # Splitting fragments file for each idents in group.by + SplitFragments( + object = object, + assay = assay, + group.by = group.by, + idents = idents, + outdir = outdir, + file.suffix = "", + append = TRUE, + buffer_length = 256L, + verbose = verbose + ) + # Column to normalized by + if(!is.null(x = normMethod)) { + if (tolower(x = normMethod) %in% c('rc', 'ncells', 'none')){ + normBy <- normMethod + } else{ + normBy <- object[[normMethod, drop = FALSE]] + } + } + + if (verbose) { + message("Creating tiles") + } + # Create tiles for each chromosome, from GenomicRanges + tiles <- unlist( + x = slidingWindows(x = chromSizes, width = tileSize, step = tileSize) + ) + if (verbose) { + message("Creating bigwig files at ", outdir) + } + # Run the creation of bigwig for each cellgroups + if (nbrOfWorkers() > 1) { + mylapply <- future_lapply + } else { + mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) + } + covFiles <- mylapply( + GroupsNames, + FUN = CreateBWGroup, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir + ) + return(covFiles) +} + +# Helper function for ExportGroupBW +# +# @param groupNamei The group to be exported +# @param availableChr Chromosomes to be processed +# @param chromLengths Chromosomes lengths +# @param tiles The tiles object +# @param normBy A vector of values to normalized the cells by +# @param tileSize The size of the tiles in the bigwig file +# @param normMethod Normalization method for the bigwig files +# 'RC' will divide the number of fragments in a tile by the number of fragments +# in the group. A scaling factor of 10^4 will be applied +# 'ncells' will divide the number of fragments in a tile by the number of cell +# in the group. 'none' will apply no normalization method. A vector of values +# for each cell can also be passed as a meta.data column name. A scaling factor +# of 10^4 will be applied +# @param cutoff The maximum number of fragment in a given tile +# @param outdir The output directory for bigwig file +# +#' @importFrom GenomicRanges seqnames GRanges coverage +#' @importFrom BiocGenerics which +#' @importFrom S4Vectors match +#' @importFrom Matrix sparseMatrix rowSums +CreateBWGroup <- function( + groupNamei, + availableChr, + chromLengths, + tiles, + normBy, + tileSize, + normMethod, + cutoff, + outdir +) { + if (!requireNamespace("rtracklayer", quietly = TRUE)) { + message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") + return(NULL) + } + normMethod <- tolower(x = normMethod) + # Read the fragments file associated to the group + fragi <- rtracklayer::import( + paste0(outdir, .Platform$file.sep, groupNamei, ".bed"), + format = "bed" + ) + cellGroupi <- unique(x = fragi$name) + # Open the writing bigwig file + covFile <- file.path( + outdir, + paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw") + ) + + covList <- lapply(X = seq_along(availableChr), FUN = function(k) { + fragik <- fragi[seqnames(fragi) == availableChr[k],] + tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] + if (length(x = fragik) == 0) { + tilesk$reads <- 0 + # If fragments + } else { + # N Tiles + nTiles <- chromLengths[availableChr[k]] / tileSize + # Add one tile if there is extra bases + if (nTiles%%1 != 0) { + nTiles <- trunc(x = nTiles) + 1 + } + # Create Sparse Matrix + matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) + + # For each tiles of this chromosome, create start tile and end tile row, + # set the associated counts matching with the fragments + # This changes compared to ArchR version 1.0.2 + # See https://github.com/GreenleafLab/ArchR/issues/2214 + mat <- sparseMatrix( + i = c(trunc(x = (start(x = fragik) - 1) / tileSize), + trunc(x = (end(x = fragik) - 1) / tileSize)) + 1, + j = as.vector(x = c(matchID, matchID)), + x = rep(1, 2*length(x = fragik)), + dims = c(nTiles, length(x = cellGroupi)) + ) + + # Max count for a cells in a tile is set to cutoff + if (!is.null(x = cutoff)){ + mat@x[mat@x > cutoff] <- cutoff + } + # Sums the cells + mat <- rowSums(x = mat) + tilesk$reads <- mat + # Normalization + if (!is.null(x = normMethod)) { + if (normMethod == "rc") { + tilesk$reads <- tilesk$reads * 10^4 / length(fragi$name) + } else if (normMethod == "ncells") { + tilesk$reads <- tilesk$reads / length(cellGroupi) + } else if (normMethod == "none") { + } else { + if (!is.null(x = normBy)){ + tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) + } + } + } + } + tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] + tilesk + }) + + names(covList) <- availableChr + covList <- as(object = covList, Class = "RleList") + rtracklayer::export.bw(object = covList, con = covFile) + return(covFile) +} diff --git a/R/utilities.R b/R/utilities.R index 19fed4d6..e11c9fb9 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2460,246 +2460,3 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) { rankY <- SparsifiedRanks(X = Y) return(corSparse(X = rankX, Y = rankY, cov = cov)) } - -#' Export bigwig files for groups of cells -#' -#' @param object A Seurat object -#' @param assay Name of assay to use -#' @param group.by The metadata variable used to group the cells -#' @param idents Identities to include (defined by group.by parameter) -#' @param normMethod Normalization method for the bigwig files. Deafult 'RC'. -#' 'RC' will divide the number of fragments in a tile by the total number of -#' fragments in the group. A scaling factor of 10^4 will be applied. -#' 'ncells' will divide the number of fragments in a tile by the number of cell -#' in the group. 'none' will apply no normalization method. -#' A vector of values for each cell can also be passed as a metadata column -#' name. A scaling factor of 10^4 will be applied -#' @param tileSize The size of the tiles in the bigwig file -#' @param minCells The minimum of cells in a group to be exported -#' @param cutoff The maximum number of fragments in a given genomic tile -#' @param chromosome A chromosomes vector to be exported -#' @param outdir Directory to write output files (splitted bed and bigwigs) -#' @param verbose Display messages -#' -#' @importFrom GenomicRanges GRanges slidingWindows -#' @importFrom future plan nbrOfWorkers -#' @importFrom future.apply future_lapply -#' @importFrom pbapply pbapply -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' ExportGroupBW(object, assay = "peaks") -#' } -ExportGroupBW <- function( - object, - assay = NULL, - group.by = NULL, - idents = NULL, - normMethod = "RC", - tileSize = 100, - minCells = 5, - cutoff = NULL, - chromosome = NULL, - outdir = getwd(), - verbose = TRUE -) { - # Check if temporary directory exist - if (!dir.exists(outdir)){ - dir.create(outdir) - } - if (length(Fragments(object)) == 0) { - stop("This object does not have Fragments, cannot generate bigwig.") - } - # Get chromosome information - if(!is.null(x = chromosome)){ - seqlevels(object) <- chromosome - } - chromLengths <- seqlengths(object) - if (is.null(chromLengths)) { - stop("Object has no seqlength, bigwig coverages cannot be evaluated.") - } - availableChr <- names(chromLengths) - chromSizes <- GRanges( - seqnames = availableChr, - ranges = IRanges( - start = rep(1, length(x = availableChr)), - end = as.numeric(x = chromLengths) - ) - ) - assay <- SetIfNull(x = assay, y = DefaultAssay(object = object)) - obj.groups <- GetGroups( - object = object, - group.by = group.by, - idents = idents - ) - GroupsNames <- names(x = table(obj.groups)[table(obj.groups) > minCells]) - # Check if output files already exist - lapply(X = GroupsNames, FUN = function(x) { - fn <- paste0(outdir, .Platform$file.sep, x, ".bed") - if (file.exists(fn)) { - message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x)) - file.remove(fn) - } - }) - # Splitting fragments file for each idents in group.by - SplitFragments( - object = object, - assay = assay, - group.by = group.by, - idents = idents, - outdir = outdir, - file.suffix = "", - append = TRUE, - buffer_length = 256L, - verbose = verbose - ) - # Column to normalized by - if(!is.null(x = normMethod)) { - if (tolower(x = normMethod) %in% c('rc', 'ncells', 'none')){ - normBy <- normMethod - } else{ - normBy <- object[[normMethod, drop = FALSE]] - } - } - - if (verbose) { - message("Creating tiles") - } - # Create tiles for each chromosome, from GenomicRanges - tiles <- unlist( - x = slidingWindows(x = chromSizes, width = tileSize, step = tileSize) - ) - if (verbose) { - message("Creating bigwig files at ", outdir) - } - # Run the creation of bigwig for each cellgroups - if (nbrOfWorkers() > 1) { - mylapply <- future_lapply - } else { - mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) - } - covFiles <- mylapply( - GroupsNames, - FUN = CreateBWGroup, - availableChr, - chromLengths, - tiles, - normBy, - tileSize, - normMethod, - cutoff, - outdir - ) - return(covFiles) -} - -# Helper function for ExportGroupBW -# -# @param groupNamei The group to be exported -# @param availableChr Chromosomes to be processed -# @param chromLengths Chromosomes lengths -# @param tiles The tiles object -# @param normBy A vector of values to normalized the cells by -# @param tileSize The size of the tiles in the bigwig file -# @param normMethod Normalization method for the bigwig files -# 'RC' will divide the number of fragments in a tile by the number of fragments -# in the group. A scaling factor of 10^4 will be applied -# 'ncells' will divide the number of fragments in a tile by the number of cell -# in the group. 'none' will apply no normalization method. A vector of values -# for each cell can also be passed as a meta.data column name. A scaling factor -# of 10^4 will be applied -# @param cutoff The maximum number of fragment in a given tile -# @param outdir The output directory for bigwig file -# -#' @importFrom GenomicRanges seqnames GRanges coverage -#' @importFrom BiocGenerics which -#' @importFrom S4Vectors match -#' @importFrom Matrix sparseMatrix rowSums -CreateBWGroup <- function( - groupNamei, - availableChr, - chromLengths, - tiles, - normBy, - tileSize, - normMethod, - cutoff, - outdir -) { - if (!requireNamespace("rtracklayer", quietly = TRUE)) { - message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/") - return(NULL) - } - normMethod <- tolower(x = normMethod) - # Read the fragments file associated to the group - fragi <- rtracklayer::import( - paste0(outdir, .Platform$file.sep, groupNamei, ".bed"), - format = "bed" - ) - cellGroupi <- unique(x = fragi$name) - # Open the writing bigwig file - covFile <- file.path( - outdir, - paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw") - ) - - covList <- lapply(X = seq_along(availableChr), FUN = function(k) { - fragik <- fragi[seqnames(fragi) == availableChr[k],] - tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)] - if (length(x = fragik) == 0) { - tilesk$reads <- 0 - # If fragments - } else { - # N Tiles - nTiles <- chromLengths[availableChr[k]] / tileSize - # Add one tile if there is extra bases - if (nTiles%%1 != 0) { - nTiles <- trunc(x = nTiles) + 1 - } - # Create Sparse Matrix - matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi) - - # For each tiles of this chromosome, create start tile and end tile row, - # set the associated counts matching with the fragments - # This changes compared to ArchR version 1.0.2 - # See https://github.com/GreenleafLab/ArchR/issues/2214 - mat <- sparseMatrix( - i = c(trunc(x = (start(x = fragik) - 1) / tileSize), - trunc(x = (end(x = fragik) - 1) / tileSize)) + 1, - j = as.vector(x = c(matchID, matchID)), - x = rep(1, 2*length(x = fragik)), - dims = c(nTiles, length(x = cellGroupi)) - ) - - # Max count for a cells in a tile is set to cutoff - if (!is.null(x = cutoff)){ - mat@x[mat@x > cutoff] <- cutoff - } - # Sums the cells - mat <- rowSums(x = mat) - tilesk$reads <- mat - # Normalization - if (!is.null(x = normMethod)) { - if (normMethod == "rc") { - tilesk$reads <- tilesk$reads * 10^4 / length(fragi$name) - } else if (normMethod == "ncells") { - tilesk$reads <- tilesk$reads / length(cellGroupi) - } else if (normMethod == "none") { - } else { - if (!is.null(x = normBy)){ - tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) - } - } - } - } - tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] - tilesk - }) - - names(covList) <- availableChr - covList <- as(object = covList, Class = "RleList") - rtracklayer::export.bw(object = covList, con = covFile) - return(covFile) -} diff --git a/tests/testthat/test-bigwig.R b/tests/testthat/test-bigwig.R new file mode 100644 index 00000000..c08633a7 --- /dev/null +++ b/tests/testthat/test-bigwig.R @@ -0,0 +1,139 @@ + +test_that("CreateBWGroup works with single tile", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "createBW") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 249250621), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = 1, end = 249250621)), + normBy = NULL, + tileSize = 249250621, + normMethod = 'RC', + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")) + expect_equal(object = bw$score, 20000) +}) + +test_that("CreateBWGroup works with 100bp tile", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "createBW2") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 249250621), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 249250621, 100), end = c(seq(100, 249250621, 100), 249250621))), + normBy = NULL, + tileSize = 100, + normMethod = "RC", + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = bw$score, c(6000, 8000, 2000, 0)) +}) + +test_that("CreateBWGroup works with seqlength equal to final pos", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "createBW2") + dir.create(outdir, showWarnings = FALSE) + fake.bed.data <- data.frame( + seqnames = rep("chr1", 5), + start = c(0, 10, 100, 110, 300), + end = c(100, 150, 200, 250, 500), + cell_name = rep("fake_cell", 5), + nb = 1:5 + ) + write.table( + fake.bed.data, file.path(outdir, "0.bed"), + col.names = FALSE, quote = FALSE, sep = "\t", + row.names = FALSE + ) + CreateBWGroup( + groupNamei = "0", + availableChr = "chr1", + chromLengths = c("chr1" = 500), + tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 499, 100), end = c(seq(100, 500, 100)))), + normBy = NULL, + tileSize = 100, + normMethod = "RC", + cutoff = NULL, + outdir = outdir + ) + expect_equal(object = length(list.files(outdir)), expected = 2) + expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = bw$score, c(6000, 8000, 2000)) +}) + +test_that("ExportGroupBW works", { + skip_if_not_installed("rtracklayer") + outdir <- file.path(tempdir(), "ExportGroupBW") + dir.create(outdir, showWarnings = FALSE) + fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") + cells <- colnames(x = atac_small) + names(x = cells) <- cells + frags <- CreateFragmentObject( + path = fpath, + cells = cells, + verbose = FALSE, + validate = FALSE + ) + Fragments(atac_small) <- frags + ExportGroupBW( + object = atac_small, + assay = NULL, + group.by = NULL, + idents = NULL, + normMethod = "RC", + tileSize = 100, + minCells = 5, + cutoff = NULL, + chromosome = NULL, + outdir = outdir, + verbose = TRUE + ) + expect_equal(object = length(list.files(outdir)), expected = 4) + expect( + file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), + "File does not exist." + ) + skip_if_not_installed("rtracklayer") + bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) + expect_equal(object = length(seqlengths(bw)), 298) +}) \ No newline at end of file diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R index d874f91f..a7bb8064 100644 --- a/tests/testthat/test-utilities.R +++ b/tests/testthat/test-utilities.R @@ -83,142 +83,3 @@ test_that("ExtractCell works", { object = ExtractCell(x = "chr1\t1\t300\tTGCA\t1"), expected = "TGCA" ) }) - -test_that("CreateBWGroup works with single tile", { - skip_if_not_installed("rtracklayer") - outdir <- file.path(tempdir(), "createBW") - dir.create(outdir, showWarnings = FALSE) - fake.bed.data <- data.frame( - seqnames = rep("chr1", 5), - start = c(0, 10, 100, 110, 300), - end = c(100, 150, 200, 250, 500), - cell_name = rep("fake_cell", 5), - nb = 1:5 - ) - write.table( - fake.bed.data, file.path(outdir, "0.bed"), - col.names = FALSE, quote = FALSE, sep = "\t", - row.names = FALSE - ) - CreateBWGroup( - groupNamei = "0", - availableChr = "chr1", - chromLengths = c("chr1" = 249250621), - tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = 1, end = 249250621)), - normBy = NULL, - tileSize = 249250621, - normMethod = 'RC', - cutoff = NULL, - outdir = outdir - ) - expect_equal(object = length(list.files(outdir)), expected = 2) - expect(file.exists(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")), "File does not exist.") - skip_if_not_installed("rtracklayer") - bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-249250621-normMethod-rc.bw")) - expect_equal(object = bw$score, 20000) -}) - -test_that("CreateBWGroup works with 100bp tile", { - skip_if_not_installed("rtracklayer") - outdir <- file.path(tempdir(), "createBW2") - dir.create(outdir, showWarnings = FALSE) - fake.bed.data <- data.frame( - seqnames = rep("chr1", 5), - start = c(0, 10, 100, 110, 300), - end = c(100, 150, 200, 250, 500), - cell_name = rep("fake_cell", 5), - nb = 1:5 - ) - write.table( - fake.bed.data, file.path(outdir, "0.bed"), - col.names = FALSE, quote = FALSE, sep = "\t", - row.names = FALSE - ) - CreateBWGroup( - groupNamei = "0", - availableChr = "chr1", - chromLengths = c("chr1" = 249250621), - tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 249250621, 100), end = c(seq(100, 249250621, 100), 249250621))), - normBy = NULL, - tileSize = 100, - normMethod = "RC", - cutoff = NULL, - outdir = outdir - ) - expect_equal(object = length(list.files(outdir)), expected = 2) - expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") - skip_if_not_installed("rtracklayer") - bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) - expect_equal(object = bw$score, c(6000, 8000, 2000, 0)) -}) - -test_that("CreateBWGroup works with seqlength equal to final pos", { - skip_if_not_installed("rtracklayer") - outdir <- file.path(tempdir(), "createBW2") - dir.create(outdir, showWarnings = FALSE) - fake.bed.data <- data.frame( - seqnames = rep("chr1", 5), - start = c(0, 10, 100, 110, 300), - end = c(100, 150, 200, 250, 500), - cell_name = rep("fake_cell", 5), - nb = 1:5 - ) - write.table( - fake.bed.data, file.path(outdir, "0.bed"), - col.names = FALSE, quote = FALSE, sep = "\t", - row.names = FALSE - ) - CreateBWGroup( - groupNamei = "0", - availableChr = "chr1", - chromLengths = c("chr1" = 500), - tiles = GRanges(seqnames = "chr1", ranges = IRanges(start = seq(1, 499, 100), end = c(seq(100, 500, 100)))), - normBy = NULL, - tileSize = 100, - normMethod = "RC", - cutoff = NULL, - outdir = outdir - ) - expect_equal(object = length(list.files(outdir)), expected = 2) - expect(file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), "File does not exist.") - skip_if_not_installed("rtracklayer") - bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) - expect_equal(object = bw$score, c(6000, 8000, 2000)) -}) - -test_that("ExportGroupBW works", { - skip_if_not_installed("rtracklayer") - outdir <- file.path(tempdir(), "ExportGroupBW") - dir.create(outdir, showWarnings = FALSE) - fpath <- system.file("extdata", "fragments.tsv.gz", package="Signac") - cells <- colnames(x = atac_small) - names(x = cells) <- cells - frags <- CreateFragmentObject( - path = fpath, - cells = cells, - verbose = FALSE, - validate = FALSE - ) - Fragments(atac_small) <- frags - ExportGroupBW( - object = atac_small, - assay = NULL, - group.by = NULL, - idents = NULL, - normMethod = "RC", - tileSize = 100, - minCells = 5, - cutoff = NULL, - chromosome = NULL, - outdir = outdir, - verbose = TRUE - ) - expect_equal(object = length(list.files(outdir)), expected = 4) - expect( - file.exists(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")), - "File does not exist." - ) - skip_if_not_installed("rtracklayer") - bw <- rtracklayer::import.bw(file.path(outdir, "0-TileSize-100-normMethod-rc.bw")) - expect_equal(object = length(seqlengths(bw)), 298) -}) \ No newline at end of file From 703d899e9c2baa06ac98ee2ed5722d7613b21452 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Fri, 17 Jan 2025 12:14:44 +0800 Subject: [PATCH 34/34] Update docs --- man/ExportGroupBW.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/ExportGroupBW.Rd b/man/ExportGroupBW.Rd index 9a7c684a..f8d34de6 100644 --- a/man/ExportGroupBW.Rd +++ b/man/ExportGroupBW.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utilities.R +% Please edit documentation in R/bigwig.R \name{ExportGroupBW} \alias{ExportGroupBW} \title{Export bigwig files for groups of cells}