Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export bigwig files from split fragments file with tests #1887

Open
wants to merge 37 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a4f9b73
Update utilities.R
Baboon61 Apr 19, 2023
033b722
Update utilities.R
Baboon61 Apr 20, 2023
081c8e1
Update utilities.R
Baboon61 May 3, 2023
27d4565
Update utilities.R
Baboon61 May 3, 2023
1417195
Update utilities.R
Baboon61 May 4, 2023
57be95f
Merge branch 'develop' into master
timoast Jun 23, 2023
6309c3b
Update function documentation
timoast Jun 23, 2023
0d5eee5
Reformat
timoast Jun 23, 2023
e65a74d
Update utilities.R
Baboon61 Nov 3, 2023
487bd46
use GetGroups
lldelisle Oct 2, 2024
5271bbc
raise error if outdir is not set
lldelisle Oct 2, 2024
cc56669
add an import for nbrOfWorkers
lldelisle Oct 2, 2024
d72e154
set outdir default value to wd
lldelisle Oct 2, 2024
aad0a9b
change help for outdir to match SplitFragments
lldelisle Oct 2, 2024
093074d
remove unneeded change of DefaultAssay
lldelisle Oct 2, 2024
c6e81e7
raise an error if the object has no Fragments
lldelisle Oct 2, 2024
bfa0d21
add tests
lldelisle Oct 2, 2024
f7f667b
run document
lldelisle Oct 2, 2024
b849dab
Add rtracklayer to the Imports
lldelisle Oct 2, 2024
74f5fae
remove unneeded check for rtracklayer
lldelisle Oct 2, 2024
c555f6c
fix assert to expect
lldelisle Oct 2, 2024
0109f1d
use more controled tests for CreateBWGroup
lldelisle Oct 2, 2024
3b1cf35
remove spaces in empty lines
lldelisle Oct 2, 2024
3f4ba00
restore 2 space indentation for params
lldelisle Oct 2, 2024
09203f8
change bin assignment
lldelisle Oct 3, 2024
2a1d612
add test with fragment end matching chromosome length
lldelisle Oct 3, 2024
a884050
guess chromosome lengths if not provided
lldelisle Oct 3, 2024
8a63452
fix test
lldelisle Oct 3, 2024
1c6e811
impose seqlength to be specified
lldelisle Oct 3, 2024
5c1457a
move the chromSize before split fragments
lldelisle Oct 3, 2024
82c3f3a
Merge branch 'develop' into set_defaults
lldelisle Jan 16, 2025
2396fc8
Skip test if rtracklayer not installed
timoast Jan 17, 2025
137abf1
move rtracklayer to suggests
timoast Jan 17, 2025
f73d9c7
skip tests if no rtracklayer
timoast Jan 17, 2025
b7d1e69
Reorganize bigwig
timoast Jan 17, 2025
703d899
Update docs
timoast Jan 17, 2025
0adc45c
Merge branch 'develop' into set_defaults
timoast Jan 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Imports:
lifecycle
Collate:
'RcppExports.R'
'bigwig.R'
'data.R'
'differential_accessibility.R'
'generics.R'
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ export(CreateMotifObject)
export(DensityScatter)
export(DepthCor)
export(DownsampleFeatures)
export(ExportGroupBW)
export(ExpressionPlot)
export(Extend)
export(FRiP)
Expand Down Expand Up @@ -227,6 +228,7 @@ importFrom(BiocGenerics,end)
importFrom(BiocGenerics,sort)
importFrom(BiocGenerics,start)
importFrom(BiocGenerics,strand)
importFrom(BiocGenerics,which)
importFrom(BiocGenerics,width)
importFrom(GenomeInfoDb,"genome<-")
importFrom(GenomeInfoDb,"isCircular<-")
Expand All @@ -246,6 +248,7 @@ importFrom(GenomeInfoDb,seqlevels)
importFrom(GenomeInfoDb,seqnames)
importFrom(GenomeInfoDb,sortSeqlevels)
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,coverage)
importFrom(GenomicRanges,disjoin)
importFrom(GenomicRanges,end)
importFrom(GenomicRanges,findOverlaps)
Expand All @@ -255,6 +258,7 @@ importFrom(GenomicRanges,makeGRangesFromDataFrame)
importFrom(GenomicRanges,reduce)
importFrom(GenomicRanges,resize)
importFrom(GenomicRanges,seqnames)
importFrom(GenomicRanges,slidingWindows)
importFrom(GenomicRanges,start)
importFrom(GenomicRanges,strand)
importFrom(GenomicRanges,tileGenome)
Expand Down Expand Up @@ -301,6 +305,7 @@ importFrom(Rsamtools,scanTabix)
importFrom(Rsamtools,seqnamesTabix)
importFrom(S4Vectors,"mcols<-")
importFrom(S4Vectors,elementNROWS)
importFrom(S4Vectors,match)
importFrom(S4Vectors,mcols)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,split)
Expand Down Expand Up @@ -337,6 +342,7 @@ importFrom(dplyr,top_n)
importFrom(dplyr,ungroup)
importFrom(fastmatch,fmatch)
importFrom(future,nbrOfWorkers)
importFrom(future,plan)
importFrom(future.apply,future_lapply)
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_string)
Expand Down Expand Up @@ -402,6 +408,7 @@ importFrom(methods,slotNames)
importFrom(patchwork,guide_area)
importFrom(patchwork,plot_layout)
importFrom(patchwork,wrap_plots)
importFrom(pbapply,pbapply)
importFrom(pbapply,pblapply)
importFrom(rlang,.data)
importFrom(scales,comma)
Expand Down
242 changes: 242 additions & 0 deletions R/bigwig.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#' Export bigwig files for groups of cells
#'
#' @param object A Seurat object
#' @param assay Name of assay to use
#' @param group.by The metadata variable used to group the cells
#' @param idents Identities to include (defined by group.by parameter)
#' @param normMethod Normalization method for the bigwig files. Deafult 'RC'.
#' 'RC' will divide the number of fragments in a tile by the total number of
#' fragments in the group. A scaling factor of 10^4 will be applied.
#' 'ncells' will divide the number of fragments in a tile by the number of cell
#' in the group. 'none' will apply no normalization method.
#' A vector of values for each cell can also be passed as a metadata column
#' name. A scaling factor of 10^4 will be applied
#' @param tileSize The size of the tiles in the bigwig file
#' @param minCells The minimum of cells in a group to be exported
#' @param cutoff The maximum number of fragments in a given genomic tile
#' @param chromosome A chromosomes vector to be exported
#' @param outdir Directory to write output files (splitted bed and bigwigs)
#' @param verbose Display messages
#'
#' @importFrom GenomicRanges GRanges slidingWindows
#' @importFrom future plan nbrOfWorkers
#' @importFrom future.apply future_lapply
#' @importFrom pbapply pbapply
#'
#' @export
#'
#' @examples
#' \dontrun{
#' ExportGroupBW(object, assay = "peaks")
#' }
ExportGroupBW <- function(
object,
assay = NULL,
group.by = NULL,
idents = NULL,
normMethod = "RC",
tileSize = 100,
minCells = 5,
cutoff = NULL,
chromosome = NULL,
outdir = getwd(),
verbose = TRUE
) {
# Check if temporary directory exist
if (!dir.exists(outdir)){
dir.create(outdir)
}
if (length(Fragments(object)) == 0) {
stop("This object does not have Fragments, cannot generate bigwig.")
}
# Get chromosome information
if(!is.null(x = chromosome)){
seqlevels(object) <- chromosome
}
chromLengths <- seqlengths(object)
if (is.null(chromLengths)) {
stop("Object has no seqlength, bigwig coverages cannot be evaluated.")
}
availableChr <- names(chromLengths)
chromSizes <- GRanges(
seqnames = availableChr,
ranges = IRanges(
start = rep(1, length(x = availableChr)),
end = as.numeric(x = chromLengths)
)
)
assay <- SetIfNull(x = assay, y = DefaultAssay(object = object))
obj.groups <- GetGroups(
object = object,
group.by = group.by,
idents = idents
)
GroupsNames <- names(x = table(obj.groups)[table(obj.groups) > minCells])
# Check if output files already exist
lapply(X = GroupsNames, FUN = function(x) {
fn <- paste0(outdir, .Platform$file.sep, x, ".bed")
if (file.exists(fn)) {
message(sprintf("The group \"%s\" is already present in the destination folder and will be overwritten !",x))
file.remove(fn)
}
})
# Splitting fragments file for each idents in group.by
SplitFragments(
object = object,
assay = assay,
group.by = group.by,
idents = idents,
outdir = outdir,
file.suffix = "",
append = TRUE,
buffer_length = 256L,
verbose = verbose
)
# Column to normalized by
if(!is.null(x = normMethod)) {
if (tolower(x = normMethod) %in% c('rc', 'ncells', 'none')){
normBy <- normMethod
} else{
normBy <- object[[normMethod, drop = FALSE]]
}
}

if (verbose) {
message("Creating tiles")
}
# Create tiles for each chromosome, from GenomicRanges
tiles <- unlist(
x = slidingWindows(x = chromSizes, width = tileSize, step = tileSize)
)
if (verbose) {
message("Creating bigwig files at ", outdir)
}
# Run the creation of bigwig for each cellgroups
if (nbrOfWorkers() > 1) {
mylapply <- future_lapply
} else {
mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply)
}
covFiles <- mylapply(
GroupsNames,
FUN = CreateBWGroup,
availableChr,
chromLengths,
tiles,
normBy,
tileSize,
normMethod,
cutoff,
outdir
)
return(covFiles)
}

# Helper function for ExportGroupBW
#
# @param groupNamei The group to be exported
# @param availableChr Chromosomes to be processed
# @param chromLengths Chromosomes lengths
# @param tiles The tiles object
# @param normBy A vector of values to normalized the cells by
# @param tileSize The size of the tiles in the bigwig file
# @param normMethod Normalization method for the bigwig files
# 'RC' will divide the number of fragments in a tile by the number of fragments
# in the group. A scaling factor of 10^4 will be applied
# 'ncells' will divide the number of fragments in a tile by the number of cell
# in the group. 'none' will apply no normalization method. A vector of values
# for each cell can also be passed as a meta.data column name. A scaling factor
# of 10^4 will be applied
# @param cutoff The maximum number of fragment in a given tile
# @param outdir The output directory for bigwig file
#
#' @importFrom GenomicRanges seqnames GRanges coverage
#' @importFrom BiocGenerics which
#' @importFrom S4Vectors match
#' @importFrom Matrix sparseMatrix rowSums
CreateBWGroup <- function(
groupNamei,
availableChr,
chromLengths,
tiles,
normBy,
tileSize,
normMethod,
cutoff,
outdir
) {
if (!requireNamespace("rtracklayer", quietly = TRUE)) {
message("Please install rtracklayer. http://www.bioconductor.org/packages/rtracklayer/")
return(NULL)
}
normMethod <- tolower(x = normMethod)
# Read the fragments file associated to the group
fragi <- rtracklayer::import(
paste0(outdir, .Platform$file.sep, groupNamei, ".bed"),
format = "bed"
)
cellGroupi <- unique(x = fragi$name)
# Open the writing bigwig file
covFile <- file.path(
outdir,
paste0(groupNamei, "-TileSize-",tileSize,"-normMethod-",normMethod,".bw")
)

covList <- lapply(X = seq_along(availableChr), FUN = function(k) {
fragik <- fragi[seqnames(fragi) == availableChr[k],]
tilesk <- tiles[BiocGenerics::which(S4Vectors::match(seqnames(tiles), availableChr[k], nomatch = 0) > 0)]
if (length(x = fragik) == 0) {
tilesk$reads <- 0
# If fragments
} else {
# N Tiles
nTiles <- chromLengths[availableChr[k]] / tileSize
# Add one tile if there is extra bases
if (nTiles%%1 != 0) {
nTiles <- trunc(x = nTiles) + 1
}
# Create Sparse Matrix
matchID <- S4Vectors::match(mcols(fragik)$name, cellGroupi)

# For each tiles of this chromosome, create start tile and end tile row,
# set the associated counts matching with the fragments
# This changes compared to ArchR version 1.0.2
# See https://github.com/GreenleafLab/ArchR/issues/2214
mat <- sparseMatrix(
i = c(trunc(x = (start(x = fragik) - 1) / tileSize),
trunc(x = (end(x = fragik) - 1) / tileSize)) + 1,
j = as.vector(x = c(matchID, matchID)),
x = rep(1, 2*length(x = fragik)),
dims = c(nTiles, length(x = cellGroupi))
)

# Max count for a cells in a tile is set to cutoff
if (!is.null(x = cutoff)){
mat@x[mat@x > cutoff] <- cutoff
}
# Sums the cells
mat <- rowSums(x = mat)
tilesk$reads <- mat
# Normalization
if (!is.null(x = normMethod)) {
if (normMethod == "rc") {
tilesk$reads <- tilesk$reads * 10^4 / length(fragi$name)
} else if (normMethod == "ncells") {
tilesk$reads <- tilesk$reads / length(cellGroupi)
} else if (normMethod == "none") {
} else {
if (!is.null(x = normBy)){
tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1])
}
}
}
}
tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]]
tilesk
})

names(covList) <- availableChr
covList <- as(object = covList, Class = "RleList")
rtracklayer::export.bw(object = covList, con = covFile)
return(covFile)
}
19 changes: 9 additions & 10 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -599,12 +599,12 @@ GetGRangesFromEnsDb <- function(
# extract genes from each chromosome
my_lapply <- ifelse(test = verbose, yes = pblapply, no = lapply)
tx <- my_lapply(X = seq_along(whole.genome), FUN = function(x){
suppressMessages(expr = biovizBase::crunch(
obj = ensdb,
which = whole.genome[x],
columns = c("tx_id", "gene_name", "gene_id", "gene_biotype")))
})
suppressMessages(expr = biovizBase::crunch(
obj = ensdb,
which = whole.genome[x],
columns = c("tx_id", "gene_name", "gene_id", "gene_biotype")))
})

# combine
tx <- do.call(what = c, args = tx)
tx <- tx[tx$gene_biotype %in% biotypes]
Expand Down Expand Up @@ -1714,7 +1714,7 @@ SingleFileCutMatrix <- function(
)
cut.df <- cut.df[
(cut.df$position > 0) & (cut.df$position <= width(x = region)[[1]]),
]
]
cell.vector <- seq_along(along.with = cells)
names(x = cell.vector) <- cells
cell.matrix.info <- cell.vector[cut.df$cell]
Expand Down Expand Up @@ -2192,7 +2192,6 @@ MergeOverlappingRows <- function(
for (i in seq_along(along.with = assay.list)) {
# get count matrix
counts <- GetAssayData(object = assay.list[[i]], layer = slot)

if (nrow(x = counts) == 0) {
# no counts, only data
# skip row merge and return empty counts matrices
Expand Down Expand Up @@ -2443,8 +2442,8 @@ SparsifiedRanks <- function(X){
x = lapply(
X = seq_along(col_lst),
FUN = function(i) rank(x = col_lst[[i]]) + offsets[i]
)
)
)
## Create template rank matrix
X.ranks <- X
X.ranks@x <- sparsified_ranks
Expand All @@ -2457,7 +2456,7 @@ SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) {
if (is.null(Y)){
# Calculate pearson correlation on rank matrices
return (corSparse(X = rankX, cov = cov))
}
}
rankY <- SparsifiedRanks(X = Y)
return(corSparse(X = rankX, Y = rankY, cov = cov))
}
Loading
Loading