Skip to content

Commit

Permalink
Merge pull request #1803 from lldelisle/addmotifs
Browse files Browse the repository at this point in the history
Better control the number of GRanges
  • Loading branch information
timoast authored Jan 16, 2025
2 parents 4fe0f8b + bdfd632 commit 51ceab1
Show file tree
Hide file tree
Showing 14 changed files with 200,210 additions and 34 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Signac
Title: Analysis of Single-Cell Chromatin Data
Version: 1.14.9001
Date: 2024-10-21
Version: 1.14.9002
Date: 2025-01-16
Authors@R: c(
person(given = 'Tim', family = 'Stuart', email = 'stuartt@gis.a-star.edu.sg', role = c('aut', 'cre'), comment = c(ORCID = '0000-0002-3044-0897')),
person(given = 'Avi', family = 'Srivastava', email = 'asrivastava@nygenome.org', role = 'aut', comment = c(ORCID = '0000-0001-9798-2079')),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ importFrom(SeuratObject,"VariableFeatures<-")
importFrom(SeuratObject,AddMetaData)
importFrom(SeuratObject,Assays)
importFrom(SeuratObject,Cells)
importFrom(SeuratObject,CheckFeaturesNames)
importFrom(SeuratObject,CreateAssayObject)
importFrom(SeuratObject,CreateDimReducObject)
importFrom(SeuratObject,CreateSeuratObject)
Expand Down
43 changes: 34 additions & 9 deletions R/objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,15 @@ CreateChromatinAssay <- function(
if (!is.null(x = annotation) & !inherits(x = annotation, what = "GRanges")) {
stop("Annotation must be a GRanges object.")
}
if (!any(c("tx_id", "transcript_id") %in% colnames(x = mcols(x = annotation)))) {
stop("Annotation must have transcript id stored in `tx_id` or `transcript_id`.")
}
if (any(!c("gene_name", "gene_id", "gene_biotype", "type") %in% colnames(x = mcols(x = annotation)))) {
stop("Annotation must have `gene_name`, `gene_id`, `gene_biotype` and `type`.")
if (!is.null(x = annotation)) {
if (!any(c("tx_id", "transcript_id") %in% colnames(x = mcols(x = annotation)))) {
stop("Annotation must have transcript id stored in `tx_id` or `transcript_id`.")
}
if (any(!c("gene_name", "gene_id", "gene_biotype", "type") %in% colnames(x = mcols(x = annotation)))) {
stop("Annotation must have `gene_name`, `gene_id`, `gene_biotype` and `type`.")
}
}

# remove low-count cells
ncount.cell <- colSums(x = data.use > 0)
data.use <- data.use[, ncount.cell >= min.features]
Expand Down Expand Up @@ -808,7 +811,7 @@ RenameCells.Fragment <- function(object, new.names, ...) {
return(object)
}

#' @importFrom SeuratObject SetAssayData
#' @importFrom SeuratObject SetAssayData CheckFeaturesNames
#' @importFrom GenomeInfoDb genome Seqinfo
#' @importFrom lifecycle deprecated is_present
#' @importFrom S4Vectors mcols
Expand Down Expand Up @@ -957,9 +960,19 @@ SetAssayData.ChromatinAssay <- function(
if (!inherits(x = new.data, what = "Motif")) {
stop("Must provide a Motif class object")
}
# Set the feature names compatible with Seurat
new.data <- SetMotifData(
object = new.data,
slot = "data",
new.data = CheckFeaturesNames(
data = GetMotifData(object = new.data, slot = "data")
)
)

# TODO allow mismatching row names, but check that the genomic ranges
# are equivalent. Requires adding a granges slot to the motif class
if (!all(rownames(x = object) == rownames(x = new.data))) {
if (nrow(x = object) != nrow(x = new.data) ||
!all(rownames(x = object) == rownames(x = new.data))) {
keep.features <- intersect(x = rownames(x = new.data),
y = rownames(x = object))
if (length(x = keep.features) == 0) {
Expand All @@ -968,8 +981,17 @@ SetAssayData.ChromatinAssay <- function(
}
else {
warning("Features do not match in ChromatinAssay and Motif object.
Subsetting the Motif object.")
Subsetting/Filling the Motif object.")
new.data <- new.data[keep.features, ]

new.data <- SetMotifData(
object = new.data,
slot = "data",
new.data = AddMissing(
GetMotifData(object = new.data, slot = "data"),
features = rownames(x = object)
)
)
}
}
methods::slot(object = object, name = layer) <- new.data
Expand Down Expand Up @@ -1023,8 +1045,11 @@ SetMotifData.Motif <- function(object, slot, new.data, ...) {
#' @export
#' @concept motifs
#' @examples
#' new.data <- matrix(sample(c(0, 1), size = nrow(atac_small[["peaks"]]) * 10,
#' replace = TRUE), nrow = nrow(atac_small[["peaks"]]))
#' rownames(new.data) <- rownames(atac_small[["peaks"]])
#' SetMotifData(
#' object = atac_small[['peaks']], slot = 'data', new.data = matrix(1:2)
#' object = atac_small[['peaks']], slot = 'data', new.data = new.data
#' )
#' @method SetMotifData ChromatinAssay
SetMotifData.ChromatinAssay <- function(object, slot, new.data, ...) {
Expand Down
35 changes: 29 additions & 6 deletions R/quantification.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,13 @@ GenomeBinMatrix <- function(
#' the fragment object will still be searched.
#' @param features A GRanges object containing a set of genomic intervals.
#' These will form the rows of the matrix, with each entry recording the number
#' of unique reads falling in the genomic region for each cell. If a genomic
#' region provided is on a chromosome that is not present in the fragment file,
#' it will not be included in the returned matrix.
#' of unique reads falling in the genomic region for each cell.
#' @param keep_all_features By default, if a genomic region provided is on a
#' chromosome that is not present in the fragment file,
#' it will not be included in the returned matrix. Set `keep_all_features` to
#' TRUE to force output to include all features in the input ranges. Note that
#' features on chromosomes that are not present in the fragment file will be
#' filled with zero counts.
#' @param cells Vector of cells to include. If NULL, include all cells found
#' in the fragments file
#' @param process_n Number of regions to load into memory at a time, per thread.
Expand All @@ -217,6 +221,7 @@ GenomeBinMatrix <- function(
FeatureMatrix <- function(
fragments,
features,
keep_all_features = FALSE,
cells = NULL,
process_n = 2000,
sep = c("-", "-"),
Expand Down Expand Up @@ -261,6 +266,7 @@ FeatureMatrix <- function(
SingleFeatureMatrix(
fragment = fragments[[x]],
features = features,
keep_all_features = keep_all_features,
cells = cells,
sep = sep,
verbose = verbose,
Expand Down Expand Up @@ -326,6 +332,7 @@ CombineTiles <- function(bins) {
SingleFeatureMatrix <- function(
fragment,
features,
keep_all_features = FALSE,
cells = NULL,
process_n = 2000,
sep = c("-", "-"),
Expand Down Expand Up @@ -359,6 +366,11 @@ SingleFeatureMatrix <- function(
index = GetIndexFile(fragment = fragment.path, verbose = FALSE)
)
n_feat_start <- length(x = features)
if (keep_all_features) {
features_to_get <- GRangesToString(grange = features, sep = sep)
} else {
features_to_get <- NULL
}
features <- keepSeqlevels(
x = features,
value = intersect(
Expand All @@ -371,7 +383,7 @@ SingleFeatureMatrix <- function(
stop("No matching chromosomes found in fragment file.")
}
n_removed <- n_feat_start - length(x = features)
if (n_removed > 0) {
if (n_removed > 0 && ! keep_all_features) {
if (n_removed == 1) {
warning(n_removed, " feature is on a seqname not present in ",
"the fragment file. This will be removed.")
Expand Down Expand Up @@ -418,7 +430,14 @@ SingleFeatureMatrix <- function(
X = matrix.parts,
FUN = AddMissing,
cells = all.cells,
features = NULL
features = features_to_get
)
} else if (keep_all_features) {
matrix.parts <- lapply(
X = matrix.parts,
FUN = AddMissing,
cells = NULL,
features = features_to_get
)
}
featmat <- do.call(what = rbind, args = matrix.parts)
Expand All @@ -429,7 +448,11 @@ SingleFeatureMatrix <- function(
colnames(x = featmat) <- unname(obj = cell.convert[colnames(x = featmat)])
}
# reorder features
feat.str <- GRangesToString(grange = features, sep = sep)
if (keep_all_features) {
feat.str <- features_to_get
} else {
feat.str <- GRangesToString(grange = features, sep = sep)
}
featmat <- featmat[feat.str, , drop=FALSE]
return(featmat)
}
26 changes: 14 additions & 12 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -1220,20 +1220,22 @@ NonOverlapping <- function(x, all.features) {
}

#' @importFrom Matrix sparseMatrix
AddMissing <- function(x, cells, features = NULL) {
AddMissing <- function(x, cells = NULL, features = NULL) {
# add columns with zeros for cells or features not in matrix
missing.cells <- setdiff(x = cells, y = colnames(x = x))
if (!(length(x = missing.cells) == 0)) {
null.mat <- sparseMatrix(
i = c(),
j = c(),
dims = c(nrow(x = x), length(x = missing.cells))
)
rownames(x = null.mat) <- rownames(x = x)
colnames(x = null.mat) <- missing.cells
x <- cbind(x, null.mat)
if (!is.null(cells)) {
missing.cells <- setdiff(x = cells, y = colnames(x = x))
if (!(length(x = missing.cells) == 0)) {
null.mat <- sparseMatrix(
i = c(),
j = c(),
dims = c(nrow(x = x), length(x = missing.cells))
)
rownames(x = null.mat) <- rownames(x = x)
colnames(x = null.mat) <- missing.cells
x <- cbind(x, null.mat)
}
x <- x[, cells, drop = FALSE]
}
x <- x[, cells, drop = FALSE]
if (!is.null(x = features)) {
missing.features <- setdiff(x = features, y = rownames(x = x))
if (!(length(x = missing.features) == 0)) {
Expand Down
Loading

0 comments on commit 51ceab1

Please sign in to comment.