From 4a5065350869fd96b8548421e169b2f86fd7a0ee Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Mon, 16 Dec 2024 15:01:39 -0800 Subject: [PATCH 1/4] Fix and clean up docs --- R/annotations.R | 29 +++++++++++++++++++++++++++-- R/nextflow_annotation_utils.R | 1 - man/copy_annotations.Rd | 11 ++++++++++- man/dot-dict_to_list.Rd | 14 ++++++++++++++ man/extract_syn_id_from_ss.Rd | 17 +++++++++++++++++ 5 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 man/dot-dict_to_list.Rd create mode 100644 man/extract_syn_id_from_ss.Rd diff --git a/R/annotations.R b/R/annotations.R index e331b28..ac816a2 100644 --- a/R/annotations.R +++ b/R/annotations.R @@ -53,11 +53,14 @@ annotate_with_manifest <- function(manifest, ignore_na = TRUE, ignore_blank = TR #' @param select Vector of properties to selectively copy if present on the entity. #' If not specified, will copy over everything, which may not be desirable. #' @param update Whether to immediately update or return annotation objects only. +#' @param as_list Only used when `update=FALSE`; for backwards-compatibility or when +#' downstream usage of `copy_annotations` expects an R list, return as an R list. #' @export copy_annotations <- function(entity_from, entity_to, select = NULL, - update = FALSE) { + update = FALSE, + as_list = TRUE) { .check_login() @@ -74,7 +77,29 @@ copy_annotations <- function(entity_from, for(k in select) { to_annotations[k] <- from_annotations[k] } - if(update) .syn$set_annotations(to_annotations) else return(to_annotations) + if(update) { + .syn$set_annotations(to_annotations) + } else if (as_list) { + .dict_to_list(to_annotations) + } else { + to_annotations + } } } + +#' Convert a flat Python Dict to R list +#' +#' An internal function used to convert Annotations objects returned by `get_annotations`. +#' +#' @param dict A flat Python Dict object. +.dict_to_list <- function(dict) { + if (is.null(names(dict))) { + stop("Input must be a named list representing a flat Python dictionary.") + } + l <- list() + for(k in names(dict)) { + l[k] <- dict[k] + } + l +} diff --git a/R/nextflow_annotation_utils.R b/R/nextflow_annotation_utils.R index e7bd610..498e41f 100644 --- a/R/nextflow_annotation_utils.R +++ b/R/nextflow_annotation_utils.R @@ -504,7 +504,6 @@ annotate_with_samtools_stats <- function(meta, #' Wrapper for all steps to get manifest for processed product #' #' @inheritParams map_sample_io -#' @param #' @param workflow_link Workflow link. #' @export #' @return List `manifest` with manifests for each processed dataset, diff --git a/man/copy_annotations.Rd b/man/copy_annotations.Rd index 9e6230d..ca562de 100644 --- a/man/copy_annotations.Rd +++ b/man/copy_annotations.Rd @@ -4,7 +4,13 @@ \alias{copy_annotations} \title{Copy annotations} \usage{ -copy_annotations(entity_from, entity_to, select = NULL, update = FALSE) +copy_annotations( + entity_from, + entity_to, + select = NULL, + update = FALSE, + as_list = TRUE +) } \arguments{ \item{entity_from}{Syn id from which to copy.} @@ -15,6 +21,9 @@ copy_annotations(entity_from, entity_to, select = NULL, update = FALSE) If not specified, will copy over everything, which may not be desirable.} \item{update}{Whether to immediately update or return annotation objects only.} + +\item{as_list}{Only used when \code{update=FALSE}; for backwards-compatibility or when +downstream usage of \code{copy_annotations} expects an R list, return as an R list.} } \description{ Copy annotations (all or selectively) from a source entity to one or more target entities. diff --git a/man/dot-dict_to_list.Rd b/man/dot-dict_to_list.Rd new file mode 100644 index 0000000..b2e6007 --- /dev/null +++ b/man/dot-dict_to_list.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/annotations.R +\name{.dict_to_list} +\alias{.dict_to_list} +\title{Convert a flat Python Dict to R list} +\usage{ +.dict_to_list(dict) +} +\arguments{ +\item{dict}{A flat Python Dict object.} +} +\description{ +An internal function used to convert Annotations objects returned by \code{get_annotations}. +} diff --git a/man/extract_syn_id_from_ss.Rd b/man/extract_syn_id_from_ss.Rd new file mode 100644 index 0000000..4811982 --- /dev/null +++ b/man/extract_syn_id_from_ss.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nextflow_annotation_utils.R +\name{extract_syn_id_from_ss} +\alias{extract_syn_id_from_ss} +\title{Extract Synapse id from URI} +\usage{ +extract_syn_id_from_ss(x) +} +\description{ +Given some vector \code{x} of URIs/file paths, +will try different methods to extract what's likely the needed Synapse id, +then sanity checks of results. +} +\seealso{ +\code{\link[=bare_syn_id]{bare_syn_id()}} +} +\keyword{internal} From 2450845994360338b4de6983531bd88a0b9b5ea3 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Mon, 16 Dec 2024 15:52:43 -0800 Subject: [PATCH 2/4] Update test and fix --- R/annotations.R | 6 +++--- tests/testthat/test_copy_annotations.R | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/R/annotations.R b/R/annotations.R index ac816a2..c05141f 100644 --- a/R/annotations.R +++ b/R/annotations.R @@ -80,9 +80,9 @@ copy_annotations <- function(entity_from, if(update) { .syn$set_annotations(to_annotations) } else if (as_list) { - .dict_to_list(to_annotations) + return(.dict_to_list(to_annotations)) } else { - to_annotations + return(to_annotations) } } } @@ -99,7 +99,7 @@ copy_annotations <- function(entity_from, } l <- list() for(k in names(dict)) { - l[k] <- dict[k] + l[[k]] <- dict[k] } l } diff --git a/tests/testthat/test_copy_annotations.R b/tests/testthat/test_copy_annotations.R index 9a2ed13..32de91a 100644 --- a/tests/testthat/test_copy_annotations.R +++ b/tests/testthat/test_copy_annotations.R @@ -1,4 +1,5 @@ -test_that("Copy annotations works", { +test_that("Copy annotations works to apply an immediate copy of annotations from one entity to another (update=TRUE) + as well as when simply getting a copy of the annotations present as a default R list to work with (update=FALSE)", { skip_if_no_synapseclient() skip_if_no_login() @@ -18,12 +19,24 @@ test_that("Copy annotations works", { parent = PARENT_TEST_PROJECT) entity_c <- .syn$store(entity_c) - # when copying all annotations from A->B (default) + # when getting a copy of all annotations from A->B (default) + copy_of_a_b <- copy_annotations(entity_from = entity_a$properties$id, + entity_to = entity_b$properties$id, + select = NULL, + update = FALSE) + + # when immediately copying all annotations from A->B (default) copy_annotations(entity_from = entity_a$properties$id, entity_to = entity_b$properties$id, select = NULL, update = TRUE) + # when getting a copy of selective annotations from A->C + copy_of_a_c <- copy_annotations(entity_from = entity_a$properties$id, + entity_to = entity_c$properties$id, + select = c("favorites", "key_not_on_a"), + update = FALSE) + # when copying selective annotations from A->C copy_annotations(entity_from = entity_a$properties$id, entity_to = entity_c$properties$id, @@ -35,6 +48,8 @@ test_that("Copy annotations works", { .syn$delete(entity_a) .syn$delete(entity_b) .syn$delete(entity_c) + testthat::expect_equal(copy_of_a_b, list(after_a = TRUE, favorites = c("raindrops", "whiskers"), foo = "bar")) + testthat::expect_equal(copy_of_a_c, list(favorites = c("raindrops", "whiskers"))) testthat::expect_equal(result_b$foo, "bar") testthat::expect_equal(result_b$favorites, c("raindrops", "whiskers")) testthat::expect_equal(result_b$after_a, TRUE) @@ -44,4 +59,3 @@ test_that("Copy annotations works", { }) - From d371983a180f6cfb59064c34bf92a9e9fb90df5d Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Mon, 16 Dec 2024 16:15:26 -0800 Subject: [PATCH 3/4] Update pkgdown index manually --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index ca2dbf8..2f50cbc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -164,6 +164,7 @@ reference: description: Mostly meant to be internal or experimental stuff - contents: - .delim_string_to_vector + - .dict_to_list - .replace_string_column_with_stringlist_column - .store_rows - missing_annotation_email From 73ef0959ec4f1f08b2f2c17ccdf4fdde55f30040 Mon Sep 17 00:00:00 2001 From: Anh Nguyet Vu Date: Tue, 17 Dec 2024 09:02:16 -0800 Subject: [PATCH 4/4] Fix some logic, including #208 --- R/nextflow_annotation_utils.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/R/nextflow_annotation_utils.R b/R/nextflow_annotation_utils.R index 498e41f..145c25e 100644 --- a/R/nextflow_annotation_utils.R +++ b/R/nextflow_annotation_utils.R @@ -28,10 +28,10 @@ annotation_rule <- function(outputFrom, which = c("format_as", "annotate_as", "t "featureCounts" = list(format_as = function(x) { "txt" }, annotate_as = "annotate_quantified_expression", template = "bts:ProcessedExpressionTemplate"), "SAMtools" = list(format_as = function(x) { substring(x, nchar(x)-2, nchar(x)) }, annotate_as = "annotate_aligned_reads", template = "bts:ProcessedAlignedReadsTemplate"), "CNVkit" = list(format_as = function(x) { substring(x, nchar(x)-2, nchar(x)) }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), - "DeepVariant" = list(format_as = function(x) { "vcf" }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), - "Strelka2" = list(format_as = function(x) { "vcf" }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), - "Mutect2" = list(format_as = function(x) { "vcf" }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), - "FreeBayes" = list(format_as = function(x) { "vcf" }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate")) + "DeepVariant" = list(format_as = function(x) { ifelse(grepl("tbi$", x), "tbi", "vcf") }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), + "Strelka2" = list(format_as = function(x) { ifelse(grepl("tbi$", x), "tbi", "vcf") }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), + "Mutect2" = list(format_as = function(x) { ifelse(grepl("tbi$", x), "tbi", "vcf") }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate"), + "FreeBayes" = list(format_as = function(x) { ifelse(grepl("tbi$", x), "tbi", "vcf") }, annotate_as = "annotate_called_variants", template = "bts:ProcessedVariantCallsTemplate")) switch( which, @@ -230,7 +230,7 @@ map_sample_output_sarek <- function(syn_out, index_fun <- function(x) { caller_index <- grep("cnvkit|deepvariant|strelka|mutect|freebayes", x, ignore.case = TRUE)[1] - if(!length(caller_index)) stop("Issue with figuring out sample output organization. Is there non-standard output?") + if(!length(caller_index)) stop("Issue with inferring sample output organization. Is this non-standard output?") file_index <- length(x) if((file_index - caller_index) == 1) { file_index - 2 # i.e. `VariantCalling//` @@ -243,7 +243,10 @@ map_sample_output_sarek <- function(syn_out, # For nf-sarek tumor somatic variant calling, sample references tumor vs normal from same indiv # The sample assigned to the processed data is the tumor sample result[, sample := path_extract(path, index_fun = index_fun)] - if(grepl("_vs_", first(result$output_name))) result[, sample := gsub("_vs.*", "", sample)] + if(.output %in% c("Strelka2", "FreeBayes", "Mutect2") && any(grepl("_vs_", result$sample))) { + message(" - Somatic data present.") + result[, sample := gsub("_vs.*", "", sample)] + } results[[.output]] <- result setattr(results[[.output]], "outputFrom", .output) @@ -450,6 +453,8 @@ annotate_called_variants <- function(metadata, "AnnotatedSomaticVariants" } else if(!grepl("_vs_", name) && format == "maf") { "AnnotatedGermlineVariants" + } else if(format == "tbi") { + "dataIndex" } else if(format %in% c("cns", "cnn", "cnr", "bed", "pdf", "png")) { "CopyNumberVariants" } else {