Skip to content

Commit

Permalink
Feat/pathfinder (#121)
Browse files Browse the repository at this point in the history
* Add find helpers

* Export

* Update pkgdown and docs
  • Loading branch information
anngvu authored Aug 28, 2023
1 parent 14eaa7b commit 38e939d
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 0 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ export(convert_to_stringlist)
export(copy_annotations)
export(data_curator_app_subpage)
export(delete_provenance)
export(find_child)
export(find_in)
export(from_pubmed)
export(get_by_prop_from_json_schema)
export(get_dependency_from_json_schema)
Expand All @@ -57,8 +59,10 @@ export(missing_annotation_email)
export(new_dataset)
export(new_project)
export(nf_cnv_dataset)
export(nf_find_asset)
export(nf_sarek_datasets)
export(nf_star_salmon_datasets)
export(nf_workflow_version)
export(processing_flowchart)
export(qc_manifest)
export(register_study)
Expand Down
90 changes: 90 additions & 0 deletions R/find.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Utils to help overcome nested folders

#' Find in path
#'
#' Get the Synapse id of an entity nested several folder layers deep without
#' having to click through the UI or create a fileview as long as the structure/path is known.
#'
#' @param scope Id of the container (project or folder) to begin search.
#' @param path Path string in format "subdir1/subdir2/file.txt", where the last-level element will be the id returned.
#' @export
find_in <- function(scope, path) {

path_list <- strsplit(path, split = "/", fixed = TRUE)[[1]]
here <- scope
id <- NULL
while(length(path_list)) {
child <- first(path_list)
path_list <- path_list[-1]
here <- find_child(child_name = child, parent = here)
id <- here
}
id
}

#' Find id of a child entity in a container
#'
#' @param parent Parent container (project or folder).
#' @param child_name Name of child entity.
#' @export
find_child <- function(child_name, parent) {

q <- .syn$getChildren(parent)
child_id <- NULL
repeat {
x <- reticulate::iter_next(q)
if(is.null(x) || x$name == child_name) {
child_id <- x$id
break
}
}
child_id
}

# Find nextflow assets --------------------------------------------------------- #

# Convenience functions for getting Synapse ids of nextflow assets

#' Find a standard nextflow workflow output asset
#'
#' Note that samplesheets became part of the output only for newer versions of nf-core/rna-seq;
#' older runs may not find samplesheets.
#' Paths default to known working paths corresponding to the latest major workflow version,
#' but this may change and may need to be updated as part of util maintenance.
#'
#' @param syn_out Id of top-level folder that corresponds to `publishDir` in a nextflow workflow.
#' @param asset Name of asset to find.
#' @returns Id of samplesheet.
#' @export
nf_find_asset <- function(syn_out,
asset = c("software_versions", "multiqc_report", "samplesheet", "samtools_stats")) {

asset <- match.arg(asset)
path <- switch(asset,
software_versions = "pipeline_info/software_versions.yml",
multiqc_report = "multiqc/star_salmon/multiqc_report.html",
samplesheet = "pipeline_info/samplesheet.valid.csv",
samtools_stats = "multiqc/star_salmon/multiqc_data/multiqc_samtools_stats.txt"
)

id <- find_in(syn_out, path)
if(is.null(id)) stop("File not found. Is this the right output directory/path?")
id
}


#' Return workflow version according to workflow meta
#'
#' @inheritParams nf_find_asset
#' @returns Version string.
#' @export
nf_workflow_version <- function(syn_out) {

version_meta <- nf_find_asset(syn_out, asset = "software_versions")
file <- .syn$get(version_meta, downloadFile = TRUE)
yml <- yaml::read_yaml(file$path)
workflow <- grep("nf-core", names(yml$Workflow))
yaml$Workflow[[workflow]]

}

7 changes: 7 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ reference:
- annotate_cnv
- annotate_reports_sarek
- annotate_with_tool_stats
- nf_workflow_version

- title: Dataset Creation and Management
- subtitle: General dataset creation and citation
Expand Down Expand Up @@ -80,6 +81,12 @@ reference:
- summarize_file_access
- grant_specific_file_access

- title: Search Utils
desc: Help locate Synapse accessions, etc.
- contents:
- contains("find")
- nf_find_asset

- title: Provenance Utils
desc: Manage provenance metadata
- contents:
Expand Down
16 changes: 16 additions & 0 deletions man/find_child.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/find_in.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/nf_find_asset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/nf_workflow_version.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 38e939d

Please sign in to comment.