issues resolved

christinehou11 · Sep 4, 2024 · 66ac4ed · 66ac4ed
1 parent 9e7c461
commit 66ac4ed
Show file tree

Hide file tree

Showing 17 changed files with 132 additions and 97 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -52,7 +52,8 @@ Suggests:
     testthat (>= 3.0.0),
     knitr,
     rmarkdown,
-    BiocStyle
+    BiocStyle,
+    ggplot2
 VignetteBuilder: knitr
 biocViews: Software, SingleCell
 URL: https://christinehou11.github.io/HuBMAPR/, https://github.com/christinehou11/HuBMAPR

diff --git a/R/collection_information.R b/R/collection_information.R
@@ -21,9 +21,7 @@ collection_information <-
     function(uuid)
     {
 
-    stopifnot(
-    .is_uuid(uuid)
-    )
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Collection")
 
     option <- .list_to_option(
         path = "hits.hits[]._source",

diff --git a/R/collections.R b/R/collections.R
@@ -76,7 +76,7 @@ collections_default_columns <-
 collection_contacts <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Collection")
 
     option <- .list_to_option(
         path = "hits.hits[]._source.contacts[]",
@@ -91,7 +91,7 @@ collection_contacts <-
 #' @name collection_data
 #'
 #' @importFrom tidyr unnest everything
-#' @importFrom dplyr select
+#' @importFrom dplyr select rename
 #'
 #' @description `collection_data()` takes a unique collection_id and
 #' returns related datasets of one specified collection as a tibble
@@ -110,7 +110,7 @@ collection_contacts <-
 collection_data <-
     function(uuid) {
 
-    stopifnot( .is_uuid(uuid))
+    stopifnot( .is_uuid(uuid), .uuid_category(uuid) == "Collection")
 
     option <- .list_to_option(path = "hits.hits[]._source.datasets[]",
                                 fields = c("uuid", "hubmap_id", "data_types",
@@ -121,7 +121,9 @@ collection_data <-
     tbl <- .query_match(uuid, option) |> unnest(everything())
     tbl$organ <- .title_to_organ(tbl$title)
 
-    .unnest_mutate_relocate(tbl) |> select(-"title")
+    .unnest_mutate_relocate(tbl) |> 
+        select(-"title") |>
+        rename("dataset_type_additional_information" = "data_types")
 
     }
 
@@ -148,7 +150,7 @@ collection_data <-
 collection_contributors <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Collection")
 
     option <- .list_to_option(path = "hits.hits[]._source.creators[]",
                                 fields = c("name", "affiliation", "orcid_id"))

diff --git a/R/datasets.R b/R/datasets.R
@@ -72,7 +72,7 @@ datasets_default_columns <-
 dataset_detail <-
     function (uuid) {
 
-    stopifnot( .is_uuid(uuid))
+    stopifnot( .is_uuid(uuid), .uuid_category(uuid) == "Dataset")
 
     .query_match(uuid, option = "hits.hits[]._source")
 
@@ -106,7 +106,7 @@ dataset_detail <-
 dataset_derived <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Dataset")
 
     option <- .list_to_option(
         path = "hits.hits[]._source.descendants[]",
@@ -148,7 +148,7 @@ dataset_derived <-
 dataset_metadata <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Dataset")
 
     donor_uuid <- .query_match(uuid,
                     option = "hits.hits[]._source.ancestors[]") |>
@@ -208,7 +208,7 @@ dataset_metadata <-
 dataset_contributors <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Dataset")
 
     .query_match(uuid,
                 option = "hits.hits[]._source.contributors[]") |> 
@@ -228,12 +228,16 @@ dataset_contributors <-
         select(-"origin_samples.organ") |>
         rename("organ" = "name",
                 "analyte_class" = "metadata.metadata.analyte_class",
-                "sample_category" = "source_samples.sample_category") |>
+                "sample_category" = "source_samples.sample_category",
+                "dataset_type_additional_information" = "data_types",
+                "donor_hubmap_id" = "donor.hubmap_id") |>
         .dataset_processing_category() |>
         mutate(pipeline = str_extract(.data$dataset_type, 
                                     "(?<=\\[).*?(?=\\])"),
-                dataset_type = gsub("\\s*\\[.*?\\]", "", .data$dataset_type)) |>
-        relocate("uuid", "hubmap_id", "dataset_type", "data_types",
+            dataset_type = gsub("\\s*\\[.*?\\]", "",.data$dataset_type),
+            sample_category = str_extract(.data$sample_category, "^[^,]+")) |>
+        relocate("uuid", "hubmap_id", "dataset_type", 
+                "dataset_type_additional_information",
                 "organ", "analyte_class", "sample_category", "status",
                 "dataset_processing_category", "pipeline", everything())
 

diff --git a/R/donors.R b/R/donors.R
@@ -80,7 +80,7 @@ donor_detail <-
     function (uuid)
     {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Donor")
 
     .query_match(uuid, option = "hits.hits[]._source")
 
@@ -90,7 +90,7 @@ donor_detail <-
 #'
 #' @name donor_derived
 #'
-#' @importFrom dplyr select filter mutate any_of
+#' @importFrom dplyr select filter mutate any_of rename
 #' @importFrom purrr map_chr map_int
 #'
 #' @description `donor_derived()` takes a unique donor_id and
@@ -114,7 +114,7 @@ donor_detail <-
 donor_derived <-
     function(uuid, entity_type = c("Dataset", "Sample")) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Donor")
 
     entity <- match.arg(entity_type)
 
@@ -139,7 +139,7 @@ donor_derived <-
     else {
 
         tbl <- tbl |>
-            select(any_of(c("uuid", "hubmap_id", "data_types", "dataset_type",
+            select(any_of(c("uuid", "hubmap_id", "dataset_type",
                             "status", "last_modified_timestamp"))) |>
             .unnest_mutate_relocate() |>
             mutate(derived_dataset_count = map_int(uuid, ~{
@@ -174,15 +174,16 @@ donor_derived <-
 donor_metadata <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Donor")
 
     .donor_metadata(uuid)
 
     }
 
 ## helper function
-#' @importFrom dplyr coalesce mutate select rename_with rename
+#' @importFrom dplyr coalesce mutate select rename_with rename case_when
 #' @importFrom tidyr unnest_longer everything
+#' @importFrom rlang .data
 #'
 .donor_edit <-
     function(tbl) {
@@ -215,6 +216,15 @@ donor_metadata <-
         unnest_longer(c("data_value", "preferred_term",
                 "grouping_concept_preferred_term", "data_type")) |>
         .donor_matadata_modify() |>
-        .unnest_mutate_relocate()
+        .unnest_mutate_relocate() |>
+        mutate(Age = as.numeric(.data$Age),
+            `Body Mass Index` = as.numeric(.data$`Body Mass Index`),
+            `Body mass index` = as.numeric(.data$`Body mass index`),
+            `Body Mass Index` = case_when(
+                !is.na(.data$`Body Mass Index`) ~ .data$`Body Mass Index`,
+                is.na(.data$`Body Mass Index`) &
+                is.na(.data$`Body mass index`) ~ NA_real_,
+                TRUE ~ .data$`Body mass index`)) |>
+        select(-"Body mass index") 
 
     }
diff --git a/R/publication_information.R b/R/publication_information.R
@@ -23,7 +23,7 @@
 publication_information <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Publication")
 
     option <- .list_to_option(
         path = "hits.hits[]._source",

diff --git a/R/publications.R b/R/publications.R
@@ -62,7 +62,7 @@ publications_default_columns <-
 #'
 #' @name publication_data
 #'
-#' @importFrom dplyr select mutate
+#' @importFrom dplyr select mutate rename
 #' @importFrom tidyr unnest
 #' @importFrom purrr map
 #' @importFrom rlang .data
@@ -108,7 +108,9 @@ publication_data <-
 
         Dataset = entity_ids |>
                     mutate(organ = .title_to_organ(.data$title)) |>
-                    select(-"title"),
+                    select(-"title") |>
+                    rename(
+                        "dataset_type_additional_information" = "data_types"),
 
         Sample = entity_ids,
 

diff --git a/R/query.R b/R/query.R
@@ -120,13 +120,14 @@ SEARCH <- "https://search.api.hubmapconsortium.org/v3/search"
         view,
 
         Dataset = c("uuid", "hubmap_id", "group_name",
-                    "data_types", "dataset_type", "organ", "analyte_class",
+                    "dataset_type_additional_information", 
+                    "dataset_type", "organ", "analyte_class",
                     "dataset_processing_category", "sample_category",
                     "registered_by", "status", "pipeline",
-                    "last_modified_timestamp", "donor.hubmap_id"),
+                    "last_modified_timestamp", "donor_hubmap_id"),
 
         Sample = c("uuid", "hubmap_id", "group_name", "sample_category",
-                    "organ", "last_modified_timestamp", "donor.hubmap_id"),
+                    "organ", "last_modified_timestamp", "donor_hubmap_id"),
 
         Donor = c("hubmap_id", "uuid", "group_name", "Sex", "Age",
                     "Body Mass Index", "Race", "last_modified_timestamp"),

diff --git a/R/samples.R b/R/samples.R
@@ -5,7 +5,8 @@
 #' @title HuBMAP Samples
 #'
 #' @description `samples()` returns details about available samples, ordered by
-#' last modified dates
+#' last modified dates. There are multiple tissue sample types displayed in 
+#' `sample_category` column which are block, organ, suspension, and section.
 #'
 #' @details Additional details are provided on the HuBMAP consortium
 #'     webpage, https://software.docs.hubmapconsortium.org/apis
@@ -76,7 +77,7 @@ samples_default_columns <-
 sample_detail <-
     function (uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Sample")
 
     .query_match(uuid, option = "hits.hits[]._source")
 
@@ -86,7 +87,7 @@ sample_detail <-
 #'
 #' @name sample_derived
 #'
-#' @importFrom dplyr select mutate filter any_of
+#' @importFrom dplyr select mutate filter any_of rename
 #' @importFrom purrr map_int map_chr
 #'
 #' @description `sample_derived()` takes a unique sample_id and
@@ -109,7 +110,7 @@ sample_detail <-
 sample_derived <-
     function(uuid, entity_type = c("Dataset", "Sample")) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Sample")
 
     entity <- match.arg(entity_type)
 
@@ -132,13 +133,12 @@ sample_derived <-
     else {
 
         tbl <- tbl |>
-            select(any_of(c("uuid", "hubmap_id", "data_types", "dataset_type",
+            select(any_of(c("uuid", "hubmap_id", "dataset_type",
                         "status", "last_modified_timestamp"))) |>
             .unnest_mutate_relocate() |>
             mutate(derived_dataset_count = map_int(uuid, ~{
                             nrow(.query_match(.x,
                             option = "hits.hits[]._source.descendants[]"))}))
-
     }
 
     tbl
@@ -170,7 +170,7 @@ sample_derived <-
 sample_metadata <-
     function(uuid) {
 
-    stopifnot(.is_uuid(uuid))
+    stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Sample")
 
     donor_uuid <- .query_match(uuid,
                     option = "hits.hits[]._source.ancestors[]") |>
@@ -183,13 +183,16 @@ sample_metadata <-
     }
 
 #' @importFrom dplyr left_join rename select
+#' @importFrom stringr str_extract
 .sample_edit <-
     function (tbl) {
 
     tbl |>
         .unnest_mutate_relocate() |>
         left_join(organ(), by = c("origin_samples.organ" = "abbreviation")) |>
         select(-"origin_samples.organ") |>
-        rename("organ" = "name")
+        rename("organ" = "name",
+                "donor_hubmap_id" = "donor.hubmap_id") |>
+        mutate(sample_category = str_extract(.data$sample_category, "^[^,]+"))
 
     }
diff --git a/R/utilities.R b/R/utilities.R
@@ -107,7 +107,7 @@
 
     }
 
-#' @importFrom dplyr mutate select case_when summarise group_by
+#' @importFrom dplyr mutate select summarise group_by
 #' @importFrom tidyr pivot_wider everything any_of
 #' @importFrom rlang .data
 .donor_matadata_modify <-
@@ -128,16 +128,7 @@
         group_by(.data$hubmap_id) |>
         select(any_of(c(.default_columns("Donor", "character"),
                         "Body mass index"))) |>
-        summarise(across(everything(), .concat_values), .groups = 'drop') |>
-        mutate(Age = as.numeric(.data$Age),
-                `Body Mass Index` = as.numeric(.data$`Body Mass Index`),
-                `Body mass index` = as.numeric(.data$`Body mass index`),
-                `Body Mass Index` = case_when(
-                    !is.na(.data$`Body Mass Index`) ~ .data$`Body Mass Index`,
-                    is.na(.data$`Body Mass Index`) &
-                    is.na(.data$`Body mass index`) ~ NA_real_,
-                    TRUE ~ .data$`Body mass index`)) |>
-                select(-"Body mass index")
+        summarise(across(everything(), .concat_values), .groups = 'drop')
 
     }
 
@@ -196,6 +187,18 @@
 
     }
 
+.uuid_category <-
+    function(uuid) {
+
+        stopifnot(.is_uuid(uuid))
+
+        type <- .query_match(uuid,
+                    option = "hits.hits[]._source.{entity_type: entity_type}")
+
+        type$entity_type
+    }
+
+
 ## .onLoad
 
 .onLoad <-

diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ Install additional required packages before running package codes in
 vignettes.
 
 ``` r
-pkgs <- c("dplyr", "tidyr")
+pkgs <- c("dplyr", "tidyr", "ggplot2")
 required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
 BiocManager::install(required_pkgs)
 ```

diff --git a/man/samples.Rd b/man/samples.Rd