Skip to content

Commit

Permalink
issues resolved
Browse files Browse the repository at this point in the history
  • Loading branch information
christinehou11 committed Sep 4, 2024
1 parent 9e7c461 commit 66ac4ed
Show file tree
Hide file tree
Showing 17 changed files with 132 additions and 97 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ Suggests:
testthat (>= 3.0.0),
knitr,
rmarkdown,
BiocStyle
BiocStyle,
ggplot2
VignetteBuilder: knitr
biocViews: Software, SingleCell
URL: https://christinehou11.github.io/HuBMAPR/, https://github.com/christinehou11/HuBMAPR
Expand Down
4 changes: 1 addition & 3 deletions R/collection_information.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ collection_information <-
function(uuid)
{

stopifnot(
.is_uuid(uuid)
)
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Collection")

option <- .list_to_option(
path = "hits.hits[]._source",
Expand Down
12 changes: 7 additions & 5 deletions R/collections.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ collections_default_columns <-
collection_contacts <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Collection")

option <- .list_to_option(
path = "hits.hits[]._source.contacts[]",
Expand All @@ -91,7 +91,7 @@ collection_contacts <-
#' @name collection_data
#'
#' @importFrom tidyr unnest everything
#' @importFrom dplyr select
#' @importFrom dplyr select rename
#'
#' @description `collection_data()` takes a unique collection_id and
#' returns related datasets of one specified collection as a tibble
Expand All @@ -110,7 +110,7 @@ collection_contacts <-
collection_data <-
function(uuid) {

stopifnot( .is_uuid(uuid))
stopifnot( .is_uuid(uuid), .uuid_category(uuid) == "Collection")

option <- .list_to_option(path = "hits.hits[]._source.datasets[]",
fields = c("uuid", "hubmap_id", "data_types",
Expand All @@ -121,7 +121,9 @@ collection_data <-
tbl <- .query_match(uuid, option) |> unnest(everything())
tbl$organ <- .title_to_organ(tbl$title)

.unnest_mutate_relocate(tbl) |> select(-"title")
.unnest_mutate_relocate(tbl) |>
select(-"title") |>
rename("dataset_type_additional_information" = "data_types")

}

Expand All @@ -148,7 +150,7 @@ collection_data <-
collection_contributors <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Collection")

option <- .list_to_option(path = "hits.hits[]._source.creators[]",
fields = c("name", "affiliation", "orcid_id"))
Expand Down
18 changes: 11 additions & 7 deletions R/datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ datasets_default_columns <-
dataset_detail <-
function (uuid) {

stopifnot( .is_uuid(uuid))
stopifnot( .is_uuid(uuid), .uuid_category(uuid) == "Dataset")

.query_match(uuid, option = "hits.hits[]._source")

Expand Down Expand Up @@ -106,7 +106,7 @@ dataset_detail <-
dataset_derived <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Dataset")

option <- .list_to_option(
path = "hits.hits[]._source.descendants[]",
Expand Down Expand Up @@ -148,7 +148,7 @@ dataset_derived <-
dataset_metadata <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Dataset")

donor_uuid <- .query_match(uuid,
option = "hits.hits[]._source.ancestors[]") |>
Expand Down Expand Up @@ -208,7 +208,7 @@ dataset_metadata <-
dataset_contributors <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Dataset")

.query_match(uuid,
option = "hits.hits[]._source.contributors[]") |>
Expand All @@ -228,12 +228,16 @@ dataset_contributors <-
select(-"origin_samples.organ") |>
rename("organ" = "name",
"analyte_class" = "metadata.metadata.analyte_class",
"sample_category" = "source_samples.sample_category") |>
"sample_category" = "source_samples.sample_category",
"dataset_type_additional_information" = "data_types",
"donor_hubmap_id" = "donor.hubmap_id") |>
.dataset_processing_category() |>
mutate(pipeline = str_extract(.data$dataset_type,
"(?<=\\[).*?(?=\\])"),
dataset_type = gsub("\\s*\\[.*?\\]", "", .data$dataset_type)) |>
relocate("uuid", "hubmap_id", "dataset_type", "data_types",
dataset_type = gsub("\\s*\\[.*?\\]", "",.data$dataset_type),
sample_category = str_extract(.data$sample_category, "^[^,]+")) |>
relocate("uuid", "hubmap_id", "dataset_type",
"dataset_type_additional_information",
"organ", "analyte_class", "sample_category", "status",
"dataset_processing_category", "pipeline", everything())

Expand Down
24 changes: 17 additions & 7 deletions R/donors.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ donor_detail <-
function (uuid)
{

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Donor")

.query_match(uuid, option = "hits.hits[]._source")

Expand All @@ -90,7 +90,7 @@ donor_detail <-
#'
#' @name donor_derived
#'
#' @importFrom dplyr select filter mutate any_of
#' @importFrom dplyr select filter mutate any_of rename
#' @importFrom purrr map_chr map_int
#'
#' @description `donor_derived()` takes a unique donor_id and
Expand All @@ -114,7 +114,7 @@ donor_detail <-
donor_derived <-
function(uuid, entity_type = c("Dataset", "Sample")) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Donor")

entity <- match.arg(entity_type)

Expand All @@ -139,7 +139,7 @@ donor_derived <-
else {

tbl <- tbl |>
select(any_of(c("uuid", "hubmap_id", "data_types", "dataset_type",
select(any_of(c("uuid", "hubmap_id", "dataset_type",
"status", "last_modified_timestamp"))) |>
.unnest_mutate_relocate() |>
mutate(derived_dataset_count = map_int(uuid, ~{
Expand Down Expand Up @@ -174,15 +174,16 @@ donor_derived <-
donor_metadata <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Donor")

.donor_metadata(uuid)

}

## helper function
#' @importFrom dplyr coalesce mutate select rename_with rename
#' @importFrom dplyr coalesce mutate select rename_with rename case_when
#' @importFrom tidyr unnest_longer everything
#' @importFrom rlang .data
#'
.donor_edit <-
function(tbl) {
Expand Down Expand Up @@ -215,6 +216,15 @@ donor_metadata <-
unnest_longer(c("data_value", "preferred_term",
"grouping_concept_preferred_term", "data_type")) |>
.donor_matadata_modify() |>
.unnest_mutate_relocate()
.unnest_mutate_relocate() |>
mutate(Age = as.numeric(.data$Age),
`Body Mass Index` = as.numeric(.data$`Body Mass Index`),
`Body mass index` = as.numeric(.data$`Body mass index`),
`Body Mass Index` = case_when(
!is.na(.data$`Body Mass Index`) ~ .data$`Body Mass Index`,
is.na(.data$`Body Mass Index`) &
is.na(.data$`Body mass index`) ~ NA_real_,
TRUE ~ .data$`Body mass index`)) |>
select(-"Body mass index")

}
2 changes: 1 addition & 1 deletion R/publication_information.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
publication_information <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Publication")

option <- .list_to_option(
path = "hits.hits[]._source",
Expand Down
6 changes: 4 additions & 2 deletions R/publications.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ publications_default_columns <-
#'
#' @name publication_data
#'
#' @importFrom dplyr select mutate
#' @importFrom dplyr select mutate rename
#' @importFrom tidyr unnest
#' @importFrom purrr map
#' @importFrom rlang .data
Expand Down Expand Up @@ -108,7 +108,9 @@ publication_data <-

Dataset = entity_ids |>
mutate(organ = .title_to_organ(.data$title)) |>
select(-"title"),
select(-"title") |>
rename(
"dataset_type_additional_information" = "data_types"),

Sample = entity_ids,

Expand Down
7 changes: 4 additions & 3 deletions R/query.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,14 @@ SEARCH <- "https://search.api.hubmapconsortium.org/v3/search"
view,

Dataset = c("uuid", "hubmap_id", "group_name",
"data_types", "dataset_type", "organ", "analyte_class",
"dataset_type_additional_information",
"dataset_type", "organ", "analyte_class",
"dataset_processing_category", "sample_category",
"registered_by", "status", "pipeline",
"last_modified_timestamp", "donor.hubmap_id"),
"last_modified_timestamp", "donor_hubmap_id"),

Sample = c("uuid", "hubmap_id", "group_name", "sample_category",
"organ", "last_modified_timestamp", "donor.hubmap_id"),
"organ", "last_modified_timestamp", "donor_hubmap_id"),

Donor = c("hubmap_id", "uuid", "group_name", "Sex", "Age",
"Body Mass Index", "Race", "last_modified_timestamp"),
Expand Down
19 changes: 11 additions & 8 deletions R/samples.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#' @title HuBMAP Samples
#'
#' @description `samples()` returns details about available samples, ordered by
#' last modified dates
#' last modified dates. There are multiple tissue sample types displayed in
#' `sample_category` column which are block, organ, suspension, and section.
#'
#' @details Additional details are provided on the HuBMAP consortium
#' webpage, https://software.docs.hubmapconsortium.org/apis
Expand Down Expand Up @@ -76,7 +77,7 @@ samples_default_columns <-
sample_detail <-
function (uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Sample")

.query_match(uuid, option = "hits.hits[]._source")

Expand All @@ -86,7 +87,7 @@ sample_detail <-
#'
#' @name sample_derived
#'
#' @importFrom dplyr select mutate filter any_of
#' @importFrom dplyr select mutate filter any_of rename
#' @importFrom purrr map_int map_chr
#'
#' @description `sample_derived()` takes a unique sample_id and
Expand All @@ -109,7 +110,7 @@ sample_detail <-
sample_derived <-
function(uuid, entity_type = c("Dataset", "Sample")) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Sample")

entity <- match.arg(entity_type)

Expand All @@ -132,13 +133,12 @@ sample_derived <-
else {

tbl <- tbl |>
select(any_of(c("uuid", "hubmap_id", "data_types", "dataset_type",
select(any_of(c("uuid", "hubmap_id", "dataset_type",
"status", "last_modified_timestamp"))) |>
.unnest_mutate_relocate() |>
mutate(derived_dataset_count = map_int(uuid, ~{
nrow(.query_match(.x,
option = "hits.hits[]._source.descendants[]"))}))

}

tbl
Expand Down Expand Up @@ -170,7 +170,7 @@ sample_derived <-
sample_metadata <-
function(uuid) {

stopifnot(.is_uuid(uuid))
stopifnot(.is_uuid(uuid), .uuid_category(uuid) == "Sample")

donor_uuid <- .query_match(uuid,
option = "hits.hits[]._source.ancestors[]") |>
Expand All @@ -183,13 +183,16 @@ sample_metadata <-
}

#' @importFrom dplyr left_join rename select
#' @importFrom stringr str_extract
.sample_edit <-
function (tbl) {

tbl |>
.unnest_mutate_relocate() |>
left_join(organ(), by = c("origin_samples.organ" = "abbreviation")) |>
select(-"origin_samples.organ") |>
rename("organ" = "name")
rename("organ" = "name",
"donor_hubmap_id" = "donor.hubmap_id") |>
mutate(sample_category = str_extract(.data$sample_category, "^[^,]+"))

}
25 changes: 14 additions & 11 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@

}

#' @importFrom dplyr mutate select case_when summarise group_by
#' @importFrom dplyr mutate select summarise group_by
#' @importFrom tidyr pivot_wider everything any_of
#' @importFrom rlang .data
.donor_matadata_modify <-
Expand All @@ -128,16 +128,7 @@
group_by(.data$hubmap_id) |>
select(any_of(c(.default_columns("Donor", "character"),
"Body mass index"))) |>
summarise(across(everything(), .concat_values), .groups = 'drop') |>
mutate(Age = as.numeric(.data$Age),
`Body Mass Index` = as.numeric(.data$`Body Mass Index`),
`Body mass index` = as.numeric(.data$`Body mass index`),
`Body Mass Index` = case_when(
!is.na(.data$`Body Mass Index`) ~ .data$`Body Mass Index`,
is.na(.data$`Body Mass Index`) &
is.na(.data$`Body mass index`) ~ NA_real_,
TRUE ~ .data$`Body mass index`)) |>
select(-"Body mass index")
summarise(across(everything(), .concat_values), .groups = 'drop')

}

Expand Down Expand Up @@ -196,6 +187,18 @@

}

.uuid_category <-
function(uuid) {

stopifnot(.is_uuid(uuid))

type <- .query_match(uuid,
option = "hits.hits[]._source.{entity_type: entity_type}")

type$entity_type
}


## .onLoad

.onLoad <-
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Install additional required packages before running package codes in
vignettes.

``` r
pkgs <- c("dplyr", "tidyr")
pkgs <- c("dplyr", "tidyr", "ggplot2")
required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
BiocManager::install(required_pkgs)
```
Expand Down
3 changes: 2 additions & 1 deletion man/samples.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 66ac4ed

Please sign in to comment.