Skip to content

Commit

Permalink
r: internal, notes
Browse files Browse the repository at this point in the history
- internal: add DAVIS_20230901, DAVIS_20231122 (but not to PACM yet)
- notes: export scripts
  • Loading branch information
jeffwalkernoaa committed Mar 13, 2024
1 parent 45feb53 commit 9a02f5e
Show file tree
Hide file tree
Showing 12 changed files with 1,372 additions and 4 deletions.
4 changes: 3 additions & 1 deletion r/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ data/templates/**/*.xlsx
data/templates/**/*.csv

_targets
.env
.env

notes/*/*
30 changes: 30 additions & 0 deletions r/R/database.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
targets_db <- list(
tar_target(db_file, "data/database/db-tables.rds", format = "file"),
tar_target(db_all, read_rds(db_file)),
tar_target(db_recordings, {
db_all$recording %>%
janitor::clean_names() %>%
mutate(recording_id = as.character(recording_id))
}),
tar_target(db_deployments, {
db_all$deployment %>%
janitor::clean_names() %>%
mutate(deployment_id = as.character(deployment_id))
}),
tar_target(db_sites, {
db_all$site %>%
janitor::clean_names()
}),
tar_target(db_projects, {
db_all$project %>%
janitor::clean_names()
}),
tar_target(db_inventory, {
db_all$inventory %>%
janitor::clean_names()
}),
tar_target(db_inventory_types, {
db_all$inventory_type %>%
janitor::clean_names()
})
)
File renamed without changes.
2 changes: 1 addition & 1 deletion r/R/external.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ targets_external <- list(
tar_target(external_db_tables, read_rds(external_db_tables_file)),
tar_target(external_submission_groups, {
tibble(
id = setdiff(list.dirs(external_dir, recursive = FALSE, full.names = FALSE), "_queue")
id = setdiff(list.dirs(external_dir, recursive = FALSE, full.names = FALSE), c("_queue", "_archive"))
) %>%
group_by(id) %>%
tar_group()
Expand Down
11 changes: 10 additions & 1 deletion r/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -853,9 +853,18 @@ qaqc_dataset <- function (deployments, detections) {
x_deployments <- tibble(deployments) %>%
select(-geometry)
x_detections <- detections %>%
rowwise() %>%
mutate(
n_locations = map_int(locations, ~ if_else(is.null(.), 0L, nrow(.)))
n_locations = {
if (is.null(locations)) {
n <- 0
} else {
n <- nrow(locations)
}
n
}
) %>%
ungroup() %>%
left_join(
x_deployments %>%
select(theme, id, deployment_type, platform_type),
Expand Down
2 changes: 1 addition & 1 deletion r/R/internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ targets_internal <- list(
detections = detections
)
})
)
)
239 changes: 239 additions & 0 deletions r/R/internal/davis-20230901.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
# DAVIS_20230901: 2004-2014 BLWH, FIWH, RIWH

targets_davis_20230901 <- list(
tar_target(davis_20230901_header_files, list.files("data/internal/DAVIS_20230901", pattern = "_HEADER_", full.names = TRUE), format = "file"),
tar_target(davis_20230901_header, {
read_csv(davis_20230901_header_files, col_types = cols(.default = col_character()), id = "file") %>%
remove_empty(which = "rows") %>%
clean_names() %>%
mutate(
theme = case_when(
str_detect(file, "BLWH") ~ "blue",
str_detect(file, "FIWH") ~ "fin",
str_detect(file, "RIWH") ~ "narw",
TRUE ~ NA_character_
),
.before = everything()
) %>%
select(-file)
}),
tar_target(davis_20230901_deployments, {
x <- davis_20230901_header %>%
left_join(
db_deployments %>%
select(
deployment_id,
inventory_id,
project_id,
site_id,
platform_no,
latitude_ddg_deployment,
longitude_ddg_deployment,
depth_water_meters,
depth_recorder_meters
),
by = "deployment_id"
) %>%
left_join(
db_inventory %>%
select(inventory_id, inventory_type_id, item_description),
by = "inventory_id"
) %>%
left_join(
db_inventory_types %>%
select(inventory_type_id, inventory_type_name),
by = "inventory_type_id"
) %>%
left_join(
db_sites %>%
select(site_id, site_name),
by = "site_id"
) %>%
left_join(
db_projects %>%
select(project_id, project_name),
by = "project_id"
) %>%
left_join(
db_recordings %>%
select(recording_id, sample_rate_khz, soundfiles_timezone, channel, recording_start_utc, recording_end_utc),
by = "recording_id"
) %>%
left_join(
davis_20230901_detail %>%
select(theme, detection_header_id, call_type = pacm_call_type_code) %>%
distinct(),
by = c("theme", "detection_header_id")
)

x2 <- x %>%
transmute(
theme,
id = toupper(glue("{project_name}_{site_name}")),
project = project_name,
site_id = site_name,
latitude = latitude_ddg_deployment,
longitude = longitude_ddg_deployment,

monitoring_start_datetime = ymd_hms(monitoring_start_datetime),
monitoring_end_datetime = ymd_hms(monitoring_end_datetime),

platform_type = case_when(
inventory_type_name == "RECORDING DEVICE (BOTTOM-MOUNTED)" ~ "mooring",
TRUE ~ NA_character_
),
platform_id = platform_no,

water_depth_meters = depth_water_meters,
recorder_depth_meters = parse_number(depth_recorder_meters),
instrument_type = item_description,
instrument_id = as.character(inventory_id),
sampling_rate_hz = sample_rate_khz * 1000,
soundfiles_timezone,
duty_cycle_seconds = NA_character_,
channel,

data_poc_name = "Genevieve Davis",
data_poc_affiliation = "NOAA NEFSC",
data_poc_email = "genevieve.davis@noaa.gov",

submitter_name = "Genevieve Davis",
submitter_affiliation = "NOAA NEFSC",
submitter_email = "genevieve.davis@noaa.gov",
submission_date = ymd(str_sub(submission_date, 1, 10)),

# species specific
detection_method,
protocol_reference,
call_type = call_type,
analyzed = TRUE,
qc_data = qc_processing,
detection_header_id
)
x2 %>%
add_count(theme, id) %>%
group_by(id) %>%
mutate(
id = if_else(n > 1, paste0(id, "_", row_number()), as.character(id))
) %>%
ungroup() %>%
select(-n)
}),

tar_target(davis_20230901_detail_files, list.files("data/internal/DAVIS_20230901", pattern = "_DETAIL_", full.names = TRUE), format = "file"),
tar_target(davis_20230901_detail, {
read_csv(davis_20230901_detail_files, col_types = cols(.default = col_character()), id = "file") %>%
remove_empty(which = "rows") %>%
clean_names() %>%
mutate(
theme = case_when(
str_detect(file, "BLWH") ~ "blue",
str_detect(file, "FIWH") ~ "fin",
str_detect(file, "RIWH") ~ "narw",
TRUE ~ NA_character_
),
.before = everything()
) %>%
select(-file)
}),
tar_target(davis_20230901_detections, {
davis_20230901_detail %>%
left_join(
davis_20230901_deployments %>%
transmute(
detection_header_id,
id
),
by = "detection_header_id"
) %>%
transmute(
theme,
id,
species = NA_character_,
date = as_date(ymd_hms(analysis_period_start_datetime)),
presence = case_when(
acoustic_presence == "0" ~ "n",
acoustic_presence == "1" ~ "y",
acoustic_presence == "2" ~ "m",
TRUE ~ NA_character_
)
) %>%
arrange(theme, id, date)
}),
tar_target(davis_20230901, {
detections <- davis_20230901_detections %>%
mutate(
locations = map(theme, ~ NULL)
)

analysis_periods <- detections %>%
group_by(id) %>%
summarise(
analysis_start_date = min(date),
analysis_end_date = max(date),
.groups = "drop"
)

deployments_analysis <- davis_20230901_deployments %>%
filter(!is.na(latitude), analyzed) %>%
left_join(analysis_periods, by = "id")

# qaqc: analysis period ---------------------------------------------------
#
# deployments_analysis %>%
# transmute(
# id,
# monitoring_start_date = as_date(monitoring_start_datetime),
# monitoring_end_date = as_date(monitoring_end_datetime),
# monitoring_n_days = as.numeric(monitoring_end_date - monitoring_start_date + 1),
# analysis_start_date,
# analysis_end_date,
# analysis_n_days = as.numeric(analysis_end_date - analysis_start_date + 1),
# delta_days = monitoring_n_days - analysis_n_days
# ) %>%
# left_join(
# count(detections, id, name = "n_detections"),
# by = "id"
# ) %>%
# view()

# summary -----------------------------------------------------------------

tabyl(detections, theme, presence)

# deployments geom --------------------------------------------------------

# no missing id, latitude, longitude
stopifnot(
all(
deployments_analysis %>%
distinct(id, latitude, longitude) %>%
complete.cases()
)
)

deployments_sf <- deployments_analysis %>%
bind_rows(filter(davis_20230901_deployments, !analyzed, !is.na(latitude))) %>%
distinct(id, latitude, longitude) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326)

mapview::mapview(deployments_sf, legend = FALSE)

deployments <- deployments_sf %>%
left_join(
deployments_analysis %>%
bind_rows(filter(davis_20230901_deployments, !analyzed, !is.na(latitude))),
by = "id"
) %>%
mutate(deployment_type = "stationary") %>%
relocate(deployment_type, geometry, .after = last_col()) %>%
select(-detection_header_id)

# export ------------------------------------------------------------------

list(
deployments = deployments,
detections = detections
)
})
)
Loading

0 comments on commit 9a02f5e

Please sign in to comment.