From 9f69e27805aa00d273e71e3646ab5fd2163e1516 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Tue, 9 Jul 2024 23:36:14 +0000 Subject: [PATCH] Remove existing folders in synapse before indexing new files --- scripts/main/archive-to-current.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/main/archive-to-current.R b/scripts/main/archive-to-current.R index 272dbf1..e80db2c 100644 --- a/scripts/main/archive-to-current.R +++ b/scripts/main/archive-to-current.R @@ -66,11 +66,27 @@ if (!is.null(synFindEntityId(validated_date, config::get("PARQUET_FOLDER_ARCHIVE # Modify cohort identifier in dir name junk <- sapply(list.dirs(AWS_CURRENT_DOWNLOAD_LOCATION), replace_equal_with_underscore) - # Generate manifest of existing files + # Generate manifest of existing files and remove existing folders SYNAPSE_AUTH_TOKEN <- Sys.getenv('SYNAPSE_AUTH_TOKEN') manifest_cmd <- glue::glue('SYNAPSE_AUTH_TOKEN="{SYNAPSE_AUTH_TOKEN}" synapse manifest --parent-id {PARQUET_FOLDER_CURRENT} --manifest ./current_manifest.tsv {AWS_CURRENT_DOWNLOAD_LOCATION}') system(manifest_cmd) + current_syn_folders <- + read_tsv( + file = "current_manifest.tsv", + show_col_types = FALSE + ) %>% + pull(parent) %>% + unique() + + syn_folders_removed <- + lapply(current_syn_folders, function(x) { + synapser::synDelete(x) + }) + + manifest_cmd <- glue::glue('SYNAPSE_AUTH_TOKEN="{SYNAPSE_AUTH_TOKEN}" synapse manifest --parent-id {PARQUET_FOLDER_CURRENT} --manifest ./current_manifest.tsv {AWS_CURRENT_DOWNLOAD_LOCATION}') + system(manifest_cmd) + # Get a list of all files to upload and their synapse locations (parentId) STR_LEN_PARQUET_FINAL_LOCATION <- stringr::str_length(AWS_CURRENT_DOWNLOAD_LOCATION)