From aefd121c955371040114305940d5708202bd4571 Mon Sep 17 00:00:00 2001 From: lvreynoso Date: Wed, 8 May 2024 14:44:59 -0700 Subject: [PATCH] Parameterize aligner wdl versions --- lib/idseq_utils/idseq_utils/batch_run_helpers.py | 11 +++++------ workflows/long-read-mngs/run.wdl | 9 +++++++++ workflows/short-read-mngs/non_host_alignment.wdl | 10 +++++++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/idseq_utils/idseq_utils/batch_run_helpers.py b/lib/idseq_utils/idseq_utils/batch_run_helpers.py index 1425d5723..7eab60b41 100644 --- a/lib/idseq_utils/idseq_utils/batch_run_helpers.py +++ b/lib/idseq_utils/idseq_utils/batch_run_helpers.py @@ -22,10 +22,6 @@ log = logging.getLogger(__name__) MAX_CHUNKS_IN_FLIGHT = 30 # TODO: remove this constant, currently does nothing since we have at most 30 index chunks -ALIGNMENT_WDL_VERSIONS: Dict[str, str] = { - "diamond": "v1.0.0", - "minimap2": "v1.0.0", -} # mitigation for TooManyRequestExceptions config = Config( @@ -158,6 +154,7 @@ def _run_chunk( chunk_dir: str, aligner: str, aligner_args: str, + aligner_wdl_version: str, queries: List[str], chunk_id: int, db_chunk: str, @@ -185,7 +182,7 @@ def _job_queue(provisioning_model: str): "query_0": query_uris[0], "extra_args": aligner_args, "db_chunk": db_chunk, - "docker_image_id": f"{account_id}.dkr.ecr.us-west-2.amazonaws.com/{aligner}:{ALIGNMENT_WDL_VERSIONS[aligner]}", + "docker_image_id": f"{account_id}.dkr.ecr.us-west-2.amazonaws.com/{aligner}:{aligner_wdl_version}", } if len(query_uris) > 1: @@ -193,7 +190,7 @@ def _job_queue(provisioning_model: str): wdl_input_uri = os.path.join(chunk_dir, f"{chunk_id}-input.json") wdl_output_uri = os.path.join(chunk_dir, f"{chunk_id}-output.json") - wdl_workflow_uri = f"s3://idseq-workflows/{aligner}-{ALIGNMENT_WDL_VERSIONS[aligner]}/{aligner}.wdl" + wdl_workflow_uri = f"s3://idseq-workflows/{aligner}-{aligner_wdl_version}/{aligner}.wdl" input_bucket, input_key = _bucket_and_key(wdl_input_uri) _s3_client.put_object( @@ -245,6 +242,7 @@ def run_alignment( result_path: str, aligner: str, aligner_args: str, + aligner_wdl_version: str, queries: List[str], ): bucket, prefix = _bucket_and_key(db_path) @@ -255,6 +253,7 @@ def run_alignment( chunk_dir, aligner, aligner_args, + aligner_wdl_version, queries, chunk_id, f"s3://{bucket}/{db_chunk}", diff --git a/workflows/long-read-mngs/run.wdl b/workflows/long-read-mngs/run.wdl index 820a5c85a..3e958e449 100644 --- a/workflows/long-read-mngs/run.wdl +++ b/workflows/long-read-mngs/run.wdl @@ -554,6 +554,7 @@ task RunNTAlignment { File? local_minimap2_index String prefix # only required for remote alignment + String minimap2_wdl_version String? s3_wd_uri String docker_image_id } @@ -573,6 +574,7 @@ task RunNTAlignment { result_path="gsnap.paf", aligner="minimap2", aligner_args="~{minimap2_args}", + aligner_wdl_version="~{minimap2_wdl_version}", queries=["~{all_sequences_to_align}"], ) CODE @@ -599,6 +601,7 @@ task RunNRAlignment { Boolean run_locally = false File? local_diamond_index # only required for remote alignment + String diamond_wdl_version String? s3_wd_uri String docker_image_id } @@ -624,6 +627,7 @@ task RunNRAlignment { result_path="diamond.m8", aligner="diamond", aligner_args="~{diamond_args}", + aligner_wdl_version="~{diamond_wdl_version}", queries=["~{assembled_reads_fa}"], ) CODE @@ -1299,6 +1303,9 @@ workflow czid_long_read_mngs { String? diamond_db String diamond_args = "long-reads" + String diamond_wdl_version = "v1.0.0" + String minimap2_wdl_version = "v1.0.0" + Boolean use_deuterostome_filter = true Boolean use_taxon_whitelist = false } @@ -1396,6 +1403,7 @@ workflow czid_long_read_mngs { run_locally = defined(minimap2_local_db_path), local_minimap2_index = minimap2_local_db_path, prefix= minimap2_prefix, + minimap2_wdl_version=minimap2_wdl_version, docker_image_id = docker_image_id, } @@ -1406,6 +1414,7 @@ workflow czid_long_read_mngs { diamond_args=diamond_args, run_locally=defined(diamond_local_db_path), local_diamond_index=diamond_local_db_path, + diamond_wdl_version=diamond_wdl_version, s3_wd_uri=s3_wd_uri, docker_image_id=docker_image_id, } diff --git a/workflows/short-read-mngs/non_host_alignment.wdl b/workflows/short-read-mngs/non_host_alignment.wdl index 9f60b721e..fe74ee6b8 100644 --- a/workflows/short-read-mngs/non_host_alignment.wdl +++ b/workflows/short-read-mngs/non_host_alignment.wdl @@ -77,6 +77,7 @@ task RunAlignment_minimap2_out { Boolean? run_locally = false File? local_minimap2_index String prefix + String minimap2_wdl_version } command <<< @@ -96,6 +97,7 @@ task RunAlignment_minimap2_out { result_path="gsnap.paf", aligner="minimap2", aligner_args="~{minimap2_args}", + aligner_wdl_version="~{minimap2_wdl_version}", queries=["~{sep='", "' fastas}"], ) CODE @@ -124,6 +126,7 @@ task RunAlignment_diamond_out { Boolean? run_locally = false File? local_diamond_index String prefix + String diamond_wdl_version } command <<< @@ -143,6 +146,7 @@ task RunAlignment_diamond_out { result_path="rapsearch2.m8", aligner="diamond", aligner_args="~{diamond_args}", + aligner_wdl_version="~{diamond_wdl_version}", queries=["~{sep='", "' fastas}"], ) CODE @@ -302,6 +306,8 @@ workflow czid_non_host_alignment { String diamond_args = "mid-sensitive" String minimap2_prefix = "gsnap" String diamond_prefix = "rapsearch2" + String minimap2_wdl_version = "v1.0.0" + String diamond_wdl_version = "v1.0.0" } call RunAlignment_minimap2_out { @@ -313,7 +319,8 @@ workflow czid_non_host_alignment { minimap2_args = minimap2_args, run_locally = defined(minimap2_local_db_path), local_minimap2_index = minimap2_local_db_path, - prefix= minimap2_prefix + prefix= minimap2_prefix, + minimap2_wdl_version=minimap2_wdl_version } call RunCallHitsMinimap2{ input: @@ -337,6 +344,7 @@ workflow czid_non_host_alignment { prefix = diamond_prefix, run_locally = defined(diamond_local_db_path), local_diamond_index = diamond_local_db_path, + diamond_wdl_version=diamond_wdl_version, docker_image_id = docker_image_id } call RunCallHitsDiamond {