Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove GenerateAlignmentViz task #374

Merged
merged 4 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 0 additions & 71 deletions workflows/short-read-mngs/experimental.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -71,51 +71,6 @@ task GenerateTaxidLocator {
}
}

task GenerateAlignmentViz {
input {
String docker_image_id
String s3_wd_uri
File gsnap_m8_gsnap_deduped_m8
File taxid_annot_sorted_nt_fasta
File taxid_locations_nt_json
File taxid_annot_sorted_nr_fasta
File taxid_locations_nr_json
File taxid_annot_sorted_genus_nt_fasta
File taxid_locations_genus_nt_json
File taxid_annot_sorted_genus_nr_fasta
File taxid_locations_genus_nr_json
File taxid_annot_sorted_family_nt_fasta
File taxid_locations_family_nt_json
File taxid_annot_sorted_family_nr_fasta
File taxid_locations_family_nr_json
File taxid_locations_combined_json
String nt_db
File nt_loc_db
}
command<<<
set -euxo pipefail
idseq-dag-run-step --workflow-name experimental \
--step-module idseq_dag.steps.generate_alignment_viz \
--step-class PipelineStepGenerateAlignmentViz \
--step-name alignment_viz_out \
--input-files '[["~{gsnap_m8_gsnap_deduped_m8}"], ["~{taxid_annot_sorted_nt_fasta}", "~{taxid_locations_nt_json}", "~{taxid_annot_sorted_nr_fasta}", "~{taxid_locations_nr_json}", "~{taxid_annot_sorted_genus_nt_fasta}", "~{taxid_locations_genus_nt_json}", "~{taxid_annot_sorted_genus_nr_fasta}", "~{taxid_locations_genus_nr_json}", "~{taxid_annot_sorted_family_nt_fasta}", "~{taxid_locations_family_nt_json}", "~{taxid_annot_sorted_family_nr_fasta}", "~{taxid_locations_family_nr_json}", "~{taxid_locations_combined_json}"]]' \
--output-files '["align_viz.summary"]' \
--output-dir-s3 '~{s3_wd_uri}' \
--additional-files '{"nt_loc_db": "~{nt_loc_db}", "nt_db": "~{nt_db}"}' \
--additional-attributes '{"nt_db": "~{nt_db}"}'
>>>
output {
String step_description_md = read_string("alignment_viz_out.description.md")
File align_viz_summary = "align_viz.summary"
File? output_read_count = "alignment_viz_out.count"
Array[File] align_viz = glob("align_viz/*.align_viz.json")
Array[File] longest_reads = glob("longest_reads/*.longest_5_reads.fasta")
}
runtime {
docker: docker_image_id
}
}

task GenerateCoverageViz {
input {
String docker_image_id
Expand Down Expand Up @@ -229,28 +184,6 @@ workflow czid_experimental {
taxid_annot_fasta = GenerateTaxidFasta.taxid_annot_fasta
}

call GenerateAlignmentViz {
input:
docker_image_id = docker_image_id,
s3_wd_uri = s3_wd_uri,
gsnap_m8_gsnap_deduped_m8 = gsnap_m8_gsnap_deduped_m8,
taxid_annot_sorted_nt_fasta = GenerateTaxidLocator.taxid_annot_sorted_nt_fasta,
taxid_locations_nt_json = GenerateTaxidLocator.taxid_locations_nt_json,
taxid_annot_sorted_nr_fasta = GenerateTaxidLocator.taxid_annot_sorted_nr_fasta,
taxid_locations_nr_json = GenerateTaxidLocator.taxid_locations_nr_json,
taxid_annot_sorted_genus_nt_fasta = GenerateTaxidLocator.taxid_annot_sorted_genus_nt_fasta,
taxid_locations_genus_nt_json = GenerateTaxidLocator.taxid_locations_genus_nt_json,
taxid_annot_sorted_genus_nr_fasta = GenerateTaxidLocator.taxid_annot_sorted_genus_nr_fasta,
taxid_locations_genus_nr_json = GenerateTaxidLocator.taxid_locations_genus_nr_json,
taxid_annot_sorted_family_nt_fasta = GenerateTaxidLocator.taxid_annot_sorted_family_nt_fasta,
taxid_locations_family_nt_json = GenerateTaxidLocator.taxid_locations_family_nt_json,
taxid_annot_sorted_family_nr_fasta = GenerateTaxidLocator.taxid_annot_sorted_family_nr_fasta,
taxid_locations_family_nr_json = GenerateTaxidLocator.taxid_locations_family_nr_json,
taxid_locations_combined_json = GenerateTaxidLocator.taxid_locations_combined_json,
nt_db = nt_db,
nt_loc_db = nt_loc_db
}

call GenerateCoverageViz {
input:
docker_image_id = docker_image_id,
Expand Down Expand Up @@ -279,15 +212,11 @@ workflow czid_experimental {
File taxid_fasta_out_taxid_annot_fasta = GenerateTaxidFasta.taxid_annot_fasta
File? taxid_fasta_out_count = GenerateTaxidFasta.output_read_count
File? taxid_locator_out_count = GenerateTaxidLocator.output_read_count
File alignment_viz_out_align_viz_summary = GenerateAlignmentViz.align_viz_summary
File? alignment_viz_out_count = GenerateAlignmentViz.output_read_count
File coverage_viz_out_coverage_viz_summary_json = GenerateCoverageViz.coverage_viz_summary_json
File? coverage_viz_out_count = GenerateCoverageViz.output_read_count
File nonhost_fastq_out_nonhost_R1_fastq = NonhostFastq.nonhost_R1_fastq
File? nonhost_fastq_out_nonhost_R2_fastq = NonhostFastq.nonhost_R2_fastq
File? nonhost_fastq_out_count = NonhostFastq.output_read_count
Array[File] align_viz = GenerateAlignmentViz.align_viz
Array[File] longest_reads = GenerateAlignmentViz.longest_reads
Array[File] coverage_viz = GenerateCoverageViz.coverage_viz
}
}
22 changes: 0 additions & 22 deletions workflows/short-read-mngs/test/test_short_read_mngs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import json
import atexit
import re
import os.path
from Bio import SeqIO


Expand Down Expand Up @@ -37,24 +35,4 @@ def test_bench3_viral(short_read_mngs_bench3_viral_outputs):
if filename.endswith(".fasta"):
assert is_valid_fasta(filename), f"{filename} is not a valid fasta file"

longest_reads = outp["outputs"]["czid_short_read_mngs.experimental.longest_reads"]
basenames = [os.path.basename(fn) for fn in longest_reads]
assert basenames, basenames
assert all(re.match(r"n[rt]\.[a-z]+\.-?[0-9]+\.longest_5_reads.fasta", fn) for fn in basenames), basenames
prefixes = set(fn[:2] for fn in basenames)
assert "nt" in prefixes, f"'nt' not found in {prefixes}"
assert "nr" in prefixes, f"'nr' not found in {prefixes}"

for fn in longest_reads:
with open(fn) as f:
lines = list(f)
assert 2 <= len(lines) <= 10, len(lines)
prev = None
for i, read in enumerate(lines):
if i % 2 == 0:
assert read[0] == ">", read
continue
assert prev is None or len(read) <= prev, (len(read), prev)
prev = len(read)
assert all(c in "ACTGUN" for c in read.strip()), read
# TODO: further correctness tests
Loading