Skip to content

Commit

Permalink
Add ConcatTextFiles wdl
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalker174 committed Jan 9, 2024
1 parent 1b83623 commit 2e502e0
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 0 deletions.
38 changes: 38 additions & 0 deletions wdl/ConcatTextFiles.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
version 1.0

import "TasksMakeCohortVcf.wdl" as tasks

workflow ConcatTextFiles {

input {
Array[File] text_files
String output_prefix
String output_suffix = ".concat.txt"
Boolean headered = false

String linux_docker
String sv_base_mini_docker
}

if (!headered) {
call tasks.CatUncompressedFiles {
input:
shards=text_files,
outfile_name="~{output_prefix}.~{output_suffix}",
sv_base_mini_docker=sv_base_mini_docker
}
}

if (headered) {
call tasks.ConcatHeaderedTextFiles {
input:
text_files=text_files,
output_filename="~{output_prefix}.~{output_suffix}",
linux_docker=linux_docker
}
}

output {
File concatenated_files = select_first([ConcatHeaderedTextFiles.out, CatUncompressedFiles.outfile])
}
}
47 changes: 47 additions & 0 deletions wdl/TasksMakeCohortVcf.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,53 @@ task CatUncompressedFiles {
}
}


task ConcatHeaderedTextFiles {
input {
Array[File] text_files
String output_filename
String linux_docker
RuntimeAttr? runtime_attr_override
}
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: ceil(10 + 2 * size(text_files, "GB")),
boot_disk_gb: 10,
preemptible_tries: 1,
max_retries: 1
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
output {
File out = "~{output_filename}"
}
command <<<
set -euo pipefail
OUT_FILE="~{output_filename}"
i=0
while read path; do
if [ $i == 0 ]; then
# Get header from first line of first file
awk 'NR==1' $path > $OUT_FILE
fi
# Get data from each file, skipping header line
awk 'NR>1' $path >> $OUT_FILE
i=$((i+1))
done < ~{write_lines(text_files)}
>>>
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
docker: linux_docker
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
}
}
task SortVcf {
input {
File vcf
Expand Down

0 comments on commit 2e502e0

Please sign in to comment.