Skip to content

Commit

Permalink
Created wdls to extract filter values into json
Browse files Browse the repository at this point in the history
  • Loading branch information
kjaisingh committed Feb 18, 2025
1 parent 6585a31 commit 186c4d9
Showing 1 changed file with 100 additions and 0 deletions.
100 changes: 100 additions & 0 deletions wdl/ExtractFiltersFromVcfs.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
version 1.0

workflow ExtractFiltersFromVCFs {

input {
Array[File] vcf_files

String sv_pipeline_docker
}

scatter (vcf in vcf_files) {
call ExtractSampleAndVariants {
input:
vcf_file = vcf,
sv_pipeline_docker = sv_pipeline_docker
}
}

call MergeJSONs {
input:
json_files = ExtractSampleAndVariants.output_json,
sv_pipeline_docker = sv_pipeline_docker
}

output {
File filters_json = MergeJSONs.merged_json
}
}

task ExtractSampleAndVariants {
input {
File vcf_file
String sv_pipeline_docker
}

command <<<
set -e

sample_id=$(bcftools query -l ~{vcf_file} | head -n 1)

output_json="${sample_id}.json"
echo "{ \"$sample_id\": {" > "$output_json"

bcftools query -f '%ID\t%FILTER\n' ~{vcf_file} | \
awk -v sample="$sample_id" '
BEGIN { first=1 }
{
if ($2 != "PASS") {
gsub(":", "_", $1);
if (!seen[$2]) {
if (!first) print ",";
first=0;
printf "\"%s\": [\"%s\"]", $2, $1;
seen[$2]=1;
} else {
printf ", \"%s\"", $1;
}
}
}
END { print "}}" }' >> "$output_json"
>>>

output {
File output_json = "~{vcf_file}.json"
}

runtime {
docker: sv_pipeline_docker
}
}

task MergeJSONs {
input {
Array[File] json_files

String sv_pipeline_docker
}

command <<<
set -e
echo "{" > merged.json

first=1
for f in ~{sep=' ' json_files}; do
if [[ "$first" -eq 0 ]]; then echo "," >> merged.json; fi
cat "$f" | jq -c . >> merged.json
first=0
done

echo "}" >> merged.json
>>>

output {
File merged_json = "merged.json"
}

runtime {
docker: sv_pipeline_docker
}
}

0 comments on commit 186c4d9

Please sign in to comment.