forked from ljdursi/consensus_call_docker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge-one-tumour-snv.sh
executable file
·144 lines (121 loc) · 3.29 KB
/
merge-one-tumour-snv.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/bash
## merges SNV files together
## requires mergevcf, bgzip, tabix
function usage {
>&2 echo "usage: $0 -b [/path/to/broad.snv.vcf.gz] -d [dkfz] -m [muse] -s [sanger] -o [outfile: defaults to merged.vcf]"
>&2 echo " merges snv VCFs for sample"
>&2 echo " Input VCFs must be bgzip-ed and tabix-ed"
exit 1
}
function add_header_and_sort {
local file=$1
local header=$2
local grep_or_zgrep=grep
if [[ $file == *.gz ]]
then
local grep_or_zgrep=zgrep
fi
${grep_or_zgrep} "^##" "$file"
if [[ ! -z "$header" ]]
then
echo $header
fi
${grep_or_zgrep} -v "^##" "$file" \
| sort -k1,1 -k2,2n
}
function cleanup {
local file=$1
add_header_and_sort "$file" \
| grep -v '=$' \
| sed -e 's/Tier[0-9]/PASS/'
}
outfile=merged.vcf
while getopts "b:d:m:s:o:h" OPTION
do
case $OPTION in
b) readonly broadfile=${OPTARG}
;;
d) readonly dkfzfile=${OPTARG}
;;
m) readonly musefile=${OPTARG}
;;
s) readonly sangerfile=${OPTARG}
;;
h) usage
;;
o) outfile=${OPTARG}
;;
esac
done
if [ -z "$musefile" ] || [ -z "$broadfile" ] || [ -z "$dkfzfile" ] || [ -z "$sangerfile" ]
then
>&2 echo "required argument missing: need muse (-m), broad (-b), dkfz (-d) and sanger (-s) files"
usage
fi
if [ -z "${outfile}" ]
then
>&2 echo "Invalid empty output filename"
usage
fi
if [ ! -f "$musefile" ] || [ ! -f "$broadfile" ] || [ ! -f "$dkfzfile" ] || [ ! -f "$sangerfile" ]
then
>&2 echo "files missing: one of ${musefile} ${broadfile} ${dkfzfile} ${sangerfile} not found"
usage
fi
for file in "$musefile" "$broadfile" "$dkfzfile" "$sangerfile"
do
if [[ $file != *.gz ]]
then
>&2 echo "Input VCF files must be bgziped and tabixed."
usage
fi
done
newest=$musefile
if [[ $broadfile -nt $newest ]]; then newest=$broadfile; fi
if [[ $dkfzfile -nt $newest ]]; then newest=$dkfzfile; fi
if [[ $sangerfile -nt $newest ]]; then newest=$sangerfile; fi
if [[ -f "${outfile}.gz" ]] && [[ "${outfile}.gz" -nt "${newest}" ]]
then
>&2 echo "$0: ${outfile} exists and is newer than inputs; cowardly refusing to overwrite."
exit 1
fi
##
## merge input files into one VCF
##
readonly MERGEDFILE=/tmp/merged.vcf
mergevcf -l broad,dkfz,muse,sanger \
<( cleanup "${broadfile}" ) \
<( cleanup "${dkfzfile}" ) \
<( cleanup "${musefile}" ) \
<( cleanup "${sangerfile}" )\
--ncallers --mincallers 2 \
| grep -v "Callers=muse;" \
> ${MERGEDFILE}
add_header_and_sort ${MERGEDFILE} \
| bgzip > ${MERGEDFILE}.gz
tabix -p vcf ${MERGEDFILE}.gz
##
## annotate with a filter if there were OXOG info fields in any of the input files
##
# create vcfanno config file to run vcf against
readonly OXOGCONF=/tmp/oxog.conf
rm -f ${OXOGCONF}
touch ${OXOGCONF}
for file in "${broadfile}" "${dkfzfile}" "${musefile}" "${sangerfile}"
do
cat >> ${OXOGCONF} <<EOF
[[annotation]]
file = "$file"
fields = ["OXOG_Fail"]
names = ["OXOG_Fail"]
ops = ["self"]
EOF
done
vcfanno -p 1 ${OXOGCONF} ${MERGEDFILE}.gz 2> /dev/null \
> ${outfile}
bgzip -f ${outfile}
tabix -p vcf ${outfile}.gz
rm ${OXOGCONF}
rm ${MERGEDFILE}
rm ${MERGEDFILE}.gz
rm ${MERGEDFILE}.gz.tbi