-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbampeTobdg
executable file
·85 lines (69 loc) · 2.12 KB
/
bampeTobdg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#! /bin/bash
# Vivek Rai <vivek.rai@roche.com>
# May 7, 2023
#
# Convert paired-end BAM files to BedGraph file.
# Write to the stdout if output file is not specified.
usage() {
echo "Usage: $0 -i <input.bam> -o <output.bedgraph> -g <genome> -h <help>"
exit 1
}
error() {
echo "$@" 1>&2
}
while getopts "i:o:g:h" opt; do
case $opt in
i) input=$OPTARG ;;
o) output=$OPTARG ;;
g) genome=$OPTARG ;;
h) usage ;;
*) usage ;;
esac
done
if [ -z "$input" ]; then
usage
fi
if [ ! -f "$input" ]; then
echo "Input file $input does not exist"
exit 1
fi
if [ -f "$output" ]; then
echo "Output file $output already exists"
exit 1
fi
if [ ! -f "$genome" ]; then
echo "Genome file $genome does not exist"
exit 1
fi
# Get the sample name from input file bath by stripping .bam extension
sample=$(basename "$input" .bam)
original_input="$input"
# Check if BAM file is name sorted and offer to sort if not
if [ "$(samtools view -H "$input" | grep -c 'SO:queryname')" -eq 0 ]; then
error "BAM file is not name sorted. Sorting now..."
samtools sort -n "$input" -o "$sample".sorted.bam
input="$sample".sorted.bam
fi
# Convert BAM to BED
error "1/4 Converting BAM to BED..."
bedtools bamtobed -bedpe -i $input >"$sample".bed 2>"$sample.bampeTobdg.log"
# [...]then selecting the 5' and 3' coordinates of the read pair to generate a
# new BED3 file. Filter improper size fragments
error "2/4 Creating fragments BED file..."
awk '$1==$4 && ($6-$2 > 0) {print $0}' "$sample.bed" | cut -f1,2,6 |
sort -k1,1 -k2,2n -k3,3n >"$sample.fragments.bed"
# # Remove chromosomes that are not in the genome file
# error "3/4 Removing chromosomes that are not in the genome file..."
# awk 'NR==FNR{a[$1];next} ($1 in a)' "$genome" $sample.fragments.bed >$sample.clean.bed
# Convert BED to BedGraph and write to output if available in an idiomatic manner
error "3/4 Converting BED to BedGraph..."
if [ -z "$output" ]; then
bedtools genomecov -bg -i $sample.fragments.bed -g "$genome"
else
bedtools genomecov -bg -i $sample.fragments.bed -g "$genome" >"$output"
fi
error "4/4 Cleanup."
rm "$sample".fragments.bed
if [ "$input" != "$original_input" ]; then
rm "$input"
fi