-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathErrorRateCodeFunction
60 lines (45 loc) · 2.92 KB
/
ErrorRateCodeFunction
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash
ErrorRateCalc () {
fqBase=$1
for barcode in CGATGCTCTGCA AAGCCGGTTGCA; do
for read in R1 R2; do
echo fqBase,read,barcode,P1del,P2del > ${fqBase}_${barcode}_${read}_Summary.csv #Finish adding summary statistics here and line 53
# Calculating number of delitions in position 1
TwoDels=$(agrep -3 -D100 -I100 "^${barcode}" ${fqBase}.${read}.fq | head -n -1 | wc -l)
OneDels=$(agrep -3 -D100 -I100 "^.${barcode}" ${fqBase}.${read}.fq | head -n -1 | wc -l)
NoDels=$(agrep -3 -D100 -I100 "^..${barcode}" ${fqBase}.${read}.fq | head -n -1 | wc -l)
OneIns=$(agrep -3 -D100 -I100 "^...${barcode}" ${fqBase}.${read}.fq | head -n -1 | wc -l)
TwoIns=$(agrep -3 -D100 -I100 "^....${barcode}" ${fqBase}.${read}.fq | head -n -1 | wc -l)
AllOutcomes=$(($TwoDels + $OneDels + $NoDels + $OneIns + $TwoIns))
echo $TwoDels $OneDelds $NoDels $OneIns $TwoIns
P1del=$(($TwoDels + $OneDels))
P2del=$OneDels
PropP1Del=$(echo $P1del / $AllOutcomes | bc -l)
PropP2Del=$(echo $P2del / $AllOutcomes | bc -l)
PropTwoDels=$(echo $TwoDels / $AllOutcomes | bc -l)
PropOneDels=$(echo $OneDels / $AllOutcomes | bc -l)
PropNoDels=$(echo $NoDels / $AllOutcomes | bc -l)
PropOneIns=$(echo $OneIns / $AllOutcomes | bc -l)
PropTwoIns=$(echo $TwoIns / $AllOutcomes | bc -l)
echo $PropP1Del $PropP2Del $PropNoDels $PropOneIns $PropTwoIns
echo $PropTwoDels + $PropOneDels + $PropNoDels + $PropOneIns + $PropTwoIns | bc -l
NoDelsSub1=$(agrep -3 -D100 -I100 "^..${barcode}" ${fqBase}.${read}.fq | head -n -1 | cut -c 1 | grep -cv G)
agrep -3 -D100 -I100 "^..${barcode}" ${fqBase}.${read}.fq | head -n -1 > ${fqBase}.${read}.${barcode}.NoDels.seq
agrep -3 -D100 -I100 "^...${barcode}" ${fqBase}.${read}.fq | head -n -1 | cut -c 2- >> ${fqBase}.${read}.${barcode}.NoDels.seq
agrep -3 -D100 -I100 "^....${barcode}" ${fqBase}.${read}.fq | head -n -1 | cut -c 3- >> ${fqBase}.${read}.${barcode}.NoDels.seq
P1Sub=$(cut -c 1 ${fqBase}.${read}.${barcode}.NoDels.seq | grep -cv G)
AllNoDelsOutcomes=$(wc -l ${fqBase}.${read}.${barcode}.NoDels.seq | cut -d " " -f1)
PropP1Sub=$(echo $P1Sub / $AllNoDelsOutcomes | bc -l)
agrep -3 -D100 -I100 "^....${barcode}" ${fqBase}.${read}.fq | head -n -1 | sed 's/^/z/g' >> ${fqBase}.${read}.${barcode}.NoDels.seq
P2Sub=$(cut -c 2 ${fqBase}.${read}.${barcode}.NoDels.seq | grep -cv G)
AllNoDelsOutcomes=$(wc -l ${fqBase}.${read}.${barcode}.NoDels.seq | cut -d " " -f1)
PropP2Sub=$(echo $P2Sub / $AllNoDelsOutcomes | bc -l)
cut -c 1-16 ${fqBase}.${read}.${barcode}.NoDels.seq | sed -e 's/\(.\)/\1,/g' | cut -c 1-31 | sed "s/^/$fqBase,$barcode,$read,/g" > ${fqBase}_${barcode}_${read}.csv #save to file
#Output Summary Stats
echo ${fqBase},${read},${barcode},$P1del,$P2del >> ${fqBase}_${barcode}_${read}_Summary.csv #add more
done
done
}
export -f ErrorRateCalc
#run function in parallel
ls *R1.fq | sed 's/\.R1\.fq//g' | parallel -j 7 --no-notice ErrorRateCalc {}