-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfunctions.R
124 lines (104 loc) · 3.77 KB
/
functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/echo source me dont execute me
#'
#'#' Rsamtoools unit tests: https://github.com/Bioconductor/Rsamtools/tree/master/inst/unitTests
#'
#'
#'#' bamCoverage by `which` region
#'
bamCoverage <- function(index, which, bamFile, orient="all", ...) {
which_element <- which[index]
param <- ScanBamParam(what=c('flag', 'pos', 'qwidth'), which=which_element,
flag=scanBamFlag(isUnmappedQuery=FALSE))
x <- scanBam(bamFile, ..., param=param)
if(orient == "all") {
ir <- IRanges(x[[1]][["pos"]], width=x[[1]][["qwidth"]])
} else if (orient == "fwd") {
ind <- which(x[[1]][["flag"]]==0)
ir <- IRanges(x[[1]][["pos"]][ind], width=x[[1]][["qwidth"]][ind])
} else if(orient == "revcomp") {
ind <- which(x[[1]][["flag"]]==16)
ir <- IRanges(x[[1]][["pos"]][ind], width=x[[1]][["qwidth"]][ind])
} else {
stop(" Arg 'orient' options: all | fwd | revcomp")
}
if(length(ir)) {
seqname <- as.character(GenomicRanges::seqnames(which_element))
start <- min(start(ir))
end <- max(end(ir))
return(list(seqname=seqname, start=start, end=end, cov=coverage(ir, shift= -start+1), ir=ir))
} else {
return(NULL)
}
}
#'
#' Extract ensembl coord system
#'
which_coords <- function(which){
paste0(GenomicRanges::seqnames(which), ":", GenomicRanges::start(which),"-", GenomicRanges::end(which))
}
#'
#' Split ensembl coords
#'
split_coords <- function(x){
parts <- strsplit(x, c("[:-]"))[[1]]
y <- list(seqname=parts[1], start=as.integer(parts[2]), end=as.integer(parts[3]))
return(y)
}
#'
#'#' KRISPR rna oligos
#'
krispr_rna_oligos <- function(){
rna <- c(crRNA_RF_1_F = "GTCATATCTAAGGACCCGCGTGG",
crRNA_RF_2_F = "TCTGTACTCCGTCTGTCGGTCGG", ## SNP at end of guide decreases efficiency - also interferes with Cas9 RE
crRNA_RF_3_R = "AGAAGACTGTCAATCCCGAGTGG",
crRNA_RF_4_F = "TGTCTGGAAAGTTTCTAACGCGG" ## SNP at beginning of guide descreases efficiency
)
return(DNAStringSet(rna))
}
#'
#'#' Median quality scores
#'
filter_quality <- function(index, FastqQual, fun=mean) {
if(missing(index)){
## Process all if index missing
index <- seq(length(FastqQual))
}
FastqQual_sub <- FastqQual[index]
metric <- rep(NA, length(FastqQual_sub))
for(i in seq(length(FastqQual_sub))) {
metric[i] <- fun(as(FastqQual_sub[i], "numeric"))
}
return(metric)
}
split_ids <- function(x) {
unname(sapply(as.character(x),
function(x)strsplit(x, " ")[[1]][1]))
}
ont_duplicates <- function(x) {
stopifnot(class(x)=="ShortReadQ")
ont_ids <- split_ids(ShortRead::id(x))
dups <- Biostrings::duplicated(ShortRead::id(x))
out <- list()
out$all_ids <- ont_ids
if(any(dups)) {
out$dup_ind <- Biostrings::duplicated(ShortRead::id(x))
out$dup_ids <- ont_ids[out$dup_ind]
}
return(out)
}
basecall_stats_versions <- function(){
#
## Checking if statistics summaries are identical between runs - _no they are not_
#
albacore_summary_hramwd <- read.table("/workspace/hramwd/github/analysis-workflows/Malus/Red_Flesh_ON/albacore2/Red_flesh_ON_run1_Cas9/all_summary_run1.txt", header=TRUE, as.is = TRUE)
albacore_summary_hrpelg <- read.table("/workspace/hrpelg/Red_Flesh_ON/albacore2/Red_flesh_ON_run1_Cas9/all_summary_run1.txt", header=TRUE, as.is = TRUE)
cat("[ hramwd run albacore dimensions ]\n")
print(dim(albacore_summary_hramwd))
cat("[ hrpelg run albacore dimensions ]\n")
print(dim(albacore_summary_hrpelg))
expect_equal(names(albacore_summary_hramwd), names(albacore_summary_hrpelg))
## Returned row order is different
o1 <- order(albacore_summary_hramwd$read_id)
o2 <- order(albacore_summary_hrpelg$read_id)
plot(o1, o2, xlab="hramwd run order", ylab="hrpelg run order", main="Albacore runs (log scale)", pch=16, cex=0.3, log="xy")
}