-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path1_preprocessing.R
157 lines (127 loc) · 5.54 KB
/
1_preprocessing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
##########################
# Submission FlowCAP IV #
# Step 1 - Preprocessing #
##########################
FCSloc <- "/group/irc/shared/data/FlowCAPIV"
dir.create(paste(FCSloc,"dambi",sep="/"),showWarnings=FALSE)
#############################################
# Load the required libraries and functions #
#############################################
library(flowCore)
library(flowDensity)
# Quality control function
medianTime_select <- function(flowFrame, parameter, nIntervals, maxDiff){
# Split the data in equal size intervals
cuts <- cut(exprs(flowFrame)[,"Time"],breaks=nIntervals)
splitted <- split(as.data.frame(exprs(flowFrame)),cuts)
# Count the number of cells in each interval
counts <- sapply(splitted,function(flowDataFrame){dim(flowDataFrame)[1]})
enoughCells <- counts > median(counts)-(2*sd(counts))
# Calculate the median values of the cells in each interval
medians <- sapply(splitted,function(flowDataFrame){median(flowDataFrame[,parameter])})
stableMedian <- abs(diff(medians))<maxDiff
stableMedian <- c(T,stableMedian) & c(stableMedian,T)
# Return only the cells which meet the conditions
selection <- which(enoughCells & stableMedian)
return(flowFrame[which(as.numeric(cuts) %in% selection),])
}
# Function to remove events on the margins
removeMargins <- function(flowFrame,dimensions){
# Look op the accepted ranges for the dimensions
meta <- pData(flowFrame@parameters)
rownames(meta) <- meta[,"name"]
# Initialize variables
selection <- rep(TRUE,times=dim(flowFrame)[1])
e <- exprs(flowFrame)
# Make selection
for(d in dimensions){
selection <- selection &
e[,d] > max(meta[d,"minRange"],min(e[,d])) &
e[,d] < min(meta[d,"maxRange"],max(e[,d]))
}
return(flowFrame[selection,])
}
# Function to select single cells
removeDoublets <- function(flowFrame, d1="FSC-A", d2="FSC-H", w=NULL,silent=TRUE){
# Calculate the ratios
ratio <- exprs(flowFrame)[,d1] / (1+ exprs(flowFrame)[,d2])
# Define the region that is accepted
r <- median(ratio)
if(is.null(w)){ w <- 2*sd(ratio) }
if(!silent){
print(r)
print(w)
}
# Make selection
selection <- which(ratio < r+w)
return(flowFrame[selection,])
}
######################
# Read the meta data #
######################
meta_patients <- read.csv(paste(FCSloc,"MetaDataTrain.csv",sep="/"),as.is=TRUE)
filenames <- c(rbind(meta_patients[,"Stim"],meta_patients[,"Unstim"]))
#############################
# Initialize some variables #
#############################
colnames_meta <- c("File","Original","Bad quality","On margins","doublets","Alive T cells",
"% Good quality","% Not on margins","% Single cells","% Alive T cells")
meta <- matrix(0,nrow=length(filenames),ncol=10,dimnames=list(filenames,colnames_meta))
#######################
# Preprocess the data #
#######################
for(file in filenames){
flowFrame <- read.FCS(paste(FCSloc,file,sep="/"))
# a. Quality Control
flowFrame_q <- medianTime_select(flowFrame,"FSC-A",100,10000)
# b. Remove margin events
flowFrame_m <- removeMargins(flowFrame_q,dimensions=colnames(flowFrame))
# c. Select single cells
flowFrame_d <- removeDoublets(flowFrame_m,d1="FSC-A",d2="FSC-H")
# d. Compensate
flowFrame_c <- compensate(flowFrame_d,flowFrame@description$SPILL)
# e. Transform
flowFrame_t <- transform(flowFrame_c,transformList(c("SSC-A",colnames(flowFrame@description$SPILL)), logicleTransform()))
# f. Select alive T-cells
flowFrame_a <- tryCatch({ # lots of try catch situations to make sure there are cells selected
print("flowFrame_tmp")
flowFrame_tmp <- flowFrame_t[(flowDensity(flowFrame_t,channels=c("V450-A","R780-A"),position=c(FALSE,TRUE)))@index,]
if(dim(flowFrame_tmp)[1]<100) stop("No cells selected")
flowFrame_tmp
}, error = function(e){
print("Failed on combined directions")
return(NULL)
})
if(is.null(flowFrame_a)){
flowFrame_tmp <- tryCatch({
print("flowFrame_tmp2")
flowFrame_tmp2 <- flowFrame_t[(flowDensity(flowFrame_t,channels=c("V450-A","R780-A"),position=c(FALSE,NA)))@index,]
if(dim(flowFrame_tmp2)[1]<1) stop("No cells selected")
flowFrame_tmp2
}, error = function(e){
print("Failed on V450-A")
flowFrame_tmp2 <- flowFrame_t[(flowDensity(flowFrame_t,channels=c("V450-A","R780-A"),position=c(FALSE,NA),upper=c(FALSE,NA)))@index,]
return(flowFrame_tmp2)
})
flowFrame_a <- tryCatch({
print("flowFrame_tmp3")
flowFrame_tmp3 <- flowFrame_tmp[(flowDensity(flowFrame_tmp,channels=c("V450-A","R780-A"),position=c(NA,TRUE)))@index,]
if(dim(flowFrame_tmp3)[1]<1) stop("No cells selected")
flowFrame_tmp3
}, error = function(e){
print("Failed on R780-A")
flowFrame_tmp3 <- flowFrame_tmp[(flowDensity(flowFrame_tmp,channels=c("V450-A","R780-A"),position=c(NA,TRUE),percentile=c(NA,0.5)))@index,]
return(flowFrame_tmp3)
})
}
# Save the result
write.FCS(flowFrame_a,paste(FCSloc,"/dambi/",gsub(".fcs","",file),"_preprocessed.fcs",sep=""))
# Save some metadata
meta[file,] <- c(file,dim(flowFrame)[1],
dim(flowFrame)[1]-dim(flowFrame_q)[1],dim(flowFrame_q)[1]-dim(flowFrame_m)[1],
dim(flowFrame_m)[1]-dim(flowFrame_d)[1],dim(flowFrame_a)[1],
dim(flowFrame_q)[1]/dim(flowFrame)[1],dim(flowFrame_m)[1]/dim(flowFrame_q)[1],
dim(flowFrame_d)[1]/dim(flowFrame_m)[1],dim(flowFrame_a)[1]/dim(flowFrame_t)[1])
print(meta[file,])
}
write.csv(meta,file=paste(FCSloc,"/dambi/MetaData_preprocessing.csv", sep=""))