-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscTerra.R
133 lines (109 loc) · 4.79 KB
/
scTerra.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
devtools::install_github('kevinblighe/EnhancedVolcano')
require(data.table)
library(tidyr)
library(dplyr)
library(DESeq2)
library(EnhancedVolcano)
setwd("~/GitHub/scTerra")
sample_name <- "SRR10018151"
#use to generate variable for naming later, only use sample name that matches
# the one from line 19
snv_input <- read.delim("inputs/example.txt")
#file needs to have column with SNV and row with SNV location/coordinates
snv_list <- snv_input$SNV
express_mat_51 <- read.delim("inputs/SRR10018151_GE_matrix_filtered.txt", row.names = 1)
redi_cos_51 <- read.delim("inputs/SRR10018151_ind_ss_redi_cos.txt")
barcodelist <- colnames(express_mat_51)
#list of barcodes to use for filtering
##redi_cos_51 <- subset(redi_cos_51, subset=(N_cells >= 10))
##remove low cell count (10)
redi_separate_51 <- separate_rows(redi_cos_51, Cells, convert = TRUE)
#separate out/expand the Cells column
#note this breaks the N_cells column for now
filt_sub_redi_51 <- redi_separate_51 %>% filter(Cells %in% barcodelist)
#keep only cells found in the GE barcode list
####
#DESEQ2 GE using cells with specific SNVs
for (input in snv_list) {
snv <- filt_sub_redi_51[filt_sub_redi_51$SNV %in% input, ]
snv_cells_A <- snv$Cells
#A is chosen SNV
snv_cells_B <- barcodelist[!barcodelist %in% snv_cells_A]
#B is all other SNVs
#filtering out only those snvs selected and setting the condition for deseq as A
snv_cells_A <- as.data.frame(snv_cells_A)
snv_cells_B <- as.data.frame(snv_cells_B)
snv_cells_A$condition <- "A"
snv_cells_B$condition <- "B"
names(snv_cells_A)[1] <- "barcode"
names(snv_cells_B)[1] <- "barcode"
dseq_meta <- rbind(snv_cells_A, snv_cells_B)
#binding the two conditional dataframes together
dseq_meta$condition <- as.factor(dseq_meta$condition)
coldata <- data.frame(barcode = barcodelist, row.names = "barcode")
dseq_meta_index <- match(rownames(coldata), dseq_meta$barcode)
#create the GE matrix barcode dataframe for reordering
#match the GE barcode to the metadata barcode and get an index for reordering
dseq_meta_reorder <- as.data.frame(dseq_meta[dseq_meta_index, ])
dseq_meta_reorder <- data.frame(dseq_meta_reorder, row.names = "barcode")
#reordering and dataframe manipulation to get into deseq2 format
#metadata creation
dds <- DESeqDataSetFromMatrix(countData = express_mat_51, colData = dseq_meta_reorder,
design = ~ condition)
#create the deseq object
dds$condition <- relevel(dds$condition, ref = "B")
#set reference condition
#here we set B reference/control
dds <- DESeq(dds)
#initialize the differential expression
res <- results(dds, independentFiltering = FALSE)
#initialize the results, with no filtering
res <- res[order(res$padj),]
#results by order of adjusted p-value
summary(results(dds, alpha=0.05))
#view summary of results that match alpha <0.05
normalized_counts <- counts(dds, normalized=TRUE)
#normalize counts variable
head(normalized_counts)
#view the normalized counts
output_gene <- as.data.frame(res[order(res$log2FoldChange, decreasing=TRUE),])
#output genes ordered by upregulated genes, set decreasing=FALSE for downregulated
file_name_genes <- paste0("outputs/", sample_name, "_DE_genes_", input, ".tsv")
file_name_genes <- gsub(":|>", "_", file_name_genes)
file_name_plotMA_shrink <- paste0("outputs/", sample_name, "_DE_MA_Shrink_", input, ".pdf")
file_name_plotMA_shrink <- gsub(":|>", "_",file_name_plotMA_shrink)
file_name_plotMA <- paste0("outputs/", sample_name, "_DE_MA_", input, ".pdf")
file_name_plotMA <- gsub(":|>", "_", file_name_plotMA)
file_name_volcano_shrink <- paste0("outputs/", sample_name, "_DE_Volcano_", input, ".pdf")
file_name_volcano_shrink <- gsub(":|>", "_", file_name_volcano_shrink)
#file name format for all plots and gene table, removing ":" and ">"
write.table(x = output_gene, file = file_name_genes, quote = FALSE, sep = "\t", col.names = NA)
#write gene table to file
resLFC<- lfcShrink(dds, coef="condition_A_vs_B", type = "normal", lfcThreshold = 1)
#apply lfcShrink to results
pdf(file = file_name_volcano_shrink)
p <- EnhancedVolcano(resLFC, lab=rownames(resLFC), x="log2FoldChange", y= "pvalue",
pCutoff = 1, title = "Normal vs. SNV" ,xlab = bquote(~Log[2]~ "fold change"),
colAlpha = .5, pointSize = 1.0, labSize = 3.0, col = c("black", "blue", "green", "red"),
legendPosition = "right",
legendLabSize = 10.0,
legendIconSize = 3.0)
print(p)
dev.off()
pdf(file = file_name_plotMA_shrink)
plotMA(resLFC, ylim=c(-2,2), cex=.4)
dev.off()
pdf(file = file_name_plotMA)
plotMA(res, ylim=c(-2,2),cex=.4)
dev.off()
print(paste0("SNV ", input, " finished."))
#visualize with MA plot
#logfold change shrinkage vs un-shrunk
#volcanoPlots
#write plots to pdf format
}
####
print("DONE")