-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCSmiRsyn.R
141 lines (110 loc) · 5.66 KB
/
CSmiRsyn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
## Function for discovering cell-specific miRNA-miRNA positive correlation
# miR1 and miR2: Gene expression values of two miRNAs in single cells
# cell_index: Index of single cells
# boxsize: Size of neighborhood (0.1 in default)
# interp_betw_point: The number of interpolation points between each cell (5 in default), interp_betw_point = 0 is used to compute
# the normalized statistic of edge gx-gy in large number of cells (more than 100)
# bootstrap_num: The number of bootstrapping for interpolating pseudo-cells
# Output: res is a list of cell-specific miRNA-miRNA positive correlation
CSmiRsyn_edge_bootstrap <- function(miR1, miR2, cell_index, boxsize = 0.1, bootstrap_betw_point = 5, bootstrap_num = 100) {
cell_num <- length(miR1)
set.seed(123)
bootstrap_sample <- lapply(seq(bootstrap_num), function(i) sample(seq(cell_num), bootstrap_betw_point * (cell_num - 1), replace = TRUE))
miR1_bootstrap <- lapply(seq(bootstrap_num), function(i) c(miR1, miR1[bootstrap_sample[[i]]]))
miR2_bootstrap <- lapply(seq(bootstrap_num), function(i) c(miR2, miR2[bootstrap_sample[[i]]]))
res <- do.call(pmedian, lapply(seq(bootstrap_num),
function(k) csn_edge(miR1_bootstrap[[k]],
miR2_bootstrap[[k]],
boxsize = boxsize)[seq(cell_num)]))
return(res[cell_index])
}
## Identifying cell-specific miRNA synergistic network
CSmiRsyn_net <- function(miRTarget, cell_index, miRExp, mRExp, minSharedmR = 1, p.value.cutoff = 0.05) {
miRTarget <- as.matrix(miRTarget)
miRExpNames <- as.matrix(colnames(miRExp))
miR <- miRTarget[, 1]
mR <- miRTarget[, 2]
miRSym <- unique(miR)
mRSym <- unique(mR)
m2 <- length(miRSym)
# Initialize variables
miRInt <- matrix(NA, m2 * (m2 - 1)/2, 2)
C <- matrix(NA, m2 * (m2 - 1)/2, 3)
for (i in seq_len(m2 - 1)) {
for (j in seq(i + 1, m2)) {
Interin1 <- miRTarget[which(miRTarget[, 1] %in% miRSym[i]), 2]
Interin2 <- miRTarget[which(miRTarget[, 1] %in% miRSym[j]), 2]
M1 <- length(Interin1)
M2 <- length(Interin2)
M3 <- length(intersect(Interin1, Interin2))
M4 <- length(mRSym)
M5 <- 1 - phyper(M3 - 1, M2, M4 - M2, M1)
if (M3 >= minSharedmR & M5 < p.value.cutoff) {
miRInt[(i - 1) * m2 + j - sum(seq_len(i)), 1] <- miRSym[i]
miRInt[(i - 1) * m2 + j - sum(seq_len(i)), 2] <- miRSym[j]
miRExpIdx1 <- which(miRExpNames %in% miRSym[i])
miRExpIdx2 <- which(miRExpNames %in% miRSym[j])
# Calculate cell-specific correlation of each miRNA-miRNA pair
M6 <- CSmiRsyn_edge_bootstrap(miRExp[, miRExpIdx1], miRExp[, miRExpIdx2], cell_index)
M7 <- pnorm(-M6)
C[(i - 1) * m2 + j - sum(seq_len(i)), 1] <- M5
C[(i - 1) * m2 + j - sum(seq_len(i)), 2] <- M6
C[(i - 1) * m2 + j - sum(seq_len(i)), 3] <- M7
}
}
}
# Extract miRNA-miRNA pairs
miRInt <- miRInt[which((C[, 1] < p.value.cutoff & C[, 3] < p.value.cutoff) == "TRUE"), ]
C <- C[which((C[, 1] < p.value.cutoff & C[, 3] < p.value.cutoff) == "TRUE"), ]
if (is.vector(C)) {
res_miRInt <- c(miRInt, C)
names(res_miRInt) <- c("miRNA_1", "miRNA_2", "p_value of shared mRNAs", "correlation", "p_value of correlation")
} else {
res_miRInt <- cbind(miRInt, C)
colnames(res_miRInt) <- c("miRNA_1", "miRNA_2", "p_value of shared mRNAs", "correlation", "p_value of correlation")
}
return(res_miRInt)
}
## Function for calculating similarity matrix between two list of module groups
# Module.group1: List object, the first list of module group
# Module.group2: List object, the second list of module group
# Output: Sim is a similarity matrix between two list of module groups
Sim.module.group <- function(Module.group1, Module.group2){
if(class(Module.group1)!="list" | class(Module.group2)!="list") {
stop("Please check your input module group! The input module group should be list object! \n")
}
m <- length(Module.group1)
n <- length(Module.group2)
Sim <- matrix(NA, m, n)
for (i in seq(m)){
for (j in seq(n)){
overlap_interin <- length(intersect(Module.group1[[i]], Module.group2[[j]]))
Sim[i, j] <- overlap_interin/min(length(Module.group1[[i]]), length(Module.group2[[j]]))
}
}
if (m < n) {
GS <- mean(unlist(lapply(seq(m), function(i) Sim[i, max.col(Sim)[i]])))*m/n
} else if (m == n) {
GS <- mean(c(unlist(lapply(seq(m), function(i) Sim[i, max.col(Sim)[i]])),
unlist(lapply(seq(n), function(i) Sim[max.col(t(Sim))[i], i]))))
} else if (m > n) {
GS <- mean(unlist(lapply(seq(n), function(i) Sim[max.col(t(Sim))[i], i])))*n/m
}
return(GS)
}
## Function for calculating cluster coefficients in random networks
# nodes.num: The number of nodes
# edges.num: The number of edges
# perm: The number of permutation
# directed: Logical value, false or true
# Output: Mean and std of cluster coefficients in random networks
Random_net_clusterCoeff <- function(nodes.num, edges.num, perm = 10000, directed = FALSE) {
set.seed(123)
res <- c()
for (i in seq(perm)) {
g <- sample_pa(n = nodes.num, m = edges.num, directed = directed)
g <- delete_edges(g, sample(1:gsize(g), size = gsize(g) - edges.num))
res[i] <- transitivity(g, type="average")
}
return(list(mean(res), sd(res)))
}