-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path04_resampling-analysis.Rmd
140 lines (111 loc) · 5.99 KB
/
04_resampling-analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
---
editor_options:
chunk_output_type: console
---
# Resampling analysis
Using the previously generated .Rdata file, we resample checklists for three levels of sampling effort at different elevational bands.
## Load .Rdata file containing elevation data across eBird sampling locations
```{r}
load("results/eBird_elev.RData")
dat <- as.data.frame(dat)
dat <- dat [,-27] # removing unnecessary columns
## include only full species
dat <- dat %>%
filter(CATEGORY == "species" | CATEGORY == "issf")
# dividing eastern and western himalayas by the 83E longitude
datWest <- dat %>% filter (LONGITUDE < 83)
datEast <- dat %>% filter (LONGITUDE > 83)
```
## Create resampled datasets
```{r}
# Creating resampled Dataset for the centre, lower, and upper limit of a species' elevational distribution for 3 levels of sampling effort (number of checklists)- separately for east and west
## Eastern Himalayas
## Number of checklists in either season in each elevational band
Checklists <- datEast[!duplicated(datEast$group.id), ]
Checklists$elev_level <- cut(Checklists$elevation, breaks = c(-Inf, 500, 1000, 1500, 2000, 2500, 3000, Inf), labels = 1:7)
Checklists.S <- subset(Checklists, month %in% 5:8) # summer
Checklists.W <- subset(Checklists, month %in% c(1, 2, 12)) # winter
summer <- Checklists.S %>%
group_by(elev_level) %>%
summarise(summer = n_distinct(group.id))
winter <- Checklists.W %>%
group_by(elev_level) %>%
summarise(winter = n_distinct(group.id))
## if you want to output a table the number of checklists at each elevation band and season
CL_ES <- left_join(summer, winter, by = "elev_level")
levels(CL_ES$elev_level) <- c("0-500","500-1000","1000-1500","1500-2000","2000-2500","2500-3000",">3000")
write.csv(CL_ES, "results/ChecklistNo_SeasonElevation_East.csv", row.names = F)
## Sampling event IDs in each season and in each elevation band
ID.S <- lapply(1:7, function(x) Checklists.S$group.id[Checklists.S$elev_level == x])
ID.W <- lapply(1:7, function(x) Checklists.W$group.id[Checklists.W$elev_level == x])
## Get unique species list
uniSpe <- datEast %>% filter(CATEGORY == "species" | CATEGORY == "issf")
uniSpe <- unique(uniSpe[, "COMMON.NAME"]) %>% data.frame()
## Get first second and third quartile of effort (number of checklists) across season and elevation
efforts.S <- summary(Checklists.S$elev_level)
efforts.W <- summary(Checklists.W$elev_level)
qEffort <- quantile(c(efforts.S, efforts.W), c(0.25,0.50,0.75))
# resample for the lower limit (5th percentile), center (median), and upper limit (95th percentile) for a species elevational distribution
for (qt in c(0.05, 0.50, 0.95)) {
#resample for the levels of sampling effort
for (i in 1:3) {
## sample an equal number of checklists (3 levels of effort) from each elevation band
set.seed(56789)
sampleID.S <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.S[[x]], qEffort[i], replace=T)))})
set.seed(56789)
sampleID.W <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.W[[x]], qEffort[i], replace=T)))})
# calculate the lower, median and upper elevation distribution for each bird species in the two seasons
# This step takes awhile
# We used parallel processing
rs<- mclapply(1:1000, function(y) {
m <- t(sapply(uniSpe[, 1], function(x){
occs.S <- subset(datEast, COMMON.NAME ==x & group.id %in% sampleID.S[[y]], select="elevation")
occs.W <- subset(datEast, COMMON.NAME==x & group.id %in% sampleID.W[[y]], select="elevation")
B.n <- nrow(occs.S)
W.n <- nrow(occs.W)
B.elev <- if (B.n > 0) quantile(occs.S$elevation, qt) else NA
W.elev <- if (W.n > 0) quantile(occs.W$elevation, qt) else NA
return(c(B.elev = B.elev, B.n = B.n, W.elev = W.elev, W.n = W.n))
}))
}, mc.cores = 7)
save(rs,file = paste0("results/eBird_woNepal_resampled_east_", sprintf("%02d", qt*100),".q", i ,".RData"))
}
}
## Repeat the above process for Western Himalayas
Checklists <- datWest[!duplicated(datWest$group.id), ]
Checklists$elev_level <- cut(Checklists$elevation, breaks = c(-Inf, 500, 1000, 1500, 2000, 2500, 3000, Inf), labels = 1:7)
Checklists.S <- subset(Checklists, month %in% 5:8)
Checklists.W <- subset(Checklists, month %in% c(1, 2, 12))
summer <- Checklists.S %>% group_by(elev_level) %>% summarise(summer = n_distinct(group.id))
winter <- Checklists.W %>% group_by(elev_level) %>% summarise(winter = n_distinct(group.id))
CL_ES <- left_join(summer,winter, by = "elev_level")
levels(CL_ES$elev_level) <-c("0-500","500-1000","1000-1500","1500-2000","2000-2500","2500-3000",">3000")
write.csv(CL_ES, "results/ChecklistNo_SeasonElevation_West.csv", row.names = F )
ID.S <- lapply(1:7, function(x) Checklists.S$group.id[Checklists.S$elev_level == x])
ID.W <- lapply(1:7, function(x) Checklists.W$group.id[Checklists.W$elev_level == x])
uniSpe <- datWest %>% filter(CATEGORY == "species" | CATEGORY == "issf")
uniSpe <- unique(uniSpe[, "COMMON.NAME"]) %>% data.frame()
efforts.S <- summary(Checklists.S$elev_level)
efforts.W <- summary(Checklists.W$elev_level)
qEffort <- quantile(c(efforts.S, efforts.W), c(0.25,0.50,0.75))
for (qt in c(0.05, 0.50, 0.95)) {
for (i in 1:3) {
set.seed(56789)
sampleID.S <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.S[[x]], qEffort[i], replace=T)))})
set.seed(56789)
sampleID.W <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.W[[x]], qEffort[i], replace=T)))})
rs<- mclapply(1:1000, function(y) {
m <- t(sapply(uniSpe[, 1], function(x){
occs.S <- subset(datWest, COMMON.NAME ==x & group.id %in% sampleID.S[[y]], select="elevation")
occs.W <- subset(datWest, COMMON.NAME==x & group.id %in% sampleID.W[[y]], select="elevation")
B.n <- nrow(occs.S)
W.n <- nrow(occs.W)
B.elev <- if (B.n > 0) quantile(occs.S$elevation, qt) else NA
W.elev <- if (W.n > 0) quantile(occs.W$elevation, qt) else NA
return(c(B.elev = B.elev, B.n = B.n, W.elev = W.elev, W.n = W.n))
}))
}, mc.cores = 7 )
save(rs,file = paste0("results/eBird_woNepal_resampled_west_", sprintf("%02d", qt*100),".q", i,".RData"))
}
}
```