-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path03_eBird-data-processing.Rmd
126 lines (96 loc) · 4.8 KB
/
03_eBird-data-processing.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
editor_options:
chunk_output_type: console
---
# Processing eBird data
In this script, we will process the community science data across the Eastern and Western Himalayas.
## Load necessary libraries
```{r}
library(tidyverse)
library(sf)
library(raster)
library(parallel)
```
## Loading custom functions to process eBird data
```{r}
# This function processes the eBird data as long as the path where the data is stored and list of countries are mentioned
readcleanrawdata = function(rawpath)
{
require(lubridate)
require(tidyverse)
require(cowplot)
preimp = c("COMMON.NAME","OBSERVATION.COUNT",
"LOCALITY.ID","LOCALITY.TYPE", "STATE", "COUNTRY",
"LATITUDE","LONGITUDE","OBSERVATION.DATE",
"TIME.OBSERVATIONS.STARTED","OBSERVER.ID",
"PROTOCOL.TYPE","DURATION.MINUTES","EFFORT.DISTANCE.KM",
"REVIEWED","NUMBER.OBSERVERS","ALL.SPECIES.REPORTED",
"GROUP.IDENTIFIER","SAMPLING.EVENT.IDENTIFIER","APPROVED",
"CATEGORY")
nms = read.delim(rawpath, nrows = 1, sep = "\t", header = T, quote = "", stringsAsFactors = F, na.strings = c(""," ",NA))
nms = names(nms)
nms[!(nms %in% preimp)] = "NULL"
nms[nms %in% preimp] = NA
data = read.delim(rawpath, colClasses = nms, sep = "\t", header = T, quote = "", stringsAsFactors = F, na.strings = c(""," ",NA))
## choosing important variables
imp = c("COMMON.NAME","OBSERVATION.COUNT",
"LOCALITY.ID","LOCALITY.TYPE", "STATE", "COUNTRY",
"LATITUDE","LONGITUDE","OBSERVATION.DATE",
"TIME.OBSERVATIONS.STARTED","OBSERVER.ID",
"PROTOCOL.TYPE","DURATION.MINUTES","EFFORT.DISTANCE.KM",
"SAMPLING.EVENT.IDENTIFIER",
"NUMBER.OBSERVERS","ALL.SPECIES.REPORTED","group.id",
"CATEGORY","no.sp")
days = c(31,28,31,30,31,30,31,31,30,31,30,31)
cdays = c(0,31,59,90,120,151,181,212,243,273,304,334)
## setup eBird data ##
## filter approved observations, species, slice by single group ID, remove repetitions
## remove repeats
## set date, add month, year and day columns using package LUBRIDATE
## filter distance travelled, duration birded and number of observers
## add number of species column (no.sp)
data = data %>%
filter(REVIEWED == 0 | APPROVED == 1) %>%
mutate(group.id = ifelse(is.na(GROUP.IDENTIFIER), SAMPLING.EVENT.IDENTIFIER, GROUP.IDENTIFIER)) %>%
filter(ALL.SPECIES.REPORTED == 1) %>% filter(PROTOCOL.TYPE == "Stationary"| PROTOCOL.TYPE == "Traveling")%>%
filter(EFFORT.DISTANCE.KM<=2.5|is.na(EFFORT.DISTANCE.KM))%>%
filter(DURATION.MINUTES <= 120)%>% filter(NUMBER.OBSERVERS <= 10)%>%
mutate(Time = hms(TIME.OBSERVATIONS.STARTED)) %>% filter(Time > hms("4:00:00") & Time < hms("19:00:00"))%>%
group_by(group.id,COMMON.NAME) %>% slice(1) %>% ungroup %>%
group_by(group.id) %>% mutate(no.sp = n_distinct(COMMON.NAME))%>%
dplyr::select(imp) %>%
mutate(OBSERVATION.DATE = as.Date(OBSERVATION.DATE),
month = month(OBSERVATION.DATE), year = year(OBSERVATION.DATE),
day = day(OBSERVATION.DATE) + cdays[month], week = week(OBSERVATION.DATE),
fort = ceiling(day/14)) %>%
filter(year > 2010)
ungroup
return(data)
}
```
## Use the function written above to extract eBird data
```{r}
# please download the latest versions of eBird data from https://ebird.org/data/download and set the file path accordingly. Since these two datasets are extremely large, we have not uploaded the same to github.
# In this study, the latest version of the data corresponds to August 31st 2022
# extract data for the following list of countries
Bhutan <- readcleanrawdata("ebd_BT_relAug-2022.txt")
India <- rbind(readcleanrawdata("ebd_IN-JK_relAug-2022.txt"), readcleanrawdata("ebd_IN-LA_relAug-2022.txt"), readcleanrawdata("ebd_IN-HP_relAug-2022.txt"), readcleanrawdata("ebd_IN-AR_relAug-2022.txt"), readcleanrawdata("ebd_IN-WB_relAug-2022.txt"), readcleanrawdata("ebd_IN-UL_relAug-2022.txt"), readcleanrawdata("ebd_IN-SK_relAug-2022.txt"))
## Removing non himalayan regions
India<-India %>% filter(LATITUDE>26,LONGITUDE<100)
dat <-rbind(India,Bhutan)
# Keep only unique locations used
datll<-dat%>% filter(month %in% c(1,2,5,6,7,8,12)) %>% distinct(LATITUDE,LONGITUDE, .keep_all = T)%>%select(LOCALITY.ID,LATITUDE,LONGITUDE)
write.csv(datll, "results/unique_loc.csv", row.names = F)
```
## Extract elevation at unique locations
```{r}
dat <- st_as_sf(dat, coords = c("LONGITUDE","LATITUDE"), crs=4326, remove = "F")
# Loading the elevation data
elev <- raster("data/elevation/alt") #srtm dem, will have to download yourself as file too big for github
# extract elevation
elevation <- raster::extract(elev,dat)
# cbind elevation back to dataframe
dat <- cbind(dat,elevation)
# save Rdata file (uploaded to GitHub)
save(dat, file = "results/eBird_elev.RData")
```