-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_load_data.R
82 lines (65 loc) · 2.81 KB
/
01_load_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
## OTN SYMPOSIUM 2024
## Hugo Flávio (hugoflavio.com)
## 2024-07-28
## Script 1: Importing our data
# What's going on here?
#
# This script will import the datasets generated by actel when we ran the
# analyses for each year inside the dataset folders. Then, we'll combine the
# data we want into a single table, and extract only the wild animals.
#
# This script works as a preface for the remaining scripts we'll work with next.
# set your work directory to the folder containing the workshop data and scripts
setwd("your/path/to/workshop/folder") # modify this line before running!
# 1.1) load needed packages/scripts -----------------------
source("https://raw.githubusercontent.com/hugomflavio/effective-fiesta/main/Random_R_Functions/mean_table.R")
library("actel")
library("ggplot2")
# 1.2) load the actel results -----------------------------
bush14 <- dataToList("dataset/2014/actel_migration_results.RData")
bush17 <- dataToList("dataset/2017/actel_migration_results.RData")
bush18 <- dataToList("dataset/2018/actel_migration_results.RData")
# 1.3) preliminary adjustments ----------------------------
# include year for each set of results, so we know
# who is who once we merge everything together.
bush14$status.df$Year <- 2014
bush17$status.df$Year <- 2017
bush18$status.df$Year <- 2018
# Check that the number of columns is the same.
ncol(bush14$status.df)
ncol(bush17$status.df)
ncol(bush18$status.df)
# They're not, so who's different?
link <- colnames(bush14$status.df) %in% colnames(bush17$status.df)
colnames(bush14$status)[!link]
link <- colnames(bush18$status.df) %in% colnames(bush17$status.df)
colnames(bush18$status)[!link]
rm(link)
# We'll proceed with the columns that
# are common to everyone
common_cols <- Reduce(intersect,
list(colnames(bush14$status.df),
colnames(bush17$status.df),
colnames(bush18$status.df)))
# combine the datasets
my_data <- rbind(bush14$status.df[common_cols],
bush17$status.df[common_cols],
bush18$status.df[common_cols])
rm(common_cols)
# quick check
# str(my_data)
# turn year into a factor for later analyses.
my_data$Year <- as.factor(my_data$Year)
# make a logical success column
my_data$Succeeded <- my_data$Status == "Succeeded"
table(my_data$Succeeded)
# 1.4) Keep only the data for wild animals ----------------
# check group levels
levels(my_data$Group)
# hm... there's na "All" group. Those are wild from 2014.
# Change them to wild.
levels(my_data$Group)[levels(my_data$Group) == "All"] <- "Wild"
# check again
levels(my_data$Group)
# keep only wild animals
wilds <- my_data[my_data$Group == "Wild", ]