01_load_data.R

## OTN SYMPOSIUM 2024
## Hugo Flávio (hugoflavio.com)
## 2024-07-28
## Script 1: Importing our data

# What's going on here?
#
# This script will import the datasets generated by actel when we ran the
# analyses for each year inside the dataset folders. Then, we'll combine the
# data we want into a single table, and extract only the wild animals.
#
# This script works as a preface for the remaining scripts we'll work with next.

# set your work directory to the folder containing the workshop data and scripts
setwd("your/path/to/workshop/folder") # modify this line before running!

# 1.1) load needed packages/scripts -----------------------
  source("https://raw.githubusercontent.com/hugomflavio/effective-fiesta/main/Random_R_Functions/mean_table.R")
  library("actel")
  library("ggplot2")

# 1.2) load the actel results -----------------------------
  bush14 <- dataToList("dataset/2014/actel_migration_results.RData")
  bush17 <- dataToList("dataset/2017/actel_migration_results.RData")
  bush18 <- dataToList("dataset/2018/actel_migration_results.RData")

# 1.3) preliminary adjustments ----------------------------
  # include year for each set of results, so we know
  # who is who once we merge everything together.
  bush14$status.df$Year <- 2014
  bush17$status.df$Year <- 2017
  bush18$status.df$Year <- 2018

  # Check that the number of columns is the same.
  ncol(bush14$status.df)
  ncol(bush17$status.df)
  ncol(bush18$status.df)

  # They're not, so who's different?
  link <- colnames(bush14$status.df) %in% colnames(bush17$status.df)
  colnames(bush14$status)[!link]

  link <- colnames(bush18$status.df) %in% colnames(bush17$status.df)
  colnames(bush18$status)[!link]
  rm(link)

  # We'll proceed with the columns that
  # are common to everyone
  common_cols <- Reduce(intersect, 
                        list(colnames(bush14$status.df),
                             colnames(bush17$status.df),
                             colnames(bush18$status.df)))

  # combine the datasets
  my_data <- rbind(bush14$status.df[common_cols], 
                   bush17$status.df[common_cols],
                   bush18$status.df[common_cols])
  rm(common_cols)

  # quick check
  # str(my_data)

  # turn year into a factor for later analyses.
  my_data$Year <- as.factor(my_data$Year)

  # make a logical success column
  my_data$Succeeded <- my_data$Status == "Succeeded"
  table(my_data$Succeeded)

# 1.4) Keep only the data for wild animals ----------------
  # check group levels
  levels(my_data$Group)

  # hm... there's na "All" group. Those are wild from 2014. 
  # Change them to wild.
  levels(my_data$Group)[levels(my_data$Group) == "All"] <- "Wild"

  # check again
  levels(my_data$Group)

  # keep only wild animals
  wilds <- my_data[my_data$Group == "Wild", ]