-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExploratoryDataAnalysis.Rmd
143 lines (118 loc) · 6.01 KB
/
ExploratoryDataAnalysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
---
title: "Exploratory Data Analysis"
author: "Justin Priest"
date: "September 25, 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
source('Fish692-PrudhoeFish_dataimport.R')
```
## Intro
This document summarizes some of the exploratory data analysis of the Prudhoe Bay nearshore
fisheries data by Justin Priest for Franz Mueter's Data Wrangling Seminar, Fish 692
<https://github.com/justinpriest/Fish692-DataWranglingSeminar>
## Site Locations
```{r locations, echo=FALSE, warning=FALSE, message=FALSE}
library(leaflet)
pru_loc <- data.frame(Station = as.character(c(214, 218, 220, 230)),
Name = c("Niakuk", "West Beach", "West Dock", "Endicott"),
Lat = c(70.347, 70.358, 70.3826, 70.309),
Long = c(-148.210, -148.473, -148.558, -147.884) )
places <- data.frame(Place = c("Prudhoe Bay", "Sag River Delta"),
Lat = c(70.345, 70.31),
Long = c(-148.38, -148.1))
leaflet(pru_loc) %>% addTiles() %>% setView(-148.2, 70.35, zoom = 10) %>%
addMarkers(popup = ~paste0(Station, " - ", Name)) %>%
addLabelOnlyMarkers(data = places, label = ~Place,
labelOptions = labelOptions(noHide = T, textOnly = TRUE,
textsize = "16px", direction = "bottom"))
```
The two eastern sites are 230 and 214, which are within the zone of influence of the Sag River.
## Catch Histograms for select species (Response Variables)
Note that most catches are very right skewed
```{r responseplots, echo=FALSE, warning=FALSE, message=FALSE}
###########################
### Explore Response Variables ####
ggplot(allcatch %>% filter(Species=="ARCS" & totcount>0), aes(x=(totcount))) + geom_histogram(bins = 30) +
ggtitle("Arctic Cisco catch histogram (excluding zeros)")
ggplot(allcatch %>% filter(Species=="ARCD" & totcount>0), aes(x=totcount)) + geom_histogram(bins = 30) +
ggtitle("Arctic Cod catch histogram (excluding zeros)")
ggplot(allcatch %>% filter(Species=="BDWF" & totcount>0), aes(x=totcount)) + geom_histogram(bins = 30) +
ggtitle("Broad Whitefish catch histogram (excluding zeros)")
ggplot(allcatch %>% filter(Species=="BDWF" & totcount>0), aes(x=log(totcount))) + geom_histogram(bins = 30) +
ggtitle("Broad Whitefish catch histogram, log transformed (excluding zeros)")
# note the slightly better distribution with the log transform
ggplot(allcatch %>% filter(Species=="LSCS" ), aes(x=(day.of.year), y=totcount)) + geom_point() +
facet_wrap(~Year, scales = "free") + ggtitle("Least Cisco catch by day of year") + ylab("Count")
# summary: most catches are of zeros or very low catch abundance, very right skewed
```
******
## Environmental Histograms (explanatory variables)
\newline
```{r explanplots1, echo=FALSE, warning=FALSE, message=FALSE}
###########################
### Explore Explanatory Variables ####
ggplot(watersalin %>% group_by(Year) %>%
summarise(annsal=mean(c(Salin_Top, Salin_Mid, Salin_Bot), na.rm = TRUE)),
aes(x=Year, y=annsal)) + geom_line() + ylab("Mean annual salinity (ppt)")
ggplot(watertemps %>% group_by(Year) %>%
summarise(anntemp=mean(c(Temp_Top, Temp_Mid, Temp_Bot), na.rm = TRUE)),
aes(x=Year, y=anntemp)) + geom_line() + ylab("Mean annual temperature (C)")
#show how salinity has been changing
plottext_salin <- data.frame(label = c(
paste0("annual change: ", signif(coef(summary(lm(
Salin_Mid ~ Year, data=watersalin %>% filter(Station == 214))))[2,1], 2)),
paste0("annual change: ", signif(coef(summary(lm(
Salin_Mid ~ Year, data=watersalin %>% filter(Station == 218))))[2,1], 2)),
paste0("annual change: ", signif(coef(summary(lm(
Salin_Mid ~ Year, data=watersalin %>% filter(Station == 220))))[2,1], 2)),
paste0("annual change: ", signif(coef(summary(lm(
Salin_Mid ~ Year, data=watersalin %>% filter(Station == 230))))[2,1], 2)) ),
Station = c(214, 218, 220, 230) )
ggplot(watersalin %>% filter(Station != 231), aes(as.factor(Year), Salin_Mid)) +
geom_boxplot() + scale_y_continuous(limits = c(0,35)) +
facet_wrap(~Station, nrow=2) +
geom_smooth(method="lm", aes(group=1)) +
geom_text(data = plottext_salin,
mapping = aes(x=-Inf, y=-Inf, label = label),
hjust = -0.2, vjust = -15) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# salinity has significantly dropped for the eastern sites (230&214)
# and increased at the western sites (220&218)
```
\newline
Note that salinity has dropped at the eastern sites (230&214)
and increased at the western sites (220&218)
\newline
```{r explanplots2, echo=FALSE, warning=FALSE, message=FALSE}
#show how temps have been increasing:
plottext_temp <- data.frame(label = c(
paste0("annual change: ", signif(coef(summary(lm(
Temp_Mid ~ Year, data=watertemps %>% filter(Station == 214))))[2,1], 2)),
paste0("annual change: ", signif(coef(summary(lm(
Temp_Mid ~ Year, data=watertemps %>% filter(Station == 218))))[2,1], 2)),
paste0("annual change: ", signif(coef(summary(lm(
Temp_Mid ~ Year, data=watertemps %>% filter(Station == 220))))[2,1], 2)),
paste0("annual change: ", signif(coef(summary(lm(
Temp_Mid ~ Year, data=watertemps %>% filter(Station == 230))))[2,1], 2)) ),
Station = c(214, 218, 220, 230) )
ggplot(watertemps %>% filter(Station != 231), aes(as.factor(Year), Temp_Mid)) +
geom_boxplot() + scale_y_continuous(limits = c(0,16)) +
facet_wrap(~Station, nrow=2) +
geom_smooth(method="lm", aes(group=1)) +
geom_text(data = plottext_temp,
mapping = aes(x=-Inf, y=-Inf, label = label),
hjust = -0.2, vjust = -15) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#all 4 sites have had a significant increase in temperatures
```
\newline
Note that temperature has significantly increased at all sites!
\newline
```{r explanplots3, echo=FALSE, warning=FALSE, message=FALSE}
#Wind
ggplot(deadhorsewind %>% filter(month==7 | month==8), aes(x=dailymeandir)) +
geom_histogram(col="black", bins = 40) + coord_polar() +
ggtitle("polar histogram plot of wind directions") + xlab("sustained wind direction")
```