-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path5sp_EDA.R
2819 lines (2417 loc) · 120 KB
/
5sp_EDA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#' ---
#' title: Exploratory Data Analysis for 5Sp Dataset
#' author: Melissa Chen
#' output: github_document
#'
#' ---
#'
#' ## First, load required packages\
#### Load packages ####
library(tidyverse) # for data manipulation
library(MASS) # For fitting distributions and NMDS
library(rstanarm) # For calculated expected alpha and beta diversity
library(gridExtra) # For arranging ggplots
library(mgcv) # For beta distribution (beta diversity)
library(vegan) # for permanova
library(car) # for type III Anova
library(RColorBrewer) # colors for barplots
library(grid) # for text grobs in gridExtra
#' Define pathways for input files
#'
# Should I re-run all models? Set to FALSE if you don't want to waste time and already have the .RData files saved
RERUN_RICH=FALSE
RERUN_INHIBRICH=FALSE
RERUN_PERCINHIB=FALSE
RERUN_DISP=FALSE
RERUN_DIST=FALSE
#### Pathways ####
# OTU table in text format; rarefied to 'rared' OTUs
oturarePWD="../MF_and_OTU_edited/otu_table_r5000.txt"
# rarefaction depth
rared <- 5000
# OTU table in text format; not rarefied
otuPWD = "../MF_and_OTU_edited/otu_table_text.txt"
# FULL Mapping file; with alpha diversity added-- must have all columns described in aMetrics
mfPWD = "../MF_and_OTU_edited/MF_withalpha.txt"
# aMetrics
aMetrics <- c("observed_otus","shannon")
# Get header names for all alpha metrics
aHeader <- paste0(aMetrics,"_even_",rared,"_alpha")
# Inhibitory metadata; columns must be "host","inhib status","isolate number","OTU ID", "fulldescrp","OTU taxonomy"
inhibPWD <- "../ANTIFUNGAL/inhibitory_metadata_MANUAL.txt"
# folder with distance matrices
dmPWD <- "../beta_div/"
# distance metrics
dmMetrics <- c("bray_curtis")
minOTUSample = 5 # min number of reads per sample for the OTU to be non-zero
minOTUTab = 100 # min number of OTUs found in table for OTU to be kept in table
otuCutoff = 5000 # cutoff to discard sample;
#### FILTERING BD NOTES:
#' We filtered BD two ways. First, we fit a poisson distribution to BD loads, and tested to see if expected value was significantly larger than zero.
#' Then, we also checks all numbers to see if less than indivTHRESH. If less than indivTHRESH, it is changed to '0'
#' Then, it see is if at least 2 are NOT zero and the third is more than 50.
#' If the third is less than 50 AND the other two measurements are zero, they are all changed to zeros./
#' For the most part, we will see that these two methods yield similar results.
#### LOAD DATA ####
# Load mappingfile
mf <- read.delim(paste0(mfPWD), header=TRUE, as.is=TRUE)
# Load full OTU table
otu <- read.delim(paste0(otuPWD), header=TRUE, as.is=TRUE, skip=1)
# Load rarefied OTU table
otu_rare <- read.delim(paste0(oturarePWD), header=TRUE, as.is=TRUE, skip=1)
# Loop through DMs
dm_all <- list()
for ( d in dmMetrics) {
dm_all[[d]] <- read.delim(paste0(dmPWD,dmMetrics,"_dm.txt"), header=TRUE)
}
inhib <- read.delim(paste0(inhibPWD), header=FALSE, as.is=TRUE)
#' ## Determining BD infection loads ## \
#' \
#' One of the problems with the BD qPCR results is that we get very irregular results. Thus, each individual measurement
#' is unreliable. Here, I use a parameterized model to predict the "true" Bd load given the measurements taken.
#' I would expect BD load to be modelled by an approximately poisson process; here, we check if this is true.
#' \
#' \
#' A poisson distribution models a process where there is an expected "distance" or "time" between events,
#' and you want to model how many events occur in a certain "distance" or "timespan". In the Bd system, an approximately
#' equal amount of Bd is applied to each individual, and we measure the intensity of Bd through qPCR of a Bd-specific amplicon.
#' One of the assumptions of the poisson distribution is that events are independent, and that the probability for an event over short
#' intervals is the same as over long intervals. The Bd intensity is bound by zero, and has a hypothetical upper limit since after a
#' certain infeciton intensity, amphibians will die. Lambda (the rate at which an event occurs) can be thought of as the number of Bd amplicons
#' per "unit" area. In our methods, we assume that we swab all individuals equally. Thus, the "area swabbed" is
#' the same across all individual amphibians. The Bd load is thus essentially a measurement of how many "events" there are in an unknown but constant
#' swabbing area. While samples of the sample individual over time violate one of the assumptions of the Poisson distribution,
#' we are simply using all the samples to see (roughly) whether a poisson distribution is a good fit for the Bd intensity. We then
#' use multiple qPCR results from ONE sample to estimate the "true" intensity of Bd on an individual sample. The poisson model
#' is therefore not really modelling Bd infection, per say, but rather the accuracy of the qPCR process.
#'
# Filter out all information except BD-positive scenarios
BD <- mf %>%
as_tibble() %>%
dplyr::select(X.SampleID, Bd_Run_1,Bd_Run_2, Bd_Average_Run_3) %>%
filter(!is.na(Bd_Average_Run_3), !is.na(Bd_Run_1), !is.na(Bd_Run_2))
samples_bd <- BD$X.SampleID
BD <- BD %>%
dplyr::select(-X.SampleID)
BD12 <- c(BD$Bd_Run_1, BD$Bd_Run_2, BD$Bd_Average_Run_3)
# Get rid of zeros due to overinflation in poisson model
BD12 <- BD12[BD12!=0]
# Fit a poisson model to log BD
poisfit <- MASS::fitdistr(round(log(BD12)), densfun = "Poisson")
# Set new range of X's to test
xfit <- seq(0,(max(log(BD12))))
# Predict y
pred.y <- data.frame(y.pred=dpois(x=xfit, poisfit$estimate), xfit=xfit)
# Plot histogram with poisson distribution fit
BD12 %>%
as_tibble() %>%
rename(BDload=value)%>%
ggplot(aes(x=log(BDload))) +
geom_histogram(aes(y=..density..), bins=8) +
geom_line(data=pred.y,aes(x=xfit, y=y.pred), col="red")
#' What we see above is that BD load (when not zero) is, in fact, modelled well by a poisson process. This means we could fit a poisson
#' model to the aPCR results to estimate the "true" infection load (lambda) for each toad.
# Model true infection load for each toad
BD_lambda_est <- t(apply(round(log(BD+1)), MARGIN=1, FUN=function(x) {
temp <- fitdistr(x, densfun="Poisson")
return(c(temp$estimate,temp$sd))
})) %>%
as_tibble() %>%
rename(sd=lambda1) %>%
mutate(pval=dnorm(0,mean=lambda,sd=sd)) %>% # To estimate if lambda is significantly different from zero, we see if pval for parameter estimate different from zero
mutate(sig=pval<=0.10)
final_BD <- cbind(BD,BD_lambda_est)
# To compare, let's see what a different filtering method yields:
# If 2 BD samples are 0 and the third is less than 50, then set to zero.
# Also, if anything is less than 5, make it zero anyway.
# BD filtering notes and thresholds:
indivTHRESH = 5 # BD individual threshold
thirdTHRESH = 50 # BD 3rd sample threshold
BD_alt <- BD
for ( r in 1:nrow(BD_alt) ) {
for ( c in 1:ncol(BD_alt) ) {
if ( (BD_alt[r,c] < indivTHRESH) | is.na(BD_alt[r,c]) ) {
BD_alt[r,c] <- 0
}
}
if ( (sum(BD_alt[r,] == 0) == 2) & (max(BD_alt[r,],na.rm=TRUE) < thirdTHRESH) ) {
BD_alt[r,] <- c(0,0,0)
}
}
BD_alt$infected <- rowSums(BD_alt) >0
cbind(final_BD[,c("Bd_Run_1","Bd_Run_2","Bd_Average_Run_3","pval","sig")], alt_infect =BD_alt$infected)
#' What we see is that by using a significant threshold of p=0.1 (which is fairly relaxed), the poisson model method is less stringent
#' than the straight threshold method. I believe the poisson model method is likely more reliable since it is able to detect cases where infection
#' is truely low, but consistent. For example, there are some cases where all 3/3 samples were BD-positive but at low abundances; however, the
#' straight threshold model does not recognize it as a 'positive' since the abundances are below the individual threshold. I believe the poisson model
#' method uses more of the information in the qPCR methods than the straight threshold method.\
#' From here, we will add in the "expected" BD load (lambda) into the mapping file for each toad.
# Insert expected BD loads for each sample
final_BD <- cbind(samples_bd, final_BD)
mf$eBD <- 0
mf[match(final_BD$samples_bd, mf$X.SampleID),"eBD"] <- final_BD$lambda
##### ADJUSTING DATA: Make same order, make 'time' variable ######
#### Edit metadata/mapping file #####
toKeepMF <- c("X.SampleID"
,"eBD" # infection levels
,"SPEC" # Species code
,"TREATMENT_GROUP" # whether it was pre or post; should rename
,"BD100KZSP_3122011" # whether or not individual was exposed to BD: will rename
,"ANONYMIZED_NAME" # individual frog ID
, "PRE_POST_BD_NUM" # timepoint
, aHeader
)
newNames <- c("SampleID"
, "eBD_log"
, "species"
, "prepost"
, "BD_infected"
, "toadID"
, "timepoint"
, aMetrics
)
mf <- mf %>% # mapping file with controls still in it
as_tibble() %>% # make into tibble
dplyr::select(one_of(toKeepMF)) %>% # filter to only relevant variables
rename_at(vars(toKeepMF), ~ newNames) %>% #rename variable names
filter(prepost == "Pre" | prepost == "Pos") %>% # get rid of things in "tre", which is a different experiment
separate(timepoint, into=c("exposure","time"), sep="_", convert=TRUE) %>% # create a time variable
mutate( time = ifelse(exposure == "Pre", time, time + 5)) %>% # time variable "restarts" at BD exposure point, but I want it to be continuous
mutate(species=ifelse(species=="Bubo","Anbo",ifelse(species=="Buma","Rhma",species))) %>%
separate(toadID, into=c("todelete","ID"), sep="_",remove=TRUE) %>%
unite(toadID, species,ID, sep = "_", remove = FALSE) %>%
dplyr::select(-todelete, -ID) %>%
mutate(species=factor(species, levels=c("Anbo","Rhma","Osse","Raca","Rapi")))
#### Adjusting values in mf for Osse and Anbo ####
#' OSSE QUIRK\
#' \
#' So it turns out that the Osse data has this weird quirk where they weren't sampled in the first timepoint. Then,
#' in the mapping file they were given time points 1-4 instead of 2-5, which screws up how the sampling
#' lines up. So, I'm going to
#' change the "pre" numbers so that they line up nicely with the rest of the samples.
# rows to change
addTime1 <- which((mf$species=="Osse") & (mf$time %in% c(1,2,3,4,5)))
mf[addTime1, "time"] <- mf[addTime1, "time"] +1 # add 1 to all pre time points so that time point 5 lines up with all other species
#' BUBO QUIRK\
#' \
#' Additionally, there is this strange quirk with the Anbo dataset (or rather, strange quirk with every OTHER dataset) where nothing but
#' Anbo was sampled at time point 10. Thus, let's add +1 to all time point 10's for all samples except Anbo
addTime1_notanbo <- which((mf$species!="Anbo") & (mf$time == 15))
mf[addTime1_notanbo,"time"] <- mf[addTime1_notanbo,"time"] + 1
#### filtering mapping files
mf.wcon <- mf
mf.tb <- mf.wcon %>%
filter(species != "None") # get rid of controls
# Number of controls lost
c(nrow(mf.wcon)-nrow(mf.tb))
### Check to make sure there are no duplicates
#+ message=FALSE, echo=FALSE, warnings=FALSE, include=FALSE
any(table(dplyr::select(mf.tb, toadID, time)) > 1)
# Oh no! There is a duplicate! Check which one it is.
#+ message=FALSE, echo=FALSE, warnings=FALSE, include=FALSE
table(dplyr::select(mf.tb, toadID, time))>1
# Lica10 has 2 timepoint 3's
# Rhma11 has 2 time point 3's
# I think this is just a mistake in the data entry. I've gone through the rest of the metadata and determined that
# it is probably mis-labeled so:
# I'm going to manually change:
# I think mck115Pre.3Buma.11.452405 Rhma11 timepoint 3 is actually Rhma9, time 3
# I think mck105Pre.3Raca.10.451190 Lica10 timepoint 3 is actually Lica9, time 3
mf.tb[mf.tb$SampleID == "mck115Pre.3Buma.11.452405",c("toadID")] <- "Rhma_9"
mf.tb[mf.tb$SampleID == "mck105Pre.3Raca.10.451190", c("toadID")] <- "Raca_9"
#### Adjusting for individuals who were contaminated at the beginning
#' Finally, there were certain individuals who actually tested positive for BD when they arrived.
#' These were unknown until later because the PCR process takes a while, so they were included in the experiment.
#' However, let us identify these individuals and remove them. The list below was manually curated from the spreadsheet
#' provided by Val Mckenzie.
#'
BD_contam_upon_arrival <- c("Rhma_4"
, "Rhma_6"
, "Rhma_7"
, "Rhma_10"
, "Rhma_11"
, "Rapi_1"
, "Rapi_2"
, "Rapi_5"
, "Rapi_7"
, "Rapi_8"
, "Rapi_9"
, "Rapi_10"
, "Rapi_11"
, "Rapi_12")
# Add new column for individuals that were originally contamined
mf.tb$orig_contam <- 0
mf.tb[which(mf.tb$toadID %in% BD_contam_upon_arrival),"orig_contam"] <- 1
#### Other adjustments
# Make a PABD column as well as a "raw" BD counts column
mf.tb <- mf.tb %>%
mutate(PABD=eBD_log>0, eBD_raw=exp(eBD_log)-1)
# # make diversity metrics numeric
mf.tb$shannon <- as.numeric(mf.tb$shannon)
mf.tb$observed_otus <- as.numeric(mf.tb$observed_otus)
mf.tb$logRich <- log(mf.tb$observed_otus)
# View(mf.tb %>%
# dplyr::select(SampleID, shannon))
# filter to ONLY include those things in otu table (Raw)
mf.raw <- mf.tb %>%
filter(SampleID %in% colnames(otu))
# How many things did we lose? Hopefully none, so that means the OTU table is complete.
c(nrow(mf.raw), nrow(mf.tb))
# Note: This ALSO means that controls were filtered out before making the OTU table.
# Samples to keep in otu table (raw)
keepSamples <- colnames(otu_rare)[-match(c("X.OTU.ID","taxonomy"),colnames(otu_rare))]
### ** Note, this assumes that there are no samples in OTU table that are NOT in mapping file!
# If you get random warnings later on, you should come back here.
OTU_names <- otu$X.OTU.ID
otu.filt <- otu %>%
as_tibble() %>%
dplyr::select(one_of(keepSamples)) %>% # Use rare because it naturally cuts off for <5000
replace(.<minOTUSample, 0) %>%
mutate(rowsums=rowSums(.)) %>%
mutate(OTUID = OTU_names) %>%
filter(rowsums > minOTUTab) %>%
dplyr::select(-rowsums)
# Samples to keep in otu table (rarefied)
OTU_names_rare <- otu_rare$X.OTU.ID
otu.filt_rare <- otu_rare %>%
as_tibble() %>%
dplyr::select(one_of(keepSamples)) %>% # Use rare
replace(.<minOTUSample, 0) %>%
mutate(rowsums=rowSums(.)) %>%
mutate(OTUID = OTU_names_rare) %>%
filter(rowsums > minOTUTab) %>%
dplyr::select(-rowsums)
# View(otu.filt_rare)
# Check that each have the same number of samples-- should be the same.
ncol(otu.filt_rare)
ncol(otu.filt)
#### The mapping file already has diversity in it, so I need to add inihibitory bacterial diversity and beta diversity
#### Inhibitory bacteria ####
### Get proportion and count of inhibitory otus from inhibitory otu metadata
inhib.tb <- inhib %>%
as_tibble() %>%
rename(Name=V1, inhib=V2, num=V3, seq=V4,sampleID=V5, taxa=V6)
otu.tb.inhib <- otu.filt %>%
mutate(inhib = (inhib.tb[match(otu.filt$OTUID, inhib.tb$seq),"inhib"])$inhib)
# transpose it, and then make column for "percent inhib" and "count inhib"
# total counts
sampleCounts <- otu.tb.inhib %>%
dplyr::select(-c("inhib","OTUID")) %>%
colSums()
# counts of total inhibitory sequences
inhibCounts <- otu.tb.inhib %>%
filter(inhib=="inhibitory") %>%
dplyr::select(-c("inhib","OTUID")) %>%
colSums()
# richness of inhibitory sequences
inhibRich <- otu.tb.inhib %>%
filter(inhib=="inhibitory") %>%
dplyr::select(-c("inhib","OTUID")) %>%
replace(.>0, 1) %>%
colSums()
## Now, add this information into the mf
mf.raw <- mf.raw %>%
mutate(n=sampleCounts[match(SampleID, names(sampleCounts))]
,inhibCounts =inhibCounts[match(SampleID, names(inhibCounts))]
, inhibRich=inhibRich[match(SampleID, names(inhibRich))]) %>%
mutate(percInhib = inhibCounts/n)
##### Adding Beta diversity turnover #####
#' Get distance to sample directly before for every sample
#' AKA: How similar was the current sample to the sample JUST before that time point for that individual toad?
#' This is different from beta DISPERSION. In cases where there is no sample before that sample, I omit it using NA
#' The distance from the previous sample is also done BEFORE I filter for "contaminated" individuals-- but this should
#' not matter because it is only dependent on the same indivdiual and all of those will be filtered out when we get rid of Bd contaminated individuals.
# Note; tried to do this with dplyr but it is VERY slow. Just used base R subsetting instead
for ( name in names(dm_all) ) {
mf.raw[,paste0("distance_",name)] <- NA
dm <- dm_all[[name]]
dm <- data.frame(dm, row.names = 1)
for ( i in 1:nrow(mf.raw) ) {
currentSample <- mf.raw$SampleID[i]
current <- mf.raw[i, c("toadID","time")]
prevSample <- mf.raw$SampleID[mf.raw$toadID==as.character(current[1,1]) & mf.raw$time == as.numeric(current[1,2]-1)]
if ( length(prevSample) > 0 & (currentSample %in% rownames(dm))) {
distTemp <- dm[currentSample,as.character(prevSample)]
if ( length(distTemp) > 0) {
mf.raw[i,paste0("distance_",name)] <- distTemp
}
}
# print(paste0("done",i," out of ",nrow(mf.tb)))
}
}
##### Adding Beta dispersion #####
#' Another aspect of beta diversity that might change between species and individuals is the dispersion of
#' an individual relative to all other individuals. That is, how much different is an individual from the centroid
#' of all samples at that time point of that species?
#'
# Now, filter mf to rarefied OTU tables
mf.rare <- mf.raw %>%
filter(SampleID %in% colnames(otu_rare))
for ( name in names(dm_all) ) {
disper <- vector()
for ( sp in levels(factor(mf.rare$species))) {
current.samps <- mf.rare %>%
filter(species==sp) %>%
dplyr::select(SampleID) %>%
pull()
current.dm <- dm_all$bray_curtis %>%
filter(X %in% current.samps) %>%
dplyr::select(one_of(c("X",current.samps)))
current.mf <- mf.rare %>%
filter(SampleID %in% current.samps)
# Same order?
# colnames(current.dm)[-1] == current.mf$SampleID
# There might be a warning that we are missing certain samples-- this is fine.
disp.temp <- betadisper(dist(data.frame(current.dm, row.names = 1)), group = (current.mf$time), type = "centroid")
disper <- c(disper, disp.temp$distances)
}
# add to mf.rare and mf.raw
mf.rare[,paste0("disper_",name)] <- data.frame(disper)[match(mf.rare$SampleID, rownames(data.frame(disper))),]
mf.raw[,paste0("disper_",name)] <- data.frame(disper)[match(mf.raw$SampleID, rownames(data.frame(disper))),]
}
#### Adding NMDS ####
# MAKE NMDS
dm.filt <- dm[mf.rare$SampleID,mf.rare$SampleID]
# Make NMDS of distance matrix
set.seed(1017937)
nmds.all <- isoMDS(dist(dm.filt), k = 2)
nmds <- nmds.all$points
colnames(nmds) <- c("NMDS1","NMDS2")
mf.rare <- cbind(mf.rare, nmds)
#### Created filtered mapping files with all extra information
# Note which controls are contaminated; get rid of them.
# Also: check to see if any controls were infected.
controls_infected <- mf.raw %>%
filter(BD_infected == "n", prepost == "Pos", PABD == TRUE) %>%
dplyr::select(toadID, time) %>%
group_by(toadID) %>%
summarize(firstInfect = min(time))
controls_infected
# above are the first time points that controls were found to be infected; need to get rid of samples after this point.
for ( r in 1:nrow(controls_infected)) {
indiv <- as.character(controls_infected[r,1])
firsttime <- as.numeric(controls_infected[r,2])
mf.rare[which( (mf.rare$toadID == indiv) & (mf.rare$time %in% firsttime:16) ), "orig_contam"] <- 1
mf.raw[which( (mf.raw$toadID == indiv) & (mf.raw$time %in% firsttime:16) ), "orig_contam"] <- 1
}
# Mapping file with only controls (training dataset)
# Save Rdata
save(mf.raw, file="mf.raw.RData")
save(mf.rare, file="mf.rare.RData")
# How many samples did we lose due to rarefaction?
c(nrow(mf.rare), nrow(mf.tb))
# Which samples did we lose due to rarefaction?
mf.tb %>%
filter(!(SampleID %in% colnames(otu_rare))) %>%
dplyr::select(SampleID) %>%
pull()
# View(mf_con)
mf_con <- mf.rare %>%
filter(BD_infected=="n")
# were any controls contaminated at any point?
con_contam <- mf_con %>%
filter(PABD==TRUE) %>%
dplyr::select(toadID, time) %>%
group_by(toadID) %>%
summarise(firstinfect=min(time))
# get all time points after that
toDel <- c()
for ( n in 1:length(con_contam$toadID)) {
toad <- con_contam$toadID[n]
minTime <- con_contam$firstinfect[n]
temp <- mf_con %>%
filter(toadID==toad, time>=minTime) %>%
dplyr::select(SampleID) %>%
pull()
toDel <- c(toDel, temp)
}
# Get rid of post-infected ones that weren't supposed to be infected
mf_con_without_init_infect <- mf_con %>%
filter(orig_contam == 0, !(SampleID %in% toDel))
# Mapping file with only treatment (testing dataset)
mf_treat <- mf.rare %>%
filter(BD_infected=="y")
mf_treat_without_init_infect <- mf_treat %>%
filter(orig_contam == 0)
save(mf_con_without_init_infect, file="mf_con_without_init_infect.RData")
save(mf_treat_without_init_infect, file="mf_treat_without_init_infect.RData")
#### Make OTU table of JUST inhibitory bacteria ####
# Should double check this at some point
temp_char_otu <- otu.tb.inhib %>%
dplyr::select(c(OTUID, inhib))
# Make con and treat with just inhibitory with taxa names
otu.temp <- t(otu.tb.inhib %>%
dplyr::select(-c(OTUID, inhib)))
otu.temp <- cbind(t(otu.temp/sampleCounts), temp_char_otu)
otu.inhibOnly.con <- otu.temp %>%
filter(inhib=="inhibitory") %>%
dplyr::select(mf_con_without_init_infect$SampleID, "OTUID")
otu.inhibOnly.treat <- otu.temp %>%
filter(inhib=="inhibitory") %>%
dplyr::select(mf_treat_without_init_infect$SampleID, "OTUID")
# save otu tables
save(otu.inhibOnly.con, file="otu.inhibOnly.con.RData")
save(otu.inhibOnly.treat, file="otu.inhibOnly.treat.RData")
#### PLOTTING EXP DESIGN ####
#+ fig.height=12, fig.width=5
mf.rare %>%
separate(toadID, into=c("sp2", "indiv"), remove = FALSE) %>%
mutate(indiv = factor(indiv, levels=c("1","2","3","4","5","6","7","8","9","10","11","12"))) %>%
mutate(Treatment=ifelse(BD_infected=="y","Bd-exposed","Control"), "LnBd_load" = eBD_log) %>%
mutate(Contaminated = factor(ifelse(orig_contam ==1, "!Contaminated upon arrival",NA), levels=c("!Contaminated upon arrival"))) %>%
ggplot(aes(x=time, y=indiv)) +
geom_line(aes(group=toadID, col=Treatment)) +
geom_point(aes(group=toadID,bg=LnBd_load), cex=4, pch=21)+
scale_color_manual(values=c("black","blue","orange")) +
scale_fill_gradient(low = "white", high = "red") +
geom_vline(aes(xintercept=5.5), col="orange")+
geom_point(aes(group=toadID, col=Contaminated), cex=1, pch=19)+ ## NEW LINE
facet_wrap(~species, nrow=5) +
xlab("Time") +
ylab("Individual Toad")
#### PLOTTING BETA COMPOSITION PLOTS ####
#+
# What do different species look like? (Control only)
mf_con_without_init_infect %>%
ggplot(aes(x=NMDS1, y=NMDS2)) +
geom_point(aes(col=species), cex=3)
# What do different species look like? (ALL data)
mf.rare %>%
ggplot(aes(x=NMDS1,y=NMDS2)) +
geom_point(aes(col=species), cex=2)
# Color dots by time
mf_con_without_init_infect %>%
ggplot(aes(x=NMDS1, y=NMDS2)) +
geom_point(aes(col=time), cex=3)
# Filtering dm to include only controls
dm.filt.con <- dm.filt[mf_con_without_init_infect$SampleID,mf_con_without_init_infect$SampleID ]
save(dm.filt.con, file="dm.filt.con.RData")
adonis2(dm.filt.con ~ species:time, data=mf_con_without_init_infect, by="margin")
adonis2(dm.filt.con ~ species + time + species:time, data=mf_con_without_init_infect)
#' There is a significant effect of species AND time AND interaction on COMPOSITION
rbind(mf_con_without_init_infect, mf_treat_without_init_infect) %>%
mutate(BD_infected=ifelse(BD_infected=="y","Treatment","Control")
, exposure = factor(exposure, levels=c("Pre","Post"))) %>%
ggplot(aes(x=NMDS1,y=NMDS2)) +
geom_point(aes(bg=exposure, col=PABD), cex=2, alpha=0.8, pch=21) +
facet_grid(BD_infected ~ species) +
scale_color_manual(values=c("white","black")) +
scale_fill_manual(values=c("blue","red"))
mf_treat_without_init_infect %>%
filter(exposure=="Post") %>%
ggplot(aes(x=NMDS1, y=NMDS2)) +
geom_point(aes(bg=time, col=PABD), cex=3, pch=21) +
scale_color_manual(values=c("white","red"))+
facet_wrap(~species, nrow=1)
mf_treat_without_init_infect_post <- mf_treat_without_init_infect %>%
filter(exposure=="Post")
dm.filt.treat <- dm.filt[mf_treat_without_init_infect_post$SampleID, mf_treat_without_init_infect_post$SampleID]
save(dm.filt.treat, file="dm.filt.treat.RData")
#### ALPHA DIVERISTY ####
#' ### ALPHA DIVERSITY PLOTTING
#' \
#' First thing to do is calculated expected alpha diversity for each individual, and then
#' calculate how for that particular sample was from the "expected" diversity. Before
#' we do that though, let's plot alpha diversity to see how it looks.
# Fit normal and lognormal distributions to each of these
shannon_normal_param <- fitdistr(mf_con_without_init_infect$shannon, densfun="Normal")
x.fit.shannon <- seq(min(mf_con_without_init_infect$shannon)-sd(mf_con_without_init_infect$shannon)
, max(mf_con_without_init_infect$shannon)+sd(mf_con_without_init_infect$shannon)
, length.out = 100)
y.pred.shannon <- dnorm(x.fit.shannon, mean = shannon_normal_param$estimate[1], sd = shannon_normal_param$estimate[2])
obsotu_lognormal_param <- fitdistr(mf_con_without_init_infect$logRich, densfun="Normal")
x.fit.obsotu <- seq(min(mf_con_without_init_infect$logRich)-sd(mf_con_without_init_infect$logRich)
, max(mf_con_without_init_infect$logRich)+sd(mf_con_without_init_infect$logRich)
, length.out = 100)
y.pred.obsotu <- dnorm(x.fit.obsotu, mean = (obsotu_lognormal_param$estimate[1]), sd = (obsotu_lognormal_param$estimate[2]))
### new: lognormal ###
obsotu_lognormal_param <- fitdistr(mf_con_without_init_infect$observed_otus, densfun="log-normal")
obsotu_normal_param <- fitdistr(mf_con_without_init_infect$observed_otus, densfun="Normal")
x.fit.obsotu <- seq(min(mf_con_without_init_infect$observed_otus)-sd(mf_con_without_init_infect$observed_otus)
, max(mf_con_without_init_infect$observed_otus)+sd(mf_con_without_init_infect$observed_otus)
, length.out = 100)
y.pred.obsotu <- dlnorm(x.fit.obsotu, meanlog = (obsotu_lognormal_param$estimate[1]), sdlog = (obsotu_lognormal_param$estimate[2]))
###
gg_shannon_all <- ggplot(data=mf_con_without_init_infect, aes(x=shannon)) +
geom_histogram(aes(y=..density..), bins=20) +
geom_line(data=data.frame(x=x.fit.shannon, y=y.pred.shannon), aes(x=x, y=y), col="red")
gg_obsotu_all <- ggplot(data=mf_con_without_init_infect, aes(x=observed_otus)) +
geom_histogram(aes(y=..density..), bins=20) +
geom_line(data=data.frame(x=x.fit.obsotu, y=y.pred.obsotu), aes(x=x, y=y), col="red")
grid.arrange(gg_shannon_all, gg_obsotu_all, nrow=1)
# Check to see if turnover is changing with time significantly
gg_divtime_con <- mf_con_without_init_infect %>%
filter(!is.na(shannon)) %>%
ggplot(aes(x=time, y=shannon)) +
geom_line(aes(group=toadID)) +
geom_point(aes(col=PABD)) +
scale_color_manual(values="blue")+
facet_grid(~species)
gg_divtime_treat <- mf_treat_without_init_infect %>%
filter(!is.na(shannon)) %>%
ggplot(aes(x=time, y=shannon)) +
geom_line(aes(group=toadID)) +
geom_point(aes(group=toadID, col=PABD)) +
scale_color_manual(values=c("blue","red"))+
geom_vline(aes(xintercept=5.5)) +
facet_grid(~species)
grid.arrange(gg_divtime_con, gg_divtime_treat, nrow=2)
#' Does diversity change over time in control individuals?
# Type I ANOVA to test for interaction-- (AB | A, B)
anova(lm(shannon ~ species*time, data=mf_con_without_init_infect))
# Use Type II ANOVA (no interaction present)
Anova(lm(shannon ~ species + time, data=mf_con_without_init_infect), type = 2)
# There is a significant effect of species but not time or interaction
#' Does richness change over time in treatment individuals?
# Type I ANOVA to test for interaction (AB | A,B)
anova(lm(shannon ~ species*time, data=mf_treat_without_init_infect))
# Type III ANOVA (valid in presence of interaction)
Anova(lm(shannon ~ species * time, data=mf_treat_without_init_infect, contrasts=list(species=contr.sum)), type=3)
gg_richtime_con <- mf_con_without_init_infect %>%
filter(!is.na(observed_otus)) %>%
ggplot(aes(x=time, y=log(observed_otus))) +
geom_line(aes(group=toadID)) +
geom_point(aes(group=toadID, col=PABD)) +
scale_color_manual(values=c("blue","red"))+
facet_grid(~species)
gg_richtime_treat <- mf_treat_without_init_infect %>%
filter(!is.na(observed_otus)) %>%
ggplot(aes(x=time, y=log(observed_otus))) +
geom_line(aes(group=toadID)) +
geom_point(aes(group=toadID, col=PABD)) +
geom_vline(aes(xintercept=5.5)) +
scale_color_manual(values=c("blue","red"))+
facet_grid(~species)
grid.arrange(gg_richtime_con, gg_richtime_treat, nrow=2)
#' Does richness change over time in control individuals?
# Type I ANOVA to test for interaction-- (AB | A, B)
anova(lm(logRich ~ species*time, data=mf_con_without_init_infect))
# Use Type II ANOVA (no interaction present)
Anova(lm(logRich ~ species + time, data=mf_con_without_init_infect), type = 2)
# There is a significant effect of species but not time or interaction
#' Does richness change over time in treatment individuals?
# Type I ANOVA to test for interaction (AB | A,B)
anova(lm(logRich ~ species*time, data=mf_treat_without_init_infect))
# Type III ANOVA (valid in presence of interaction)
Anova(lm(logRich ~ species * time, data=mf_treat_without_init_infect, contrasts=list(species=contr.sum)), type=3)
#### SHANNON ####
#' We see that shannon and log of observed otus look approximately normal. Great!
#' Now, let's fit some models to this data. For shannon and logRich, we should use a LMM
#' to find out the average richness for each species and the average richness for each individual through time.
#' \
#' u ~ N(u_i, sigma_i)\
#' u_i = a_j\
#' a_j ~ N(u_sp, sigma_sp)\
#' where i = sample, j = individual, sp = species
#' Below, we use the dataset with JUST the controls.
# There was no effect of time
if ( RERUN_RICH ) {
lmer_shannon <- stan_lmer(shannon ~ -1 + species + (1|toadID), data=mf_con_without_init_infect
, prior = normal(0, 10, autoscale = TRUE)
, seed = 98374)
save(lmer_shannon, file="lmer_shannon.RData")
} else {
load("lmer_shannon.RData")
}
prior_summary(lmer_shannon)
# Look at distributions according to models
samps_lmer_shannon <- rstan::extract(lmer_shannon$stanfit)
pre_test_set <- mf_treat_without_init_infect %>%
filter(time<6)
## PLOT OF EXPECTED RCIHNESS FOR EACH SPECIES
samps_lmer_shannon$beta %>%
as.data.frame() %>%
rename(Anbo=V1, Rhma=V2, Osse=V3, Raca=V4, Rapi=V5) %>%
dplyr::select(Anbo,Rhma,Osse,Raca,Rapi) %>%
gather(key=species, value=shannon) %>%
mutate(species=factor(species, levels=c("Anbo","Rhma","Osse","Raca","Rapi"))) %>%
ggplot(mapping=aes(x=species, y=shannon))+
geom_violin() +
geom_point(data=mf_con_without_init_infect, aes(y=shannon, x=species), position = position_jitter(width = 0.1, height=0), col="blue") +
geom_point(data=pre_test_set, aes(y=shannon, x=species), position=position_jitter(width = 0.1, height=0), col="red")
# Get standard deviation between toad individuals and samples
samp_sigma <- samps_lmer_shannon$aux
toadID_sigma <- sd(samps_lmer_shannon$b[,ncol(samps_lmer_shannon$b)])
# Now, we can calculate the probability that the "test" dataset values come from this distribution
# List of individuals
treat_indiv <- unique(mf_treat_without_init_infect$toadID)
# List of each species
species_list <- levels(factor(mf_con_without_init_infect$species))
exp_distr <- as.data.frame(matrix(ncol=length(species_list), nrow=4000, dimnames = list(1:4000, species_list)))
for ( num_sp in 1:length(species_list)) {
exp_distr[,num_sp] <- rnorm(length(samps_lmer_shannon$beta[,num_sp]), mean=samps_lmer_shannon$beta[,num_sp], sd=toadID_sigma)
}
# Loop through and calculate probability of having diversity at that level
pre_exp_indiv <- data.frame(toadID=treat_indiv, exp_shannon=rep(NA, length(treat_indiv)), p_shannon=rep(NA, length(treat_indiv)), infect=rep(NA, length(treat_indiv)))
for ( i in treat_indiv ) {
n_row <- match(i, treat_indiv)
sp <- unlist(strsplit(i,"_"))
num_sp <- match(sp[1], levels(factor(mf_con_without_init_infect$species)))
temp_shannon <- mf_treat_without_init_infect %>%
filter(toadID==i, time <=5 ) %>%
dplyr::select(shannon) %>%
pull()
if ( length(temp_shannon)>1 ) {
exp_shannon <- fitdistr(temp_shannon, "normal")$estimate[1]
} else {
exp_shannon <- temp_shannon
}
# pred_distr <- rnorm(length(samps_lmer_shannon$beta[,num_sp]), mean=rnorm(length(samps_lmer_shannon$beta[,num_sp]), mean=samps_lmer_shannon$beta[,num_sp], sd=toadID_sigma), sd=samp_sigma)
p_shannon <- sum(exp_distr[,sp[1]]<exp_shannon)/length(exp_distr[,sp[1]])
### Did they get infected?
infect <- max(mf_treat_without_init_infect %>%
filter(toadID==i) %>%
dplyr::select(eBD_raw) %>%
pull()
)
pre_exp_indiv[n_row,c("exp_shannon","p_shannon","infect")] <- c(exp_shannon, p_shannon, infect)
}
# Get estimates for control toads
shannon_species_exp <- fixef(lmer_shannon) %>%
as_tibble() %>%
mutate(species=c("Anbo", "Rhma","Osse","Raca","Rapi"))
con_toad_est_shannon <- ranef(lmer_shannon)$toadID %>%
rename(sp_est="(Intercept)") %>%
mutate(toadID=rownames(ranef(lmer_shannon)$toadID)) %>%
separate(toadID, into=c("species","num"), sep="_",remove=FALSE) %>%
dplyr::select(-num) %>%
left_join(shannon_species_exp, by = "species") %>%
mutate(est_shannon=sp_est+value)
con_exp_indiv_shannon <- data.frame(toadID=con_toad_est_shannon$toadID, exp_shannon=rep(NA, length(con_toad_est_shannon$toadID)), p_shannon=rep(NA, length(con_toad_est_shannon$toadID)))
for ( i in 1:nrow(con_toad_est_shannon) ) {
s <- con_toad_est_shannon[i,"species"]
exp_shannon <- con_toad_est_shannon[i,"est_shannon"]
p_shannon <- sum(exp_distr[,s]<exp_shannon)/length(exp_distr[,s])
con_exp_indiv_shannon[i,c("exp_shannon","p_shannon")] <- c(exp_shannon, p_shannon)
}
# create species column
pre_exp_indiv <- pre_exp_indiv %>%
separate(toadID, into=c("species","indiv"), remove = FALSE) %>%
mutate(species=factor(species, levels=c("Anbo","Rhma","Osse","Raca","Rapi")))
# Plot results
gg_shan_p <- ggplot(pre_exp_indiv, aes(x=p_shannon, y=log(infect+1))) +
geom_point(aes(color=species), cex=4) +
geom_smooth(aes(color=species),method=lm, se = FALSE) +
geom_smooth(method=lm, se=FALSE, col="black")
# if we'd JUST plotted raw values
gg_shan_raw <- ggplot(pre_exp_indiv, aes(x=exp_shannon, y=log(infect+1)))+
geom_point(aes(color=species), cex=4) +
geom_smooth(method=lm, se=FALSE) +
geom_smooth(aes(color=species),method=lm, se = FALSE)
grid.arrange(gg_shan_p, gg_shan_raw, nrow=1)
exp_distr %>%
gather(key=species, value=shannon) %>%
mutate(species=factor(species,levels=c("Anbo","Rhma","Osse","Raca","Rapi")))%>%
ggplot(aes(x=species, y=shannon)) +
geom_violin() +
geom_point(data=pre_exp_indiv, aes(x=species, y=exp_shannon, col=log(infect+1)), cex=4, position=position_jitter(height=0, width=0.1))
all_p <- pre_exp_indiv %>%
dplyr::select(toadID, exp_shannon, p_shannon,infect)
#### RICHNESS (observed OTUs) ####
if (RERUN_RICH) {
lmer_logRich <- stan_lmer(logRich ~ -1 + species + (1|toadID), data=mf_con_without_init_infect
, prior = normal(0, 10, autoscale = TRUE)
, seed = 98374
, adapt_delta = 0.96)
save(lmer_logRich, file="lmer_logRich.RData")
} else {
load(file="lmer_logRich.RData")
}
prior_summary(lmer_logRich)
# Look at distributions according to models
samps_lmer_logRich <- rstan::extract(lmer_logRich$stanfit)
pre_test_set <- mf_treat_without_init_infect %>%
filter(time<6)
# Plot of expected by species, and real samples
samps_lmer_logRich$beta %>%
as.data.frame() %>%
rename(Anbo=V1, Rhma=V2, Osse=V3, Raca=V4, Rapi=V5) %>%
dplyr::select(Anbo,Rhma,Osse,Raca,Rapi) %>%
gather(key=species, value=logRich) %>%
mutate(species=factor(species, levels=c("Anbo","Rhma","Osse","Raca","Rapi"))) %>%
ggplot(mapping=aes(x=species, y=logRich))+
geom_violin() +
geom_point(data=mf_con_without_init_infect, aes(y=logRich, x=species), position = position_jitter(width = 0.1, height=0), col="blue") +
geom_point(data=pre_test_set, aes(y=logRich, x=species), position=position_jitter(width = 0.1, height=0), col="red")
# Get standard deviation between toad individuals and samples
samp_sigma <- samps_lmer_logRich$aux
toadID_sigma <- sd(samps_lmer_logRich$b[,ncol(samps_lmer_logRich$b)])
# Now, we can calculate the probability that the "test" dataset values come from this distribution
# List of individuals
treat_indiv <- unique(mf_treat_without_init_infect$toadID)
# List of each species
species_list <- levels(factor(mf_con_without_init_infect$species))
exp_distr <- as.data.frame(matrix(ncol=length(species_list), nrow=4000, dimnames = list(1:4000, species_list)))
for ( num_sp in 1:length(species_list)) {
exp_distr[,num_sp] <- rnorm(length(samps_lmer_logRich$beta[,num_sp]), mean=samps_lmer_logRich$beta[,num_sp], sd=toadID_sigma)
# exp_distr[,num_sp] <- rnorm(length(samps_lmer_logRich$beta[,num_sp]), mean=rnorm(length(samps_lmer_logRich$beta[,num_sp]), mean=samps_lmer_logRich$beta[,num_sp], sd=toadID_sigma), sd=samp_sigma)
}
# Loop through and calculate probability of having diversity at that level
pre_exp_indiv <- data.frame(toadID=treat_indiv, exp_logRich=rep(NA, length(treat_indiv)), p_logRich=rep(NA, length(treat_indiv)), infect=rep(NA, length(treat_indiv)))
for ( i in treat_indiv ) {
n_row <- match(i, treat_indiv)
sp <- unlist(strsplit(i,"_"))
num_sp <- match(sp[1], levels(factor(mf_con_without_init_infect$species)))
temp_logRich <- mf_treat_without_init_infect %>%
filter(toadID==i, time <=5 ) %>%
dplyr::select(logRich) %>%
pull()
if ( length(temp_logRich)>1 ) {
exp_logRich <- fitdistr(temp_logRich, "normal")$estimate[1]
} else {
exp_logRich <- temp_logRich
}
# pred_distr <- rnorm(length(samps_lmer_logRich$beta[,num_sp]), mean=rnorm(length(samps_lmer_logRich$beta[,num_sp]), mean=samps_lmer_logRich$beta[,num_sp], sd=toadID_sigma), sd=samp_sigma)
p_logRich <- sum(exp_distr[,sp[1]]<exp_logRich)/length(exp_distr[,sp[1]])
### Did they get infected?
infect <- max(mf_treat_without_init_infect %>%
filter(toadID==i) %>%
dplyr::select(eBD_raw) %>%
pull()
)
pre_exp_indiv[n_row,c("exp_logRich","p_logRich","infect")] <- c(exp_logRich, p_logRich, infect)
}
# Get estimates for control toads
logRich_species_exp <- fixef(lmer_logRich) %>%
as_tibble() %>%
mutate(species=c("Anbo", "Rhma","Osse","Raca","Rapi"))
con_toad_est_logRich <- ranef(lmer_logRich)$toadID %>%
rename(sp_est="(Intercept)") %>%
mutate(toadID=rownames(ranef(lmer_logRich)$toadID)) %>%
separate(toadID, into=c("species","num"), sep="_",remove=FALSE) %>%
dplyr::select(-num) %>%
left_join(logRich_species_exp, by = "species") %>%
mutate(est_logRich=sp_est+value)
con_exp_indiv_logRich <- data.frame(toadID=con_toad_est_logRich$toadID, exp_logRich=rep(NA, length(con_toad_est_logRich$toadID)), p_logRich=rep(NA, length(con_toad_est_logRich$toadID)))
for ( i in 1:nrow(con_toad_est_logRich) ) {
s <- con_toad_est_logRich[i,"species"]
exp_logRich <- con_toad_est_logRich[i,"est_logRich"]
p_logRich <- sum(exp_distr[,s]<exp_logRich)/length(exp_distr[,s])
con_exp_indiv_logRich[i,c("exp_logRich","p_logRich")] <- c(exp_logRich, p_logRich)
}
# create species column
pre_exp_indiv <- pre_exp_indiv %>%
separate(toadID, into=c("species","indiv"), remove = FALSE) %>%
mutate(species=factor(species,levels=c("Anbo","Rhma","Osse","Raca","Rapi")))
# Plot results
gg_logRich_p <- ggplot(pre_exp_indiv, aes(x=p_logRich, y=log(infect+1))) +
geom_point(aes(color=species), cex=4) +
geom_smooth(aes(color=species),method=lm, se = FALSE) +
geom_smooth(method=lm, se=FALSE, col="black")
# if we'd JUST plotted raw values
gg_logRich_raw <- ggplot(pre_exp_indiv, aes(x=exp_logRich, y=log(infect+1)))+
geom_point(aes(color=species), cex=4) +
geom_smooth(aes(color=species),method=lm, se = FALSE) +
geom_smooth(method=lm, se=FALSE)
grid.arrange(gg_logRich_p, gg_logRich_raw, nrow=1)
exp_distr %>%
gather(key=species, value=logRich) %>%
mutate(species=factor(species,levels=c("Anbo","Rhma","Osse","Raca","Rapi")))%>%
ggplot(aes(x=species, y=logRich)) +
geom_violin() +
geom_point(data=pre_exp_indiv, aes(x=species, y=exp_logRich, col=log(infect+1)), cex=4, position=position_jitter(height=0, width=0.1))
all_p <- pre_exp_indiv %>%
dplyr::select(toadID, exp_logRich, p_logRich) %>%
full_join(all_p, by="toadID")
#### BETA DIVERSTY ####
#### Dispersion ####
#### Beta plot
# First, fit a beta distribution
x.fit.dist <- seq(min((mf_con_without_init_infect$disper_bray_curtis), na.rm = TRUE)-sd((mf_con_without_init_infect$disper_bray_curtis), na.rm = TRUE)
, max((mf_con_without_init_infect$disper_bray_curtis), na.rm = TRUE)+sd((mf_con_without_init_infect$disper_bray_curtis), na.rm = TRUE)
, length.out = 100)
dist.fit.lognorm <- fitdistr(mf_con_without_init_infect$disper_bray_curtis[!is.na(mf_con_without_init_infect$disper_bray_curtis)]
, densfun="lognormal")
y.pred.dist.lognorm <- dlnorm(x.fit.dist, meanlog = dist.fit.lognorm$estimate[1], sdlog = dist.fit.lognorm$estimate[2])
# Try a normal too?
dist.fit.norm <- fitdistr((mf_con_without_init_infect$disper_bray_curtis[!is.na(mf_con_without_init_infect$disper_bray_curtis)])
, densfun="Normal")
y.pred.dist.norm <- dnorm(x.fit.dist, mean = dist.fit.norm$estimate[1], sd = dist.fit.norm$estimate[2])
mf_con_without_init_infect %>%
filter(!is.na(disper_bray_curtis)) %>%
ggplot(aes(x=disper_bray_curtis)) +
geom_histogram(aes(y=..density..), bins=25) +
geom_line(data=data.frame(x=x.fit.dist, y=y.pred.dist.lognorm), aes(x=x, y=y), col="red") +
geom_line(data=data.frame(x=x.fit.dist, y=y.pred.dist.norm), aes(x=x, y=y), col="blue")
#' lognorml fits better.
# Check to see if turnover is changing with time significantly
gg_disttime_con <- mf_con_without_init_infect %>%
filter(!is.na(disper_bray_curtis)) %>%
ggplot(aes(x=time, y=disper_bray_curtis)) +
geom_line(aes(group=toadID)) +
geom_point(aes(group=toadID, col=PABD)) +
scale_color_manual(values=c("blue","red"))+
facet_grid(~species)
gg_disttime_treat <- mf_treat_without_init_infect %>%
filter(!is.na(disper_bray_curtis)) %>%
ggplot(aes(x=time, y=disper_bray_curtis)) +
geom_line(aes(group=toadID)) +
geom_point(aes(group=toadID, col=PABD)) +
scale_color_manual(values=c("blue","red"))+
geom_vline(aes(xintercept=5.5))+
facet_grid(~species)
grid.arrange(gg_disttime_con, gg_disttime_treat, nrow=2)
# Is there an effect of species and time on controls?
# Type I ANOVA (to check for interaction) (AB | A,B)
anova(lm(log(disper_bray_curtis) ~ species*time, data=mf_con_without_init_infect))
# Type II ANOVA with no interaction
Anova(lm(log(disper_bray_curtis) ~ species + time, data=mf_con_without_init_infect), type = 2)
# Is there an effect of species and time on treatment??