library(reshape2)
library(ggplot2)
library(OpasnetUtils)
library(vegan)
# Sp accumulation ichno simple
# version 2014-11-09 by Hanna Tuomisto
# PART 1
# open a data tables with data of species per trap sample, environmental data and literature data
# combine samples from the same trap, by soil type and by region
# combine data within traps by sampling season
# export the resulting data tables to text files
# PART 2
# open data saved in PART 1
# calculate number of species and its chao and ACE estimates, number of individuals, diversity
# save resulting table to a new file
# PART 3
# open data saved in PART 2 and construct and save species-accumulation graphs
# PART 4
# run NMDS with trapwise data and save the graphs
########## PART 1: Open data files and combine data ##########
#### check default directory
getwd()
setwd("/Users/hantuo/Documents/j1 Anun koinobiontit") # for iMac
setwd("/Users/hantuo/Documents/j1 Isrraelin iknot") # for iMac
setwd("/Users/hannatuomisto/Documents/j1 Isrraelin iknot") # for Air
setwd("/Users/hannatuomisto/Documents/Dokumentit/j1 Anun koinobiontit") # for Pro
setwd("/Users/hannatuomisto/Documents/Dokumentit/j1 Isrraelin iknot") # for Pro
###### open raw data files ######
# open sites by species data file for the own field data
# columns should be as follows: "TrapID","SeqInTrap","SampleCode","StartYear","StartMonth","StartDay",
# "EndYear","EndMonth","EndDay", further columns each corresponding to one species
path <- file.choose()
spraw <- read.csv(path, header=TRUE, stringsAsFactors = FALSE)
spname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
# open file with trap locality info
# columns should be as follows: "TrapID","SoilType","Region","Latitude"
path <- file.choose()
trapraw <- read.csv(path, header=TRUE, stringsAsFactors = FALSE)
trapname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
###### ALLA OLEVA HOITUISI MERGELLÄ. JOS HALUTAAN, VOI LISÄTÄ SKIRPTIN JOKA KERTOO MITÄ TIPUTETTIIN.
a <- merge(spraw, trapraw, by = "TrapID")
# eliminate traps that are only found in one of the data files
sptable <- spraw[spraw$TrapID %in% trapraw$TrapID,]
traptable <- trapraw[trapraw$TrapID %in% spraw$TrapID,]
if((el=nrow(spraw) - nrow(sptable)) != 0) {
print(paste(el, "species data rows will be omitted from analyses because trap info is not available for:",
paste0(spraw[!spraw$TrapID %in% trapraw$TrapID,"TrapID"], collapse=", ")))
}
if((el=nrow(trapraw) - nrow(traptable)) != 0) {
print(paste(el, "traps had no species data:", paste0(trapraw[!trapraw$TrapID %in% spraw$TrapID,"TrapID"], collapse=", ")))
}
# open file with literature data
# columns should contain: "MTM", "Species", "Individuals", "Latitude", "Longitude", Elev.min", "Elev.max"
# coordinates can also be: "Lat.deg", "Lat.min"
path <- file.choose()
litraw <- read.csv(path, header=TRUE)
litname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
if(FALSE) { # if needed, convert latitudes to decimal degrees by running the next line:
litraw[,ncol(litraw+1)] <- litraw$Lat.deg + litraw$Lat.min/60
}
###### choose options and prepare the data ######
#### choose what combinations of sampling periods to use
comb <- c("TrapID", "SoilType", "Region")
comb <- c("TrapID", "Region")
comb <- c("TrapID")
comb <- c("Region")
comb <- c("SoilType")
#### set name for file with species accumulation data
# info on combination will be added automatically
# !!! any file with the same name in the working directory will be overwritten without warning !!!
filename <- paste(spname,"sp-accum", sep="_")
#### Calculate catch for different sampling periods and traps
#### ALLA OLEVAN VOISI KORVATA POSIXct:llä
# convert start and end date to sequential day
days <- cbind(1:12, c(31,28,31,30,31,30,31,31,30,31,30,31), c(rep(0,12)))
colnames(days) <- c("month", "monthlength", "cumdays")
for(i in 2:nrow(days)) days[i,3] <- days[i-1,2]+days[i-1,3]
StartDay <- (sptable$StartYear-min(sptable$StartYear))*365+sptable$StartDay+days[sptable$StartMonth,3]
EndDay <- (sptable$EndYear-min(sptable$StartYear))*365+sptable$EndDay+days[sptable$EndMonth,3]
NumDays <- EndDay-StartDay
SamplingGap <- 0
# create a new data matrix with sequential days
data <- cbind(TrapID=sptable$TrapID, StartDay, EndDay, NumDays, SamplingGap, sptable[,10:ncol(sptable)])
data <- na.omit(data) # eliminate rows with missing dates
# aggregate data by trap
data_tr <- aggregate(data[,-1], by=list(data$TrapID), FUN=sum, simplify=TRUE)
colnames(data_tr)[1] <- colnames(data)[1]
# add SoilType and Region (but not Latitude)
data_sub <- merge(traptable[,-4], data)
data_tr <- merge(traptable[,-4], data_tr)
# fix start and end days
data_tr$StartDay <- aggregate(data$StartDay, by=data["TrapID"], FUN=min, simplify=TRUE)[,2]
data_tr$EndDay <- aggregate(data$EndDay, by=list(data$TrapID), FUN=max, simplify=TRUE)[,2]
data_tr$SamplingGap <- data_tr$EndDay - data_tr$StartDay - data_tr$NumDays
#### Code below can be used after data is managed with functions longfo and prepare.
temp <- aggregate(a$Indiv_cum, INDEX = a[c("Time_cum", "Trap")], FUN = sum)
temp2 <- aggregate(a$Spec_cum, INDEX = a[c("Time_cum", "Trap")], FUN = sum)
ggplot(subset(temp, Trap == "a"), aes(x = Time_cum, y = Freq, colour = Trap))+geom_step()
ggplot(a,aes(x=Cum,color=Trap, group = Trap)) +
stat_bin(data=a, aes(y=cumsum(..count..)),geom="step")+
stat_bin(data=subset(a,Trap=="b"),aes(y=cumsum(..count..)),geom="step")
ggplot(a, aes(x=Cum, y = cumsum(Individuals))) + geom_line()
# run data aggregation within and across traps
for(m in seq(length(comb))) { # loop through the aggregation criteria
if(comb[m]=="TrapID") {
data_m <- data_sub
} else { # use data aggregated by trap
data_m <- data_tr
if(comb[m]=="SoilType") data_m <- data_m[data_m$SoilType != "",] # exclude traps with no SoilType
}
# create matrix to compile data on sampling times and numbers of species
sp_time <- data_m
for(n in seq(unique(data_m$Region))) { # repeat for each region separately
comb_n <- data_m[data_m$Region==unique(data_m$Region)[n],]
for(p in if(comb[m]=="TrapID") 1 else 1:10) {
for(i in seq(unique(comb_n[,comb[m]]))) { # loop through the unique values of the m:th aggregation criterion
comb_i <- comb_n[comb_n[,comb[m]]==unique(comb_n[,comb[m]])[i],]
if(nrow(comb_i) > 1) {
if(comb[m]=="TrapID") {
comb_i <- comb_i[order(comb_i$StartDay, comb_i$EndDay),] # sort rows by date
} else {
comb_i <- comb_i[sample(nrow(comb_i), nrow(comb_i)),] # randomize row order
}
first <- comb_i[1,]
for(k in 2:(nrow(comb_i))) {
second <- comb_i[k,]
new <- nrow(sp_time)+1
sp_time[new,"Region"] <- first$Region # this creates a new row
sp_time[new,"SoilType"] <- if(first$SoilType==second$SoilType) {first$SoilType} else {paste(first$SoilType,second$SoilType,sep=" ")}
sp_time[new,"TrapID"] <- if(first$TrapID==second$TrapID) {first$TrapID} else {paste(first$TrapID,second$TrapID,sep=" ")}
sp_time[new,"StartDay"] <- min(first$StartDay,second$StartDay)
sp_time[new,"EndDay"] <- max(first$EndDay,second$EndDay)
sp_time[new,"NumDays"] <- sum(first$NumDays,second$NumDays)
if(comb[m]=="TrapID") {
sp_time[new,"SamplingGap"] <- max(first$StartDay,second$StartDay)-min(first$EndDay,second$EndDay)+first$SamplingGap+second$SamplingGap
} else {sp_time[new,"SamplingGap"] <- NA}
sp_time[new,8:ncol(sp_time)] <- first[,8:ncol(first)]+second[,8:ncol(second)]
# redefine first to be the combination of first and second
first <- sp_time[new,]
}
}
}
}
}
write.csv(sp_time, file=paste(filename, "_", comb[m], ".csv", sep=""))
}
########## PART 2: Calculate number of individuals and species, estimates of S and diversity ##########
library(vegan)
#### open the file with aggregated sampling periods to be used in calculations
path <- file.choose()
to_est <- read.csv(path, row.names=1, stringsAsFactors = FALSE)
to_estname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
#### calculate individuals, species, estimates and diversity and save as table
estS <- t(estimateR(to_est[,8:ncol(to_est)]))
estSadd <- cbind("ChaoAddRel"=100*(estS[,"S.chao1"]/estS[,"S.obs"]-1),
"ChaoAddAbs"=estS[,"S.chao1"]-estS[,"S.obs"])
renyiD <- renyi(to_est[,8:ncol(to_est)], scales=c(0,1,2), hill=TRUE)
colnames(renyiD) <- paste("Div.q", colnames(renyiD), sep="")
estSdiv <- cbind(
Region = to_est$Region,
MTM = to_est$NumDays/30,
StartDay = to_est$StartDay,
Individuals = rowSums(to_est[,8:ncol(to_est)]),
estS[,-(4:5)],
estSadd,
renyiD,
TrapID = to_est$TrapID,
SoilType = to_est$SoilType
)
colnames(estSdiv)[which(colnames(estSdiv)=="S.obs")] <- "Species"
write.csv(estSdiv, file=paste(to_estname, "_S_est_D.csv", sep=""))
########## PART 3: Make species accumulation graphs ##########
#### open the result file for plotting, name ends in "_S_est_D.csv" ####
path <- file.choose()
toplot <- read.csv(path, row.names=1, stringsAsFactors = FALSE)
toplotname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
if(any("Species" %in% colnames(toplot))) {"OK"} else {"The opened file has no Species column!"}
#### choose if source file name is used as title for the plot ####
title <- toplotname
# title <- "" # choose this for publication quality
#### choose the color scheme ####
col_choice <- 1 # 6 colors by soil type; optimised for Clay, Floodplain, Loam, Sand, Secondary, Terrace
# col_choice <- 5 # as 1, but add abbreviation of Region to legend
# col_choice <- 2 # 2 colours by Region
# col_choice <- 6 # all symbols black
# col_choice <- 3 # 4 colours by SoilType (Loreto) NOT FUNCTIONAL YET
# col_choice <- 4 # 1 colour (all sites the same) NOT FUNCTIONAL YET
# the colors are automatically defined here on the basis of the above choice
if((col_choice==1) | (col_choice==5)) {
col_lut <- cbind("SoilType"=c(sort(unique(toplot$SoilType))), "Color"=c("darkgoldenrod","blue","brown4","red","darkgreen","darkblue")
, "Rank"=c(1,5,2,3,4,6))
col_lut <- col_lut[order(col_lut[,"Rank"]),]
col_source <- "SoilType"
if(col_choice==5) {
col_lut <- cbind(col_lut, "Reg.abbr"=col_lut[,"SoilType"])
for (i in seq(nrow(col_lut))) {
col_lut[i,"Reg.abbr"] <- gsub("[:a-z: :blank:]","",toplot$Region[which(toplot$SoilType==col_lut[i,1])[1]])
}
}
}
if(col_choice==2) {
col_lut <- cbind(c(sort(unique(toplot$Region))), c("black", "blue"))
col_source <- "Region"
}
if(col_choice==3) { # this will only work for Anu's data
toplot <- toplot[which(toplot$Region %in% c("NRAM", "ACC")),]
col_lut <- cbind(c(sort(unique(toplot$SoilType))), c("darkblue", "blue", "red", "darkgoldenrod"))
col_source <- "SoilType"
}
if(col_choice==4) { # this will only work for Anu's data
toplot <- toplot[which(toplot$Region %in% c("OnkoneGare", "Tiputini")),]
col_lut <- cbind(c(sort(unique(toplot$Region))), c("black"))
col_source <- "Region"
}
# legend specifications for figures
col_legend <- ifelse(rbind(col_lut[,1])!="", rbind(col_lut[,1]), "Unspecified")
if(col_choice == 5) {
col_legend <- paste(col_lut[,"Reg.abbr", drop=FALSE], col_legend)
}
if((col_choice==1) | (col_choice==5)) {col_lit <- "black"} else {col_lit <- "red"}
# check on screen that the colours are as they should
print(col_lut)
#### Choose one of the plot types: ####
figure <- "Traplines" # FIG 1: trap-wise species accumulation lines
figure <- "Estimates" # FIG 2: plot with estimated number and percentage of unseen species
figure <- "Effort" # FIG 3: Plot with individuals and spp vs MTM
figure <- "Diversity" # FIG 4: Plot with richness and diversity vs individuals
figure <- "Latitude" # FIG 5: Plot with richness against latitude
#### these plot options are set automatically on the basis of choices made above ####
# FIG 1: Trap-wise species accumulation lines
if(figure=="Traplines") {
mfrow <- c(1,3) # defines the number of panels
hw <- 2.7 # defines the dimensions of each panel
x_own <- cbind(toplot$MTM, toplot$MTM, toplot$Individuals)
xlab <- c("Trap months", "Trap months", "Individuals")
y_own <- cbind(toplot$Individuals, toplot$Species, toplot$Species)
ylab <- c("Individuals", "Species", "Species")
lit <- FALSE
cex_lat <- FALSE
log <- ""
col_pos <- "topleft"
col_panel <- 1
panel_pos <- "bottomright"
}
# FIG 2: Plot with estimated number and percentage of unseen species
if(figure=="Estimates") {
mfrow <- c(1,2) # defines the number of panels
hw <- 4.2 # defines the dimensions of each panel
x_own <- cbind(toplot$Individuals, toplot$Individuals)
xlab <- c("Individuals", "Individuals")
y_own <- cbind(toplot$ChaoAddAbs, toplot$ChaoAddRel)
ylab <- c("Estimated increase in richness (species)", "Estimated increase in richness (%)")
# remove infinite values of ChaoAddRel
elim <- which(is.na(y_own[,2]))
if(length(elim > 0)) {
y_own <- y_own[-elim,]
x_own <- x_own[-elim,]
if(cex_lat==TRUE) { cex1 <- cex1[-elim,] }
col <- col[-elim]
}
lit <- FALSE
cex_lat <- FALSE
log <- "xy"
col_pos <- "bottomleft"
col_panel <- 2
panel_pos <- "topright"
}
# FIG 3: Plot with individuals and spp vs MTM
if(figure=="Effort") {
mfrow <- c(1,2) # defines the number of panels
hw <- 4.2 # defines the dimensions of each panel
x_own <- cbind(toplot$MTM, toplot$MTM)
xlab <- c("Trap months", "Trap months")
y_own <- cbind(toplot$Individuals, toplot$Species)
ylab <- c("Individuals", "Species")
lit <- TRUE
x_lit <- cbind(litraw$MTM, litraw$MTM)
y_lit <- cbind(litraw$Individuals, litraw$Species)
cex_lat <- TRUE
log <- "xy"
col_pos <- "bottomright"
col_panel <- 1
panel_pos <- "topleft"
}
# FIG 4: Plot with richness and diversity vs individuals
if(figure=="Diversity") {
mfrow <- c(1,3) # defines the number of panels
hw <- 2.9 # defines the dimensions of each panel
x_own <- cbind(toplot$Individuals, toplot$Individuals, toplot$Individuals)
xlab <- c("Individuals", "Individuals", "Individuals")
y_own <- cbind(toplot$Div.q0, toplot$Div.q1, toplot$Div.q2)
ylab <- c("Richness", "Diversity q=1", "Diversity q=2")
lit <- TRUE
x_lit <- cbind(litraw$Individuals)
y_lit <- cbind(litraw$Species)
cex_lat <- TRUE
log <- "xy"
col_pos <- "bottomright"
col_panel <- 2
panel_pos <- "topleft"
}
# colour vector for plotting
if(col_choice == 6) {col <- "black"} else {
col <- c(rep("", nrow(toplot)))
for(a in seq(length(col))) col[a] <- col_lut[which(col_lut[,1]==toplot[a,col_source]),2]
}
# symbol size set to either constant or scaled by latitude
if(cex_lat == FALSE) {
cex1 <- cex2 <- 1 } else {
cex1 <- c(rep(0, dim(toplot)[1]))
for(a in seq(length(cex1))) cex1[a] <- traptable$Latitude[toplot$Region[a]==traptable$Region][1]
cex1 <- 2.5*sqrt(cex1/max(abs(litraw$Latitude)))
}
# if data are log-transformed, zeroes are removed from the data
if(charmatch("x", log, nomatch=-1)>=0) {
col <- col[x_own[,2]>0]
if(cex_lat==TRUE) { cex1 <- cex1[x_own[,2]>0] }
y_own <- y_own[x_own[,2]>0,]
x_own <- x_own[x_own[,2]>0,]
}
if(grep("y", log)>0) {
col <- col[y_own[,2]>0]
if(cex_lat==TRUE) { cex1 <- cex1[y_own[,2]>0] }
x_own <- x_own[y_own[,2]>0,]
y_own <- y_own[y_own[,2]>0,]
}
# axis limits
x_lower <- apply(x_own,2,min)
x_upper <- apply(x_own,2,max)
if(figure=="Diversity") {
y_lower <- rep(min(y_own), dim(y_own)[2])
y_upper <- rep(max(y_own), dim(y_own)[2])
} else {
y_lower <- apply(y_own,2,min)
y_upper <- apply(y_own,2,max)
}
# parameters for literature data
################# HUOM! make those sites gray that do not have Rhyssinae data!! ####################
if(lit) {
x_upper <- apply(rbind(apply(x_lit,2,max, na.rm=TRUE), x_upper), 2, max)
y_upper <- apply(rbind(apply(y_lit,2,max, na.rm=TRUE), y_upper), 2, max)
if(cex_lat) {
cex2 <- 2.5*sqrt(abs(litraw$Latitude)/max(abs(litraw$Latitude)))
} else { cex2 <- 1 }
pch2 <- ifelse(litraw$Elev.max<1000, 1, 19)
}
# lettering for the panels
panel <- LETTERS[1:(dim(x_own)[2]*dim(y_own)[2])]
# name for the output file
if(log != "") logged <- paste("-log",log,sep="") else logged <- ""
filename <- paste(toplotname,"-", figure, "-lit", lit, logged,
"-lat", cex_lat, "-col",col_source, sep="")
#### end of automatic plot options ####
#### Draw the graphs
pdf(file=paste(toplotname, "-", figure, ".pdf", sep=""), height=hw*mfrow[1], width=hw*mfrow[2])
par(mfrow=mfrow, mar=c(4,4,1,1), oma=c(1,1,1,1))
for(i in seq(dim(x_own)[2])) { # loop through x variables
plot(x_own[,i], y_own[,i], type="n", log=log,
xlim=c(if(figure!="Effort") {x_lower[i]} else {x_lower[i]/2}, x_upper[i]),
ylim=c(y_lower[i], y_upper[i]), xlab=xlab[i], ylab=ylab[i], cex=cex1, col=col)
title(main=title, cex=0.5, outer=T)
legend(panel_pos, panel[i], bty="n", cex=1.5)
if(i==col_panel) {
if(lit) {
legend(col_pos, c(col_legend, "Literature"), text.col=c(col_lut[,2], col_lit), bty="n", cex=1)
} else {
legend(col_pos, col_legend, text.col=col_lut[,2], bty="n", cex=1)
}
}
if(figure=="Traplines") {
for(j in seq(unique(toplot$TrapID))) {
sel_j <- which(toplot$TrapID==unique(toplot$TrapID)[j])
min_j <- min(toplot[sel_j,"StartDay"])
sel_j <- intersect(sel_j, which(toplot$StartDay==min(toplot[sel_j,"StartDay"])))
col1 <- col[sel_j]
points(x_own[sel_j,i], y_own[sel_j,i], type="l", col=col1)
}
}
if(figure=="Estimates") {
points(x_own[,i], y_own[,i], type="p", col=col)
}
if(figure=="Effort" ) {
points(x_own[,i], y_own[,i], type="p", col=col)
if(lit) {
points(x_lit[,i], y_lit[,i], type="p", col=col_lit, cex=cex2, pch=pch2)
}}
if(figure=="Diversity") {
points(x_own[,i], y_own[,i], type="p", col=col)
if(lit & i==1) {
points(x_lit[,i], y_lit[,i], type="p", col=col_lit, cex=cex2, pch=pch2)
}}
if(figure=="Latitude") {
points(x_own[,i], y_own[,i], type="p", col=col)
if(lit & i==1) {
points(x_lit[,i], y_lit[,i], type="p", col=col_lit, cex=cex2, pch=pch2)
}}
}
dev.off()
##################################### hätäratkaisu
# plot the figure=="Latitude" graph
lat <- 0
for (i in seq(nrow(toplot))) {
lat <- c(lat, trapraw[which(toplot$Region[i] == trapraw$Region)[1],"Latitude"])}
lat <- as.numeric(lat[-1])
toplot2 <- as.data.frame(cbind("Latitude"=lat, "Species"=toplot$Species, "MTM"=toplot$MTM))
pdf(file=paste(toplotname, "-", figure, ".pdf", sep=""), height=4, width=4.8)
par(mfrow=c(1,1), mar=c(4,4,1,1), oma=c(1,1,1,1))
plot(toplot2$Latitude, toplot2$Species, type="n", xlab="Latitude", ylab="Species",
xlim=c(min(litraw$Latitude), max(litraw$Latitude)), ylim=c(0,max(toplot2$Species)+5))
for(i in seq(unique(toplot2$Latitude))) {
dots <- toplot2[toplot2$Latitude==unique(toplot2$Latitude)[i],]
dots2 <- dots[dots$Species<=max(dots$Species)/2,]
dots2 <- dots2[sample(seq(nrow(dots2)), 10, replace = FALSE, prob = NULL),]
points(dots2$Latitude, dots2$Species, cex=3*(dots2$MTM/max(litraw$MTM))^(1/3))
dots2 <- dots[dots$Species>max(dots$Species)/2,]
dots2 <- dots[sample(seq(nrow(dots2)), 10, replace = FALSE, prob = NULL),]
points(dots2$Latitude, dots2$Species, cex=3*(dots2$MTM/max(litraw$MTM))^(1/3))
dots2 <- (dots[dots$Species==max(dots$Species),][1,])
points(dots2$Latitude, dots2$Species, cex=3*(dots2$MTM/max(litraw$MTM))^(1/3))
}
points(litraw$Latitude, litraw$Species, pch=pch2, cex=3*(litraw$MTM/max(litraw$MTM))^(1/3))
dev.off()
#################
########## PART 4: Run and plot NMDS with trapwise data ##########
library(vegan)
library(MASS)
#### aggregate species data by trap
trapdata <- aggregate(sptable[,-(1:11)], by=list(sptable$TrapID), FUN=sum, simplify=TRUE)
colnames(trapdata)[1] <- "TrapID"
trapdata <- trapdata[match(traptable$TrapID, trapdata$TrapID),]
rownames(trapdata) <- trapdata$TrapID
trapdata <- trapdata[,-1]
#### run NMDS using both presence-absence data (Soerensen index) and abundance data (Bray-Curtis relative)
sp.sor <- vegdist(trapdata, method="bray", binary=TRUE) # change here for other dissimilarity
fileout <- paste(spname, "Soerensendist.txt", sep="_")
write.table(as.matrix(sp.sor), file=fileout, sep="\t")
ord_pa <- metaMDS(sp.sor)
sp.bc <- vegdist(decostand(trapdata,"total"), method="bray", binary=FALSE) # change here for other dissimilarity
fileout <- paste(spname, "BrayCurtdist.txt", sep="_")
write.table(as.matrix(sp.bc), file=fileout, sep="\t")
ord_ab <- metaMDS(sp.bc)
#### define colors for the plots
col_source <- traptable$SoilType
if(length(unique(col_source))==5) {
col_lut <- cbind(c(sort(unique(col_source))), c("black", "darkblue", "blue", "red", "darkgoldenrod"))
} else {
col_lut <- cbind(c(sort(unique(col_source))), c("darkblue", "blue", "red", "darkgoldenrod"))
}
# check that the colors are as they should
print(col_lut)
col <- c(rep("",length(col_source)))
for(a in seq(length(col))) col[a] <- col_lut[which(col_lut[,1]==col_source[a]),2]
# set symbols by Region
pch_source <- traptable$Region
pch_lut <- as.data.frame(cbind(c(sort(unique(pch_source))), c(1, 0, 2, 6)), stringsAsFactors = FALSE)
# check that the symbols are as they should
print(pch_lut)
pch <- c(rep(0,length(pch_source)))
for(a in seq(length(pch))) pch[a] <- as.numeric(pch_lut[which(pch_lut[,1]==pch_source[a]),2])
# plot the ordination result and save it as a pdf file
pdf(file=paste(spname,"NMDS.pdf", sep="_"), height=4, width=8)
par(mfrow=c(1,2), mar=c(4,4,1,1), oma=c(1,1,1,1))
# presence-absence ordination
plot(ord_pa, display="sites", type="n", ylim=c(-0.4,0.4), xlim=c(-0.6, 0.6),
xlab="Axis 1", ylab="Axis 2")
points(ord_pa, col=col, pch=pch)
legend("topleft", ifelse(rbind(col_lut[,1])!="", rbind(col_lut[,1]),"unspecified"), text.col=col_lut[,2], bty="n", cex=0.7)
title(main="Presence-absence")
# abundance ordination
plot(ord_ab, display="sites", type="n", ylim=c(-0.4,0.4), xlim=c(-0.6, 0.6),
xlab="Axis 1", ylab="Axis 2")
points(ord_ab, col=col, pch=pch)
legend("topleft", legend=pch_lut[,1], bty="n", cex=0.7, pch=as.numeric(pch_lut[,2]))
title(main="Relative abundance")
dev.off()
########## PART 5: Run and plot NMDS with seasonal data ##########
TARKISTETTAVA
library(vegan)
library(MASS)
#### aggregate species data by season within trap
sp_data <- sptable[,-5]
sp_data <- na.omit(sp_data)
months <- list(c(12,1,2), c(3:5), c(6:8), c(9:11))
months.names <- c("Dec-Feb", "Mar-May", "Jun-Aug", "Sep-Nov")
for(i in 1:4) {
sp_season <- sp_data[which(sp_data$StartMonth %in% months[[i]]),]
sp_season <- aggregate(sp_season[,-(1:10)], by=list(sp_season$TrapID), FUN=sum, simplify=TRUE)
colnames(sp_season)[1] <- "TrapID"
if(i==1) {
trapdata <- sp_season
season <- rep(months.names[i], nrow(sp_season))
soil <- rep("", nrow(sp_season))
for(k in seq(length(soil))) soil[k] <- traptable[which(trapdata$TrapID[k]==traptable$TrapID),2]
} else {
trapdata <- rbind(trapdata, sp_season)
season <- c(season, rep(months.names[i], nrow(sp_season)))
soil.t <- rep("", nrow(sp_season))
for(k in seq(length(soil.t))) soil.t[k] <- traptable[which(trapdata$TrapID[k]==traptable$TrapID),2]
soil <- c(soil, soil.t)
}
}
trap <- trapdata$TrapID
rownames(trapdata) <- paste(season, trapdata$TrapID, sep=".")
trapdata <- trapdata[,-1]
min.ind <- 5
season <- season[which(rowSums(trapdata)>=min.ind)]
soil <- soil[which(rowSums(trapdata)>=min.ind)]
trapdata <- trapdata[which(rowSums(trapdata)>=min.ind),]
file <- paste(spname, "_SeasonInd", min.ind, sep="")
#### run NMDS using both presence-absence data (Soerensen index) and abundance data (Bray-Curtis relative)
sp.sor <- vegdist(trapdata, method="bray", binary=TRUE) # change here for other dissimilarity
fileout <- paste(file, "Soerensendist.txt", sep="_")
write.table(as.matrix(sp.sor), file=fileout, sep="\t")
ord_pa <- metaMDS(sp.sor)
sp.bc <- vegdist(decostand(trapdata,"total"), method="bray", binary=FALSE) # change here for other dissimilarity
fileout <- paste(file, "BrayCurtdist.txt", sep="_")
write.table(as.matrix(sp.bc), file=fileout, sep="\t")
ord_ab <- metaMDS(sp.bc)
#### define colors for the plots
col_source <- list(soil, season)
col_lut <- list()
if(length(unique(col_source[[1]]))==5) {
col_lut[[1]] <- cbind(c(sort(unique(col_source[[1]]))), c("black", "darkblue", "blue", "red", "darkgoldenrod"))
} else {
col_lut[[1]] <- cbind(c(sort(unique(col_source[[1]]))), c("darkblue", "blue", "red", "darkgoldenrod"))
}
col_lut[[2]] <- cbind(c(unique(col_source[[2]])), c("black", "green", "blue", "brown"))
# check that the colors are as they should
print(col_lut)
col <- cbind(rep("",length(col_source[[1]])), rep("",length(col_source[[2]])))
for(a in seq(nrow(col))) {
col[a,1] <- col_lut[[1]][which(col_lut[[1]][,1]==col_source[[1]][a]),2]
col[a,2] <- col_lut[[2]][which(col_lut[[2]][,1]==col_source[[2]][a]),2]
}
col_lut[[1]][1,1] <- "undefined"
# set symbols by Region
pch_source <- traptable$Region
pch_lut <- as.data.frame(cbind(c(sort(unique(pch_source))), c(1, 0, 2, 6)), stringsAsFactors = FALSE)
# check that the symbols are as they should
print(pch_lut)
pch <- c(rep(0,length(pch_source)))
for(a in seq(length(pch))) pch[a] <- as.numeric(pch_lut[which(pch_lut[,1]==pch_source[a]),2])
############ KESKEN #####################
# plot the ordination result and save it as a pdf file
pdf(file=paste(file, "NMDS.pdf", sep="_"), height=7, width=7)
par(mfrow=c(ncol(col),2), mar=c(4,4,1,1), oma=c(1,1,1,1))
for(j in seq(ncol(col))) {
# presence-absence ordination
plot(ord_pa, display="sites", type="n", xlab="Axis 1", ylab="Axis 2")
points(ord_pa, col=col[,j], pch=pch)
legend("topleft", rbind(col_lut[[j]][,1]), text.col=col_lut[[j]][,2], bty="n", cex=0.7)
title(main="Presence-absence")
# abundance ordination
plot(ord_ab, display="sites", type="n", xlab="Axis 1", ylab="Axis 2")
points(ord_ab, col=col[,j], pch=pch)
legend("topleft", legend=pch_lut[,1], bty="n", cex=0.7, pch=as.numeric(pch_lut[,2]))
title(main="Relative abundance")
}
dev.off()
|