Loading libraries needed for analyses
library(BiodiversityR)
library(ggplot2)
library(reshape2)
library(cowplot)
library(plyr)
library(dplyr)
library(ggrepel)
library(ggpubr)
Verify active working directory
getwd()
Open and read the csv files in the input folder. There are five input datasets with occurrences information of alien and native species in Italy.
freshwater = read.csv2(file ='/home/jovyan/work/input/Dataset_Biodiversity_Freshwaters_LifeWatch_2015.csv', header = TRUE, stringsAsFactors = T)
head(freshwater)
marine = read.csv(file ='/home/jovyan/work/input/Dataset_Biodiversity_Marine_Lifewatch_2015.csv', header = TRUE, stringsAsFactors = T)
head(marine)
transitional = read.csv(file ='/home/jovyan/work/input/Dataset_Biodiversity_TransitionalWaters_LifeWatch_2015.csv', header = TRUE, stringsAsFactors = T)
head(transitional)
Aterrestrial = read.csv2(file ='/home/jovyan/work/input/Dataset_Biodiversity_Terrestrial_Habitats_LifeWatch_2015.csv', header = TRUE, stringsAsFactors = T)
head(Aterrestrial)
PFterrestrial = read.csv2(file ='/home/jovyan/work/input/Dataset_Biodiversity_Terrestrial_Plants_and_Fungi_LifeWatch_2015.csv', header = TRUE, stringsAsFactors = T)
head(PFterrestrial)
Merge the five datasets and work on a unique dataframe. Add a column to the original datasets to specify their provenance.
freshwater$dataset <- c("Freshwater")
marine$dataset <- c("Marine")
transitional$dataset <- c("Transitional")
Aterrestrial$dataset <- c("Terrestrial_A")
PFterrestrial$dataset <- c("Terrestrial_PF")
Check column names in each dataset and keep only columns needed for analyses
names(freshwater)
names(marine)
names(transitional)
names(Aterrestrial)
names(PFterrestrial)
#Select the following common variables: eventdate, decimallatitude, decimallongitude, phylum, class, order, family, genus,
#scientificname, scientificnameauthorship, eunishabitatstypecode, alien, eunisspeciesgroups and dataset
BiotopeVars <- c("eventdate", "decimallatitude", "decimallongitude", "phylum", "class", "order", "family",
"genus", "scientificname", "scientificnameauthorship", "eunishabitatstypecode", "alien",
"eunisspeciesgroups", "dataset")
freshwater_var <- freshwater[BiotopeVars]
marine_var <- marine[BiotopeVars]
transitional_var <- transitional[BiotopeVars]
Aterrestrial_var <- Aterrestrial[BiotopeVars]
PFterrestrial_var <- PFterrestrial[BiotopeVars]
#Check if selection worked correctly
names(freshwater_var)
names(marine_var)
names(transitional_var)
names(Aterrestrial_var)
names(PFterrestrial_var)
#Merge datasets into a unique dataframe
fullDB <- rbind(marine_var, transitional_var, freshwater_var, Aterrestrial_var, PFterrestrial_var)
dim(fullDB)
head(fullDB)
#The column "Alien" specifies the species status by assigning numerical values to native species ("0") and alien species ("1").
#To facilitate the analysis change numerical values with character values.
fullDB <- fullDB %>% mutate(alien=recode(alien, "1"="yes", "0"="no"))
#Check classes
str(fullDB)
#Check values for variable "Alien"
fullDB[is.na(fullDB$alien),]
#There are six records without information about the "alien" status
#Search species in EASIN and assign species status accordingly.
fullDB$alien[fullDB$scientificname=="Sisymbrium orientale"] <- "no"
fullDB$alien[fullDB$scientificname=="Papaver hybridum"] <- "no"
fullDB$alien[fullDB$scientificname=="Papaver dubium"] <- "no"
fullDB$alien[fullDB$scientificname=="Crepis sancta"] <- "yes"
#Delete remaining "NA" values in variable "Alien" if no information about the species status could be found in EASIN.
fullDB <- fullDB[!is.na(fullDB$alien),]
Check if total number of species per dataset match number of total number of alien and native species per dataset
fullDB %>%
group_by(dataset) %>%
summarise(
totSpecies=length(unique(scientificname))
)
fullDB %>%
group_by(dataset, alien) %>%
summarise(
totSpecies=length(unique(scientificname))
)
#Total species numbers do not match. This means that the status alien and native have been assigned to the same species
#problably for an error. Check which are these species.
fullDB %>%
group_by(scientificname) %>%
filter(
n_distinct(alien)>1)
#After a search on EASIN and GRIIS, the species status was ascertained it can be now amended.
fullDB$alien[fullDB$scientificname=="Monocorophium sextonae"] <- "yes"
fullDB$alien[fullDB$scientificname=="Ebria tripartita"] <- "no"
fullDB$alien[fullDB$scientificname=="Amphibalanus eburneus"] <- "yes"
fullDB$alien[fullDB$scientificname=="Hipparchia (Neohipparchia) statilinus"] <- "no"
fullDB$alien[fullDB$scientificname=="Acanthophora nayadiformis"] <- "yes"
fullDB$alien[fullDB$scientificname=="Knipowitschia panizzae"] <- "no"
fullDB$alien[fullDB$scientificname=="Cyprinus carpio"] <- "yes"
fullDB$alien[fullDB$scientificname=="Anas platyrhynchos"] <- "no"
fullDB$alien[fullDB$scientificname=="Erythrocladia irregularis"] <- "no"
fullDB$alien[fullDB$scientificname=="Alburnus alburnus"] <- "yes"
fullDB$alien[fullDB$scientificname=="Cobitis taenia"] <- "no"
fullDB$alien[fullDB$scientificname=="Cyclops vicinus"] <- "yes"
fullDB$alien[fullDB$scientificname=="Girardia tigrina"] <- "yes"
fullDB$alien[fullDB$scientificname=="Gobio gobio"] <- "yes"
fullDB$alien[fullDB$scientificname=="Paspalum distichum"] <- "yes"
fullDB$alien[fullDB$scientificname=="Rutilus rutilus"] <- "yes"
fullDB$alien[fullDB$scientificname=="Salmo trutta"] <- "no"
fullDB$alien[fullDB$scientificname=="Scardinius erythrophthalmus"] <- "yes"
fullDB$alien[fullDB$scientificname=="Xylosandrus germanus"] <- "yes"
fullDB$alien[fullDB$scientificname=="Chamaecyparis lawsoniana"] <- "yes"
fullDB$alien[fullDB$scientificname=="Crataegus germanica"] <- "yes"
fullDB$alien[fullDB$scientificname=="Epilobium montanum"] <- "no"
fullDB$alien[fullDB$scientificname=="Lilium martagon"] <- "no"
fullDB$alien[fullDB$scientificname=="Pterogonium gracile"] <- "yes"
fullDB$alien[fullDB$scientificname=="Vitis vinifera"] <- "no"
#check values of EUNIS habitats
levels(fullDB$eunishabitatstypecode)
#there are duplications in column eunishabitatstype because some values are in capital letters and others in lowercase
#transform all values in capital
fullDB$eunishabitatstypecode <- toupper(fullDB$eunishabitatstypecode)
fullDB$eunishabitatstypecode <- as.factor(fullDB$eunishabitatstypecode)
levels(fullDB$eunishabitatstypecode)
#The detailed classification of habitats could be problematic for downstream analyses because
#1) not all habitats are classified till the thirth or forth level of EUNIS classification (comparison issues)
#2) because many of these habitats are underrepresented
#For such a reason it is better to work on a upper level of habitat classification (e.g. first level; A1, B1, C1, etc.)
#add a column with this upper level classification for EUNIS habitats
fullDB$eunishabitatsLevel1 <- sub("(^[^.]+)\\..*", "\\1", fullDB$eunishabitatstypecode)
# check result
fullDB$eunishabitatsLevel1 <- as.factor(fullDB$eunishabitatsLevel1)
levels(fullDB$eunishabitatsLevel1)
#Even at the first level of classification it will be difficult to compare habitats across environmental domains because
#there are huge differences in numbers. For example there are only 3 habitats in freshwater ecosystems (C) with many records
#and 18 habitats in terrestrial ecosystems with few records each.
#Further merge habitats based on EUNIS classification
fullDB$eunisL0 <- plyr::revalue(fullDB$eunishabitatsLevel1, c("A1"="Marine benthic",
"A2"="Marine benthic",
"A3"="Marine benthic",
"A4"="Marine benthic",
"A5"="Marine benthic",
"A6"="Marine benthic",
"A7"="Marine pelagic",
"B1"="Coastal",
"E1"="Grasslands",
"E2"="Grasslands",
"E3"="Grasslands",
"E4"="Grasslands",
"E5"="Grasslands",
"F1"="Heathland, scrub and tundra",
"F2"="Heathland, scrub and tundra",
"F4"="Heathland, scrub and tundra",
"F5"="Heathland, scrub and tundra",
"F6"="Heathland, scrub and tundra",
"G1"="Woodlands",
"G2"="Woodlands",
"G3"="Woodlands",
"G4"="Woodlands",
"H1"="Inland habitats with sparse vegetation",
"H2"="Inland habitats with sparse vegetation",
"H3"="Inland habitats with sparse vegetation",
"H5"="Inland habitats with sparse vegetation",
"J4"="Artificial terrestrial",
"J5"="Artificial waters",
"X11"="Artificial terrestrial",
"C1"="Standing waters",
"C2"="Running waters",
"C3"="Water-fringing vegetation",
"X02"="Saline lagoons",
"X03"="Brackish lagoons"))
#check macro-habitats levels
levels(fullDB$eunisL0)
#Add column to specify environmental domains of habitats
fullDB$domain <- plyr::revalue(fullDB$eunishabitatsLevel1, c("A1"="Marine",
"A2"="Marine",
"A3"="Marine",
"A4"="Marine",
"A5"="Marine",
"A6"="Marine",
"A7"="Marine",
"B1"="Marine",
"E1"="Terrestrial",
"E2"="Terrestrial",
"E3"="Terrestrial",
"E4"="Terrestrial",
"E5"="Terrestrial",
"F1"="Terrestrial",
"F2"="Terrestrial",
"F4"="Terrestrial",
"F5"="Terrestrial",
"F6"="Terrestrial",
"G1"="Terrestrial",
"G2"="Terrestrial",
"G3"="Terrestrial",
"G4"="Terrestrial",
"H1"="Terrestrial",
"H2"="Terrestrial",
"H3"="Terrestrial",
"H5"="Terrestrial",
"J4"="Artificial",
"J5"="Artificial",
"X11"="Artificial",
"C1"="Freshwater",
"C2"="Freshwater",
"C3"="Freshwater",
"X02"="Transitional",
"X03"="Transitional"))
#check levels of environmental domains
levels(fullDB$domain)
#Apply a treshold to the dataset based on number of records (occurrences) per macro-habitat
#Keep only macro-habitats with number of records >5% of the total occurrences in the related domain
#Assign a number to each row in the dataset representing the occurrences and then aggregate the dataset
fullDB$occ <- c(1)
ThresholdDB <- aggregate(fullDB$occ,
by=list(fullDB$eunisL0,
fullDB$domain), FUN="sum")
head(ThresholdDB)
ThresholdDB <- plyr::rename(ThresholdDB, c("Group.1"="eunisL0", "Group.2"="domain", "x"="occEunis"))
#Add total number of occurrences per domain
ThresholdDB$occDomain <- plyr::revalue(ThresholdDB$domain, c("Transitional"=3744,
"Marine"=12781,
"Terrestrial"=8537,
"Freshwater"=6542,
"Artificial"=452))
#Calculate ratio
str(ThresholdDB)
ThresholdDB$ratio <- ThresholdDB$occEunis/as.numeric(as.character(ThresholdDB$occDomain))
Threshold <- 0.05
for (b in 1:nrow(ThresholdDB)) {
if (!is.na(ThresholdDB$ratio[b])) {
if(ThresholdDB$ratio[b] < Threshold){ThresholdDB$occEunis[b] <- 0}
}
}
#By applying a threshold of 0.05 the "water-fringing vegetation", "coastal habitats", "Heathland, scrub and tundra"
#and "Inland habitats with sparse vegetation" habitats will be cutted-off
#Create "clean" dataset after treshold to be used for downstream analyses
cleanDB <- fullDB[which(fullDB$eunisL0=="Marine benthic"|
fullDB$eunisL0=="Marine pelagic"|
fullDB$eunisL0=="Standing waters"|
fullDB$eunisL0=="Running waters"|
fullDB$eunisL0=="Woodlands"|
fullDB$eunisL0=="Grasslands"|
fullDB$eunisL0=="Artificial waters"|
fullDB$eunisL0=="Artificial terrestrial"|
fullDB$eunisL0=="Saline lagoons"|
fullDB$eunisL0=="Brackish lagoons"),]
dim(cleanDB)
head(cleanDB)
#Abundance analyses - number of alien and native species per macro-habitat and domain
#Calculate number of species per environmental domain
speciesNumb <- cleanDB %>%
group_by(domain, alien) %>%
summarise(
totSpecies=length(unique(scientificname))
)
head(speciesNumb)
# Plot 1 - species numbers by domains
speciesNumb <- speciesNumb %>% mutate(alienInv=ifelse(alien=="no", totSpecies, totSpecies*-1))
barplot1 <- ggplot(speciesNumb, aes(x=domain, y=alienInv))+
geom_bar(aes(fill=alien), stat = "identity", position = "identity", alpha=0.8, color="black", width = 0.5)+
geom_label(aes(label=format(totSpecies, big.mark = ",")), vjust=ifelse(speciesNumb$alienInv<0, 1.20, -0.2))+
scale_fill_manual(labels=c("no"="Native", "yes"="Alien"), values = c("no"="aquamarine4", "yes"="black"))+
scale_x_discrete(limits=c("Artificial", "Freshwater", "Transitional", "Marine", "Terrestrial"))+
geom_hline(yintercept=0, size=1)+
theme_classic()+ labs(title="Number of native and alien species in environmental domains")+
theme(axis.ticks = element_blank(), axis.line = element_blank(), axis.text.y = element_blank(),
axis.text.x = element_text(color="black", size = 14), axis.title = element_blank(), legend.position = "null",
legend.title = element_blank(), legend.text = element_text(size = 14))
barplot1
#Save plot as png
png("/home/jovyan/work/output/SpeciesNumbByDomains.png", width = 500, height = 600, units = "px")
barplot1
dev.off()
#Calculate number of species per environmental domain
speciesNumb2 <- cleanDB %>%
group_by(eunisL0, alien) %>%
summarise(
totSpecies=length(unique(scientificname))
)
head(speciesNumb2)
# Plot 2 - species number by EUNIS macro-habitats
speciesNumb2 <- speciesNumb2 %>% mutate(alienInv=ifelse(alien=="no", totSpecies, totSpecies*-1))
barplot2 <- ggplot(speciesNumb2, aes(x=eunisL0, y=alienInv))+
geom_bar(aes(fill=alien), stat = "identity", position = "identity", alpha=0.8, color="black", width = 0.7)+
geom_label(aes(label=format(totSpecies, big.mark = ",")), hjust=ifelse(speciesNumb2$alienInv<0, 1.20, -0.2))+
scale_fill_manual(labels=c("no"="Native", "yes"="Alien"), values = c("no"="aquamarine4", "yes"="black"))+
scale_x_discrete(limits=c("Artificial waters","Artificial terrestrial", "Grasslands","Woodlands", "Running waters","Standing waters",
"Saline lagoons","Brackish lagoons","Marine pelagic","Marine benthic"))+
coord_flip()+
geom_hline(yintercept=0, size=1)+
theme_classic()+ labs(title="Number of native and alien species in EUNIS habitats")+
theme(axis.ticks = element_blank(), axis.line = element_blank(), axis.text.x = element_blank(),
axis.text.y = element_text(color="black", size = 12), axis.title = element_blank(), legend.position = "bottom",
legend.title = element_blank(), legend.text = element_text(size = 12))
barplot2
#Save plot as png
png("/home/jovyan/work/output/SpeciesNumbByMacro-Habitsts.png", width = 8000, height = 5000, units = "px", res = 500)
barplot2
dev.off()
#Calculate the species incidence on macro-habitats and domains
speciesNumb3 <- cleanDB %>%
group_by(domain,eunisL0, alien) %>%
summarise(
totSpecies=length(unique(scientificname))
)
head(speciesNumb3)
comando utilizzato per pulire spazzatura che occupa la memoria da usare quando la RAM è piena
SpeciesInc <- dcast(speciesNumb3, eunisL0+domain~alien, value.var = "totSpecies")
head(SpeciesInc)
SpeciesInc$incidence <- SpeciesInc$yes/SpeciesInc$no
## Plot 3 - Alien incidence across environmental domains
box1 <- ggboxplot(SpeciesInc, x="domain", y="incidence", xlab = FALSE, ylab = "Incidence\n",
width = 0.9, size = 0.7, font.label = list(size=14), fill = "domain", alpha=0.7)+
scale_fill_manual(values = c("Marine"="darkslategray2", "Terrestrial"="indianred4", "Freshwater"="palegreen4",
"Transitional"="steelblue4", "Artificial"="gray"))+
scale_x_discrete(limits=c("Marine","Transitional", "Freshwater", "Terrestrial",
"Artificial"))+
labs(title="Alien species incidence in environmental domains")+
theme_pubclean()+theme(axis.text = element_text(colour = "black", size = 11), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), legend.position = "null")
box1
#Save plot as png
png("/home/jovyan/work/output/AlienIncidenceDomains.png", width = 1500, height = 1500, units = "px", res = 200)
box1
dev.off()
## Plot 4 - Alien incidence in EUNIS macro-habitats
barplot3 <- ggplot(SpeciesInc, aes(x=eunisL0, y=incidence))+
geom_bar(aes(fill=domain), stat = "identity", width = .5, alpha=.7, color="black")+
ylab("Incidence\n")+
scale_fill_manual(values = c("Terrestrial"="indianred4","Marine" ="darkslategray2",
"Transitional"="steelblue4", "Freshwater"="palegreen4", "Artificial"="gray"))+
scale_x_discrete(limits=c("Marine pelagic","Marine benthic", "Saline lagoons", "Brackish lagoons",
"Standing waters","Running waters", "Grasslands","Woodlands",
"Artificial terrestrial", "Artificial waters"))+
labs(title="Alien species incidence in EUNIS habitats")+
theme_pubclean()+theme(axis.text = element_text(colour = "black", size = 11),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(), legend.position = "null")
barplot3
#Save plot as png
png("/home/jovyan/work/output/AlienIncidenceHabitats.png", width = 1500, height = 1500, units = "px", res = 200)
barplot3
dev.off()
#Chi-Square Test of Independence
#The chi-square test evaluates whether there is a significant association between the categories (EUNIS habitats)
#of two variables (alien species vs. native species).
#Prepare dataset for analysis
chiHab <- SpeciesInc[,-1]
rownames(chiHab) <- SpeciesInc[,1]
#Run chi-square test
chisq <- chisq.test(chiHab[,2:3], simulate.p.value=T)
#Simulate.p.value added because of the small sample size
#Check test results
chisq
The results indicated that habitats and number of alien/native species are statistically significantly associated (p-value < 0.5).
Chi-square statistic is 88.58
#Extract and inspect observed and expected counts
chisq$observed
round(chisq$expected, 2)
#Extract chi-square residuals
round(chisq$residuals, 3)
#Residuals can be used to evaluate the cells of the contingency table that contribute the most to the total Chi-square score.
#A correlation plot can be used to visualise the value of residuals contributing the most to the chi-square score
#as well as the negative or positive associations between categorical variables
corrplot(chisq$residuals, is.corr = F, addCoef.col = "black", col = COL2("BrBG"), tl.col = "black", cl.pos = "n")
#Analysis of variance
#The anova test shows if there is any significant difference between the average incidence among domains
specinc.aov <- aov(incidence~domain, data = SpeciesInc)
#Summary of the analysis
summary(specinc.aov)
#p-value is less than the significance level 0.05 hence differences in incidence between domains is significant
#Tukey test for multiple pairwise comparisons
TukeyHSD(specinc.aov)
#Abundance analyses - number of alien and native species records per macro-habitat and domain
#Calculate and plot number of alien and native occurrences per macro-habitat
#Aggregate dataset and rename variables
subDB1 <- aggregate(cleanDB$occ, by=list(cleanDB$eunisL0, cleanDB$alien), FUN="sum")
subDB1 <- plyr::rename(subDB1, c("Group.1"="eunisL0", "Group.2"="alien", "x"="occ"))
head(subDB1)
#Generate barplot for visualisation
subDB1 <- subDB1 %>% mutate(alienInv=ifelse(alien=="no", occ, occ*-1))
ggplot(subDB1, aes(x=eunisL0, y=alienInv))+
geom_bar(aes(fill=alien), stat = "identity", position = "identity", alpha=0.8, color="black", width = 0.7)+
geom_label(aes(label=format(occ, big.mark = ",")), hjust=ifelse(subDB1$alienInv<0, 1.20, -0.2))+
scale_fill_manual(labels=c("no"="Native", "yes"="Alien"), values = c("no"="aquamarine4", "yes"="black"))+
scale_x_discrete(limits=c("Artificial waters","Artificial terrestrial", "Grasslands","Woodlands", "Running waters","Standing waters",
"Saline lagoons","Brackish lagoons","Marine pelagic","Marine benthic"))+
coord_flip()+
geom_hline(yintercept=0, size=1)+
theme_classic()+ labs(title="Number of native and alien species occurrences in EUNIS habitats")+
theme(axis.ticks = element_blank(), axis.line = element_blank(), axis.text.x = element_blank(),
axis.text.y = element_text(color="black", size = 12), axis.title = element_blank(), legend.position = "bottom",
legend.title = element_blank(), legend.text = element_text(size = 12))
#Calculate and plot number of alien and native occurrences per domain
#Aggregate dataset and rename variables
subDB2 <- aggregate(cleanDB$occ, by=list(cleanDB$domain, cleanDB$alien), FUN="sum")
subDB2 <- plyr::rename(subDB2, c("Group.1"="domain", "Group.2"="alien", "x"="occ"))
head(subDB2)
#Generate barplot for visualisation
subDB2 <- subDB2 %>% mutate(alienInv=ifelse(alien=="no", occ, occ*-1))
ggplot(subDB2, aes(x=domain, y=alienInv))+
geom_bar(aes(fill=alien), stat = "identity", position = "identity", alpha=0.8, color="black", width = 0.5)+
geom_label(aes(label=format(occ, big.mark = ",")), vjust=ifelse(subDB2$alienInv<0, 1.20, -0.2))+
scale_fill_manual(labels=c("no"="Native", "yes"="Alien"), values = c("no"="aquamarine4", "yes"="black"))+
scale_x_discrete(limits=c("Artificial","Transitional", "Freshwater", "Terrestrial", "Marine"))+
geom_hline(yintercept=0, size=1)+
theme_classic()+labs(title="Number of native and alien species occurrences in environmental domains")+
theme(axis.ticks = element_blank(), axis.line = element_blank(), axis.text.y = element_blank(),
axis.text.x = element_text(color="black", size = 12), axis.title = element_blank(), legend.position = "null",
legend.title = element_blank(), legend.text = element_text(size = 12))
#Calculate the species incidence on macro-habitats and domains
HabInc <- dcast(cleanDB, eunisL0+domain~alien, value.var = "occ", fun.aggregate = sum)
head(HabInc)
HabInc$incidence <- HabInc$yes/HabInc$no
#Plot alien species incidence in environmental domains
ggboxplot(HabInc, x="domain", y="incidence", xlab = FALSE, ylab = "Incidence\n",
width = 0.9, size = 0.7, font.label = list(size=14), fill = "domain", alpha=0.7)+
scale_fill_manual(values = c("Marine"="darkslategray2", "Terrestrial"="indianred4", "Freshwater"="palegreen4",
"Transitional"="steelblue4", "Artificial"="gray"))+
scale_x_discrete(limits=c("Marine","Transitional", "Freshwater", "Terrestrial",
"Artificial"))+
labs(title="Alien species incidence in environmental domains")+
theme_pubclean()+theme(axis.text = element_text(colour = "black", size = 11), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), legend.position = "null")
Incidence values remain consistent between number of species and species occurrences!
#Plot alien species incidence in EUNIS habitats
ggplot(HabInc, aes(x=eunisL0, y=incidence))+
geom_bar(aes(fill=domain), stat = "identity", width = .5, alpha=.7, color="black")+
ylab("Incidence\n")+
scale_fill_manual(values = c("Terrestrial"="indianred4","Marine" ="darkslategray2",
"Transitional"="steelblue4", "Freshwater"="palegreen4", "Artificial"="gray"))+
scale_x_discrete(limits=c("Marine pelagic","Marine benthic", "Saline lagoons", "Brackish lagoons",
"Standing waters","Running waters", "Grasslands","Woodlands",
"Artificial terrestrial", "Artificial waters"))+
labs(title="Alien species incidence in EUNIS habitats")+
theme_pubclean()+theme(axis.text = element_text(colour = "black", size = 11),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(), legend.position = "null")
#Generate dataframe for analysis
alienDB_domain = dcast(cleanDB[which(cleanDB$alien=="yes"),], domain+eunisL0~eunisspeciesgroups, value.var = "occ", fun.aggregate = sum)
head(alienDB_domain)
dim(alienDB_domain)
#To run the analysis two datasets should be used with (1) taxonomic groups abundances and (2) environmental information
alienData <- alienDB_domain[,3:ncol(alienDB_domain)]
head(alienData)
#Compute rank abundances for alien species for all domains and taxonomic groups
RankAbu <- rankabundance(alienData)
RankAbu
#Plot rank abundance curve
rankabunplot(RankAbu, scale = "abundance", specnames = c(1:6))
#Generate environmental dataset
domain <- as.data.frame(alienDB_domain[,1:2])
head(domain)
str(domain)
#PLot rank abundance curves across environmental domains
alienrank <- rankabuncomp(alienData, y=domain, factor = "domain", scale = "abundance",
return.data = T,specnames = c(1:5), legend = FALSE)
#Use ggplot
alienRankPlot <- ggplot(data = alienrank[which(alienrank$Grouping!="Artificial"),], aes(x=rank, y=abundance))+
scale_x_continuous(expand = c(0,1), sec.axis = dup_axis(labels = NULL, name = NULL))+
scale_y_continuous(expand = c(0,1), sec.axis = dup_axis(labels = NULL, name = NULL))+
geom_line(aes(color=Grouping), size=1, alpha=0.7, linetype="dashed")+
geom_point(aes(color=Grouping), size=4)+
geom_text_repel(data = subset(alienrank[which(alienrank$Grouping!="Artificial"),], labelit==T),
aes(label=species), angle=0, nudge_x = 1.5, nudge_y = 0, show.legend = F)+
facet_wrap(~factor(Grouping, levels=c("Marine", "Transitional", "Freshwater", "Terrestrial")))+
theme_pubclean()+
scale_color_manual(values = c("Terrestrial"="indianred4","Marine" ="darkslategray3",
"Transitional"="steelblue4", "Freshwater"="palegreen4"))+
labs(x="\nTaxonomic groups rank", y="Alien abundance\n")+
theme(panel.grid = element_blank(), legend.position = "none", axis.ticks.y.right = element_blank(),
axis.text = element_text(colour = "black"),
axis.ticks.x.top = element_blank(), strip.background = element_rect(fill = "transparent",
color = "black"),
strip.placement = "outside",
strip.text = element_text(size=12))
alienRankPlot
png("/home/jovyan/work/output/AlienRankAbundanceCurves.png", width = 4000, height = 4000, units = "px", res = 500)
alienRankPlot
dev.off()
#gc()