# REQUIREMENTS: # Programs: RStudio # R packages: vegan, dplyr # Files: deadwood.csv file. This dataest is a modified version of the following dataset: # Shumskaya M, Zambell C, Mishra S, Bell E, Blue S, Yearwood-Marut J, Marut W, Vindas-Cruz A, # Jennings A, Hylton N, Burghardt J (2019). Survey of saproxylic fungi across parks of New Jersey. # Kean University. Occurrence dataset https://doi.org/10.15468/ngpb5m accessed via GBIF.org on 2019-07-26 # # INSTRUCTIONS: # # Download the deadwood.csv file and store it in the directory you can find later. # # Open R Studio, then use the "+" symbol under file to begin a new R script. # # Using the bottom right window Packages tab to search for and # Install packages vegan and dplyr if they are not already installed. # Next click the boxes next to these two packages to attach them in R. # Alternatively, to attach the packages (once they are already downloaded) you could type: library(vegan) library(dplyr) # In the bottom right frame of R Studio, go to the "Files" tab and navigate to your recently # downloaded file deadwood.csv. Left click on the file name with the mouse, then choose "Import Dataset". # After importing the file, the "View()" command will be automatically executed, causing the # dataset to appear in a tab in the top left quarter of R studio (right above this text). # Click on the tab and note the spreadsheet format of the data. If the deadwood dataset tab is # closed (go ahead and close it) you can access it again by typing: View(deadwood) # Some other useful data exploration commands to explore the deadwood dataset are: # names(), nrow(), ncol(). # USE THESE COMMANDS AND THE VIEW TAB TO ANSWER QUESTIONS 1-4 IN THE HANDOUT. # For Metric or Non-metric multidimensional scaling ordination methods, we have to # choose a distance metric with which to compare the different communities. The # ordination procedure will then attempt to represent these distances by plotting # the communities as points in either 2 or 3 directions on a graph. Those # communities that are most similar to each other in species composition should # be spatially closest to each other in the graph. # In vegan, you can generate distance matrices using different distance metrics, # but first you will have to prepare the data. # To prepare the data for vegan we must remove any columns that aren't NUMERICAL species abundance, # or presence/absence data. Let's create a second dataset that only has the species occurrence data # by removing the descriptive columns. The "select" command (of dplyr package) will do so. # COMPLETE THE COMMAND BELOW to remove not only locality and habitat columns, but also # decimalLatitude, decimalLongitude, geodeticDatum, eventDate, basisOfRecord, countryCode, stateProvince, kingdom, and recordedBy columns # (note R is very spelling and case sensitive; column names must be exact): deadwood2<-select(deadwood,-locality,-habitat, COMPLETE_COMMAND_TO_REMOVE_ALL_COLUMNS_UNRELATED_TO_SPECIES_OCCURRENCE) # View the resulting dataset (TYPE THE APPROPRIATE COMMAND BELOW): View(deadwood2) # Make sure that all the columns that are not species names have been removed, and redo if they are not. # Now we can explore distance matrices. Run the following command, but FILL IN THE METHOD as "jaccard" wood.dis<-vegdist(deadwood2, method="INSERT_DISTANCE_METHOD_HERE") wood.dis # Note the distance matrix has appeared. It is on a scale of 0 to 1. # The distance matrix just constructed was for demonstration. In the next # command, metaMDS, you will execute the NMDS ordination, and the distance # matrix will be made again (but not viewed this time) as part of the procedure. # Now perform the metaMDS procedure, which will run a chosen number of iterations # until it hopefully reaches a low "stress" level, in other words a good fit. # A stress level above 0.2 is considered unacceptable, 0.1 is fair, less than 0.1 is good. wood.mds <- metaMDS(deadwood2, try=1000, distance="jaccard") wood.mds # ANSWER QUESTIONS 5 & 6 IN THE HANDOUT. #Also check the goodness of fit using a Shepard plot where graphical distances are plotted #against community dissimilarity. If the fit is good, the dots should follow the curve. stressplot(wood.mds) # ANSWER QUESTION 7 ON THE HANDOUT. # Now let's see the NMDS plot, type="t" means to plot the text rather than points. ordiplot(wood.mds, type="t") # All the species and site names are shown but it's too messy to read. # Lets just plot the points so we can add locality names and a few species names later. ordiplot(wood.mds, type="points") # The black circles represent sites. The red crosses are the species. # Create this ordiplot object which stores information for adding species labels later on. pl<-ordiplot(wood.mds) # Display different versions of the plot: ordiplot(wood.mds, type="points", cex=1.5) ordiplot(wood.mds, type="points", cex=1) ordiplot(wood.mds, type="points", cex=0.3) #ANSWER QUESTION 8 # cex=1 seems suitable, so execute the following command then move on: ordiplot(wood.mds, type="points", cex=1) # Let's add hulls (boundaries) around the Hardwood and Pine Barrens habitats to see # if they form different groups. # The green hull surrounds the hardwood habitats, the brown hull surrounds the pine sites. ordihull(wood.mds, deadwood$habitat, col =c("green","brown")) #Add the labels for the sites, using that pl object created earlier. text(pl,what="sites", deadwood$locality,cex=1) # We can see the site names are excessively long. # Lets make a vector of shorter site names to map onto the points in the ordination. # These must be in the same order as they are by row in the deadwood dataset. Sites<-c("Wawayanda","Stokes","Weiss","Stephens","Meadowood","Teethertown","Schiff","Thompson","Ocean County","Forest REC" ,"Rancocas","Brendon Byrne","Franklin Parker","Wells Mills","Belleplain","Cattus Island") # Now make a fresh ordiplot and add the new labels. ordiplot(wood.mds, type="points", cex=1) ordihull(wood.mds, deadwood$habitat, col =c("green","brown")) text(pl,what="sites", Sites, cex=1) # You can use the "Zoom" button in the bottom right window to enlarge the graph. # ANSWER QUESTION 9. # Now we are going to map a variable onto the ordination, to see its relationship # to the communities. Let's look at decimalLatitude and decimalLongitude from the deadwood dataset. # View the dataset and note that these two variables are in columns 3 & 4. Extract columns 3 & 4 # into a new dataframe called "variables" and then view it with the following commands: variables<-deadwood[,3:4] variables # Now we will use vegan's command envfit to examine whether latitude or longitude have a # statistically significant relationship to dead wood fungal community composition. # We will store the result as ef. ef<-envfit(wood.mds,variables,permu=999) ef # ANSWER QUESTION 10 ON THE HANDOUT. # Plot any variable with p value below 0.1 onto the ordination: plot(ef,p.max=0.1) #ANSWER QUESTION 11 ON THE HANDOUT. # Now let's identify some of the species and see how they are plotted in the ordination, # and what their placement means. Again, we are using the pl object we created earlier # using ordiplot. We will use vegan's identify command to identify selected species. # Label 3 crosses from different parts of the plot. # Run the following line, then read below how to select crosses. identify(pl, "species", col="purple" ) # After you hit the command, a stop sign appears. Enlarge the plot by stretching its margins so # that it is easier to see the points. Select points by clicking on them. When finished, hit the # "Finish" button in the top right corner of the plot. # ANSWER QUESTION 12 and 13. # Let's investigate which sites these species were found in. # ANSWER QUESTION 14 USING THE FOLLOWING COMMANDS. # You will have to use cbind to pair site titles to species. # Use the following script but insert your own species name. cbind(deadwood$locality,deadwood$habitat, deadwood$INSERT_SPECIES_NAME_HERE) cbind(deadwood$locality,deadwood$habitat, deadwood$INSERT_SPECIES_NAME_HERE) cbind(deadwood$locality,deadwood$habitat, deadwood$INSERT_SPECIES_NAME_HERE) # ANSWER QUESTIONS 15 & 16 ON THE HANDOUT. # END