#STEP 1: Set your working directory under Session/Set Working Directory/Choose Directory
#        You might like to choose the downloads folder

#STEP 2: Download your two data sets (saved as *.csv) into your Working Directory 
#        Use a short, meaningful file name for each one
#        Import your two data sets and save as a named "dataframe" using the code below.

NYBG_SP1 <- read.csv("zizia_sampleData.csv") #The part in quotations should be your file's name
                                             #you can re-name the left hand ("NYBG_SP1") to whatever you want, but you will have to change all of the following code as well
                     

NYBG_SP2 <- read.csv("alliaria_sampleData.csv") #The part in quotations should be your file's name
                                                #you can re-name the left hand ("NYBG_SP2") to whatever you want, but you will have to change all of the following code as well

#STEP 3: Do a bit of investigating for the years represented in your data
#        Use the code below

length(na.omit(NYBG_SP1$year)) #This will give you the number of rows that contain a value; e.g., your sample size

summary(NYBG_SP1$year) #This will report the lowest, median, mean, and maximum values for the varaible "year" within the dataframe "NYBG_SP1"

#Figure out how many decades are represented in your data
  #(Max - Min)/10 = number of decades
  #Use this for the "breaks" below

#STEP 4: Make a histogram of specimen collection years for species #1
#        Use the code below

h <- hist(NYBG_SP1$year,
     main = "Title",         #Replace "title" with a meaningful header for your figure
     xlab = "x-axis label",  #Replace "x-axis label" with an appropriate x-axis label
     xlim = c(1800,2020),    #Replace these numbers with your min and max years
     ylim = c(0, 20),        #You'll come back to this
     breaks = 17)            #Experiment with this number, perhaps replace this numbers with your number of represented decades

#Select and run everything within "h <- hist(...)"

#STEP 5: Figure out how many observations are in the largest category
#        Use the code below

h$counts  #output is a list of the number of observations within each category
          #In the output, find the largest number.

#STEP 6: Go back to the above histogram code ["h <- hist(...)"], and alter the ylim parameter to fit your tallest bar

#STEP 7: Re-run the histogram code with your altered y-axis limits

#Step 8: Add labels to your histogram
#        Use the code below

text(h$mids, h$counts, labels = h$counts, adj = c(0.5, -0.5))

#STEP 9: Export your histogram as an image for use in your report
#        Use the code below

#Step 10: Repeat the above with your second species: Change every instance of "NYBG_SP1" to "NYBG_SP2" 
#         Then, use these representations of the data (two histograms) to answer your research question.