#Import StreamQuality_all.xlxs through 'Import Dataset' #to view the first few lines of data to make sure that the dataset imported correctly head(StreamQuality_all) #add libraries library(dplyr) library(tidyr) library(ggplot2) library(scales) #Nitrogen trend per EPA REGION? #First calculate the mean N of each region stream_qualityN <- StreamQuality_all %>% group_by(EPAREGION) %>% summarise(avgNTL = mean(NTL)) stream_qualityN #Next calculate the median N of each region stream_qualityN_median <- StreamQuality_all %>% group_by(EPAREGION) %>% summarise(medNTL = median(NTL)) stream_qualityN_median #Create a box plot to compare across regions p = ggplot(data = StreamQuality_all, aes(x = EPAREGION, y = NTL)) + geom_boxplot()+ theme(axis.text.x = element_text(angle = 30, hjust = 1)) + labs( x="EPA Region", y="Nitrogen Total Load (ug/L)") p #get the quartile values R computed to create the box plot ggplot_build(p) #create a new box plot with a log base 10 y axis ggplot(data = StreamQuality_all, aes(x = EPAREGION, y = NTL)) + geom_boxplot()+ scale_y_log10() + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + labs( x="EPA Region", y="Nitrogen Total Load (ug/L)")