# Load the data file ## Install readxl package install.packages("readxl") ## load readxl package library("readxl") ## Read the excel file my_data <- read_excel("streamdata.xls") ## If you prefer, you can run this with a .csv file ## my_data <- read_excel("streamdata.csv") #Check data table if it is loaded my_data #Convert data table to dataframe my_data <- as.data.frame(my_data) #data of region 7 and region 10 my_data_region7 <- my_data[my_data["EPAREGION"]=="REGION__7",] my_data_region10 <- my_data[my_data["EPAREGION"]=="REGION_10",] #Calculate nitrogen (5th column) statistics summary(my_data_region7[,5]) summary(my_data_region10[,5]) #Calculate quantiles quantile(my_data_region7[,5]) quant_reg7 = unname(quantile(my_data_region7[,5])) quantile(my_data_region10[,5]) quant_reg10 = unname(quantile(my_data_region10[,5])) #plot a boxplot of nitrogen data boxplot(my_data_region7[,5]) boxplot(my_data_region10[,5]) #(Q3-Q1) quant_reg7[4]-quant_reg7[2] quant_reg10[4]-quant_reg10[2] #(Q3-Q1)*1.5 (quant_reg7[4]-quant_reg7[2])*1.5 (quant_reg10[4]-quant_reg10[2])*1.5 #upper extreme boundaries (quant_reg7[4]-quant_reg7[2])*1.5+quant_reg7[4] (quant_reg10[4]-quant_reg10[2])*1.5 + quant_reg10[4] #actual upper extreme boundaries boxplot.stats(my_data_region7[,5])$stats[5] boxplot.stats(my_data_region10[,5])$stats[5] #actual lower extrme boundaries boxplot.stats(my_data_region7[,5])$stats[1] boxplot.stats(my_data_region10[,5])$stats[1] #lower extreme boundaries quant_reg7[2]-(quant_reg7[4]-quant_reg7[2])*1.5 quant_reg10[2]-(quant_reg10[4]-quant_reg10[2])*1.5 #outliers outliers_reg7 = boxplot.stats(my_data_region7[,5])$out outliers_reg10 = boxplot.stats(my_data_region10[,5])$out #number of lower outliers sum(outliers_reg7 < boxplot.stats(my_data_region7[,5])$stats[1]) #boxplot.stats(x)$stats[1] is the min value of boxplot sum(outliers_reg10 < boxplot.stats(my_data_region10[,5])$stats[1]) #number of upper outliers sum(outliers_reg7 > boxplot.stats(my_data_region7[,5])$stats[5]) #boxplot.stats(x)$stats[5] is the max value of boxplot sum(outliers_reg10 > boxplot.stats(my_data_region10[,5])$stats[5]) #plot total nitrogen vs. mmi (7th column) on all data plot(my_data[,5], my_data[,7], main ="Total nitrogen vs. MMI", xlab="Total nitrogen (ug/L)", ylab="MMI WSABEST") abline(lm(my_data[,7] ~ my_data[,5])) #linear regression on graph model1 = lm(my_data[,7] ~ my_data[,5]) summary(model1) # R2 is in this summary #plot total ptl (6th column) vs mmi plot(my_data[,6], my_data[,7], main ="PTL vs. MMI", xlab="PTL (ug/L)", ylab="MMI") abline(lm(my_data[,7] ~ my_data[,6])) model2 = lm(my_data[,7] ~ my_data[,6]) summary(model2) #plot total log(ntl) vs mmi plot(log10(my_data[,5]), my_data[,7], xlim=c(0,5),main ="Total nitrogen vs. MMI", xlab="Log Total nitrogen (ug/L)", ylab="MMI WSABEST") abline(lm(my_data[,7] ~ log10(my_data[,5]))) model3 = lm(my_data[,7] ~ log10(my_data[,5])) summary(model3) #plot total log(ntl) vs mmi in Region 7 plot(log10(my_data_region7[,5]), my_data_region7[,7], xlim=c(0,5), main ="Log Total nitrogen vs. MMI in Region 7", xlab="Log Total nitrogen (ug/L)", ylab="MMI WSABEST") abline(lm(my_data_region7[,7] ~ log10(my_data_region7[,5]))) model_reg7 = lm(my_data_region7[,7] ~ log10(my_data_region7[,5])) summary(model_reg7) #plot total log(ntl) vs mmi in Region 10 plot(log10(my_data_region10[,5]), my_data_region10[,7], xlim=c(0,5), main ="Log Total nitrogen vs. MMI in Region 10", xlab="Log Total nitrogen (ug/L)", ylab="MMI WSABEST") abline(lm(my_data_region10[,7] ~ log10(my_data_region10[,5]))) model_reg10 = lm(my_data_region10[,7] ~ log10(my_data_region10[,5])) summary(model_reg10)