--- title: "Data Nugget 3" author: "Elli Bosch" date: "4/4/2019" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` #Loading the Data into R ```{r} library(readxl) Data <- read_excel("Data - Nutrients in the James River.xlsx") View(Data) summary(Data) ``` #Installing necessary packages ```{r} install.packages("scales") library("scales") install.packages("tidyverse") library(tidyverse) options(scipen=5) ``` #Scatterplot of Nutrients v. Discharge ```{r} nutrientsvdischarge <- ggplot(data=Data, aes(x=Discharge2, y=Kgperyear, col=Nutrient)) + geom_point() + labs(x = "Discharge (m3/y)", y = "Nutrient Concentration (kg/y)") + ggtitle("Nutrient Concentration (kg/y) vs. Discharge (m3/y)") + scale_y_continuous(labels=comma) + scale_x_continuous(labels=comma) nutrientsvdischarge ``` #Looking at Nitrogen #Testing for Normality of Nitrogen ```{r} shapiro.test(Data$TN) ``` *Teacher note: this p-value means that we should accept the null hypothesis that our data for nitrogen is normally distributed* ##Nitrogen Linear Regression Plot and Summary (with diagnostic plots) ```{r} plot( TN ~ Discharge , Data) mod <- lm(TN ~ Discharge, data=Data) abline(mod) ``` ```{r} summary(mod) plot(mod) ``` ```{r} mod2 <- ggplot(Data, aes(x=Data$Discharge, y=Data$TN)) + xlab("Discharge (m3/y)") + ylab("Total Nitrogen (kg/y)") + labs(title = "Linear model of total nitrogen concentration as a function of discharge") + geom_point(shape=16, colour="#CC6666") + geom_smooth(method=lm, colour="#CC6666") + scale_fill_grey(start=0.8, end=0.2) + theme_classic() + scale_y_continuous(limits = c(4000000, 12000000), labels=comma) + scale_x_continuous(labels=comma) ``` ```{r} plot(mod2) ``` #Looking at Phosphorus #Testing for Normality of Phosphorus ```{r} shapiro.test(Data$TP) ``` *Teacher note: this p-value means that we should accept the null hypothesis that our data for phosphorus is normally distributed* ##Phosphorus Linear Regression Plot and Summary (with diagnostic plots) ```{r} plot( TP ~ Discharge , Data) mod3 <- lm(TP ~ Discharge, data=Data) abline(mod3) ``` ```{r} summary(mod3) plot(mod3) ``` ##Linear model graph made nice ```{r} mod4 <- ggplot(Data, aes(x=Data$Discharge, y=Data$TP)) + xlab("Discharge (m3/y)") + ylab("Total Phosphorus (kg/y)") + labs(title = "Linear model of total phosphorus concentration as a function of discharge") + geom_point(shape=16, colour="#56B4E9") + geom_smooth(method=lm, colour="#56B4E9") + scale_fill_grey(start=0.8, end=0.2) + theme_classic() + scale_y_continuous(limits = c(200000, 1600000), labels=comma) + scale_x_continuous(labels=comma) ``` ```{r} plot(mod4) ```