R Code for Grouped Graphs Swirl Lesson
library(readr) ##for importing data
library(ggplot2) ##for graphing
library(dplyr) ## for filtering, summarising, and other data wrangling
#import hemlock data file
hemlock <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vT-Uo5Gs2dcR6f6_PFrZwkaSrojsBCFt1qvVNU0PXn4RHVe3_GDzNL3BCxkkp6eIhjkfKw3S6YcX6wz/pub?output=csv",
col_types = cols(SamplingDate = col_date(format = "%m/%d/%Y"),
Location = col_factor()))
##calculate descriptive stats and SE for EHS density
EHS.sum <- hemlock %>%
group_by(Location) %>%
summarise(mean = mean(EHS),
sd = sd(EHS),
n = n()) %>%
mutate(sem = sd/(sqrt(n)))
print(EHS.sum)
##Bar plot with mean and SE
g.bar <- ggplot(EHS.sum, aes(x=Location,y=mean, fill))+
geom_bar(stat="identity", width = 0.5, show.legend=FALSE, fill = "steelblue")+
geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=0.1, size=1) +
ylab("EHS Density (insects/cm)") +
xlab("Location") +
coord_cartesian(xlim = c(0.5,4.5), expand=FALSE) +
theme_classic(base_size=16)
print(g.bar)
##To make a histogram of just one location, first you have to filter for a location (in this case FLH)
FLH.data <- hemlock %>% filter(Location == "FLH")
##Histogram of the EHS density at the FLH location
FLH.hist <- ggplot(data = FLH.data, aes(x = EHS))+
geom_histogram(binwidth = 0.4, color = "white", fill = "steelblue", show.legend = FALSE) +
xlab("EHS Density (insects/cm)") +
ylab("Count") +
coord_cartesian(expand=TRUE) +
theme_classic(base_size=14)
print(FLH.hist)
##Histogram of the HWA density for all 4 locations
g.hist <- ggplot(data = hemlock, aes(x = EHS, fill=Location))+
geom_histogram(binwidth = 0.4, color = "white", show.legend = FALSE) +
facet_grid(Location ~ .) +
xlab("EHS Density (insects/cm)") +
ylab("Count") +
coord_cartesian(expand=TRUE) +
theme_classic(base_size=14)
print(g.hist)
##Box plot of the EHS density at all 4 locations, X = mean, outliers showing as points
EHS.box <-ggplot(data = hemlock, aes(x= Location, y = EHS, color=Location))+
stat_boxplot(geom ='errorbar', width = 0.1, na.rm = TRUE, lwd=0.75, show.legend = FALSE) +
geom_boxplot(width = 0.5, na.rm = TRUE,lwd=0.75, show.legend = FALSE) +
stat_summary(fun=mean, geom="point", shape=4, size=2,
na.rm = TRUE, show.legend = FALSE, colour = "black", stroke = 2) +
ylab("EHS Density (insects/cm)") +
xlab("Location") +
coord_cartesian(ylim=c(0,11.5),expand=TRUE) +
theme_classic(base_size=20)
print(EHS.box)
##Box plot of the EHS density at all 4 locations, X = mean, all points showing
EHS.box2 <-ggplot(data = hemlock, aes(x= Location, y = EHS))+
geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=1, alpha = 0.5,
position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +
stat_boxplot(geom ='errorbar', width = 0.1, na.rm = TRUE, lwd=0.75) +
geom_boxplot(width = 0.5, na.rm = TRUE, outlier.shape= NA, alpha = 0.1, lwd=0.75) +
stat_summary(fun=mean, geom="point", shape=4, size=2,
na.rm = TRUE, show.legend = FALSE, colour = "black", stroke = 2) +
ylab("EHS Density (insects/cm)") +
xlab("Location") +
coord_cartesian(expand=TRUE) +
theme_classic(base_size=20)
print(EHS.box2)
#sample size
sample_size = hemlock %>% group_by(Location) %>% summarize (num=n())
#violin plot with box plots and sample sizes
EHS.violin <- hemlock %>%
left_join(sample_size) %>%
mutate(myaxis = paste0(Location, "\n", "n=", num)) %>%
ggplot(aes(x=myaxis, y=EHS, fill=Location)) +
geom_violin(width=1.4) +
geom_boxplot(width=0.1, color="black", alpha=0.8) +
ylab("EHS Density (insects/cm)") +
xlab("Location") +
theme_classic(base_size=18) +
theme(legend.position="none")
print(EHS.violin)
#Filter for one semester (Winter 2019)
hemlock$SamplingDate <- as.Date(hemlock$SamplingDate, "%m/%d/%Y")
small <- hemlock %>% filter (SamplingDate > as.Date("2018-11-20"))
#Dot plot with mean
dot.mean <- ggplot(data = small, aes(x= Location, y = EHS))+
geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=3, alpha=0.75,
position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +
stat_summary(fun = mean, fun.min = mean, fun.max = mean, geom = "crossbar",
width = 0.5, size = 0.75, na.rm = TRUE, show.legend = FALSE, colour = "black") +
ylab("EHS Density (insects/cm)") +
xlab("Location") +
coord_cartesian(expand=TRUE) +
theme_classic(base_size=20) +
theme(legend.position="none")
print(dot.mean)
#Dot plot with median
dot.median <- ggplot(data = small, aes(x= Location, y = EHS))+
geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=3, alpha=0.75,
position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +
stat_summary(fun = median, fun.min = median, fun.max = median, geom = "crossbar",
width = 0.5, size = 0.75, na.rm = TRUE, show.legend = FALSE, colour = "black") +
ylab("EHS Density (insects/cm)") +
xlab("Location") +
coord_cartesian(expand=TRUE) +
theme_classic(base_size=20) +
theme(legend.position="none")
print(dot.median)