################################################################################ ## malaise_data_visualization.R: Making graphs to visualize data from Malaise samples ## ## Author: Kimberly Komatsu ################################################################################ #### Setting up script #### library(readxl) library(ggsci) library(tidyverse) setwd('C:\\Users\\kjkomatsu\\OneDrive - UNCG\\malaise trap network') #set working directory #### Graph Settings ##### theme_set(theme_bw()) theme_update(axis.title.x=element_text(size=20, vjust=-0.35, margin=margin(t=15)), axis.text.x=element_text(size=16), axis.title.y=element_text(size=20, angle=90, vjust=0.5, margin=margin(r=15)), axis.text.y=element_text(size=16), plot.title = element_text(size=24, vjust=2), panel.grid.major=element_blank(), panel.grid.minor=element_blank(), legend.title=element_blank(), legend.text=element_text(size=20)) #homemade functions #barGraphStats(data=, variable="", byFactorNames=c("")) barGraphStats <- function(data, variable, byFactorNames) { count <- length(byFactorNames) N <- aggregate(data[[variable]], data[byFactorNames], FUN=length) names(N)[1:count] <- byFactorNames names(N) <- sub("^x$", "N", names(N)) mean <- aggregate(data[[variable]], data[byFactorNames], FUN=mean) names(mean)[1:count] <- byFactorNames names(mean) <- sub("^x$", "mean", names(mean)) sd <- aggregate(data[[variable]], data[byFactorNames], FUN=sd) names(sd)[1:count] <- byFactorNames names(sd) <- sub("^x$", "sd", names(sd)) preSummaryStats <- merge(N, mean, by=byFactorNames) finalSummaryStats <- merge(preSummaryStats, sd, by=byFactorNames) finalSummaryStats$se <- finalSummaryStats$sd / sqrt(finalSummaryStats$N) return(finalSummaryStats) } #### Import data #### data2024 <- read_xlsx('BIO444_malaiseNetwork_finalData_2024.xlsx', sheet='composition') #### Dot plot by Order ##### ggplot(data=filter(data2024, !(order %in% c('unknown larva', 'unknown larva #2'))), aes(x=sample_year, y=count, color=sample_location)) + geom_point(size=3) + scale_color_manual(values=c('#FFC20A', '#0C7BDC')) + #colorblind friendly palatte xlab('Sample Year') + ylab('Total Count') + facet_grid(cols=vars(sample_period), rows=vars(order), scales='free') #not a great way to visualize this data until you have multiple years #### Pie charts #### countProportions <- data2024 %>% # filter(sample_location=='A', sample_period==1) %>% group_by(sample_location, sample_period) %>% mutate(proportion = round((count/sum(count)), digits=3)) %>% # arrange(proportion) %>% mutate(labels=scales::percent(proportion)) ggplot(data=countProportions, aes(x="", y=proportion, fill=order)) + geom_col() + coord_polar(theta="y") + scale_fill_d3(palette='category20c') + theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank(), panel.grid = element_blank(), plot.title = element_text(vjust = 0.5), legend.position = 'bottom') + facet_grid(rows=vars(sample_location), cols=vars(sample_period)) #### Total abundance and biomass #### total2024 <- data2024 %>% group_by(sample_location, sample_period, sample_year) %>% summarize(total_count=sum(count), total_biomass=sum(air_dry_weight)) %>% ungroup() #total abundance over sample periods ggplot(data=total2024, aes(x=sample_period, y=total_count, color=sample_location)) + geom_point(size=3) + geom_smooth(method='lm', se=F) + scale_color_manual(values=c('#FFC20A', '#0C7BDC')) + #colorblind friendly palatte xlab('Sample Period') + ylab('Total Count') + scale_x_continuous(breaks=c(1,2,3)) #total abundance across years ggplot(data=total2024, aes(x=sample_year, y=total_count, color=sample_location)) + geom_point(size=3) + scale_color_manual(values=c('#FFC20A', '#0C7BDC')) + #colorblind friendly palatte xlab('Sample Year') + ylab('Total Count') + facet_wrap(~sample_period) #not a great way to visualize this data until you have multiple years #total biomass over sample periods ggplot(data=total2024, aes(x=sample_period, y=total_biomass, color=sample_location)) + geom_point(size=3) + geom_smooth(method='lm', formula = y ~ poly(x,2), se=F) + scale_color_manual(values=c('#FFC20A', '#0C7BDC')) + #colorblind friendly palatte xlab('Sample Period') + ylab('Total Biomass (g)') + scale_x_continuous(breaks=c(1,2,3)) ggplot(data=total2024, aes(x=sample_year, y=total_biomass, color=sample_location)) + geom_point(size=3) + scale_color_manual(values=c('#FFC20A', '#0C7BDC')) + #colorblind friendly palatte xlab('Sample Year') + ylab('Total Biomass (g)') + facet_wrap(~sample_period) #not a great way to visualize this data until you have multiple years