# Code for generating Figures 1, 5, and 6 in the # QUBES Comparative Genomics Figure of the Week # Author: Robert Furrow # Date modified: 17 June 2020 # file paths (and/or working directory) need to be # adjusted to correctly load the csv files. # all csv files are available in the resource on QUBES. # tidyverse package needed for all figures # checking and installing if needed if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse") # loading tidyverse library(tidyverse) ##------------------ ##------------------ # Figure 1: GC content X contig length # scatterplot of gc content by contig length df_scatterplot <- read_csv("scatterplot.csv") # using ggplot2 for a scatterplot with the bw theme p1 <- ggplot(data=df_scatterplot,aes(x=contig_length,y=gc)) + geom_point() + labs(x = "contig length", y = "GC content", title = "Strain 1926: gc X contig length") + theme_bw() # saving the plot ggsave("gc_scatterplot.png",p1,width = 8, height = 5, units = "in", dpi = 600) ##------------------ ##------------------ # Figure 5 (not used in pilot implementation of this Figure of the Week) # plot of GC content along sliding window of contig df_sliding <- read_csv("sliding_window.csv") mean_gc <- 0.630125 sd_gc <- 0.04059668 # filtering to the start of the contig, # color coding by points outside of 2 standard deviations # adding horizontal lines at +/- 2 standard deviations p5 <- df_sliding %>% dplyr::filter(window < 3e5) %>% ggplot(aes(x = window, y = gc)) + geom_point(aes(shape = extreme_region, color = extreme_region), size = 3) + geom_hline(yintercept = mean_gc+2*sd_gc, color = "red") + geom_hline(yintercept = mean_gc-2*sd_gc, color = "red") + theme_minimal() + ylim(c(0,1)) + scale_color_manual(limits = c("TRUE","FALSE"), values = c("red","black")) + scale_shape_manual(limits = c("TRUE","FALSE"), values = c(18,16)) + labs(x = "window start position", y = "GC content") + theme(legend.position = "none", text = element_text(size = 20)) # saving the plot ggsave("sliding_window.png",p5,width = 11, height = 6, units = "in", dpi = 600) ##------------------ ##------------------ # Figure 6 (not used in pilot implementation of this Figure of the Week) # heatmap of relative amino acid proportions within proteome if (!requireNamespace("viridis", quietly = TRUE)) install.packages("viridis") library(viridis) df_heatmap <- read_csv("heatmap.csv") # selecting only certain genera, getting mean amino acid # (relative proportions) within each genus, # reordering axis values, using viridis color scale p6 <- df_heatmap %>% dplyr::filter(genus %in% c("Halorubrum", "Bacillus","Virgibacillus", "Halobacillus", "Pontibacillus", "Halomonas")) %>% group_by(genus,amino_acid) %>% summarize(genus_mean = mean(relative_aa_use)) %>% ungroup() %>% mutate(genus_sort = factor(genus,levels = rev(c("Halorubrum", "Halomonas", "Bacillus", "Virgibacillus", "Halobacillus","Pontibacillus"))), amino_acid = factor(amino_acid, levels = c("E","D","K","R","P","H","G","C","A","V", "Y","W","F","M","L","I","T","S","Q","N"))) %>% ggplot(aes(x = amino_acid, y = genus_sort, fill = genus_mean)) + geom_tile(color = "white", size = 0.4)+ scale_fill_viridis(option = "viridis", breaks = c(0.6, 1, 1.4)) + labs(x = "amino acid" , y = "", fill = "relative \nproportion") + theme_classic() + theme(axis.line = element_blank(), text = element_text(size = 20), axis.ticks = element_blank()) #saving plot ggsave("aa_heatmap.png",p6,width = 11, height = 6, units = "in", dpi = 600)