if(!"stringr" %in% installed.packages()[,"Package"]) install.packages("stringr") library(stringr) library(ggplot2) library(DescTools) library(patchwork) draw_wenxiang <- function(file = NULL, sequence, labels = FALSE, label.col = "black", col = c("red", "blue", "yellow", "gray"), legend = FALSE, size = FALSE, comp_label = FALSE, fixed = TRUE, curves = 0, shapes = FALSE){ residue_col <- function(resid) { # 1 = nonpolar, 2 = polar, 3 = basic, 4 = acidic resid.vals <- c(1, 3, 2, 4, 2, 2, 4, 1, 3, 1, 1, 3, 1, 1, 1, 2, 2, 1, 1, 1) resid.vals <- as.integer(resid.vals) names(resid.vals) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") # error if `resid` is not a valid one-letter code if (!(resid %in% names(resid.vals))) { stop(paste("ERROR:", resid, "is not a valid one-letter code for an amino acid.")) } # return color index based on name resid.vals[resid] } check_seq_length <- function(sequence){ if (nchar(sequence) > 18) { stop("ERROR: sequence must have less than or equal to 18 characters.") } else if (nchar(sequence) < 2) { stop("ERROR: sequence must have at least 2 characters.") } } check_labels <- function(labels){ if (!is.logical(labels)) { stop("labels parameter must be logical (either TRUE or FALSE)") } if (!(label.col %in% grDevices::colors())) { stop("label.col parameter must be a valid color (see list of valid colors using grDevices::colors())") } } check_col <- function(col) { if("consurf" %in% tolower(col) & length(col) > 1){ stop("Please make sure your col variable is c(\"consurf\") if you'd like to use Consurf results") } if(tolower(col)[1] != "consurf"){ NUM.COLORS <- 4 # hydrophobic, hydrophilic, basic, acidic if (sum(col %in% grDevices::colors()) != NUM.COLORS) { stop("ERROR: parameter `col` has invalid, missing, or too many colors.") } } } prepare_spiral_df <- function(){ # prepare data frame for spiral BETWEEN.DISTANCE <- 0.042 START.RADIUS <- 0.0625 CENTER.X <- 0.5 CENTER.Y <- 0.52 df.spiral <- data.frame(start.angle = rep(c(0, pi), 5), end.angle = rep(c(pi, 2 * pi), 5), center.y = rep(c(CENTER.X, CENTER.X + BETWEEN.DISTANCE), 5), center.x = rep(CENTER.Y, 10)) df.spiral$end.angle[10] <- 260 / 180 * pi curr.radius <- START.RADIUS for (i in 1:10) { df.spiral$radius[i] <- curr.radius curr.radius <- curr.radius + BETWEEN.DISTANCE } return(df.spiral) } subset_spirals <- function(df.spiral){ resid.spiral <- c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 9, 9, 10) df.spiral <- df.spiral[1:resid.spiral[num.resid], ] df.spiral$end.angle[nrow(df.spiral)] <- df.spiral$start.angle[nrow(df.spiral)] + (((num.resid - 1) * 100 / 180 * pi - pi * (resid.spiral[num.resid] - 1)) %% (2 * pi)) return(df.spiral) } create_consurf_table <- function(file, sequence){ #imports GRADES file table. Skips first 13 lines because first 13 lines of GRADES file contains #stuff we don't need table <- read.table(file, header = TRUE, skip = 13, fill = TRUE) #Last four lines of GRADES file contains irrelevant stuff last_row = nrow(table) - 4 #Clean table to include only relevant rows AND columns from GRADES file table <- table[2:last_row, c("POS","X3LATOM","SEQ","COLOR")] rownames(table) <- NULL #Combines all characters from "SEQ" column of GRADES file. This is the full sequence according to the #GRADES file return(table) } decide_fill.col <- function(col, table){ if(identical(col, "consurf")){ scores <- as.numeric(str_replace(table$COLOR[begin:end], "\\d\\*", "10")) names(scores) <- table[begin:end, "SEQ"] fill.col <- scores } else{ fill.col <- vapply(X = strsplit(sequence, "")[[1]], FUN.VALUE = integer(1), FUN = function(curr.resid) { residue_col(curr.resid) }) } return(fill.col) } decide_scale_fill_values <- function(col){ #Used to color the residues if(identical(col, "consurf")){ values <- c("Insufficient Data" = "#ffff80", "9 - conserved" = "#9c245c", "8" = "#ec7ca4", "7" = "#f4c4dc", "6" = "#fcecf4", "5" = "#fcfcfc", "4" = "#dffcfc", "3" = "#d4fcfc", "2" = "#8cfcfc", "1 - variable" = "#0cc4cc") } else { values <- c("hydrophobic" = col[1], "polar" = col[2], "basic" = col[3], "acidic" = col[4]) } } decide_scale_fill_name <- function(col){ if(identical(col, "consurf")){ name <- "Conservation" } else { name <- "Residue Types" } } decide_scale_fill_limits <- function(col){ if(identical(col, "consurf")){ limits <- c("9 - conserved", "8", "7", "6", "5", "4", "3", "2", "1 - variable","Insufficient Data") } else { limits <- NULL } } determine_icon_radii <- function(df.resid){ if(size){ residues <- c("A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V") residue_volumes <- c(88.6,173.4,114.1,111.1,108.5,143.8,138.4,60.1,153.2,166.7,166.7,168.6,162.9, 189.9,112.7,89,116.1,227.8,193.6,140) # Returns a vector that contains index values for where residues from input sequence is found in # the residues vector above. The index will be used to return the residue volume specific to that residue residue_index <- match(df.resid$lettername, residues) # Returns vector of residue volumes specific to each residue found from input specific_residue_volumes <- residue_volumes[residue_index] mean_res_vol <- mean(specific_residue_volumes) #calculates some proportion of 0.04 (original size for all residue icons) # used to convert specific volumes to icon sizes vol_to_radii_converter <- 0.04 / mean_res_vol #converts specific volumes to residue icon size #result is a vector of icon sizes residue_icon_radii <- specific_residue_volumes * vol_to_radii_converter } else{ residue_icon_radii <- 0.04 } } decide_resid.types <- function(col){ if(identical(col, "consurf")){ return(c("1 - variable", "2", "3", "4", "5", "6", "7", "8", "9 - conserved", "Insufficient Data")) }else{ return(c("hydrophobic", "polar", "basic", "acidic")) } } check_begin <- function(begin){ if(begin == -1){ stop("The sequence you put in can not be found in the Consurf GRADES file you selected.") } } check_seq_match <- function(full_seq, begin, end, sequence){ if(!substr(full_seq, begin, end) == sequence) stop("The input sequence isn't found in the GRADES file") } check_seq_length(sequence) check_col(col) num.resid <- nchar(sequence) df.spiral <- prepare_spiral_df() df.spiral <- subset_spirals(df.spiral) if(size){ if(shapes){ stop("Size and shape can't be used together yet") } } if(!(is.character(file))){ if(("consurf" %in% col)|comp_label){ stop("You need a CONSURF Grades File in order to use the Consurf coloring and the residue positions") } } df.resid <- data.frame(y = c(0.5625, 0.4891, 0.4438, 0.5943, 0.6122, 0.3878, 0.4478, 0.7191, 0.5400, 0.2695, 0.5893, 0.7955, 0.3428, 0.2689, 0.8151, 0.6993, 0.1255, 0.4655), x = c(0.5200, 0.5816, 0.4843, 0.4295, 0.6142, 0.6142, 0.3568, 0.4555, 0.7470, 0.5200, 0.2516, 0.6276, 0.7924, 0.2908, 0.2908, 0.8651, 0.6563, 0.0862)) df.resid <- df.resid[1:num.resid, ] if(is.character(file)){ print("This indicates that the file exists") table <- create_consurf_table(file, sequence) full_seq <- paste(table$SEQ, collapse = "") begin <- as.numeric(regexpr(sequence, full_seq)[1]) check_begin(begin) end <- begin + nchar(sequence) - 1 check_seq_match(full_seq, begin, end, sequence) } resid.types <- decide_resid.types(col) fill.col <- decide_fill.col(col = col, table = table) df.resid$fill.col <- resid.types[fill.col] df.resid$lettername <- vapply(X = 1:nchar(sequence), FUN.VALUE = character(1), FUN = function(i) substr(sequence, i, i)) df.resid$complete_label <- df.resid$lettername if(comp_label == TRUE){ #The first CONSURF/GRADES file position isn't necessarily the first PDB position #This inds first position where the ATOM field has a residue #The ATOM field of the CONSURF GRADES file relates directly to the PDB file #This is used to fix the inconsistency between CONSURF residue positions and PDB residue positions first_pdb_residue_index <- which(table$X3LATOM != "-")[1] #Extracts the first PDB position (represented through the ATOM field/column of GRADES file) first_pdb_residue_position <- as.numeric(str_extract(table$X3LATOM[first_pdb_residue_index], "[0-9]+")) #Finds the differences between CONSURF(aka GRADES file) position and PDB position file_position_to_seq_position <- first_pdb_residue_index - first_pdb_residue_position #Converts CONSURF/GRADES file numbering to PDB numbering df.resid$positions <- c(begin:end) - file_position_to_seq_position #Combines Residue 1 letter names and PDB numbering positions df.resid$complete_label <- paste(df.resid$lettername, df.resid$positions, sep = "") } df.resid$radius <- determine_icon_radii(df.resid) first_set_residues <- list(c(1,4,7,10,13,16),c(2,5,8,11,14,17),c(3,6,9,12,15,18)) first_coord_list <- list() first_coord_list<- lapply(first_set_residues, function(x) data.frame( x = df.resid[x,"x"], y = df.resid[x,"y"])) first_spline_list <- lapply(first_coord_list, function(first_coord_list) as.data.frame(with(first_coord_list, list(x = spline(x,n = 100)$y, y = spline(y, n = 100)$y) ))) second_set_residues <- list(c(1,5,9,13,17), c(2,6,10,14,18), c(3,7,11,15), c(4,8,12,16)) second_coord_list <- list() second_coord_list<- lapply(second_set_residues, function(x) data.frame( x = df.resid[x,"x"], y = df.resid[x,"y"])) second_spline_list <- lapply(second_coord_list, function(second_coord_list) as.data.frame(with(second_coord_list, list(x = spline(x,n = 100)$y, y = spline(y, n = 100)$y) ))) second_list<- list(c(1,6,11,16),c(2,7,12,17),c(3,8,13,18),c(4,9,14),c(5,10,15)) xy_list <- list() xy_list<- lapply(second_list, function(x) data.frame( x = df.resid[x,"x"], y = df.resid[x,"y"])) spline_list <- lapply(xy_list, function(xy_list) as.data.frame(with(xy_list, list(x = spline(x,n = 100)$y, y = spline(y, n = 100)$y) ))) color_vector <- c("green","orange","blue","red","purple","green","orange","blue","red","purple","green","orange","blue", "red","purple","green","orange","blue") #decide circles <- if multiple shapes selected, return filtered data.frame() else return df.resid ############given an element of a vector of letters: "A", output shape "triangle" shape_options <- c(2, 5, 6, 6, 4, 6, 6, 2, 5, 2, 2, 5, 1, 3, 2, 4, 4, 3, 3, 2) shape_options <- as.integer(shape_options) letter_options <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") names(shape_options) <- letter_options shapes_to_index <- c("circle", "triangle", "square", "diamond", "pentagon", "hexagon") shape_vector <- shapes_to_index[shape_options[df.resid$lettername]] #given an element "triangle" from a vector and coordinates output x & y coordinates of vertices df.resid$shape <- shape_vector vertex_df <- data.frame() for(index in 1:nrow(df.resid)){ shape <- df.resid$shape[index] x <- df.resid$x[index] y <- df.resid$y[index] if (shape == "circle") { list <- list( x = x, y = y) } if (shape == "triangle") { list <- DrawRegPolygon(x = x, y = y,radius.x = 0.04,rot = pi/2, nv = 3, plot = FALSE) } if (shape == "square") { list <- DrawRegPolygon(x = x, y=y,radius.x = 0.04, rot = pi/4, nv = 4, plot = FALSE) } if(shape == "diamond"){ list <- DrawRegPolygon(x = x, y = y, radius.x = 0.04, radius.y = 0.036, nv=4, plot = FALSE) } if(shape == "pentagon"){ list <- DrawRegPolygon(x = x, y = y, radius.x = 0.04,rot = pi/2, nv = 5, plot = FALSE) } if(shape == "hexagon"){ list <- DrawRegPolygon(x =x, y=y, radius.x = 0.04,nv = 6, plot = FALSE) } print(list$x) print(list$y) index_vertex_df <- data.frame(x_coords = list$x,y_coords = list$y, index = rep(index, length(list$x) )) vertex_df <- rbind(vertex_df, index_vertex_df) } g <- ggplot2::ggplot() if (curves != 0) { if (curves == 1) { g <- g + ggplot2::geom_path(data =first_spline_list[[1]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("green"), size = I(1.25))) + ggplot2::geom_path(data =first_spline_list[[2]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("orange"), size = I(1.25))) + ggplot2::geom_path(data =first_spline_list[[3]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("blue"), size = I(1.25))) } if (curves == 2) { g <- g + ggplot2::geom_path(data =second_spline_list[[1]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("green"), size = I(1.25))) + ggplot2::geom_path(data =second_spline_list[[2]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("orange"), size = I(1.25))) + ggplot2::geom_path(data =second_spline_list[[3]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("blue"), size = I(1.25))) + ggplot2::geom_path(data =second_spline_list[[4]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("red"), size = I(1.25))) } if (curves == 3 ) { g <- g + ggplot2::geom_path(data =spline_list[[1]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("green"), size = I(1.25))) + ggplot2::geom_path(data =spline_list[[2]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("orange"), size = I(1.25))) + ggplot2::geom_path(data =spline_list[[3]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("blue"), size = I(1.25))) + ggplot2::geom_path(data =spline_list[[4]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("red"), size = I(1.25))) + ggplot2::geom_path(data =spline_list[[5]], ggplot2::aes(x = .data$x, y = .data$y,linetype = I("dashed"), color = I("purple"), size = I(1.25))) } } g <- g + ggforce::geom_arc(data = df.spiral, ggplot2::aes(x0 = .data$center.x, y0 = .data$center.y, r = .data$radius, start = .data$start.angle, end = .data$end.angle)) if (shapes) { g <- g + geom_polygon(data = vertex_df, aes(x = x_coords, y = y_coords, group = index, fill = df.resid$fill.col[.data$index]), colour = "black") } else { g <- g + ggforce::geom_circle(data = df.resid, ggplot2::aes(x0 = .data$x, y0 = .data$y, r = .data$radius, fill = .data$fill.col)) } g <- g + ggplot2::xlim(c(0, 1)) + ggplot2::ylim(c(0, 1)) + ggplot2::scale_fill_manual(values = decide_scale_fill_values(col), name = decide_scale_fill_name(col), limits = decide_scale_fill_limits(col), drop = FALSE, na.translate = TRUE) + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), panel.background = ggplot2::element_blank(), panel.border = ggplot2::element_blank(), axis.title = ggplot2::element_blank(), axis.text = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank(), legend.position = "none") if (labels) { g <- g + ggplot2::geom_text(data = df.resid, ggplot2::aes(x = .data$x, y = .data$y, label = complete_label, colour = I(label.col))) } # fixed coordinates if user desires if (fixed) { g <- g + ggplot2::coord_fixed() } # legend if user desires if (legend) { g <- g + ggplot2::theme(legend.position = "right") } plot(g) } draw_wenxiang(file = NULL, sequence = "LDQLRKLLSY", col = c("red", "blue", "yellow", "gray"), labels = TRUE, label.col = "black", legend = TRUE, size = FALSE, comp_label = FALSE, fixed = TRUE, curves = 1, shapes = TRUE)