--- title: "Slide Deck 2" format: html editor: visual editor_options: chunk_output_type: console --- ```{r} library("ggradar") library("ggrepel") library("tidyverse") ``` ```{r} paygap <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-06-28/paygap.csv') juneteenth_census <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-16/census.csv') ``` ```{r} d12_data <- readr::read_csv("colleges.csv") # fix column names colnames(d12_data) <- c("index", "institution", "tuition", "enrollment") # rankings started at zero?? d12_data <- d12_data |> mutate(ranking = index + 1, forLabel = rep(FALSE, nrow(d12_data))) d12_data$forLabel[stringr::str_detect(d12_data$institution, "Merced")] <- TRUE d12_data_for_labels <- d12_data |> filter(forLabel) ``` ```{r, message = FALSE, warning = FALSE} # https://www.reddit.com/r/comicbooks/comments/g925r3/marvel_official_power_grid_ratings_sortable_sheet/ d15_data_raw <- readxl::read_xlsx("Marvel Powergrid Ratings.xlsx") # make copy d15_data <- d15_data_raw |> janitor::clean_names() |> rename(fighting = fighting_skills, energy = energy_projection) # call on the Fantastic Four! # there were several superheroes and super-villians with some similar names # so it was faster to code the search with their real names d15_data <- d15_data |> filter(stringr::str_detect(real_name, "Richards, Reed") | stringr::str_detect(real_name, "Storm-Richards, Susan") | stringr::str_detect(real_name, "Storm, Jonathon") | stringr::str_detect(real_name, "Grimm, Benjamin")) # focus on columns with hero names and stats d15_data <- d15_data |> select(1, 6:11) # ensure numeric columns # d15_data <- d15_data |> mutate_if(is.numeric, as.numeric) d15_data$intelligence <- as.numeric(d15_data$intelligence) d15_data$strength <- as.numeric(d15_data$strength) d15_data$speed <- as.numeric(d15_data$speed) d15_data$durability <- as.numeric(d15_data$durability) d15_data$energy <- as.numeric(d15_data$energy) d15_data$fighting <- as.numeric(d15_data$fighting) ``` ## Man versus Woman ```{r} plot1A <- paygap |> select(diff_mean_hourly_percent) |> mutate(pay_bias = case_when( diff_mean_hourly_percent > 0 ~ "Men paid more", diff_mean_hourly_percent < 0 ~ "Women paid more", TRUE ~ "Equal pay" )) |> ggplot(aes(x = pay_bias)) + geom_bar(stat = "count") + labs(title = "Pay Gap Data", subtitle = "Samuel Mendez", caption = "Source: gender-pay-gap.service.gov.uk", x = "", y = "number of jobs") ``` ```{r} plot1B <- paygap |> select(diff_mean_hourly_percent) |> mutate(pay_bias = case_when( diff_mean_hourly_percent > 0 ~ "Men paid more", diff_mean_hourly_percent < 0 ~ "Women paid more", TRUE ~ "Equal pay" )) |> ggplot(aes(x = pay_bias)) + geom_bar(stat = "count") + labs(title = "Pay Gap Data", subtitle = "Ruby Garcia", caption = "Source: gender-pay-gap.service.gov.uk", x = "", y = "number of jobs") ``` ## White vs Hispanic ```{r} plot2A <- paygap |> select(diff_mean_hourly_percent, employer_size) |> ggplot() + geom_boxplot(aes(x = employer_size, y = diff_mean_hourly_percent, fill = employer_size), color = "black") + labs(title = "Pay Gap Data", subtitle = "Hector Gomez", caption = "Source: gender-pay-gap.service.gov.uk", x = "company size (number of employees)", y = "difference in wages (percentage)") ``` ```{r} plot2B <- paygap |> select(diff_mean_hourly_percent, employer_size) |> ggplot() + geom_boxplot(aes(x = employer_size, y = diff_mean_hourly_percent, fill = employer_size), color = "black") + labs(title = "Pay Gap Data", subtitle = "Hunter Gibson", caption = "Source: gender-pay-gap.service.gov.uk", x = "company size (number of employees)", y = "difference in wages (percentage)") ``` ## Masculine vs Feminine ```{r} plot3A <- juneteenth_census |> filter(region == "USA Total") |> select(black_free, black_slaves, year) |> pivot_longer(cols = c(black_free, black_slaves), names_to = "classification", values_to = "population") |> ggplot() + geom_line(aes(x = year, y = population, color = classification, group = classification), size = 3) + scale_color_manual(values = c("blue", "green")) + theme_minimal() + labs(title = "US Slave Population", subtitle = "Samuel Klindermann", caption = "Source: US Census, BlackPast.org") ``` ```{r} plot3B <- juneteenth_census |> filter(region == "USA Total") |> select(black_free, black_slaves, year) |> pivot_longer(cols = c(black_free, black_slaves), names_to = "classification", values_to = "population") |> ggplot() + geom_line(aes(x = year, y = population, color = classification, group = classification), size = 3) + scale_color_manual(values = c("pink", "purple")) + theme_minimal() + labs(title = "US Slave Population", subtitle = "Samuel Klindermann", caption = "Source: US Census, BlackPast.org") ``` ## Man vs Woman (as subjects of the graph) ```{r} # Mr Fantastic plot4A <- d15_data |> filter(character == "Mister Fantastic") |> ggradar(grid.min = 0, grid.mid = 3, grid.max = 7, # Polygons group.line.width = 2, group.point.size = 7, group.colours = c("blue"), # Background and grid lines background.circle.colour = "white", gridline.mid.colour = "grey", legend.position = "bottom") + labs(title = "Mr Fantastic", subtitle = "Reed Richards") + theme(plot.subtitle = element_text(color = "blue", hjust = 0.5, size = 20), plot.title = element_text(color = "blue", hjust = 0.5, size = 25)) ``` ```{r} # Invisible Woman plot4B <- d15_data |> filter(character == "Invisible Woman") |> ggradar(grid.min = 0, grid.mid = 3, grid.max = 7, # Polygons group.line.width = 2, group.point.size = 7, group.colours = c("red"), # Background and grid lines background.circle.colour = "white", gridline.mid.colour = "grey", legend.position = "bottom") + labs(title = "Invisible Woman", subtitle = "Sue Storm-Richards") + theme(plot.subtitle = element_text(color = "red", hjust = 0.5, size = 20), plot.title = element_text(color = "red", hjust = 0.5, size = 25)) ``` ## Emoji or not ```{r, eval = FALSE} plot5A <- d12_data |> ggplot(aes(x = ranking, y = tuition)) + geom_point(color = "#00588D") + geom_label_repel(aes(x = ranking, y = tuition, label = "University of California Merced"), data = d12_data_for_labels, fill = "#EBB434", # nudge_x = 100, nudge_y = -33000, # point.padding = NA, segment.color = "#EBB434", segment.size = 2, arrow = arrow(ends = "last", length = unit(0.03, "npc"), type = "closed")) + # ggimage::geom_image(aes(x = ranking, y = tuition, # image = logo_path), # data = d12_data_for_labels, # size = 0.05, # by = "width", # asp = 1.618) + # ggimage::geom_image(aes(x = 350, y = 57500, # image = logo_path), # data = d12_data_for_labels, # size = 0.2, # by = "width", # asp = 1.618) + labs(title = "UC Merced Entered the Top 100", subtitle = "30 Day Chart Challenge, Day 12: Emulating The Economist", x = "US News Ranking (2022)", y = "Out-of-State Tuition (US dollars)", caption = "Source: Kaggle, Neel Gajare") + ggthemes::theme_economist() + theme(panel.grid.major = element_line(color = "#B7C6CF"), plot.caption = element_text(hjust = 0.0)) ``` ```{r, eval = FALSE} plot5B <- d12_data |> ggplot(aes(x = ranking, y = tuition)) + geom_point(color = "#00588D") + geom_label_repel(aes(x = ranking, y = tuition, label = "University of California Merced"), data = d12_data_for_labels, fill = "#EBB434", # nudge_x = 100, nudge_y = -33000, # point.padding = NA, segment.color = "#EBB434", segment.size = 2, arrow = arrow(ends = "last", length = unit(0.03, "npc"), type = "closed")) + ggimage::geom_image(aes(x = ranking, y = tuition, image = logo_path), data = d12_data_for_labels, size = 0.05, by = "width", asp = 1.618) + ggimage::geom_image(aes(x = 350, y = 57500, image = logo_path), data = d12_data_for_labels, size = 0.2, by = "width", asp = 1.618) + labs(title = "UC Merced Entered the Top 100", subtitle = "30 Day Chart Challenge, Day 12: Emulating The Economist", x = "US News Ranking (2022)", y = "Out-of-State Tuition (US dollars)", caption = "Source: Kaggle, Neel Gajare") + ggthemes::theme_economist() + theme(panel.grid.major = element_line(color = "#B7C6CF"), plot.caption = element_text(hjust = 0.0)) ``` ```{r} # https://www.khstats.com/blog/trt-timelines/multiple-vars/ dat_long <- read_csv("https://raw.githubusercontent.com/kathoffman/steroids-trial-emulation/main/data/dat_trt_timeline.csv", col_types = list(id = "c", steroids = "c", death = "c", severe = "c")) # define colors for all geometries with a color argument cols <- c("Severe hypoxia" = "#b24745", # red "Intubated" = "darkslateblue", # navy "Not intubated" = "#74aaff", # lighter blue "Steroids"="#ffd966", # gold "Death" = "black") shapes <- c("Severe hypoxia" = 21, "Steroids" = 15, # square "Death" = 4) # cross # empty circle (control inside with fill argument if desired) shape_override <- c(21, NA, NA, 15, 4) # order matches `cols`:severe, intubation (yes/no), steroids, death line_override <- c(NA,1,1,NA,NA) # order matches `cols`:severe, intubation (yes/no), steroids, death stroke_override <- c(.8,1,1,1,1) # order matches `cols`:severe, intubation (yes/no), steroids, death size_override <- c(2.5,2.5,2.6,2,2) # order matches `cols`:severe, intubation (yes/no), steroids, death # modify swimmer data to 1) only show events if yes 2) have an id ordered by max follow up dat_swim <- dat_long |> mutate(severe_this_day = case_when(severe == 1 ~ day), steroids_this_day = case_when(steroids == 1 ~ day), death_this_day = case_when(death == 1 ~ day)) %>% group_by(id) |> mutate(max_day = max(day)) |> ungroup() |> nest(cols = day:death_this_day) |> arrange(max_day) |> mutate(id_sorted = factor(row_number())) |> unnest() plot6A <- dat_swim |> ggplot() + geom_line(aes(x=day, y=id_sorted, col = intubation_status, group=id_sorted), size=1.8) + geom_point(aes(x=steroids_this_day, y=id_sorted, col="Steroids", shape="Steroids"), stroke=2) + geom_point(aes(x=severe_this_day, y=id_sorted, col="Severe hypoxia", shape="Severe hypoxia"), size=2, stroke=1.5) + geom_point(aes(x=death_this_day, y=id_sorted, col="Death", shape="Death"), size=2, stroke=1.5) + theme_bw() + scale_color_manual(values = cols, name="Patient Status") + scale_shape_manual(values = shapes, name = "Patient Status") + guides(color = guide_legend( override.aes = list( shape = shape_override, linetype = line_override) ), shape = "none" )+ labs(x="Days since hospitalization",y="Patient\nnumber",title="Treatment Timeline for N=30 Patients", subtitle = "Katherine Hoffman") + scale_x_continuous(expand=c(0,0)) + # remove extra white space theme(text=element_text(family="Poppins", size=11), title = element_text(angle = 0, vjust=.5, size=12, face="bold"), axis.title.y = element_text(angle = 0, vjust=.5, size=12, face="bold"), axis.title.x = element_text(size=15, face="bold", vjust=-0.5, hjust=0), axis.text.y = element_text(size=6, hjust=1.5), axis.ticks.y = element_blank(), legend.position = c(0.8, 0.3), legend.title = element_text(colour="black", size=13, face=4), legend.text = element_text(colour="black", size=10), legend.background = element_rect(size=0.5, linetype="solid", colour ="gray30"), panel.grid.minor = element_blank(), panel.grid.major.x = element_blank() ) plot6B <- plot6A + labs(x="Days since hospitalization",y="Patient\nnumber",title="Treatment Timeline for N=30 Patients", subtitle = "Jose Manuel Vera") ``` ```{r} ggsave("plot1A.png", plot1A, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot1B.png", plot1B, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot2A.png", plot2A, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot2B.png", plot2B, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot3A.png", plot3A, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot3B.png", plot3B, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot4A.png", plot4A, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot4B.png", plot4B, device = "png", height = 1200, width = 1600, units = "px") # ggsave("plot5A.png", plot5A, # device = "png", # height = 1200, width = 1600, # units = "px") # ggsave("plot5B.png", plot5B, # device = "png", # height = 1200, width = 1600, # units = "px") ggsave("plot6A.png", plot6A, device = "png", height = 1200, width = 1600, units = "px") ggsave("plot6B.png", plot6B, device = "png", height = 1200, width = 1600, units = "px") ```