---
title: "Slide Deck 2"
format: html
editor: visual
editor_options:
chunk_output_type: console
---
```{r}
library("ggradar")
library("ggrepel")
library("tidyverse")
```
```{r}
paygap <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-06-28/paygap.csv')
juneteenth_census <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-16/census.csv')
```
```{r}
d12_data <- readr::read_csv("colleges.csv")
# fix column names
colnames(d12_data) <- c("index", "institution", "tuition", "enrollment")
# rankings started at zero??
d12_data <- d12_data |>
mutate(ranking = index + 1,
forLabel = rep(FALSE, nrow(d12_data)))
d12_data$forLabel[stringr::str_detect(d12_data$institution, "Merced")] <- TRUE
d12_data_for_labels <- d12_data |>
filter(forLabel)
```
```{r, message = FALSE, warning = FALSE}
# https://www.reddit.com/r/comicbooks/comments/g925r3/marvel_official_power_grid_ratings_sortable_sheet/
d15_data_raw <- readxl::read_xlsx("Marvel Powergrid Ratings.xlsx")
# make copy
d15_data <- d15_data_raw |>
janitor::clean_names() |>
rename(fighting = fighting_skills,
energy = energy_projection)
# call on the Fantastic Four!
# there were several superheroes and super-villians with some similar names
# so it was faster to code the search with their real names
d15_data <- d15_data |>
filter(stringr::str_detect(real_name, "Richards, Reed") |
stringr::str_detect(real_name, "Storm-Richards, Susan") |
stringr::str_detect(real_name, "Storm, Jonathon") |
stringr::str_detect(real_name, "Grimm, Benjamin"))
# focus on columns with hero names and stats
d15_data <- d15_data |>
select(1, 6:11)
# ensure numeric columns
# d15_data <- d15_data |> mutate_if(is.numeric, as.numeric)
d15_data$intelligence <- as.numeric(d15_data$intelligence)
d15_data$strength <- as.numeric(d15_data$strength)
d15_data$speed <- as.numeric(d15_data$speed)
d15_data$durability <- as.numeric(d15_data$durability)
d15_data$energy <- as.numeric(d15_data$energy)
d15_data$fighting <- as.numeric(d15_data$fighting)
```
## Man versus Woman
```{r}
plot1A <- paygap |>
select(diff_mean_hourly_percent) |>
mutate(pay_bias = case_when(
diff_mean_hourly_percent > 0 ~ "Men paid more",
diff_mean_hourly_percent < 0 ~ "Women paid more",
TRUE ~ "Equal pay"
)) |>
ggplot(aes(x = pay_bias)) +
geom_bar(stat = "count") +
labs(title = "Pay Gap Data",
subtitle = "Samuel Mendez",
caption = "Source: gender-pay-gap.service.gov.uk",
x = "", y = "number of jobs")
```
```{r}
plot1B <- paygap |>
select(diff_mean_hourly_percent) |>
mutate(pay_bias = case_when(
diff_mean_hourly_percent > 0 ~ "Men paid more",
diff_mean_hourly_percent < 0 ~ "Women paid more",
TRUE ~ "Equal pay"
)) |>
ggplot(aes(x = pay_bias)) +
geom_bar(stat = "count") +
labs(title = "Pay Gap Data",
subtitle = "Ruby Garcia",
caption = "Source: gender-pay-gap.service.gov.uk",
x = "", y = "number of jobs")
```
## White vs Hispanic
```{r}
plot2A <- paygap |>
select(diff_mean_hourly_percent, employer_size) |>
ggplot() +
geom_boxplot(aes(x = employer_size, y = diff_mean_hourly_percent,
fill = employer_size),
color = "black") +
labs(title = "Pay Gap Data",
subtitle = "Hector Gomez",
caption = "Source: gender-pay-gap.service.gov.uk",
x = "company size (number of employees)",
y = "difference in wages (percentage)")
```
```{r}
plot2B <- paygap |>
select(diff_mean_hourly_percent, employer_size) |>
ggplot() +
geom_boxplot(aes(x = employer_size, y = diff_mean_hourly_percent,
fill = employer_size),
color = "black") +
labs(title = "Pay Gap Data",
subtitle = "Hunter Gibson",
caption = "Source: gender-pay-gap.service.gov.uk",
x = "company size (number of employees)",
y = "difference in wages (percentage)")
```
## Masculine vs Feminine
```{r}
plot3A <- juneteenth_census |>
filter(region == "USA Total") |>
select(black_free, black_slaves, year) |>
pivot_longer(cols = c(black_free, black_slaves),
names_to = "classification",
values_to = "population") |>
ggplot() +
geom_line(aes(x = year, y = population,
color = classification,
group = classification),
size = 3) +
scale_color_manual(values = c("blue", "green")) +
theme_minimal() +
labs(title = "US Slave Population",
subtitle = "Samuel Klindermann",
caption = "Source: US Census, BlackPast.org")
```
```{r}
plot3B <- juneteenth_census |>
filter(region == "USA Total") |>
select(black_free, black_slaves, year) |>
pivot_longer(cols = c(black_free, black_slaves),
names_to = "classification",
values_to = "population") |>
ggplot() +
geom_line(aes(x = year, y = population,
color = classification,
group = classification),
size = 3) +
scale_color_manual(values = c("pink", "purple")) +
theme_minimal() +
labs(title = "US Slave Population",
subtitle = "Samuel Klindermann",
caption = "Source: US Census, BlackPast.org")
```
## Man vs Woman (as subjects of the graph)
```{r}
# Mr Fantastic
plot4A <- d15_data |>
filter(character == "Mister Fantastic") |>
ggradar(grid.min = 0, grid.mid = 3, grid.max = 7,
# Polygons
group.line.width = 2,
group.point.size = 7,
group.colours = c("blue"),
# Background and grid lines
background.circle.colour = "white",
gridline.mid.colour = "grey",
legend.position = "bottom") +
labs(title = "Mr Fantastic",
subtitle = "Reed Richards") +
theme(plot.subtitle = element_text(color = "blue", hjust = 0.5, size = 20),
plot.title = element_text(color = "blue", hjust = 0.5, size = 25))
```
```{r}
# Invisible Woman
plot4B <- d15_data |>
filter(character == "Invisible Woman") |>
ggradar(grid.min = 0, grid.mid = 3, grid.max = 7,
# Polygons
group.line.width = 2,
group.point.size = 7,
group.colours = c("red"),
# Background and grid lines
background.circle.colour = "white",
gridline.mid.colour = "grey",
legend.position = "bottom") +
labs(title = "Invisible Woman",
subtitle = "Sue Storm-Richards") +
theme(plot.subtitle = element_text(color = "red", hjust = 0.5, size = 20),
plot.title = element_text(color = "red", hjust = 0.5, size = 25))
```
## Emoji or not
```{r, eval = FALSE}
plot5A <- d12_data |>
ggplot(aes(x = ranking, y = tuition)) +
geom_point(color = "#00588D") +
geom_label_repel(aes(x = ranking, y = tuition,
label = "University of California Merced"),
data = d12_data_for_labels,
fill = "#EBB434",
# nudge_x = 100,
nudge_y = -33000,
# point.padding = NA,
segment.color = "#EBB434",
segment.size = 2,
arrow = arrow(ends = "last",
length = unit(0.03, "npc"),
type = "closed")) +
# ggimage::geom_image(aes(x = ranking, y = tuition,
# image = logo_path),
# data = d12_data_for_labels,
# size = 0.05,
# by = "width",
# asp = 1.618) +
# ggimage::geom_image(aes(x = 350, y = 57500,
# image = logo_path),
# data = d12_data_for_labels,
# size = 0.2,
# by = "width",
# asp = 1.618) +
labs(title = "UC Merced Entered the Top 100",
subtitle = "30 Day Chart Challenge, Day 12: Emulating The Economist",
x = "US News Ranking (2022)",
y = "Out-of-State Tuition (US dollars)",
caption = "Source: Kaggle, Neel Gajare") +
ggthemes::theme_economist() +
theme(panel.grid.major = element_line(color = "#B7C6CF"),
plot.caption = element_text(hjust = 0.0))
```
```{r, eval = FALSE}
plot5B <- d12_data |>
ggplot(aes(x = ranking, y = tuition)) +
geom_point(color = "#00588D") +
geom_label_repel(aes(x = ranking, y = tuition,
label = "University of California Merced"),
data = d12_data_for_labels,
fill = "#EBB434",
# nudge_x = 100,
nudge_y = -33000,
# point.padding = NA,
segment.color = "#EBB434",
segment.size = 2,
arrow = arrow(ends = "last",
length = unit(0.03, "npc"),
type = "closed")) +
ggimage::geom_image(aes(x = ranking, y = tuition,
image = logo_path),
data = d12_data_for_labels,
size = 0.05,
by = "width",
asp = 1.618) +
ggimage::geom_image(aes(x = 350, y = 57500,
image = logo_path),
data = d12_data_for_labels,
size = 0.2,
by = "width",
asp = 1.618) +
labs(title = "UC Merced Entered the Top 100",
subtitle = "30 Day Chart Challenge, Day 12: Emulating The Economist",
x = "US News Ranking (2022)",
y = "Out-of-State Tuition (US dollars)",
caption = "Source: Kaggle, Neel Gajare") +
ggthemes::theme_economist() +
theme(panel.grid.major = element_line(color = "#B7C6CF"),
plot.caption = element_text(hjust = 0.0))
```
```{r}
# https://www.khstats.com/blog/trt-timelines/multiple-vars/
dat_long <- read_csv("https://raw.githubusercontent.com/kathoffman/steroids-trial-emulation/main/data/dat_trt_timeline.csv", col_types = list(id = "c", steroids = "c", death = "c", severe = "c"))
# define colors for all geometries with a color argument
cols <- c("Severe hypoxia" = "#b24745", # red
"Intubated" = "darkslateblue", # navy
"Not intubated" = "#74aaff", # lighter blue
"Steroids"="#ffd966", # gold
"Death" = "black")
shapes <- c("Severe hypoxia" = 21,
"Steroids" = 15, # square
"Death" = 4) # cross # empty circle (control inside with fill argument if desired)
shape_override <- c(21, NA, NA, 15, 4) # order matches `cols`:severe, intubation (yes/no), steroids, death
line_override <- c(NA,1,1,NA,NA) # order matches `cols`:severe, intubation (yes/no), steroids, death
stroke_override <- c(.8,1,1,1,1) # order matches `cols`:severe, intubation (yes/no), steroids, death
size_override <- c(2.5,2.5,2.6,2,2) # order matches `cols`:severe, intubation (yes/no), steroids, death
# modify swimmer data to 1) only show events if yes 2) have an id ordered by max follow up
dat_swim <-
dat_long |>
mutate(severe_this_day = case_when(severe == 1 ~ day),
steroids_this_day = case_when(steroids == 1 ~ day),
death_this_day = case_when(death == 1 ~ day)) %>%
group_by(id) |>
mutate(max_day = max(day)) |>
ungroup() |>
nest(cols = day:death_this_day) |>
arrange(max_day) |>
mutate(id_sorted = factor(row_number())) |>
unnest()
plot6A <- dat_swim |>
ggplot() +
geom_line(aes(x=day, y=id_sorted, col = intubation_status, group=id_sorted),
size=1.8) +
geom_point(aes(x=steroids_this_day, y=id_sorted, col="Steroids", shape="Steroids"), stroke=2) +
geom_point(aes(x=severe_this_day, y=id_sorted, col="Severe hypoxia", shape="Severe hypoxia"), size=2, stroke=1.5) +
geom_point(aes(x=death_this_day, y=id_sorted, col="Death", shape="Death"), size=2, stroke=1.5) +
theme_bw() +
scale_color_manual(values = cols, name="Patient Status") +
scale_shape_manual(values = shapes, name = "Patient Status") +
guides(color = guide_legend(
override.aes = list(
shape = shape_override,
linetype = line_override)
),
shape = "none"
)+
labs(x="Days since hospitalization",y="Patient\nnumber",title="Treatment Timeline for N=30 Patients",
subtitle = "Katherine Hoffman") +
scale_x_continuous(expand=c(0,0)) + # remove extra white space
theme(text=element_text(family="Poppins", size=11),
title = element_text(angle = 0, vjust=.5, size=12, face="bold"),
axis.title.y = element_text(angle = 0, vjust=.5, size=12, face="bold"),
axis.title.x = element_text(size=15, face="bold", vjust=-0.5, hjust=0),
axis.text.y = element_text(size=6, hjust=1.5),
axis.ticks.y = element_blank(),
legend.position = c(0.8, 0.3),
legend.title = element_text(colour="black", size=13, face=4),
legend.text = element_text(colour="black", size=10),
legend.background = element_rect(size=0.5, linetype="solid", colour ="gray30"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()
)
plot6B <- plot6A +
labs(x="Days since hospitalization",y="Patient\nnumber",title="Treatment Timeline for N=30 Patients",
subtitle = "Jose Manuel Vera")
```
```{r}
ggsave("plot1A.png", plot1A,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot1B.png", plot1B,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot2A.png", plot2A,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot2B.png", plot2B,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot3A.png", plot3A,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot3B.png", plot3B,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot4A.png", plot4A,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot4B.png", plot4B,
device = "png",
height = 1200, width = 1600,
units = "px")
# ggsave("plot5A.png", plot5A,
# device = "png",
# height = 1200, width = 1600,
# units = "px")
# ggsave("plot5B.png", plot5B,
# device = "png",
# height = 1200, width = 1600,
# units = "px")
ggsave("plot6A.png", plot6A,
device = "png",
height = 1200, width = 1600,
units = "px")
ggsave("plot6B.png", plot6B,
device = "png",
height = 1200, width = 1600,
units = "px")
```