GE02

Baseball Graphics

library(tidyverse)
## -- Attaching packages ---------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.1     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## -- Conflicts ------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
player_data <- read.csv("Master.csv")
pitching_data <- read.csv("Pitching.csv")
salary_data <- read.csv("Salaries.csv")
inflation_index <- read.csv("inflation.csv")

Graph 1: Box Plot

pitching_data$yearID <- as.factor(pitching_data$yearID)

summary_ERA <- summarize(group_by(pitching_data, yearID), Q1 = quantile(ERA,.25,na.rm=T),median=median(ERA,na.rm=T), Q3 = quantile(ERA,.75,na.rm=T), min=min(ERA,na.rm=T), max=max(ERA,na.rm=T), na.rm=T)

ggplot(pitching_data) + geom_boxplot(aes(x = yearID, y = ERA), na.rm=T)

Graph 2: Line Plot

summary_ERA$yearID <- as.numeric(as.character(summary_ERA$yearID))

ggplot(summary_ERA)+geom_line(aes(x=yearID, y=median))

Graph 3: Ribbon Plot

ggplot(summary_ERA) + geom_ribbon(aes(x=yearID, ymin=Q1, ymax=Q3), fill="lightgreen") + geom_line(aes(x=yearID, y=median), color="darkblue")

Graph 4: Line Plot

pitching_data_filtered <- filter(pitching_data, G>=10)

pitching_data_filtered$G <- as.numeric(pitching_data_filtered$G)

summary_pitching_data <- summarize(group_by(pitching_data_filtered, yearID),
                              ERA_six_proportion=mean(ERA >= 6, na.rm=T),
                              ERA_three_proportion=mean(ERA <= 3,na.rm=T))

summary_pitching_data$yearID <- as.numeric(as.character(summary_pitching_data$yearID))

ggplot(summary_pitching_data, aes(x=yearID)) +
  geom_line(aes(y=ERA_six_proportion, color= "6 or higher"))+
  geom_line(aes(y=ERA_three_proportion, color="3 or under")) + scale_color_manual(values = c("6 or higher" = "red", "3 or under"="darkblue"), name="ERA") +
  theme_classic() +
  labs(x = "Year", y="Proportion", title="Proportion of Pitchers (pitching at least 10 games)\n With Low and High ERAs by Year")

Graph 5:Ribbon and Line Plot

player_data_2 <- mutate(player_data, usa = ifelse(birthCountry == "USA", "Born in US", "Born outside USA"))

names(inflation_index)[1] <- "yearID"
head(inflation_index)
##   yearID inflation2015
## 1   1980          2.88
## 2   1981          2.61
## 3   1982          2.46
## 4   1983          2.38
## 5   1984          2.28
## 6   1985          2.20
tail(inflation_index)
##    yearID inflation2015
## 30   2009          1.10
## 31   2010          1.09
## 32   2011          1.05
## 33   2012          1.03
## 34   2013          1.02
## 35   2014          1.00
player_data_2$playerID <- as.character(player_data_2$playerID)
salary_data$playerID <- as.character(salary_data$playerID)

summary_salary_inner <- inner_join(salary_data, player_data_2, by = "playerID")

summary_salary <- summarize(group_by(summary_salary_inner, yearID,usa), Q1 = quantile(salary,.25,na.rm=T),median=median(salary,na.rm=T), Q3 = quantile(salary,.75,na.rm=T), min=min(salary,na.rm=T), max=max(salary,na.rm=T))

summary_salary_inner <- inner_join(summary_salary, inflation_index, by = "yearID")

summary_salary_left <- left_join(summary_salary, inflation_index, by="yearID")

summary_salary_right <- right_join(summary_salary, inflation_index, by="yearID")

summary_salary_full <- full_join(summary_salary, inflation_index, by="yearID")

summary_salary_left[summary_salary_left$yearID==2015,"inflation2015"]<-1


summary_salary_inner <- mutate(summary_salary_inner, median_inflation_adjusted = median*inflation2015, Q1_inflation_adjusted = Q1*inflation2015, Q3_inflation_adjusted = Q3*inflation2015, min_inflation_adjusted = min*inflation2015,max_inflation_adjusted = max*inflation2015)


ggplot(summary_salary_inner) + 
  geom_ribbon(aes(x=yearID, ymin= Q1_inflation_adjusted, ymax= Q3_inflation_adjusted, fill= usa), alpha = 0.4) + 
  geom_line(aes(x=yearID, y=median_inflation_adjusted, color = usa)) + scale_color_manual(values = c("Born in US" = "red", "Born outside USA"="darkblue"), name="Median Salary") +
  scale_y_continuous(labels = scales::dollar)+labs(y="Annual Salary  \n (Adjusted for Inflation)",x="Year",title="Salaries of Middle 50% of Earners in Major League Baseball") + 
  scale_fill_manual(name="Middle 50% of Earners",values=c("Born in US"="red", "Born outside USA" = "turquoise1")) +
  theme_classic()

css.php