Here are two plots to consider for this year’s NCAA tournament. I use data for the 68 teams featured in the bracket.

library(ggplot2)
library(RCurl)
library(ggrepel)
library(dplyr)

df = read.csv(text=getURL("https://raw.githubusercontent.com/jskaza/data/master/ncaa2018/seeds2018.csv"))

ggplot(data=df, aes(x=AdjO, y=AdjD, color = Region)) +
  geom_point(size=2, alpha=0.7) +  scale_y_reverse() + xlab("Points scored per 100 possessions") + 
  ylab("Points allowed per 100 possessions") +
  theme_bw(base_size = 18) + 
  geom_text_repel(aes(label=ifelse(AdjD<90 | AdjO > 122 | AdjD > 110 ,as.character(Team),"")), show.legend = FALSE) + 
  labs(title = "Adjusted Defense vs. Adjusted Offense", subtitle = "Adjusted for Schedule", caption = "Source: KenPom")

plot of chunk unnamed-chunk-2

is_outlier = function(x) {
  return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x))
}
df2 = df  %>% group_by(Region) %>%
  mutate(outlier=ifelse(is_outlier(AdjT), as.character(Team), ""))
ggplot(data=df2, aes(x=Region,y=AdjT)) + 
  geom_boxplot() + 
  geom_text_repel(aes(label=outlier),na.rm=TRUE) + 
  labs(title = "Tempo by Region", subtitle = "Adjusted for Schedule", caption = "Source: KenPom") + 
  theme_bw(base_size=18)

plot of chunk unnamed-chunk-2