086: NCAAT WAB

cbbplotR
gt
Published

March 26, 2024

Load data

Code
library(cbbplotR)

# TourneyWAB
# Idea is we don't need home or away because all games are neutral 
# Semi-home would be interesting, but not sure that is worth sorting out

# Add the efficiency ratings for the neutral 
barts <- cbbdata::cbd_torvik_ratings(year = "2024") |>
  dplyr::select(team, barthag, adj_o, adj_d) |>
  dplyr::mutate(
    N = (adj_o ^ 11.5) / (adj_o ^ 11.5 + adj_d ^ 11.5)
  ) |>
  tidyr::pivot_longer(cols = c(N),
                      names_to = "location",
                      values_to = "rtg")

# NCAA Tournament seeds and teams 
# Excluding the First Four because none of those teams advanced 
seeds <- readr::read_csv("https://gist.githubusercontent.com/gallochris/86fc2a21dd4b30e3dd9ea79516594f05/raw/1e2dfa7eae2d196f5dee9ecf5fe95e5f76ba827d/ncaat_seeds.csv") |> 
  dplyr::select(seed = Seed, team = Team, region = Region)

ncaat_teams <- seeds |> 
  dplyr::filter(!team %in% c("Virginia", "Boise St.", "Montana St.", "Howard")) |> 
  dplyr::pull(team) 


# Add the played schedule for the tournament teams only 
ncaat_sched <- cbbdata::cbd_torvik_game_stats(year = 2024, type = "post") |> 
  dplyr::filter(date > "2024-03-20") |> 
  dplyr::filter(team %in% ncaat_teams) |> 
  dplyr::left_join(seeds, by ="team") |> 
  dplyr::select(seed, region, date, game_id, team, conf, opp, opp_conf, 
                location, result, pts_scored, pts_allowed, tempo, 
                game_score, off_ppp, off_efg,off_to, off_or, 
                off_ftr, def_ppp, def_efg,def_to, def_or, def_ftr) |> 

  dplyr::mutate(opp_location = dplyr::case_match(location,
                                                 "N" ~ "N")) |>
  dplyr::arrange(date)

# Use the worst 4 seed by Alabama
barts |>
  dplyr::filter(team == "Alabama" & location == "N") |>
  dplyr::pull(rtg) -> bubbleNeut 
  

# Calculate the WabW 
ncaat_with_rtg <- ncaat_sched |>
  dplyr::left_join(barts, by = c("team" = "team",
                                 "location" = "location")) |>
  dplyr::left_join(barts, by = c("opp" = "team",
                                 "opp_location" = "location"))  |>
  dplyr::rename(team_rtg = rtg.x, opp_rtg = rtg.y)  |>
  dplyr::mutate(
    bub_rtg = dplyr::case_when(
      location == "N" ~ bubbleNeut
    )
  ) |>
  dplyr::mutate(
    #bub_win_prob = log(bub_rtg / (1 - opp_rtg), base = 5),
    bub_win_prob = (bub_rtg - bub_rtg * opp_rtg) / (bub_rtg + opp_rtg - 2 * bub_rtg * opp_rtg),
    # team A is bubble, team B is opponent
    wab = dplyr::case_when(
      result == "W" ~ (1 - bub_win_prob),
      result == "L" ~ (0 - bub_win_prob),
    ),
    wab_opp = 1 - bub_win_prob,
    score = paste0(pts_scored, "-", pts_allowed),
  ) |>
  dplyr::select(
    date,
    seed,
    region,
    team,
    opp,
    conf,
    opp_conf,
    location,
    result,
    score,
    opp_location,
    team_rtg,
    opp_rtg,
    bub_rtg,
    bub_win_prob,
    wab,
    wab_opp
  ) 

# now pull in the sweet 16 
sweet16 <- c("North Carolina", "Arizona", "Clemson", "Alabama", 
             "Purdue", "Gonzaga", "Tennessee", "Creighton", "North Carolina St.", 
             "Marquette", "Duke", "Houston", "Connecticut", "San Diego St.", 
             "Illinois", "Iowa St.")

ncaat_with_rtg |> 
  dplyr::group_by(team) |> 
  dplyr::summarize(wins = sum(result == "W")) |> 
  dplyr::filter(wins == 2) |> 
  dplyr::pull(team) -> s16list

ncaat_with_rtg |> 
  dplyr::group_by(team) |> 
  dplyr::summarize(wins = sum(result == "W")) |> 
  dplyr::filter(wins == 1) |> dplyr::pull(team) -> r32loserlist


# sweet 16 list 
ncaat_with_rtg |> 
  dplyr::filter(team %in% c(r32loserlist,s16list)) |> 
  dplyr::left_join(seeds, by = c("opp" = "team")) |> 
  dplyr::rename(opp_seed = seed.y, seed = seed.x, region = region.x) |> 
  dplyr::group_by(team,region,conf,seed) |> 
  dplyr::select(seed,region,team,conf,bub_win_prob,wab, opp_seed) |> 
  dplyr::summarize(bwp1 = dplyr::first(bub_win_prob),
                   bwp2 = dplyr::last(bub_win_prob),
                   wab1 = dplyr::first(wab),
                   wab2 = dplyr::last(wab),
                   opp_seed_list = list(opp_seed)) |> 
  dplyr::mutate(wabT = bwp1 * bwp2,
                sumWAB = wab1 + wab2) |> 
  dplyr::arrange(wabT) |> 
  dplyr::filter(team %in% s16list) |> 
  dplyr::select(team, conf, seed, region, opp_seed_list, wabT) |> 
  dplyr::ungroup() -> s16_tbl

Sweet 16 WAB Table

Code
 s16_tbl |> 
  cbbplotR::gt_cbb_teams(team, team) |> 
  dplyr::mutate(conf = dplyr::case_match(conf,
                  "BE" ~ "Big East",
                  "B12" ~ "Big 12", 
                  "B10" ~ "Big Ten",
                  "P12" ~ "Pac-12", 
                  "MWC" ~ "MW",
                  conf ~ conf)) |> 
  gt::gt() |>  
  gt::fmt_markdown(team) |> 
  gt::fmt_percent(columns = (wabT), decimals = 1) |> 
  gt::cols_label(
    team = "", 
    conf = "", 
    seed = "Seed", 
    region = "Region", 
    opp_seed_list = gt::html("Opponent<br>Seeds"),
    wabT = gt::html("WAB <br>Bracket Luck"),
  ) |> 
  cbbplotR::gt_theme_athletic() |>
  gt::cols_align(align = "left", columns = c(team, conf)) |>
  gt::data_color(
    columns = c(wabT), 
    palette = "BuGn"
  ) |> 
  gtExtras::gt_add_divider(columns = c(conf), sides = "right", color = "black") |> 
  gt::tab_header(title = "Sweet 16: WAB-determined Bracket Luck",
                 subtitle = gt::html("Shows the
                                     percent chance the <b>overall 16 seed</b> would win the two games each team played to earn a trip to the Sweet 16. The <b>higher</b> the percentage, the <em>easier</em> the Sweet 16 path.")) |>
  gt::tab_source_note(source_note = gt::html("<hr>Wins-above-baseline is defined by the lowest rated 4 seed (Alabama) or the worst team <em>expected</em> to make the Sweet 16. A lower percentage means a more difficult Sweet 16 path.<br> <hr><b>Data by Ryan (@5th_Factor) and Chris (@dadgumboxscores)<br>
                                               Data from Bart Torvik, cbbdata, and cbbplotR")) |> 
  gt::tab_style(
    locations = gt::cells_title(groups = "subtitle"),
    style = gt::cell_text(
      size = "small"
    )
  ) |> 
  gt::tab_style(
    locations = gt::cells_source_notes(),
    style = gt::cell_text(
      size = "x-small"
    )
  ) |> 
  gt::tab_style(
    style = gt::cell_borders(sides = "top", color = 'black', weight = gt::px(1.5), style = 'solid'),
    locations = gt::cells_body(
      rows = gt::everything()
    )
  ) -> wabBL_tbl


gtExtras::gtsave_extra(wabBL_tbl,
                       filename = "wabBL.png",
                       vheight = 950,
                       vwidth = 650)

wabBL_tbl
Sweet 16: WAB-determined Bracket Luck
Shows the percent chance the overall 16 seed would win the two games each team played to earn a trip to the Sweet 16. The higher the percentage, the easier the Sweet 16 path.
Seed Region Opponent
Seeds
WAB
Bracket Luck

Clemson

ACC 6 West 11, 3 30.9%

Gonzaga

WCC 5 Midwest 12, 4 43.1%

Marquette

Big East 2 South 15, 10 49.1%

Tennessee

SEC 2 Midwest 15, 7 51.3%

North Carolina

ACC 1 West 16, 9 51.6%

North Carolina St.

ACC 11 South 6, 14 53.6%

Iowa St.

Big 12 2 East 15, 7 56.1%

Houston

Big 12 1 South 16, 9 57.2%

Alabama

SEC 4 West 13, 12 60.1%

Connecticut

Big East 1 East 16, 9 60.2%

Creighton

Big East 3 Midwest 14, 11 60.3%

Arizona

Pac-12 2 West 15, 7 60.7%

Duke

ACC 4 South 13, 12 64.4%

San Diego St.

MW 5 East 12, 13 67.1%

Purdue

Big Ten 1 Midwest 16, 8 67.6%

Illinois

Big Ten 3 East 14, 11 67.8%

Wins-above-baseline is defined by the lowest rated 4 seed (Alabama) or the worst team expected to make the Sweet 16. A lower percentage means a more difficult Sweet 16 path.

Data by Ryan (@5th_Factor) and Chris (@dadgumboxscores)
Data from Bart Torvik, cbbdata, and cbbplotR