library(cbbplotR)# load data sched_with_rtg <- readr::read_csv("https://raw.githubusercontent.com/gallochris/wab_data_job/main/data/played_sched_rtg.csv")future_sched_with_ratings <- readr::read_csv("https://raw.githubusercontent.com/gallochris/wab_data_job/main/data/future_sched_rtg.csv")wabRank <- readr::read_csv("https://raw.githubusercontent.com/gallochris/wab_data_job/main/data/wab_rank.csv")clean_played_tbl <- readr::read_csv("https://raw.githubusercontent.com/gallochris/wab_data_job/main/data/clean_played_tbl.csv")# find conference infoconfInfo <- sched_with_rtg |> dplyr::select(team,conf) |>unique()# this is the part to functionize # confWAB <- "ACC" run_confWAB <-function(confWAB) { confSched <- sched_with_rtg |> dplyr::filter(conf %in% confWAB) |> dplyr::filter(conf == opp_conf)#find the median team confTeams <- confInfo |> dplyr::filter(conf %in% confWAB) |> dplyr::pull(team) wabRank |> dplyr::filter(team %in% confTeams) |> dplyr::arrange(trank_rank) |> dplyr::pull(team) -> confTeamsRanked#median team (or if there are an even number, the team worse than median, so team 6 of 10) medianTeam <- confTeamsRanked[ceiling(length(confTeamsRanked) /2)] hcMultiplier <-0.014# Add the efficiency ratings - these are updated via cbbdata every 30 minutes or so? barts <- cbbdata::cbd_torvik_ratings(year ="2024") |> dplyr::select(team, barthag, adj_o, adj_d) |> dplyr::mutate(oHome = adj_o * (1+ hcMultiplier),dHome = adj_d * (1- hcMultiplier),oAway = adj_o * (1- hcMultiplier),dAway = adj_d * (1+ hcMultiplier) ) |> dplyr::mutate(H = (oHome ^11.5) / (oHome ^11.5+ dHome ^11.5),N = (adj_o ^11.5) / (adj_o ^11.5+ adj_d ^11.5),A = (oAway ^11.5) / (oAway ^11.5+ dAway ^11.5) ) |> tidyr::pivot_longer(cols =c(N, H, A),names_to ="location",values_to ="rtg")# Now add entire played schedule for DI teams only all_team_sched <- cbbdata::cbd_torvik_game_stats(year ="2024") |> dplyr::select(game_id, date, team, opp, conf, opp_conf, location, result, pts_scored, pts_allowed) |> dplyr::mutate(opp_location = dplyr::case_match(location,"H"~"A","A"~"H","N"~"N")) |> dplyr::arrange(date)# Define the bubble team and location ratings# Now with Median Conf team bubTeam <- medianTeam# Define second default team - use 55th ranking as a proxy two_bubTeam <- barts |> dplyr::select(team, barthag) |> dplyr::arrange(-barthag) |> dplyr::distinct(team, .keep_all =TRUE) |> dplyr::slice(42) |> dplyr::pull(team)# Bubble location metrics barts |> dplyr::filter(team == bubTeam & location =="H") |> dplyr::pull(rtg) -> bubbleHome barts |> dplyr::filter(team == bubTeam & location =="N") |> dplyr::pull(rtg) -> bubbleNeut barts |> dplyr::filter(team == bubTeam & location =="A") |> dplyr::pull(rtg) -> bubbleAway# Add played schedule with ratings, wab, and NET# The cbbdata::cbd_add_net_quad function adds the opponent's NET# and the Quadrant of the game, which is fun sched_with_rtg <- all_team_sched |> dplyr::left_join(barts, by =c("team"="team","location"="location")) |> dplyr::left_join(barts, by =c("opp"="team","opp_location"="location")) |> dplyr::rename(team_rtg = rtg.x, opp_rtg = rtg.y) |> dplyr::mutate(bub_rtg = dplyr::case_when( location =="H"~ bubbleHome, location =="A"~ bubbleAway, location =="N"~ bubbleNeut ) ) |> dplyr::mutate(#bub_win_prob = log(bub_rtg / (1 - opp_rtg), base = 5),bub_win_prob = (bub_rtg - bub_rtg * opp_rtg) / (bub_rtg + opp_rtg -2* bub_rtg * opp_rtg),# team A is bubble, team B is opponentwab = dplyr::case_when( result =="W"~ (1- bub_win_prob), result =="L"~ (0- bub_win_prob), ),wab_opp =1- bub_win_prob,score = dplyr::if_else( result =="W",paste0(pts_scored, "-", pts_allowed),paste0(pts_allowed, "-", pts_scored) ) ) |> dplyr::select( date, team, opp, conf, opp_conf, location, result, score, opp_location, team_rtg, opp_rtg, bub_rtg, bub_win_prob, wab, wab_opp ) |> cbbdata::cbd_add_net_quad() playedGames <- cbbdata::cbd_torvik_game_stats(year ="2024")$game_id |>unique()#"South Dakota MinesUtah St.11-6" %in% playedGames all_team_future <- cbbdata::cbd_torvik_season_schedule(year ="2024") |> dplyr::filter(!game_id %in% playedGames & type !="nond1") |> dplyr::mutate(team = home, opp = away)### Try to replicate chris' above work for the future games:# Add played schedule with ratings, wab, and NET# The cbbdata::cbd_add_net_quad function adds the opponent's NET# and the Quadrant of the game, which is fun## try this, mutate all_team_future to include "team" as home, opponent as away -> save as homeTeam## then mutate a column that is team as away, opp as home -> save as awayTeam## rbind## then use the bubble ratings file above to pull in the WAB value of "opponent" all_team_future_visitors <- cbbdata::cbd_torvik_season_schedule(year ="2024") |> dplyr::filter(!game_id %in% playedGames & type !="nond1") |> dplyr::mutate(team = away, opp = home) future_sched_with_ratings <-rbind.data.frame(all_team_future, all_team_future_visitors) |> dplyr::mutate(location =ifelse(neutral, "N",ifelse(team == home, "H", "A"))) |> dplyr::mutate(opp_location =ifelse(neutral, "N",ifelse(team == home, "A", "H"))) |> dplyr::arrange(date) |> dplyr::left_join(barts, by =c("team"="team","location"="location")) |> dplyr::left_join(barts, by =c("opp"="team","opp_location"="location")) |> dplyr::rename(team_rtg = rtg.x, opp_rtg = rtg.y) |> dplyr::mutate(bub_rtg = dplyr::case_when( location =="H"~ bubbleHome, location =="A"~ bubbleAway, location =="N"~ bubbleNeut ) ) |> dplyr::mutate(#bub_win_prob = log(bub_rtg / (1 - opp_rtg), base = 5),bub_win_prob = (bub_rtg - bub_rtg * opp_rtg) / (bub_rtg + opp_rtg -2* bub_rtg * opp_rtg)) |> dplyr::mutate(wabW = (1- bub_win_prob),wabL = (-bub_win_prob)) |> dplyr::select( date, team, opp, location, opp_location, team_rtg, opp_rtg, bub_rtg, bub_win_prob, wabW, wabL ) |> cbbdata::cbd_add_net_quad() confSched <- sched_with_rtg |> dplyr::filter(conf == confWAB & opp_conf == confWAB) |> dplyr::mutate(wins =ifelse(result =="W", 1, 0),losses =ifelse(result =="W", 0, 1)) confSched |> dplyr::group_by(team) |> dplyr::summarize(confWAB =sum(wab),possWAB =sum(wab_opp),confWins =sum(wins),confLosses =sum(losses) ) |>## summarize in conference WAB, possible WAB dplyr::arrange(-confWAB) |> dplyr::mutate(confGames = confWins + confLosses) |> dplyr::mutate(confWP = confWins / confGames,bubbleWins = confWins - confWAB,bubbleLosses = confGames - (confWins - confWAB) ) |> dplyr::mutate(bubbleWP = bubbleWins / confGames) -> confWABtablereturn(confWABtable)}run_confWAB(confWAB ="ACC") -> result
Conf WAB table (ACC)
Code
acc_title <-gt_cbb_logo_title(title ='Conference-only WAB',subtitle ="Shows each team's actual record and its expected record using a conference specific bubble team.",type ='conference',value ='ACC',logo_height =40)result |> dplyr::mutate(sos_rank = dplyr::dense_rank(-possWAB),bubbleWins =round(bubbleWins, digits =1),bubbleLosses =round(bubbleLosses, digits =1),confWAB =round(confWAB, digits =1)) |> dplyr::select(team, confWins, confLosses, confWAB, bubbleWins, bubbleLosses, sos_rank) |> cbbplotR::gt_cbb_teams(team, team) |> gt::gt() |> gt::fmt_markdown(team) |> gt::cols_label(team ="",confWins ="W",confLosses ="L", confWAB ="WAB",bubbleWins ="W", bubbleLosses ="L",sos_rank ="SOS" ) |> gt::tab_spanner(columns =c(confWAB, bubbleWins, bubbleLosses, sos_rank), label ="Expected Record Bubble Team") |> cbbplotR::gt_theme_athletic() |> gt::cols_align(align ="left", columns ="team") |> gtExtras::gt_hulk_col_numeric(columns =c(sos_rank), reverse =TRUE) |> gtExtras::gt_hulk_col_numeric(columns =c(confWAB)) |> gt::tab_header(title = gt::html(acc_title)) |> gt::tab_source_note(source_note = gt::html("<hr><br>Wins-above-bubble (WAB) is a résumé metric that reflects how many more (or fewer) games a team has won against its schedule than a bubble-quality team would be expected to win against that same schedule.<br><br> SOS is strength of schedule based on potential WAB to earn.<br><br><br>Data through March 5 games. Miami and Boston College play tonight.<br><hr> <b>Fifth Factor + Bless your chart | data via cbbdata + cbbplotR</b>")) -> tbl_confWABgtExtras::gtsave_extra(tbl_confWAB,filename ="conf_wab_table.png",vheight =875,vwidth =650)tbl_confWAB
Conference-only WAB Shows each team's actual record and its expected record using a conference specific bubble team.
W
L
Expected Record Bubble Team
WAB
W
L
SOS
North Carolina
16
3
6.8
9.2
9.8
12
Duke
15
4
5.4
9.6
9.4
14
Virginia
12
7
2.8
9.2
9.8
11
Pittsburgh
11
8
2.4
8.6
10.4
4
Clemson
11
8
1.6
9.4
9.6
13
Syracuse
11
9
1.5
9.5
10.5
2
Wake Forest
10
9
1.1
8.9
10.1
7
Florida St.
9
10
0.4
8.6
10.4
3
North Carolina St.
9
10
0.2
8.8
10.2
6
Virginia Tech
9
10
-0.1
9.1
9.9
10
Georgia Tech
7
12
-1.7
8.7
10.3
5
Notre Dame
7
12
-2.0
9.0
10.0
8
Miami FL
6
12
-2.1
8.1
9.9
9
Boston College
6
12
-2.9
8.9
9.1
15
Louisville
3
16
-5.1
8.1
10.9
1
Wins-above-bubble (WAB) is a résumé metric that reflects how many more (or fewer) games a team has won against its schedule than a bubble-quality team would be expected to win against that same schedule.
SOS is strength of schedule based on potential WAB to earn.
Data through March 5 games. Miami and Boston College play tonight. Fifth Factor + Bless your chart | data via cbbdata + cbbplotR