# WOMEN'S SOCCER DATA
file_path <- "~/byc/posts/byc_043/wsoc"
file_list <- list.files (file_path, pattern = " \\ .xlsx$" , full.names = TRUE )
# Create a function to read an Excel file and add a column with the file name
read_and_add_file_column <- function (file_path) {
data <- readxl:: read_excel (file_path, sheet = "Sheet1" )
data$ file_name <- basename (file_path) # Add a new column with the file name
data
}
data_list <- purrr:: map (file_list, read_and_add_file_column)
all_wsoc <- dplyr:: bind_rows (data_list)
# teams to include
ff_t <- c ("UCLA" , "Florida St." , "Southern California" , "Penn St." , "Stanford" , "Santa Clara" , "Virginia" ,
"Duke" , "West Virginia" , "North Carolina" , "BYU" , "Virginia Tech" ,
"Texas A&M" , "Rutgers" , "Georgetown" , "South Carolina" , "Washington State" ,
"Alabama" )
all_wsoc |>
dplyr:: mutate (Team = gsub (" \\ ([^)]* \\ )" , "" , Team)) |>
dplyr:: mutate (Team = trimws (Team)) |>
dplyr:: filter (Team %in% ff_t) |>
dplyr:: group_by (Team) |>
dplyr:: mutate (W = cumsum (Won),
L = cumsum (Lost),
Tie = cumsum (Tied),
win_pct = (W + (.5 * Tie)) / (W + L + Tie)) |>
dplyr:: filter (file_name == "22-23.xlsx" ) |>
dplyr:: select (Team, W, L, Tie, win_pct) |>
dplyr:: arrange (- win_pct) -> full_data
#. load titles
titles <- readr:: read_csv ("fdata.csv" )
# join the data
wsoc_data <- dplyr:: left_join (titles, full_data, by = "Team" )
# load titles by season data set
haha <- readr:: read_csv ("haha.csv" )
# NOTRE DAME DATA
get_notre_dame_stats <- function (start_year, end_year) {
years <- start_year: end_year
all_stats <- list ()
for (year in years) {
stats <- cfbfastR:: cfbd_game_team_stats (year, team = "Notre Dame" )
stats$ year <- year # Add a new column for the year
all_stats[[as.character (year)]] <- stats
}
combined_stats <- dplyr:: bind_rows (all_stats)
return (combined_stats)
}
# Specify the range of years
start_year <- 2014
end_year <- 2022
# Get and combine game statistics for Notre Dame for the specified years
nd_stats <- get_notre_dame_stats (start_year, end_year)
# Sort the combined game statistics by year
nd_results <- nd_stats |>
dplyr:: add_row (year = 2014 , school = "Notre Dame" , opponent = "LSU" ,
opponent_conference = "SEC" , points = 31 , points_allowed = 28 ) |>
dplyr:: add_row (year = 2015 , school = "Notre Dame" , opponent = "Ohio State" ,
opponent_conference = "Big Ten" , points = 28 , points_allowed = 44 ) |>
dplyr:: add_row (year = 2017 , school = "Notre Dame" , opponent = "LSU" ,
opponent_conference = "SEC" , points = 21 , points_allowed = 17 ) |>
dplyr:: add_row (year = 2018 , school = "Notre Dame" , opponent = "Clemson" ,
opponent_conference = "ACC" , points = 3 , points_allowed = 30 ) |>
dplyr:: add_row (year = 2019 , school = "Notre Dame" , opponent = "Iowa State" ,
opponent_conference = "Big 12" , points = 33 , points_allowed = 9 ) |>
dplyr:: add_row (year = 2020 , school = "Notre Dame" , opponent = "Alabama" ,
opponent_conference = "SEC" , points = 14 , points_allowed = 31 ) |>
dplyr:: add_row (year = 2021 , school = "Notre Dame" , opponent = "Oklahoma State" ,
opponent_conference = "Big 12" , points = 35 , points_allowed = 37 ) |>
dplyr:: add_row (year = 2022 , school = "Notre Dame" , opponent = "South Carolina" ,
opponent_conference = "SEC" , points = 45 , points_allowed = 38 ) |>
dplyr:: arrange (year) |>
dplyr:: select (year, school, opponent, opponent_conference, home_away, points, points_allowed) |>
dplyr:: mutate (diff = points - points_allowed) |>
dplyr:: mutate (result = dplyr:: if_else (diff > 0 , "W" , "L" ))