…with help from tools that other already people made.
library(tidyverse)
library(glue)
library(gt)
library(hrbrthemes)
library(ggpomological)
library(highcharter)
Grab the the most recent advanced metrics from basketball reference using the {nbastatR} package by Alex Bresler. Note, running bref_players_stats()
will assign the output data frames, dataBREFPlayerTotals
and dataBREFPlayerAdvanced
, to the environment, so we don’t need to do anything else (I rename them for my own sanity).
library(nbastatR)
bref_players_stats(seasons = 2019, tables = c("advanced", "totals"),
widen = TRUE, assign_to_environment = TRUE)
bref_advanced <- dataBREFPlayerAdvanced
bref_totals <- dataBREFPlayerTotals
I always like to start out by skimming with the skimr package…
skimr::skim(bref_advanced)
#> Skim summary statistics
#> n obs: 501
#> n variables: 36
#>
#> ── Variable type:character ──────────────────────────────────────────────────────────────────
#> variable missing complete n min max empty n_unique
#> idPlayer 0 501 501 6 9 0 501
#> idPlayerSeason 0 501 501 11 14 0 501
#> idPosition 0 501 501 1 5 0 13
#> namePlayer 0 501 501 4 24 0 501
#> namePlayerBREF 0 501 501 7 24 0 501
#> slugSeason 0 501 501 7 7 0 1
#> slugTeamBREF 0 501 501 3 3 0 31
#> urlPlayerHeadshot 15 486 501 86 89 0 486
#> urlPlayerThumbnail 0 501 501 52 89 0 501
#>
#> ── Variable type:logical ────────────────────────────────────────────────────────────────────
#> variable missing complete n mean count
#> isHOFPlayer 0 501 501 0 FAL: 501, NA: 0
#>
#> ── Variable type:numeric ────────────────────────────────────────────────────────────────────
#> variable missing complete n mean sd p0
#> agePlayer 0 501 501 26.08 4.18 19
#> countGames 0 501 501 44.04 22.57 1
#> idPlayerNBA 0 501 501 880176.29 724825.17 1114
#> minutes 0 501 501 1005.99 745.5 1
#> pct3PRate 5 496 501 0.36 0.21 0
#> pctAST 0 501 501 0.13 0.094 0
#> pctBLK 0 501 501 0.17 0.28 0
#> pctDRB 0 501 501 0.15 0.084 0
#> pctFTRate 5 496 501 0.24 0.14 0
#> pctORB 0 501 501 0.073 0.14 0
#> pctSTL 0 501 501 0.12 0.26 0
#> pctTOV 4 497 501 0.12 0.055 0
#> pctTRB 0 501 501 0.1 0.059 0
#> pctTrueShooting 4 497 501 0.53 0.11 0
#> pctUSG 0 501 501 0.19 0.059 0
#> ratioBPM 0 501 501 -1.95 5.61 -52.7
#> ratioDBPM 0 501 501 -0.55 2.68 -20.5
#> ratioDWS 0 501 501 1.02 0.98 -0.4
#> ratioOBPM 0 501 501 -1.4 4.3 -36.1
#> ratioOWS 0 501 501 1.1 1.66 -2.7
#> ratioPER 0 501 501 12.99 7.62 -38
#> ratioVORP 0 501 501 0.51 1.18 -1.9
#> ratioWS 0 501 501 2.12 2.44 -1.6
#> ratioWSPer48 0 501 501 0.076 0.12 -0.95
#> yearSeason 0 501 501 2018 0 2018
#> yearSeasonFirst 15 486 501 2013.49 4.36 1989
#> p25 p50 p75 p100 hist
#> 23 25 29 42 ▃▇▇▆▃▁▁▁
#> 26 50 64 72 ▃▂▂▂▂▃▅▇
#> 2e+05 2e+05 1628383 1629541 ▇▁▁▁▁▁▁▇
#> 292 945 1672 2671 ▇▃▃▃▃▃▃▁
#> 0.23 0.38 0.52 0.9 ▇▃▇▇▇▅▂▁
#> 0.068 0.11 0.18 0.59 ▆▇▃▂▁▁▁▁
#> 0.012 0.024 0.2 0.9 ▇▁▁▁▁▁▁▁
#> 0.1 0.14 0.19 0.92 ▅▇▂▁▁▁▁▁
#> 0.15 0.23 0.32 0.86 ▃▇▇▃▂▁▁▁
#> 0.02 0.034 0.074 1 ▇▁▁▁▁▁▁▁
#> 0.013 0.016 0.024 0.9 ▇▁▁▁▁▁▁▁
#> 0.095 0.12 0.15 0.5 ▂▇▆▁▁▁▁▁
#> 0.062 0.087 0.13 0.52 ▅▇▃▁▁▁▁▁
#> 0.5 0.55 0.58 0.83 ▁▁▁▁▃▇▁▁
#> 0.15 0.18 0.22 0.47 ▁▁▇▆▂▁▁▁
#> -3.9 -1.4 0.6 27.2 ▁▁▁▁▃▇▁▁
#> -1.7 -0.5 0.9 8.6 ▁▁▁▁▂▇▂▁
#> 0.2 0.8 1.5 5 ▇▇▆▃▂▁▁▁
#> -2.8 -1.1 0.3 22.3 ▁▁▁▁▇▂▁▁
#> 0 0.5 1.7 9.1 ▁▇▆▂▂▁▁▁
#> 9.4 12.6 16.5 80.8 ▁▁▁▇▁▁▁▁
#> -0.1 0.1 0.8 8.1 ▁▇▂▁▁▁▁▁
#> 0.2 1.3 3.1 13 ▆▇▅▂▁▁▁▁
#> 0.039 0.081 0.13 1.26 ▁▁▁▇▂▁▁▁
#> 2018 2018 2018 2018 ▁▁▁▇▁▁▁▁
#> 2011 2015 2017 2018 ▁▁▁▁▁▂▃▇
Now we can filter and munge as needed:
adv_player_stats <- bref_advanced %>%
filter(minutes >= 500) %>%
mutate(bref_url = glue::glue("https://www.basketball-reference.com/players/{stringr::str_sub(idPlayer, 1, 1)}/{idPlayer}.html"),
bref_link = glue::glue('<a href="{bref_url}">{namePlayer}</a>'))
Collapse positions into front and backcourt:
unique_positions <- unique(bref_advanced$idPosition)
frontcourt <- c("PF", "SF", "C", "PF-SF", "C-PF", "SG-PF", "SF-PF")
backcourt <- c("PG", "SG", "PG-SG", "SG-PG", "SF-SG", "SG-SF")
bref_efg <- bref_totals %>%
select(one_of(c("idPlayer", "pctEFG")))
adv_player_stats <- adv_player_stats %>%
left_join(bref_efg, by = "idPlayer") %>%
mutate( "position" = case_when(
idPosition %in% frontcourt ~ "frontcourt",
idPosition %in% backcourt ~ "backcourt",
TRUE ~ "other"),
"position" = as.factor(position)
)
Let’s also get some info from the NBA Stats API using teams_players_states()
. By using assign_to_environment = TRUE
, we’ll automatically get a data frame dataGeneralPlayers
. For now I just want players’ offensive rating1, ortg
, and defensive rating2, drtg
.
nbastatR::teams_players_stats(seasons = 2019, types = c("player"),
tables = "general", measures = "Advanced",
assign_to_environment = TRUE)
player_rtgs <- dataGeneralPlayers %>%
select(one_of(c("idPlayer", "ortg", "drtg")))
adv_player_stats <- adv_player_stats %>%
left_join(player_rtgs, by = c("idPlayerNBA" = "idPlayer"))
adv_player_stats %>%
ggplot(aes(x = ratioPER)) +
geom_histogram()
Let’s get some help from glue and hrbrthemes…
adv_player_stats %>%
ggplot(aes(x = ratioPER)) +
geom_histogram(alpha = 0.7, fill = "#011627") +
labs(title = "PER for players with 500+ minutes",
subtitle = "NBA 2018-2019 season",
caption = glue::glue("data via nbastatR {yesterday}")) +
hrbrthemes::theme_ipsum_rc()
adv_player_stats %>%
ggplot(aes(x = ratioVORP)) +
geom_histogram(alpha = 0.7, fill = "#011627") +
labs(title = "Value Over Replacement Player (VORP)",
subtitle = "NBA 2018-2019 season, players with 500+ minutes",
caption = glue::glue("data via nbastatR {yesterday}")) +
hrbrthemes::theme_ipsum_rc()
adv_player_stats %>%
ggplot(aes(x = ratioWS)) +
geom_histogram(alpha = 0.7, fill = "#011627") +
labs(title = "Win Shares for players with 500+ minutes",
subtitle = "NBA 2018-2019 season",
caption = glue::glue("data via nbastatR {yesterday}")) +
hrbrthemes::theme_ipsum_rc()
Histograms are all well and good, but let’s look at something a little more interesting…
adv_player_stats %>%
ggplot(aes(x = ratioOBPM, y = ratioDBPM)) +
geom_point() +
geom_hline(yintercept = 0, alpha = 0.6, lty = "dashed") +
geom_vline(xintercept = 0, alpha = 0.6, lty = "dashed") +
labs(title = "Offensive vs. Defensive Box Plus-Minus",
subtitle = glue::glue("NBA 2018-2019 season through {yesterday}"),
caption = glue::glue("data via nbastatR"),
x = "OBPM",
y = "DBPM") +
hrbrthemes::theme_ipsum_rc()
Things are a pretty boring without annotation — and we’re not doing much in the way of storytelling. Luckily Hiroaki Yutani’s gghighlight package can help us out with that!
Because gghighlight uses a predicate function to determine what to highlight, I’ll make a little helper fun to get the top 10 players for some variable.
get_top10 <- function(df, column) {
require(rlang)
column <- enquo(column)
dplyr::top_n(df, n = 10, wt = !!column) %>%
pull(namePlayer)
}
Things are looking a little more complex, so let’s look at the pieces of code in this next section.
# get top 10 for desired variable (in this case ratioBPM)
top10_BPM <- top_n(adv_player_stats, n = 10, wt = ratioBPM) %>%
pull(namePlayer)
adv_player_stats %>%
ggplot(aes(x = ratioOBPM, y = ratioDBPM)) +
geom_point(color = "#011627") +
gghighlight::gghighlight(namePlayer %in% top10_BPM, label_key = namePlayer,
label_params = list(fill = ggplot2::alpha("white", 0.8),
box.padding = 0,
family = "Roboto Condensed"),
unhighlighted_colour = "#007190") +
geom_hline(yintercept = 0, alpha = 0.6, lty = "dashed") +
geom_vline(xintercept = 0, alpha = 0.6, lty = "dashed") +
labs(title = "Offensive vs. Defensive Box Plus-Minus: Top 10 Box Plus/Minus",
subtitle = glue::glue("NBA 2018-2019 season through {yesterday}"),
caption = glue::glue("data via nbastatR"),
x = "OBPM",
y = "DBPM") +
hrbrthemes::theme_ipsum_rc()
Predicate functions won’t always hit everything you want to see, which is why interactive visualizations can be a great tool for exploration. There are also some widgets and add-ins in RStudio that can help out with this.3
Since we’re using distill for R Markdown, we have some nice options in terms of figure layout. Below, I’ll use layout="l-body-outset"
as a chunk parameter.
top10_WS <- get_top10(adv_player_stats, ratioWSPer48)
adv_player_stats %>%
ggplot(aes(x = ratioOWS, y = ratioDWS)) +
geom_point(color = "#011627") +
gghighlight::gghighlight(namePlayer %in% top10_WS, label_key = namePlayer,
label_params = list(fill = ggplot2::alpha("white", 0.8),
box.padding = 0,
family = "Roboto Condensed"),
unhighlighted_colour = "#007190") +
geom_hline(yintercept = 0, alpha = 0.6, lty = "dashed") +
geom_vline(xintercept = 0, alpha = 0.6, lty = "dashed") +
labs(title = "Offensive vs. Defensive Win Shares: Top 10 WS Per 48",
subtitle = glue::glue("NBA 2018-2019 season through {yesterday}"),
caption = glue::glue("data via nbastatR"),
x = "OWS",
y = "DWS") +
hrbrthemes::theme_ipsum_rc()
We can go even wider by using layout="l-page"
.
top10_EFG <- get_top10(adv_player_stats, pctEFG)
adv_player_stats %>%
ggplot(aes(x = (pctTrueShooting - mean(pctTrueShooting)), y = (ratioPER - mean(ratioPER)))) +
geom_point(color = "#011627") +
gghighlight::gghighlight(namePlayer %in% top10_EFG, label_key = namePlayer,
label_params = list(fill = ggplot2::alpha("white", 0.8),
box.padding = 0,
family = "Roboto Condensed"),
unhighlighted_colour = "#007190") +
geom_hline(yintercept = 0, alpha = 0.6, lty = "dashed") +
geom_vline(xintercept = 0, alpha = 0.6, lty = "dashed") +
labs(title = "TS% above avg vs. PER above avg: Top 10 EFG%",
subtitle = glue::glue("NBA 2018-2019 season"),
caption = glue::glue("data via nbastatR, {yesterday}"),
x = "true shooting %",
y = "player efficiency rating") +
hrbrthemes::theme_ipsum_rc()
One of my latest favorite packages to play with is Rich Iannone’s {gt}:
adv_player_stats %>%
select(namePlayer, ratioBPM, ratioOBPM, ratioDBPM, bref_url, urlPlayerThumbnail) %>%
arrange(desc(ratioBPM)) %>%
top_n(n = 10, wt = ratioBPM) %>%
gt::gt(rowname_col = "namePlayer") %>%
tab_header(
title = md("**Top 10 Box Plus/Minus**")
) %>%
cols_label(
ratioBPM = md("**BPM**"),
ratioOBPM = md("**OBPM**"),
ratioDBPM = md("**DBPM**"),
bref_url = md("**Link**"),
urlPlayerThumbnail = md("")
) %>%
text_transform(
locations = cells_data(vars(bref_url)),
fn = function(x) {
sprintf("<a href=%s>profile</a>", x)
}
) %>%
text_transform(
locations = cells_data(vars(urlPlayerThumbnail)),
fn = function(x) {
web_image(url = x)
}
) %>%
tab_source_note(
md("source: [basketball-reference.com](https://www.basketball-reference.com) via [nbastatR](http://asbcllc.com/nbastatR/index.html)")
) %>%
tab_footnote(
footnote = ("Players with 500+ minutes."),
locations = cells_title("title")
) %>%
tab_footnote(
footnote = ("Box Plus/Minus: a box score estimate of the points per 100 possessions that a player contributed above a league-average player, translated to an average team."),
locations = cells_column_labels(
columns = vars(ratioBPM)
)
) %>%
tab_footnote(
footnote = ("Offensive Box Plus/Minus."),
locations = cells_column_labels(
columns = vars(ratioOBPM)
)
) %>%
tab_footnote(
footnote = ("Defensive Box Plus/Minus."),
locations = cells_column_labels(
columns = vars(ratioDBPM)
)
) %>%
tab_options(footnote.glyph = c("*, †, ‡, §, ¶, ‖"),
table.width = px(640))
Top 10 Box Plus/Minus* | ||||||
---|---|---|---|---|---|---|
BPM † | OBPM ‡ | DBPM § | Link | |||
James Harden | 11.0 | 10.1 | 0.8 | profile | ||
Giannis Antetokounmpo | 10.8 | 5.7 | 5.1 | profile | ||
Nikola Jokic | 9.8 | 6.2 | 3.6 | profile | ||
Anthony Davis | 8.7 | 4.8 | 3.9 | profile | ||
LeBron James | 7.9 | 6.2 | 1.7 | profile | ||
Karl-Anthony Towns | 7.3 | 5.4 | 1.9 | profile | ||
Kyrie Irving | 6.8 | 6.4 | 0.5 | profile | ||
Nikola Vucevic | 6.8 | 3.2 | 3.6 | profile | ||
Rudy Gobert | 6.8 | 1.7 | 5.1 | profile | ||
Mitchell Robinson | 6.2 | 0.4 | 5.8 | profile | ||
source: basketball-reference.com via nbastatR | ||||||
* Players with 500+ minutes. † Box Plus/Minus: a box score estimate of the points per 100 possessions that a player contributed above a league-average player, translated to an average team. ‡ Offensive Box Plus/Minus. § Defensive Box Plus/Minus. |
adv_player_stats %>%
select(urlPlayerHeadshot, namePlayer, ratioBPM, ratioOBPM, ratioDBPM) %>%
arrange(desc(ratioOBPM)) %>%
top_n(n = 10, wt = ratioOBPM) %>%
gt::gt() %>%
tab_header(
title = md("**Top 10 Offensive Box Plus/Minus**")
) %>%
cols_label(
namePlayer = md("**Player**"),
urlPlayerHeadshot = md(""),
ratioBPM = md("**BPM**"),
ratioOBPM = md("**OBPM**"),
ratioDBPM = md("**DBPM**")
) %>%
text_transform(
locations = cells_data(vars(urlPlayerHeadshot)),
fn = function(x) {
web_image(url = x)
}
) %>%
tab_source_note(
md("source: [basketball-reference.com](https://www.basketball-reference.com) via [nbastatR](http://asbcllc.com/nbastatR/index.html)")
) %>%
tab_footnote(
footnote = ("Players with 500+ minutes."),
locations = cells_title("title")
) %>%
tab_footnote(
footnote = ("Box Plus/Minus; a box score estimate of the points per 100 possessions that a player contributed above a league-average player, translated to an average team."),
locations = cells_column_labels(
columns = vars(ratioBPM)
)
) %>%
tab_footnote(
footnote = ("Offensive Box Plus/Minus."),
locations = cells_column_labels(
columns = vars(ratioOBPM)
)
) %>%
tab_footnote(
footnote = ("Defensive Box Plus/Minus."),
locations = cells_column_labels(
columns = vars(ratioDBPM)
)
) %>%
tab_options(footnote.glyph = c("*, †, ‡, §, ¶, ‖"),
table.width = px(640))
Top 10 Offensive Box Plus/Minus* | |||||
---|---|---|---|---|---|
Player | BPM † | OBPM ‡ | DBPM § | ||
James Harden | 11.0 | 10.1 | 0.8 | ||
Stephen Curry | 6.0 | 7.7 | -1.6 | ||
Damian Lillard | 5.5 | 6.6 | -1.1 | ||
Kyrie Irving | 6.8 | 6.4 | 0.5 | ||
LeBron James | 7.9 | 6.2 | 1.7 | ||
Nikola Jokic | 9.8 | 6.2 | 3.6 | ||
Giannis Antetokounmpo | 10.8 | 5.7 | 5.1 | ||
Karl-Anthony Towns | 7.3 | 5.4 | 1.9 | ||
Paul George | 5.8 | 5.0 | 0.9 | ||
Anthony Davis | 8.7 | 4.8 | 3.9 | ||
source: basketball-reference.com via nbastatR | |||||
* Players with 500+ minutes. † Box Plus/Minus; a box score estimate of the points per 100 possessions that a player contributed above a league-average player, translated to an average team. ‡ Offensive Box Plus/Minus. § Defensive Box Plus/Minus. |
adv_player_stats %>%
select(namePlayer, ratioBPM, ratioOBPM, ratioDBPM) %>%
arrange(desc(ratioDBPM)) %>%
top_n(n = 10, wt = ratioDBPM) %>%
gt::gt() %>%
tab_header(
title = md("**Top 10 Defensive Box Plus/Minus**")
) %>%
cols_label(
namePlayer = md("**Player**"),
ratioBPM = md("**BPM**"),
ratioOBPM = md("**OBPM**"),
ratioDBPM = md("**DBPM**")
) %>%
tab_source_note(
md("source: [basketball-reference.com](https://www.basketball-reference.com) via [nbastatR](http://asbcllc.com/nbastatR/index.html)")
) %>%
tab_footnote(
footnote = ("Players with 500+ minutes."),
locations = cells_title("title")
) %>%
tab_footnote(
footnote = ("Box Plus/Minus; a box score estimate of the points per 100 possessions that a player contributed above a league-average player, translated to an average team."),
locations = cells_column_labels(
columns = vars(ratioBPM)
)
) %>%
tab_footnote(
footnote = ("Offensive Box Plus/Minus."),
locations = cells_column_labels(
columns = vars(ratioOBPM)
)
) %>%
tab_footnote(
footnote = ("Defensive Box Plus/Minus."),
locations = cells_column_labels(
columns = vars(ratioDBPM)
)
) %>%
tab_options(footnote.glyph = c("*, †, ‡, §, ¶, ‖"),
table.width = px(640))
Top 10 Defensive Box Plus/Minus* | ||||
---|---|---|---|---|
Player | BPM † | OBPM ‡ | DBPM § | |
Mitchell Robinson | 6.2 | 0.4 | 5.8 | |
Nerlens Noel | 3.8 | -1.8 | 5.5 | |
Giannis Antetokounmpo | 10.8 | 5.7 | 5.1 | |
Rudy Gobert | 6.8 | 1.7 | 5.1 | |
Myles Turner | 3.2 | -1.6 | 4.8 | |
Russell Westbrook | 6.1 | 2.1 | 4.1 | |
Anthony Davis | 8.7 | 4.8 | 3.9 | |
Joakim Noah | 1.9 | -1.8 | 3.7 | |
Mason Plumlee | 4.0 | 0.3 | 3.7 | |
Jusuf Nurkic | 5.2 | 1.6 | 3.6 | |
Nikola Jokic | 9.8 | 6.2 | 3.6 | |
Nikola Vucevic | 6.8 | 3.2 | 3.6 | |
source: basketball-reference.com via nbastatR | ||||
* Players with 500+ minutes. † Box Plus/Minus; a box score estimate of the points per 100 possessions that a player contributed above a league-average player, translated to an average team. ‡ Offensive Box Plus/Minus. § Defensive Box Plus/Minus. |
Messing around with highcharts courtesy of Joshua Kunst’s {highcharter} package.
library(highcharter)
hchart(adv_player_stats, "scatter", hcaes(x = "ratioOBPM", y = "ratioDBPM", group = "position", name = "namePlayer", OBPM = "ratioOBPM", DBPM = "ratioDBPM", position = "position")) %>%
hc_tooltip(pointFormat = "<b>{point.name}</b><br />OBPM: {point.OBPM}<br />DBPM: {point.DBPM}") %>%
hc_title(text = "Offensive vs. Defensive Box Plus/Minus") %>%
hc_subtitle(text = "NBA 2018-2019 Season") %>%
hc_credits(enabled = TRUE,
text = "data via nbastatR",
style = list(
fontSize = "10px"
)
) %>%
hc_add_theme(hc_theme_538())
hchart(adv_player_stats, "scatter", hcaes(x = "ratioOWS", y = "ratioDWS", group = "position", name = "namePlayer", OWS = "ratioOWS", DWS = "ratioDWS", position = "position")) %>%
hc_tooltip(pointFormat = "<b>{point.name}</b><br />OWS: {point.OWS}<br />DWS: {point.DWS}") %>%
hc_title(text = "Offensive vs. Defensive Win Shares") %>%
hc_subtitle(text = "NBA 2018-2019 Season") %>%
hc_credits(enabled = TRUE,
text = "data via nbastatR",
style = list(
fontSize = "10px"
)
) %>%
hc_add_theme(hc_theme_economist())
hchart(adv_player_stats, "scatter",
hcaes(x = "pctTrueShooting", y = "ratioPER",
name = "namePlayer", TS = "pctTrueShooting",
PER = "ratioPER", position = "position")) %>%
hc_tooltip(pointFormat = "<b>{point.name}</b><br />TS%: {point.TS}<br />PER: {point.PER}<br />Position: {point.position}") %>%
hc_title(text = "True Shooting % vs Player Efficiency Rating") %>%
hc_subtitle(text = "NBA 2018-2019 Season") %>%
hc_credits(enabled = TRUE,
text = "data via nbastatR",
style = list(
fontSize = "14px"
)
) %>%
hc_add_theme(hc_theme_chalk(
plotOptions = list(
scatter = list(
marker = list(radius = 4,
fillOpacity = 0.3) # actually this does nothing
)
)
)
)