Skip to article content

Étude des relations entre l'entraîneur sportif et la performance du club

Head Coaches

Imports

from pathlib import Path

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import polars as pl
import seaborn as sns

sns.set_theme(context="paper", style="ticks", palette="deep", color_codes=True)
plt.rcParams["figure.autolayout"] = True
plt.rcParams["figure.dpi"] = 300

Loading data

head_coach = pl.read_csv(Path("./data/head_coach.csv")).cast(
    {"Appointed": pl.Date, "EndDate": pl.Date}
)

head_coach_dismissed = head_coach.filter(pl.col("EndDate").is_not_null())
long_tenure = head_coach.filter(pl.col("Tenure") > 3000).height

Il y a 1 entraîneurs avec plus de 3000 jours en poste. Cela concerne Arsene Wenger qui a été responsable d’Arsenal pendant 7046 jours.

Basic plots

# Useful to add xtick months to dayofyear plot
months = [
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep",
    "Oct",
    "Nov",
    "Dec",
]
plt.figure()
plt.grid(axis="y", linestyle="--", alpha=0.8)
sns.histplot(
    head_coach.get_column("Appointed").dt.month(),
    stat="density",
    discrete=True,
    alpha=1,
)
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.gca().set_xticks(range(1, 13))
plt.gca().set_xticklabels(
    ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
)
plt.title("Monthly Distribution of Head Coach Appointments")
plt.xlabel("Month")
plt.ylabel("Proportion")
plt.show()
<Figure size 1920x1440 with 1 Axes>
# Plot for Head Coach dismissal distribution
plt.figure()
plt.grid(axis="y", linestyle="--", alpha=0.8)
sns.histplot(
    head_coach.get_column("EndDate").dt.month(),
    stat="density",
    color="r",
    discrete=True,
    alpha=1,
)
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.gca().set_xticks(range(1, 13))
plt.gca().set_xticklabels(
    ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
)
plt.title("Monthly Distribution of Head Coach Dismissals")
plt.xlabel("Month")
plt.ylabel("Proportion")
plt.show()
<Figure size 1920x1440 with 1 Axes>
# Proportion of in-season vs off-season head coach dismissal per league

head_coach_dismissed = head_coach_dismissed.with_columns(
    pl.when(head_coach_dismissed["EndDate"].dt.month().is_in([5, 6, 7]))
    .then(pl.lit("Off Season"))
    .otherwise(pl.lit("During Season"))
    .alias("Dismissal Period")
)

season_break = (
    head_coach_dismissed.group_by(["League", "Dismissal Period"])
    .len()
    .with_columns(proportion=pl.col("len") / pl.col("len").sum().over("League"))
    .with_columns(pl.format("{} %", (pl.col("proportion") * 100).round(1)))
    .pivot(index="League", on="Dismissal Period", values="proportion")
    .fill_null(0)
    .sort("Off Season")
)

season_break
Loading...
head_coach = head_coach.with_columns(
    appointed_day_of_year=head_coach.get_column("Appointed").dt.ordinal_day(),
    dismissal_day_of_year=head_coach.get_column("EndDate").dt.ordinal_day(),
)

# KDE Plot of head coach appointment/dismissed days of the year versus league
leagues = head_coach.get_column("League").unique()
fig, ax = plt.subplots(
    len(leagues),
    2,
    figsize=(
        8,
        1.8 * len(leagues),
    ),
    sharex=True,
    sharey=True,
)

for i, league in enumerate(leagues):
    sns.kdeplot(
        data=head_coach.filter(pl.col("League") == league),
        x="appointed_day_of_year",
        ax=ax[i, 0],
        fill=True,
        color="b",
        alpha=0.5,
        bw_adjust=0.25,
        clip=(0, 365),
        label="Appointments",
    )
    sns.kdeplot(
        data=head_coach.filter(pl.col("League") == league),
        x="dismissal_day_of_year",
        ax=ax[i, 1],
        fill=True,
        color="r",
        alpha=0.5,
        bw_adjust=0.25,
        clip=(0, 365),
        label="Dismissals",
    )
    ax[i, 0].set_xlim(0, 365)
    ax[i, 1].set_xlim(0, 365)
    # Major formatter for x-axis
    ax[i, 0].xaxis.set_major_locator(mdates.MonthLocator())
    ax[i, 0].xaxis.set_major_formatter(mdates.DateFormatter("%b"))
    ax[i, 1].xaxis.set_major_locator(mdates.MonthLocator())
    ax[i, 1].xaxis.set_major_formatter(mdates.DateFormatter("%b"))
    ax[i, 0].set_ylabel(league, rotation=0, labelpad=40)
    # Hide y-axis label and ticks
    ax[i, 0].set_yticklabels([])
    ax[i, 1].set_yticklabels([])
    ax[i, 0].grid(axis="y", linestyle="--", alpha=0.8)
    ax[i, 1].grid(axis="y", linestyle="--", alpha=0.8)
    # Remove x-axis label
    ax[i, 0].set_xlabel("")
    ax[i, 1].set_xlabel("")

    if i > 0:
        ax[i, 0].legend().remove()
        ax[i, 1].legend().remove()
    else:
        ax[i, 0].legend()
        ax[i, 1].legend()
        # Place each legend centered on top of their respective axes
        ax[i, 0].legend(loc="upper center", bbox_to_anchor=(0.5, 1.3), ncol=2)
        ax[i, 1].legend(loc="upper center", bbox_to_anchor=(0.5, 1.3), ncol=2)
<Figure size 2400x2700 with 10 Axes>
# Plot ECDF of head_coach tenure
plt.figure(figsize=(8, 4))

sns.ecdfplot(
    data=head_coach_dismissed, x="Tenure", stat="percent", alpha=1, linewidth=2
)
plt.ylabel("Percentage of Head Coaches")
# Format percentage
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=100))
# Grid
plt.grid(axis="y", linestyle="-", alpha=0.8)
plt.grid(axis="x", linestyle="-", alpha=0.8)
plt.xticks(range(0, 3650, 365))
plt.xlim(0, head_coach_dismissed.get_column("Tenure").quantile(0.99))
plt.title("Cumulative distribution of Head Coaches tenure (2015 - 2023)")
plt.xlabel("Head Coaches tenure (days)")
plt.show()
<Figure size 2400x1200 with 1 Axes>

En moyenne, les entraîneurs sportifs sont restés en poste 535 jours.

# Average days in post per league

# Calculate average days in post per league
avg_days_in_post = (
    head_coach_dismissed.group_by("League")
    .agg(pl.col("Tenure").mean().alias("Average Tenure"))
    .sort("Average Tenure")
)

# Plot average days in post per league
sns.barplot(
    y=avg_days_in_post.get_column("League"),
    x=avg_days_in_post.get_column("Average Tenure"),
    orient="h",
)
plt.title(
    "Average Head Coach Tenure for Completed Appointments per League (2015 - 2023)"
)
plt.xlabel("Average Head Coach Tenure (days)")
plt.tick_params(axis="y", which="both", length=0)
# Disable ylabel
plt.ylabel("")
plt.grid(axis="x", linestyle="--", alpha=0.8)
<Figure size 1920x1440 with 1 Axes>
# Number of clubs per Head Coach

# Group by coach_name and count the number of clubs
club_per_coach = head_coach.group_by("HeadCoach").len(name="count")

sns.histplot(data=club_per_coach, x="count", discrete=True, stat="probability", alpha=1)
plt.xticks(range(1, club_per_coach["count"].max() + 1))
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.title("Proportion of Head Coaches by Number of Club Appointments (2015 - 2023)")
plt.xlabel("Number of clubs")
plt.ylabel("Proportion of Head Coaches")
plt.grid(axis="y", linestyle="--", alpha=0.8)
<Figure size 1920x1440 with 1 Axes>
# Number of Head Coachs per club

# Group by team and count the number of head coach
coach_per_club = head_coach.group_by("Team").len(name="count")

sns.histplot(data=coach_per_club, x="count", discrete=True, stat="probability", alpha=1)
plt.xticks(range(1, coach_per_club["count"].max() + 1))
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.title(f"Proportion of Clubs by Number of Head Coaches Appointed (2015 - 2023)")
plt.xlabel("Number of head coach")
plt.ylabel("Proportion of clubs")
plt.grid(axis="y", linestyle="--", alpha=0.8)
<Figure size 1920x1440 with 1 Axes>
# Average number of coach per club per league

# Calculate average number of coach per club per league
coach_per_team = head_coach.group_by(["League", "Team"]).len()
avg_number_of_coach_per_club_per_league = (
    coach_per_team.group_by("League")
    .agg(pl.col("len").mean().round(1).alias("avg_coach_per_club"))
    .sort("avg_coach_per_club")
)

# Plot average number of coach per club per league
sns.barplot(
    data=avg_number_of_coach_per_club_per_league,
    x="avg_coach_per_club",
    y="League",
    orient="h",
)
plt.title(
    "Average Number of Head Coaches Appointed per Club versus League (2015 - 2023)"
)
plt.ylabel("")
plt.xlabel("Average number of coach per club")
plt.grid(axis="x", linestyle="--", alpha=0.8)
<Figure size 1920x1440 with 1 Axes>
Étude des relations entre l'entraîneur sportif et la performance du club
Exploratory Data Analysis
Étude des relations entre l'entraîneur sportif et la performance du club
Statistical analysis