Skip to article content

Étude des relations entre l'entraîneur sportif et la performance du club

Head Coaches

Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(context = 'paper', style = 'ticks', palette = 'deep', color_codes = True)
plt.rcParams['figure.autolayout'] = True
plt.rcParams['figure.dpi'] = 300

Loading data

head_coach = pd.read_csv('data/head_coach.csv', parse_dates=['Appointed', 'EndDate'])
head_coach_dismissed = head_coach[head_coach['EndDate'].notnull()]

long_tenure = head_coach[head_coach['Tenure'] > 3000].shape[0]

Il y a 1 entraîneurs avec plus de 3000 jours en poste. Cela concerne Arsene Wenger qui a été responsable d’Arsenal pendant 7046 jours.

Basic plots

# Useful to add xtick months to dayofyear plot
months = pd.date_range('2022-01-01', '2022-12-31', freq='ME').strftime('%b').tolist()
plt.figure()
plt.grid(axis='y', linestyle='--', alpha=0.8)
sns.histplot(head_coach['Appointed'].dt.month, bins=11, color = 'b', kde=False, stat='density', alpha = 1)
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.xticks(range(1, 13), months)
plt.title('Monthly Distribution of Head Coach Appointments')
plt.xlabel('Month')
plt.ylabel('Proportion');
<Figure size 1920x1440 with 1 Axes>
# Plot for Head Coach dismissal distribution
plt.figure()
plt.grid(axis='y', linestyle='--', alpha=0.8)
sns.histplot(head_coach['EndDate'].dt.month, bins=11, color = 'r', kde=False, stat='density', alpha = 1)
plt.xticks(range(1, 13), months)
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.title('Monthly Distribution of Head Coach Dismissals')
plt.xlabel('Month')
plt.ylabel('Proportion');
<Figure size 1920x1440 with 1 Axes>
# Proportion of in-season vs off-season head coach dismissal per league
head_coach_dismissed['OffSeason'] = head_coach_dismissed['EndDate'].dt.month.isin([5, 6, 7])
season_break = head_coach_dismissed.groupby(['League'])['OffSeason'].value_counts(normalize=True).unstack().fillna(0)
season_break.columns = ['In-Season', 'Off-Season']
season_break = season_break.sort_values(by='Off-Season', ascending=False)
# plot season_break for each league
season_break.plot(kind='barh', stacked=True, color=['r', 'b'])
plt.legend(labels=['In-Season (Aug-Apr)', 'Off-Season (May-Jul)'], loc='lower center', bbox_to_anchor=(0.5, 1.0), ncol=2)
plt.gca().xaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.tick_params(axis='y', which='both', length=0)
plt.title('Proportion of In-Season vs Off-Season Head Coach Dismissals', y = 1.10)
plt.xlabel('Proportion')
plt.ylabel('')
plt.grid(axis='x', linestyle='--', alpha=0.8);
<Figure size 1920x1440 with 1 Axes>
head_coach['AppointedDay'] = head_coach['Appointed'].dt.dayofyear
head_coach_dismissed['DismissedDay'] = head_coach_dismissed['EndDate'].dt.dayofyear

# KDE Plot of head coach appointment/dismissed days of the year versus league
leagues = head_coach['League'].unique()
fig, ax = plt.subplots(len(leagues), 2, figsize=(8, 1.8*len(leagues),), sharex=True, sharey=True)

for i, league in enumerate(leagues):
    sns.kdeplot(data=head_coach[head_coach['League'] == league], x='AppointedDay', ax=ax[i, 0], fill=True, color='b', alpha=0.5, bw_adjust=0.25, clip=(0, 365), label = 'Appointments')
    sns.kdeplot(data=head_coach_dismissed[head_coach_dismissed['League'] == league], x='DismissedDay', ax=ax[i, 1], fill=True, color='r', alpha=0.5, bw_adjust=0.25, clip=(0, 365), label = 'Dismissals')
    ax[i, 0].set_xlim(0, 365)
    ax[i, 1].set_xlim(0, 365)
    # Major formatter for x-axis
    ax[i, 0].xaxis.set_major_locator(mdates.MonthLocator())
    ax[i, 0].xaxis.set_major_formatter(mdates.DateFormatter('%b'))
    ax[i, 1].xaxis.set_major_locator(mdates.MonthLocator())
    ax[i, 1].xaxis.set_major_formatter(mdates.DateFormatter('%b'))
    ax[i, 0].set_ylabel(league, rotation=0, labelpad=40)
    # Hide y-axis label and ticks
    ax[i, 0].set_yticklabels([])
    ax[i, 1].set_yticklabels([])
    # ax[i, 0].set_yticks([])
    # ax[i, 1].set_yticks([])
    ax[i, 0].grid(axis='y', linestyle='--', alpha=0.8)
    ax[i, 1].grid(axis='y', linestyle='--', alpha=0.8)
    # Remove x-axis label
    ax[i, 0].set_xlabel('')
    ax[i, 1].set_xlabel('')

    if i > 0:
        ax[i, 0].legend().remove()
        ax[i, 1].legend().remove()
    else:
        ax[i, 0].legend()
        ax[i, 1].legend()
        # Place each legend centered on top of their respective axes
        ax[i, 0].legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=2)
        ax[i, 1].legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=2)
<Figure size 2400x2700 with 10 Axes>
# Plot ECDF of head_coach tenure 
plt.figure(figsize=(8, 4))

sns.ecdfplot(data=head_coach_dismissed, x='Tenure', stat = 'percent', alpha = 1, linewidth = 2)
plt.ylabel('Percentage of Head Coaches')
# Format percentage
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=100))
# Grid
plt.grid(axis='y', linestyle='-', alpha=0.8)
plt.grid(axis='x', linestyle='-', alpha=0.8)
plt.xticks(range(0, max(head_coach_dismissed['Tenure']) + 365, 365))
plt.xlim(0, head_coach_dismissed['Tenure'].quantile(0.99))
plt.title('Cumulative distribution of Head Coaches tenure (2015 - 2023)')
plt.xlabel('Head Coaches tenure (days)');
<Figure size 2400x1200 with 1 Axes>

En moyenne, les entraîneurs sportifs sont restés en poste 535 jours.

# Average days in post per league 

# Calculate average days in post per league
avg_days_in_post = head_coach_dismissed.groupby('League')['Tenure'].mean()
avg_days_in_post = avg_days_in_post.sort_values()

# Plot average days in post per league
plt.figure()
sns.barplot(y=avg_days_in_post.index, x=avg_days_in_post.values, orient='h')
plt.title('Average Head Coach Tenure for Completed Appointments per League (2015 - 2023)')
plt.xlabel('Head Coach tenure (days)')
plt.tick_params(axis='y', which='both', length=0)
# Disable ylabel
plt.ylabel('')
plt.grid(axis='x', linestyle='--', alpha=0.8)
<Figure size 1920x1440 with 1 Axes>
# Number of clubs per Head Coach

# Group by coach_name and count the number of clubs
club_per_coach = head_coach.groupby('HeadCoach').size().reset_index(name='count')

plt.figure()
sns.histplot(x='count', data = club_per_coach, discrete = True, stat="probability", alpha = 1)
plt.xticks(range(1, club_per_coach['count'].max() + 1))
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.title('Proportion of Head Coaches by Number of Club Appointments (2015 - 2023)')
plt.xlabel('Number of clubs')
plt.ylabel('Proportion of Head Coaches')
plt.grid(axis='y', linestyle='--', alpha=0.8)
<Figure size 1920x1440 with 1 Axes>
# Number of Head Coachs per club

# Group by team and count the number of head coach

coach_per_club = head_coach.groupby('Team').size().reset_index(name='count')

plt.figure()
sns.histplot(x='count', data = coach_per_club, discrete=True, stat="probability", alpha = 1)
plt.xticks(range(1, coach_per_club['count'].max() + 1))
plt.gca().yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
plt.title(f'Proportion of Clubs by Number of Head Coaches Appointed (2015 - 2023)')
plt.xlabel('Number of head coach')
plt.ylabel('Proportion of clubs');
<Figure size 1920x1440 with 1 Axes>
# Average number of coach per club per league 

# Calculate average number of coach per club per league
coach_per_team = head_coach.groupby(['League', 'Team']).size()
avg_number_of_coach_per_club_per_league = coach_per_team.groupby('League').mean().sort_values()

# Plot average number of coach per club per league
plt.figure()
sns.barplot(x=avg_number_of_coach_per_club_per_league.values, y=avg_number_of_coach_per_club_per_league.index, orient='h')
plt.title('Average Number of Head Coaches Appointed per Club versus League (2015 - 2023)')
plt.ylabel('')
plt.xlabel('Average number of coach per club');
<Figure size 1920x1440 with 1 Axes>
Étude des relations entre l'entraîneur sportif et la performance du club
Exploratory Data Analysis
Étude des relations entre l'entraîneur sportif et la performance du club
Statistical analysis