Skip to article content

Étude des relations entre l'entraîneur sportif et la performance du club

Data extraction

Nous collectons les données de Fbref et Transfermarkt en utilisant la bibliothèque WorldFootballR.

Nous collectons des données de 2015 à 2023 auprès des principales ligues européennes de première division : Angleterre, Espagne, Italie, Allemagne, France, Portugal, Écosse, Pologne, Grèce, Turquie, Suisse, Pays-Bas, Belgique, Autriche.

if (!require(devtools, quietly = TRUE)) {
    install.packages("devtools")
    library(devtools)
}

if (!require(worldfootballR)) { 
    devtools::install_github("JaseZiv/worldfootballR")
    library(worldfootballR)
}

if (!require(readr)) {
  install.packages("readr")
  library(readr)
}
Le chargement a nécessité le package : worldfootballR

Le chargement a nécessité le package : readr

Collecting match results

# Change parameter to study different teams and seaons 
# country <- c("ENG", "ESP", "ITA", "GER", "FRA", "POR", "SCO", "POL", "GRE", "SUI", "NED", "BEL", "AUT")
# year <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)

country <- c("ENG", "ESP", "ITA", "GER", "FRA")
year <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025)
match_result <- load_match_results(country = country, gender = "M", season_end_year = year, tier = "1st")
→ Data last updated 2025-02-04 17:32:08.29795408248901 UTC

columns_to_keep <- c('Competition_Name', 'Country', 'Season_End_Year', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
match_result <- match_result[, columns_to_keep]
# Rename columns
colnames(match_result) <- c('league', 'country', 'season', 'date', 'home', 'home_goals', 'away', 'away_goals')
head(match_result)
Loading...
unique(match_result$league)
# Reformat Bundesliga
match_result$league <- gsub("Fußball-Bundesliga", "Bundesliga", match_result$league)
Loading...
summary(match_result)
league country season date Length:19969 Length:19969 Min. :2015 Min. :2014-08-08 Class :character Class :character 1st Qu.:2017 1st Qu.:2017-03-18 Mode :character Mode :character Median :2020 Median :2019-12-21 Mean :2020 Mean :2019-12-28 3rd Qu.:2023 3rd Qu.:2022-09-30 Max. :2025 Max. :2025-05-25 home home_goals away away_goals Length:19969 Min. : 0.000 Length:19969 Min. :0.000 Class :character 1st Qu.: 1.000 Class :character 1st Qu.:0.000 Mode :character Median : 1.000 Mode :character Median :1.000 Mean : 1.542 Mean :1.228 3rd Qu.: 2.000 3rd Qu.:2.000 Max. :10.000 Max. :9.000 NA's :806 NA's :806
# Saving the data
write_csv(match_result, "../data/extracted_match_results.csv")

Collecting head coach data

countries <- c("England", "Spain", "Italy", "Germany", "France")

get_team_url <- function(country) {
    tryCatch({
        tm_league_team_urls(country_name = country, start_year = 2015)
    }, error = function(e) {
        warning("Failed to fetch URLs for ", country, ": ", e$message)
        character(0)
    })
}

teams_url <- unlist(lapply(countries, get_team_url))
head(teams_url)
Loading...
get_team_staff_url <- function(team_url) {
    tryCatch({
        tm_team_staff_urls(team_urls = team_url, staff_role = "Manager")
    }, error = function(e) {
        warning("Failed to fetch staff URLs for ", team_url, ": ", e$message)
        data.frame()
    })
}

teams_staff_url <- unlist(lapply(teams_url, get_team_staff_url))
head(teams_staff_url)
Loading...
head_coach <- tm_team_staff_history(team_urls = teams_staff_url, staff_role = "Manager")
unique(head_coach$league)
unique(head_coach$team)
Error: indice hors limites

Error: indice hors limites

Error: indice hors limites

Error: indice hors limites

Error: indice hors limites

Error: indice hors limites

Error: indice hors limites

There is some missing information about country and league in the data. We will add this information manually.

sapply(head_coach, function(x) sum(is.na(x)))
# Show unique teams with missing league and or country
unique(head_coach$team[is.na(head_coach$league) | is.na(head_coach$country)])
Loading...
# Fix league and country for 'Chievo Verona' and 'GFC Ajaccio'
head_coach$league[head_coach$team == 'Chievo Verona'] <- 'Serie A'
head_coach$country[head_coach$team == 'Chievo Verona'] <- 'Italy'
head_coach$league[head_coach$team == 'GFC Ajaccio'] <- 'Ligue 2'
head_coach$country[head_coach$team == 'GFC Ajaccio'] <- 'France'

Filter leagues that are not First Division Leagues

# Filter teams that are not First Division teams
# first_division_teams <- c(
#     'Premier League', 'LaLiga', 'Serie A', 'Bundesliga', 'Ligue 1', 
#     'Liga Portugal', 'Scottish Premiership', 'PKO BP Ekstraklasa', 'Super League 1', 
#     'Super League', 'Eredivisie', 'Jupiler Pro League')

first_division_teams <- c('Premier League', 'LaLiga', 'Serie A', 'Bundesliga', 'Ligue 1')
# Ensure the every first_division_teams is in the head_coach$league
all(first_division_teams %in% head_coach$league)
# Filter the head_coach data
head_coach <- head_coach[head_coach$league %in% first_division_teams, ]
head(head_coach, 5)
Loading...
columns_to_keep <- c('team_name', 'league', 'country', 'staff_name', 'appointed', 'end_date', 'days_in_post', 'matches', 'wins', 'draws', 'losses')
head_coach <- head_coach[, columns_to_keep]

# Rename columns
colnames(head_coach) <- c('Team', 'League', 'Country', 'HeadCoach', 'Appointed', 'EndDate', 'Tenure', 'Matches', 'Wins', 'Draws', 'Losses')

summary(head_coach)
Team League Country HeadCoach Length:3532 Length:3532 Length:3532 Length:3532 Class :character Class :character Class :character Class :character Mode :character Mode :character Mode :character Mode :character Appointed EndDate Tenure Matches Min. :1886-06-26 Min. :1893-08-01 Min. : -242.0 Min. : 0.00 1st Qu.:1961-11-02 1st Qu.:1963-06-30 1st Qu.: 186.0 1st Qu.: 10.00 Median :1987-07-01 Median :1988-03-06 Median : 364.0 Median : 29.00 Mean :1982-05-15 Mean :1983-04-16 Mean : 608.2 Mean : 51.59 3rd Qu.:2004-12-29 3rd Qu.:2005-06-30 3rd Qu.: 730.0 3rd Qu.: 67.00 Max. :2024-04-23 Max. :2024-06-30 Max. :14613.0 Max. :1490.00 NA's :64 Wins Draws Losses Min. : 0.00 Min. : 0.00 Min. : 0.00 1st Qu.: 2.00 1st Qu.: 2.00 1st Qu.: 4.00 Median : 10.00 Median : 7.00 Median : 10.00 Mean : 22.53 Mean : 13.01 Mean : 16.05 3rd Qu.: 28.00 3rd Qu.: 17.00 3rd Qu.: 21.00 Max. :895.00 Max. :323.00 Max. :272.00
# Saving the data
write_csv(head_coach, "../data/extracted_head_coach.csv")
Étude des relations entre l'entraîneur sportif et la performance du club
Étude des relations entre l'entraîneur sportif et la performance du club
Étude des relations entre l'entraîneur sportif et la performance du club
Preprocessing