Skip to article content

Étude des relations entre l'entraîneur sportif et la performance du club

Data extraction

Nous collectons les données de Fbref et Transfermarkt en utilisant la bibliothèque WorldFootballR.

Nous collectons des données de 2015 à 2023 auprès des principales ligues européennes de première division : Angleterre, Espagne, Italie, Allemagne, France, Portugal, Écosse, Pologne, Grèce, Turquie, Suisse, Pays-Bas, Belgique, Autriche.

if (!require(worldfootballR)) { 
    install.packages("worldfootballR")
    library(worldfootballR)
}

if (!require(readr)) {
  install.packages("readr")
  library(readr)
}
Le chargement a n'ecessit'e le package : worldfootballR

Le chargement a n'ecessit'e le package : readr

Collecting match results

# Change parameter to study different teams and seaons 
# country <- c("ENG", "ESP", "ITA", "GER", "FRA", "POR", "SCO", "POL", "GRE", "SUI", "NED", "BEL", "AUT")
# year <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)

country <- c("ENG", "ESP", "ITA", "GER", "FRA")
year <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)
match_result <- fb_match_results(country = country, gender = "M", season_end_year = year, tier = "1st")
columns_to_keep <- c('Competition_Name', 'Country', 'Season_End_Year', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
match_result <- match_result[, columns_to_keep]
# Rename columns
colnames(match_result) <- c('League', 'Country', 'Season', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
head(match_result)
Loading...
unique(match_result$League)
# Fix League name for Bundesliga
match_result$League <- gsub("Fu\303\237ball-Bundesliga", "Bundesliga", match_result$League)
Loading...
summary(match_result)
League Country Season Date Length:16463 Length:16463 Min. :2015 Min. :2014-08-08 Class :character Class :character 1st Qu.:2017 1st Qu.:2016-10-29 Mode :character Mode :character Median :2019 Median :2019-01-11 Mean :2019 Mean :2019-01-10 3rd Qu.:2021 3rd Qu.:2021-03-21 Max. :2023 Max. :2023-06-11 Home HomeGoals Away AwayGoals Length:16463 Min. : 0.000 Length:16463 Min. :0.000 Class :character 1st Qu.: 1.000 Class :character 1st Qu.:0.000 Mode :character Median : 1.000 Mode :character Median :1.000 Mean : 1.536 Mean :1.213 3rd Qu.: 2.000 3rd Qu.:2.000 Max. :10.000 Max. :9.000 NA's :101 NA's :101
# Saving the data
write_csv(match_result, "data/extracted_match_results.csv")

Collecting head coach data

# Change country to study different teams
# country <- c("England", "Spain", "Italy", "Germany", "France", "Portugal", "Scotland", "Poland", "Greece", "Switzerland", "Netherlands", "Belgium")

country <- c("England", "Spain", "Italy", "Germany", "France")
teams_url <- c()

for (i in seq_along(country)) {
    league_team_url <- tm_league_team_urls(country_name = country[i], start_year = 2015)
    teams_url <- c(teams_url, league_team_url)
}
head_coach <- tm_team_staff_history(team_urls = teams_url, staff_role = "Manager")
unique(head_coach$league)
unique(head_coach$team)
Loading...

There is some missing information about country and league in the data. We will add this information manually.

sapply(head_coach, function(x) sum(is.na(x)))
# Show unique teams with missing league and or country
unique(head_coach$team[is.na(head_coach$league) | is.na(head_coach$country)])
Loading...
# Fix league and country for 'Chievo Verona' and 'GFC Ajaccio'
head_coach$league[head_coach$team == 'Chievo Verona'] <- 'Serie A'
head_coach$country[head_coach$team == 'Chievo Verona'] <- 'Italy'
head_coach$league[head_coach$team == 'GFC Ajaccio'] <- 'Ligue 2'
head_coach$country[head_coach$team == 'GFC Ajaccio'] <- 'France'

Filter leagues that are not First Division Leagues

# Filter teams that are not First Division teams
# first_division_teams <- c(
#     'Premier League', 'LaLiga', 'Serie A', 'Bundesliga', 'Ligue 1', 
#     'Liga Portugal', 'Scottish Premiership', 'PKO BP Ekstraklasa', 'Super League 1', 
#     'Super League', 'Eredivisie', 'Jupiler Pro League')

first_division_teams <- c('Premier League', 'LaLiga', 'Serie A', 'Bundesliga', 'Ligue 1')
# Ensure the every first_division_teams is in the head_coach$league
all(first_division_teams %in% head_coach$league)
# Filter the head_coach data
head_coach <- head_coach[head_coach$league %in% first_division_teams, ]
head(head_coach, 5)
Loading...
columns_to_keep <- c('team_name', 'league', 'country', 'staff_name', 'appointed', 'end_date', 'days_in_post', 'matches', 'wins', 'draws', 'losses')
head_coach <- head_coach[, columns_to_keep]

# Rename columns
colnames(head_coach) <- c('Team', 'League', 'Country', 'HeadCoach', 'Appointed', 'EndDate', 'Tenure', 'Matches', 'Wins', 'Draws', 'Losses')

summary(head_coach)
Team League Country HeadCoach Length:3532 Length:3532 Length:3532 Length:3532 Class :character Class :character Class :character Class :character Mode :character Mode :character Mode :character Mode :character Appointed EndDate Tenure Matches Min. :1886-06-26 Min. :1893-08-01 Min. : -242.0 Min. : 0.00 1st Qu.:1961-11-02 1st Qu.:1963-06-30 1st Qu.: 186.0 1st Qu.: 10.00 Median :1987-07-01 Median :1988-03-06 Median : 364.0 Median : 29.00 Mean :1982-05-15 Mean :1983-04-16 Mean : 608.2 Mean : 51.59 3rd Qu.:2004-12-29 3rd Qu.:2005-06-30 3rd Qu.: 730.0 3rd Qu.: 67.00 Max. :2024-04-23 Max. :2024-06-30 Max. :14613.0 Max. :1490.00 NA's :64 Wins Draws Losses Min. : 0.00 Min. : 0.00 Min. : 0.00 1st Qu.: 2.00 1st Qu.: 2.00 1st Qu.: 4.00 Median : 10.00 Median : 7.00 Median : 10.00 Mean : 22.53 Mean : 13.01 Mean : 16.05 3rd Qu.: 28.00 3rd Qu.: 17.00 3rd Qu.: 21.00 Max. :895.00 Max. :323.00 Max. :272.00
# Saving the data
write_csv(head_coach, "data/extracted_head_coach.csv")
Étude des relations entre l'entraîneur sportif et la performance du club
Étude des relations entre l'entraîneur sportif et la performance du club
Étude des relations entre l'entraîneur sportif et la performance du club
Preprocessing