Étude des relations entre l'entraîneur sportif et la performance du club
Data extraction
Nous collectons les données de Fbref et Transfermarkt en utilisant la bibliothèque WorldFootballR.
Nous collectons des données de 2015 à 2023 auprès des principales ligues européennes de première division : Angleterre, Espagne, Italie, Allemagne, France, Portugal, Écosse, Pologne, Grèce, Turquie, Suisse, Pays-Bas, Belgique, Autriche.
if (!require(worldfootballR)) {
install.packages("worldfootballR")
library(worldfootballR)
}
if (!require(readr)) {
install.packages("readr")
library(readr)
}
Le chargement a n'ecessit'e le package : worldfootballR
Le chargement a n'ecessit'e le package : readr
Collecting match results¶
# Change parameter to study different teams and seaons
# country <- c("ENG", "ESP", "ITA", "GER", "FRA", "POR", "SCO", "POL", "GRE", "SUI", "NED", "BEL", "AUT")
# year <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)
country <- c("ENG", "ESP", "ITA", "GER", "FRA")
year <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)
match_result <- fb_match_results(country = country, gender = "M", season_end_year = year, tier = "1st")
columns_to_keep <- c('Competition_Name', 'Country', 'Season_End_Year', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
match_result <- match_result[, columns_to_keep]
# Rename columns
colnames(match_result) <- c('League', 'Country', 'Season', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
head(match_result)
Loading...
unique(match_result$League)
# Fix League name for Bundesliga
match_result$League <- gsub("Fu\303\237ball-Bundesliga", "Bundesliga", match_result$League)
Loading...
summary(match_result)
League Country Season Date
Length:16463 Length:16463 Min. :2015 Min. :2014-08-08
Class :character Class :character 1st Qu.:2017 1st Qu.:2016-10-29
Mode :character Mode :character Median :2019 Median :2019-01-11
Mean :2019 Mean :2019-01-10
3rd Qu.:2021 3rd Qu.:2021-03-21
Max. :2023 Max. :2023-06-11
Home HomeGoals Away AwayGoals
Length:16463 Min. : 0.000 Length:16463 Min. :0.000
Class :character 1st Qu.: 1.000 Class :character 1st Qu.:0.000
Mode :character Median : 1.000 Mode :character Median :1.000
Mean : 1.536 Mean :1.213
3rd Qu.: 2.000 3rd Qu.:2.000
Max. :10.000 Max. :9.000
NA's :101 NA's :101
# Saving the data
write_csv(match_result, "data/extracted_match_results.csv")
Collecting head coach data¶
# Change country to study different teams
# country <- c("England", "Spain", "Italy", "Germany", "France", "Portugal", "Scotland", "Poland", "Greece", "Switzerland", "Netherlands", "Belgium")
country <- c("England", "Spain", "Italy", "Germany", "France")
teams_url <- c()
for (i in seq_along(country)) {
league_team_url <- tm_league_team_urls(country_name = country[i], start_year = 2015)
teams_url <- c(teams_url, league_team_url)
}
head_coach <- tm_team_staff_history(team_urls = teams_url, staff_role = "Manager")
unique(head_coach$league)
unique(head_coach$team)
Loading...
There is some missing information about country and league in the data. We will add this information manually.
sapply(head_coach, function(x) sum(is.na(x)))
# Show unique teams with missing league and or country
unique(head_coach$team[is.na(head_coach$league) | is.na(head_coach$country)])
Loading...
# Fix league and country for 'Chievo Verona' and 'GFC Ajaccio'
head_coach$league[head_coach$team == 'Chievo Verona'] <- 'Serie A'
head_coach$country[head_coach$team == 'Chievo Verona'] <- 'Italy'
head_coach$league[head_coach$team == 'GFC Ajaccio'] <- 'Ligue 2'
head_coach$country[head_coach$team == 'GFC Ajaccio'] <- 'France'
Filter leagues that are not First Division Leagues
# Filter teams that are not First Division teams
# first_division_teams <- c(
# 'Premier League', 'LaLiga', 'Serie A', 'Bundesliga', 'Ligue 1',
# 'Liga Portugal', 'Scottish Premiership', 'PKO BP Ekstraklasa', 'Super League 1',
# 'Super League', 'Eredivisie', 'Jupiler Pro League')
first_division_teams <- c('Premier League', 'LaLiga', 'Serie A', 'Bundesliga', 'Ligue 1')
# Ensure the every first_division_teams is in the head_coach$league
all(first_division_teams %in% head_coach$league)
# Filter the head_coach data
head_coach <- head_coach[head_coach$league %in% first_division_teams, ]
head(head_coach, 5)
Loading...
columns_to_keep <- c('team_name', 'league', 'country', 'staff_name', 'appointed', 'end_date', 'days_in_post', 'matches', 'wins', 'draws', 'losses')
head_coach <- head_coach[, columns_to_keep]
# Rename columns
colnames(head_coach) <- c('Team', 'League', 'Country', 'HeadCoach', 'Appointed', 'EndDate', 'Tenure', 'Matches', 'Wins', 'Draws', 'Losses')
summary(head_coach)
Team League Country HeadCoach
Length:3532 Length:3532 Length:3532 Length:3532
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
Appointed EndDate Tenure Matches
Min. :1886-06-26 Min. :1893-08-01 Min. : -242.0 Min. : 0.00
1st Qu.:1961-11-02 1st Qu.:1963-06-30 1st Qu.: 186.0 1st Qu.: 10.00
Median :1987-07-01 Median :1988-03-06 Median : 364.0 Median : 29.00
Mean :1982-05-15 Mean :1983-04-16 Mean : 608.2 Mean : 51.59
3rd Qu.:2004-12-29 3rd Qu.:2005-06-30 3rd Qu.: 730.0 3rd Qu.: 67.00
Max. :2024-04-23 Max. :2024-06-30 Max. :14613.0 Max. :1490.00
NA's :64
Wins Draws Losses
Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 2.00 1st Qu.: 2.00 1st Qu.: 4.00
Median : 10.00 Median : 7.00 Median : 10.00
Mean : 22.53 Mean : 13.01 Mean : 16.05
3rd Qu.: 28.00 3rd Qu.: 17.00 3rd Qu.: 21.00
Max. :895.00 Max. :323.00 Max. :272.00
# Saving the data
write_csv(head_coach, "data/extracted_head_coach.csv")