[Pema-Norbu] - Fab Futures - Data Science
Home About

< Home

Week 1: Introduction to the data science¶

Dataset¶

Jupyterlab¶

In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Rest of your code...
# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Read the data
df = pd.read_csv('datasets/players_15.csv')
In [2]:
print(df.head())
   sofifa_id                                         player_url  \
0     158023  https://sofifa.com/player/158023/lionel-messi/...   
1      20801  https://sofifa.com/player/20801/c-ronaldo-dos-...   
2       9014  https://sofifa.com/player/9014/arjen-robben/15...   
3      41236  https://sofifa.com/player/41236/zlatan-ibrahim...   
4     167495  https://sofifa.com/player/167495/manuel-neuer/...   

          short_name                            long_name  age         dob  \
0           L. Messi       Lionel Andrés Messi Cuccittini   27  1987-06-24   
1  Cristiano Ronaldo  Cristiano Ronaldo dos Santos Aveiro   29  1985-02-05   
2          A. Robben                         Arjen Robben   30  1984-01-23   
3     Z. Ibrahimović                   Zlatan Ibrahimović   32  1981-10-03   
4           M. Neuer                         Manuel Neuer   28  1986-03-27   

   height_cm  weight_kg  nationality            club_name  ...   lwb   ldm  \
0        169         67    Argentina         FC Barcelona  ...  62+3  62+3   
1        185         80     Portugal          Real Madrid  ...  63+3  63+3   
2        180         80  Netherlands    FC Bayern München  ...  64+3  64+3   
3        195         95       Sweden  Paris Saint-Germain  ...  61+3  65+3   
4        193         92      Germany    FC Bayern München  ...  36+3  40+3   

    cdm   rdm   rwb    lb   lcb    cb   rcb    rb  
0  62+3  62+3  62+3  54+3  45+3  45+3  45+3  54+3  
1  63+3  63+3  63+3  57+3  52+3  52+3  52+3  57+3  
2  64+3  64+3  64+3  55+3  46+3  46+3  46+3  55+3  
3  65+3  65+3  61+3  56+3  55+3  55+3  55+3  56+3  
4  40+3  40+3  36+3  36+3  38+3  38+3  38+3  36+3  

[5 rows x 106 columns]
In [3]:
# Top nationalities in the dataset
top_nations = df['nationality'].value_counts().head(15)

plt.figure(figsize=(12, 8))
top_nations.plot(kind='bar')
plt.title('Top 15 Nationalities in FIFA 21')
plt.xlabel('Nationality')
plt.ylabel('Number of Players')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Read the data
df = pd.read_csv('datasets/players_15.csv')

# 1. Distribution of Player Overall Ratings
plt.figure(figsize=(12, 6))
plt.hist(df['overall'], bins=30, edgecolor='black', alpha=0.7)
plt.title('Distribution of Player Overall Ratings in FIFA 21')
plt.xlabel('Overall Rating')
plt.ylabel('Number of Players')
plt.grid(True, alpha=0.3)
plt.show()
No description has been provided for this image
In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Read the data
df = pd.read_csv('datasets/players_15.csv')

# First, let's see what columns are available
print("First few rows:")
print(df.head())
print("\nColumn names:")
print(df.columns.tolist())

# Let's check the actual column names for club/team information
# Common column names for clubs in FIFA datasets:
# 'club', 'club_name', 'team', 'club_team', 'team_name'

# Look for club-related columns
club_columns = [col for col in df.columns if 'club' in col.lower() or 'team' in col.lower()]
print("\nPotential club/team columns:", club_columns)

# Let's see some sample values from potential club columns
if club_columns:
    for col in club_columns[:3]:  # Check first 3 club-related columns
        print(f"\nUnique values in '{col}':")
        print(df[col].value_counts().head(10))
First few rows:
   sofifa_id                                         player_url  \
0     158023  https://sofifa.com/player/158023/lionel-messi/...   
1      20801  https://sofifa.com/player/20801/c-ronaldo-dos-...   
2       9014  https://sofifa.com/player/9014/arjen-robben/15...   
3      41236  https://sofifa.com/player/41236/zlatan-ibrahim...   
4     167495  https://sofifa.com/player/167495/manuel-neuer/...   

          short_name                            long_name  age         dob  \
0           L. Messi       Lionel Andrés Messi Cuccittini   27  1987-06-24   
1  Cristiano Ronaldo  Cristiano Ronaldo dos Santos Aveiro   29  1985-02-05   
2          A. Robben                         Arjen Robben   30  1984-01-23   
3     Z. Ibrahimović                   Zlatan Ibrahimović   32  1981-10-03   
4           M. Neuer                         Manuel Neuer   28  1986-03-27   

   height_cm  weight_kg  nationality            club_name  ...   lwb   ldm  \
0        169         67    Argentina         FC Barcelona  ...  62+3  62+3   
1        185         80     Portugal          Real Madrid  ...  63+3  63+3   
2        180         80  Netherlands    FC Bayern München  ...  64+3  64+3   
3        195         95       Sweden  Paris Saint-Germain  ...  61+3  65+3   
4        193         92      Germany    FC Bayern München  ...  36+3  40+3   

    cdm   rdm   rwb    lb   lcb    cb   rcb    rb  
0  62+3  62+3  62+3  54+3  45+3  45+3  45+3  54+3  
1  63+3  63+3  63+3  57+3  52+3  52+3  52+3  57+3  
2  64+3  64+3  64+3  55+3  46+3  46+3  46+3  55+3  
3  65+3  65+3  61+3  56+3  55+3  55+3  55+3  56+3  
4  40+3  40+3  36+3  36+3  38+3  38+3  38+3  36+3  

[5 rows x 106 columns]

Column names:
['sofifa_id', 'player_url', 'short_name', 'long_name', 'age', 'dob', 'height_cm', 'weight_kg', 'nationality', 'club_name', 'league_name', 'league_rank', 'overall', 'potential', 'value_eur', 'wage_eur', 'player_positions', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'work_rate', 'body_type', 'real_face', 'release_clause_eur', 'player_tags', 'team_position', 'team_jersey_number', 'loaned_from', 'joined', 'contract_valid_until', 'nation_position', 'nation_jersey_number', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes', 'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb']

Potential club/team columns: ['club_name', 'team_position', 'team_jersey_number']

Unique values in 'club_name':
club_name
Sevilla FC                 33
Newcastle United           33
Hull City                  33
Torino                     33
OGC Nice                   33
Burnley                    33
Stoke City                 33
Queens Park Rangers        33
Sporting Club de Bastia    33
Everton                    33
Name: count, dtype: int64

Unique values in 'team_position':
team_position
SUB    6906
RES    2663
LCB     577
RCB     577
GK      577
LB      534
RB      534
LM      403
RM      403
ST      366
Name: count, dtype: int64

Unique values in 'team_jersey_number':
team_jersey_number
7.0     542
8.0     536
10.0    533
5.0     526
11.0    521
6.0     520
1.0     511
9.0     501
4.0     497
17.0    489
Name: count, dtype: int64
In [ ]: