# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load the data
df = pd.read_csv('datasets/players_15.csv')

# Display basic information
print("Dataset Shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nColumn names:")
print(df.columns.tolist())
print("\nData types:")
print(df.dtypes)
print("\nMissing values:")
print(df.isnull().sum().sort_values(ascending=False).head(20))

Dataset Shape: (16155, 106)

First few rows:
   sofifa_id                                         player_url  \
0     158023  https://sofifa.com/player/158023/lionel-messi/...   
1      20801  https://sofifa.com/player/20801/c-ronaldo-dos-...   
2       9014  https://sofifa.com/player/9014/arjen-robben/15...   
3      41236  https://sofifa.com/player/41236/zlatan-ibrahim...   
4     167495  https://sofifa.com/player/167495/manuel-neuer/...   

          short_name                            long_name  age         dob  \
0           L. Messi       Lionel Andrés Messi Cuccittini   27  1987-06-24   
1  Cristiano Ronaldo  Cristiano Ronaldo dos Santos Aveiro   29  1985-02-05   
2          A. Robben                         Arjen Robben   30  1984-01-23   
3     Z. Ibrahimović                   Zlatan Ibrahimović   32  1981-10-03   
4           M. Neuer                         Manuel Neuer   28  1986-03-27   

   height_cm  weight_kg  nationality            club_name  ...   lwb   ldm  \
0        169         67    Argentina         FC Barcelona  ...  62+3  62+3   
1        185         80     Portugal          Real Madrid  ...  63+3  63+3   
2        180         80  Netherlands    FC Bayern München  ...  64+3  64+3   
3        195         95       Sweden  Paris Saint-Germain  ...  61+3  65+3   
4        193         92      Germany    FC Bayern München  ...  36+3  40+3   

    cdm   rdm   rwb    lb   lcb    cb   rcb    rb  
0  62+3  62+3  62+3  54+3  45+3  45+3  45+3  54+3  
1  63+3  63+3  63+3  57+3  52+3  52+3  52+3  57+3  
2  64+3  64+3  64+3  55+3  46+3  46+3  46+3  55+3  
3  65+3  65+3  61+3  56+3  55+3  55+3  55+3  56+3  
4  40+3  40+3  36+3  36+3  38+3  38+3  38+3  36+3  

[5 rows x 106 columns]

Column names:
['sofifa_id', 'player_url', 'short_name', 'long_name', 'age', 'dob', 'height_cm', 'weight_kg', 'nationality', 'club_name', 'league_name', 'league_rank', 'overall', 'potential', 'value_eur', 'wage_eur', 'player_positions', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'work_rate', 'body_type', 'real_face', 'release_clause_eur', 'player_tags', 'team_position', 'team_jersey_number', 'loaned_from', 'joined', 'contract_valid_until', 'nation_position', 'nation_jersey_number', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes', 'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb']

Data types:
sofifa_id      int64
player_url    object
short_name    object
long_name     object
age            int64
               ...  
lb            object
lcb           object
cb            object
rcb           object
rb            object
Length: 106, dtype: object

Missing values:
mentality_composure     16155
release_clause_eur      16155
loaned_from             15243
nation_jersey_number    15074
nation_position         15074
player_tags             14919
gk_speed                14380
gk_kicking              14380
gk_handling             14380
gk_diving               14380
gk_positioning          14380
gk_reflexes             14380
player_traits            9556
pace                     1775
physic                   1775
defending                1775
dribbling                1775
passing                  1775
shooting                 1775
joined                   1151
dtype: int64

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load the data
df = pd.read_csv('datasets/players_15.csv')

# Display basic information
print("Dataset Shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nColumn names:")
print(df.columns.tolist())
print("\nData types:")
print(df.dtypes)

# ========== DATA CLEANING ==========
print("\n" + "="*50)
print("CLEANING THE DATA")
print("="*50)

# Clean column names (remove extra spaces)
df.columns = df.columns.str.strip()

# Create a cleaner copy for analysis
df_clean = df.copy()

# Let's see what columns have missing values
print("\nMissing values summary:")
missing_summary = df_clean.isnull().sum()
print(missing_summary[missing_summary > 0])

# Step 1: Drop columns with too many missing values (> 50%)
threshold = len(df_clean) * 0.5
columns_to_drop = missing_summary[missing_summary > threshold].index.tolist()
print(f"\nDropping columns with more than 50% missing values: {columns_to_drop}")
df_clean = df_clean.drop(columns=columns_to_drop)

# Step 2: Drop columns that are not useful for prediction
columns_to_drop = [
    'player_url', 'short_name', 'long_name', 'dob', 
    'player_tags', 'team_position', 'player_traits', 
    'loaned_from', 'joined', 'real_face', 'contract_valid_until',
    'nation_position', 'team_jersey_number', 'nation_jersey_number'
]

# Only drop columns that exist
for col in columns_to_drop:
    if col in df_clean.columns:
        df_clean = df_clean.drop(columns=[col])

# Step 3: Select only numeric columns for initial analysis
# Get numeric columns only
numeric_cols = df_clean.select_dtypes(include=[np.number]).columns.tolist()

# Also include some important categorical columns that we'll encode
categorical_cols_to_keep = ['preferred_foot', 'body_type', 'work_rate']

# Keep only numeric columns and selected categorical columns
all_cols_to_keep = numeric_cols + [col for col in categorical_cols_to_keep if col in df_clean.columns]
df_clean = df_clean[all_cols_to_keep]

print(f"\nAfter initial cleaning - shape: {df_clean.shape}")

# Step 4: Handle missing values in numeric columns
print("\nHandling missing values...")
imputer = SimpleImputer(strategy='median')  # Use median to handle outliers
numeric_data = df_clean.select_dtypes(include=[np.number])
df_clean[numeric_data.columns] = imputer.fit_transform(numeric_data)

# Step 5: Encode categorical variables
print("\nEncoding categorical variables...")
le = LabelEncoder()

for col in ['preferred_foot', 'body_type']:
    if col in df_clean.columns:
        # Fill any remaining NaN with the most common value
        df_clean[col] = df_clean[col].fillna(df_clean[col].mode()[0])
        df_clean[col] = le.fit_transform(df_clean[col])

# Handle work_rate separately as it has format like "High/Low"
if 'work_rate' in df_clean.columns:
    # Create two separate columns for attacking and defensive work rates
    df_clean[['att_work_rate', 'def_work_rate']] = df_clean['work_rate'].str.split('/', expand=True)
    
    # Encode the work rates
    work_rate_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
    df_clean['att_work_rate'] = df_clean['att_work_rate'].map(work_rate_mapping).fillna(1)
    df_clean['def_work_rate'] = df_clean['def_work_rate'].map(work_rate_mapping).fillna(1)
    
    # Drop the original work_rate column
    df_clean = df_clean.drop(columns=['work_rate'])

# Step 6: Final check for any remaining NaN values
print("\nFinal check for NaN values:")
print(f"Total NaN values remaining: {df_clean.isnull().sum().sum()}")
print(f"Columns with NaN: {df_clean.columns[df_clean.isnull().any()].tolist()}")

# Drop any remaining rows with NaN (should be very few if any)
initial_rows = len(df_clean)
df_clean = df_clean.dropna()
final_rows = len(df_clean)
print(f"\nRows before dropping NaN: {initial_rows}")
print(f"Rows after dropping NaN: {final_rows}")
print(f"Rows dropped: {initial_rows - final_rows}")

# Step 7: Make sure we have the target column
if 'overall' not in df_clean.columns:
    print("\nERROR: 'overall' column not found in cleaned data!")
    print(f"Available columns: {df_clean.columns.tolist()}")
else:
    print(f"\n✅ Data cleaning complete!")
    print(f"Final shape: {df_clean.shape}")
    print(f"Number of features: {len(df_clean.columns) - 1}")  # minus target
    
    # Display some basic statistics
    print("\nBasic statistics of cleaned data:")
    print(df_clean[['overall', 'age', 'value_eur', 'wage_eur']].describe())

# ========== ANALYSIS 1: PREDICTING PLAYER'S OVERALL RATING ==========
print("\n" + "="*50)
print("ANALYSIS 1: PREDICTING OVERALL RATING")
print("="*50)

# Let's predict overall rating based on other attributes
# Define features and target
# First, drop non-numeric columns that might still be present
for col in df_clean.columns:
    if df_clean[col].dtype == 'object':
        print(f"Warning: Column '{col}' is still object type. Dropping it.")
        df_clean = df_clean.drop(columns=[col])

# Make sure we have numeric data
df_clean = df_clean.select_dtypes(include=[np.number])

# Define X and y
X = df_clean.drop(columns=['overall', 'sofifa_id'])
y = df_clean['overall']

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

# Check for any infinite values
print(f"\nChecking for infinite values:")
print(f"Infinite values in X: {np.isinf(X.values).sum()}")
print(f"Infinite values in y: {np.isinf(y.values).sum()}")

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"\nData scaled successfully!")

# Train different models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42, max_depth=5),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42, max_depth=10)
}

results = {}
print("\nModel Performance:")
for name, model in models.items():
    print(f"\nTraining {name}...")
    try:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        r2 = r2_score(y_test, y_pred)
        
        results[name] = {'RMSE': rmse, 'R²': r2}
        
        print(f"  RMSE: {rmse:.4f}")
        print(f"  R² Score: {r2:.4f}")
        
    except Exception as e:
        print(f"  Error training {name}: {e}")

print("-" * 50)

# Feature importance from Random Forest
if 'Random Forest' in results:
    print("\nFeature Importance Analysis:")
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train_scaled, y_train)
    
    # Get feature importances
    feature_importances = pd.DataFrame({
        'feature': X.columns,
        'importance': rf_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nTop 10 Most Important Features for Overall Rating:")
    print(feature_importances.head(10))
    
    # Visualize feature importance
    plt.figure(figsize=(12, 8))
    top_features = feature_importances.head(15)
    plt.barh(top_features['feature'], top_features['importance'])
    plt.xlabel('Importance')
    plt.title('Top 15 Features for Predicting Overall Rating')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()

# ========== ADDITIONAL SIMPLE VISUALIZATIONS ==========
print("\n" + "="*50)
print("ADDITIONAL VISUALIZATIONS")
print("="*50)

# 1. Distribution of Overall Ratings
plt.figure(figsize=(10, 6))
plt.hist(y, bins=30, edgecolor='black', alpha=0.7, color='skyblue')
plt.xlabel('Overall Rating')
plt.ylabel('Number of Players')
plt.title('Distribution of Player Overall Ratings')
plt.grid(True, alpha=0.3)
plt.show()

# 2. Top 20 players by overall rating
if 'short_name' in df.columns:
    top_players = df[['short_name', 'overall', 'age', 'club_name']].sort_values('overall', ascending=False).head(20)
    print("\nTop 20 Players by Overall Rating:")
    print(top_players)
else:
    print("\nNote: 'short_name' column not available for displaying top players")

# 3. Age vs Overall Rating scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(df['age'], df['overall'], alpha=0.5, s=20, color='green')
plt.xlabel('Age')
plt.ylabel('Overall Rating')
plt.title('Age vs Overall Rating')
plt.grid(True, alpha=0.3)
plt.show()

# 4. Value vs Overall Rating (only if value_eur exists)
if 'value_eur' in df.columns:
    plt.figure(figsize=(10, 6))
    # Use log scale for value to better visualize
    plt.scatter(np.log1p(df['value_eur']), df['overall'], alpha=0.5, s=20, color='purple')
    plt.xlabel('Log(Value + 1) in EUR')
    plt.ylabel('Overall Rating')
    plt.title('Player Value vs Overall Rating (Log Scale)')
    plt.grid(True, alpha=0.3)
    plt.show()

# ========== SIMPLE CORRELATION ANALYSIS ==========
print("\n" + "="*50)
print("CORRELATION ANALYSIS")
print("="*50)

# Select a few key columns for correlation analysis
key_columns = ['overall', 'potential', 'value_eur', 'wage_eur', 'age', 'height_cm', 'weight_kg']
key_columns = [col for col in key_columns if col in df.columns]

if len(key_columns) > 1:
    corr_matrix = df[key_columns].corr()
    
    # Plot correlation heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f', square=True)
    plt.title('Correlation Matrix of Key Player Attributes')
    plt.tight_layout()
    plt.show()
    
    # Show top correlations with overall rating
    if 'overall' in corr_matrix.columns:
        overall_corr = corr_matrix['overall'].sort_values(ascending=False)
        print("\nTop Correlations with Overall Rating:")
        print(overall_corr.head(10))

print("\n" + "="*50)
print("ANALYSIS COMPLETE!")
print("="*50)

Dataset Shape: (16155, 106)

First few rows:
   sofifa_id                                         player_url  \
0     158023  https://sofifa.com/player/158023/lionel-messi/...   
1      20801  https://sofifa.com/player/20801/c-ronaldo-dos-...   
2       9014  https://sofifa.com/player/9014/arjen-robben/15...   
3      41236  https://sofifa.com/player/41236/zlatan-ibrahim...   
4     167495  https://sofifa.com/player/167495/manuel-neuer/...   

          short_name                            long_name  age         dob  \
0           L. Messi       Lionel Andrés Messi Cuccittini   27  1987-06-24   
1  Cristiano Ronaldo  Cristiano Ronaldo dos Santos Aveiro   29  1985-02-05   
2          A. Robben                         Arjen Robben   30  1984-01-23   
3     Z. Ibrahimović                   Zlatan Ibrahimović   32  1981-10-03   
4           M. Neuer                         Manuel Neuer   28  1986-03-27   

   height_cm  weight_kg  nationality            club_name  ...   lwb   ldm  \
0        169         67    Argentina         FC Barcelona  ...  62+3  62+3   
1        185         80     Portugal          Real Madrid  ...  63+3  63+3   
2        180         80  Netherlands    FC Bayern München  ...  64+3  64+3   
3        195         95       Sweden  Paris Saint-Germain  ...  61+3  65+3   
4        193         92      Germany    FC Bayern München  ...  36+3  40+3   

    cdm   rdm   rwb    lb   lcb    cb   rcb    rb  
0  62+3  62+3  62+3  54+3  45+3  45+3  45+3  54+3  
1  63+3  63+3  63+3  57+3  52+3  52+3  52+3  57+3  
2  64+3  64+3  64+3  55+3  46+3  46+3  46+3  55+3  
3  65+3  65+3  61+3  56+3  55+3  55+3  55+3  56+3  
4  40+3  40+3  36+3  36+3  38+3  38+3  38+3  36+3  

[5 rows x 106 columns]

Column names:
['sofifa_id', 'player_url', 'short_name', 'long_name', 'age', 'dob', 'height_cm', 'weight_kg', 'nationality', 'club_name', 'league_name', 'league_rank', 'overall', 'potential', 'value_eur', 'wage_eur', 'player_positions', 'preferred_foot', 'international_reputation', 'weak_foot', 'skill_moves', 'work_rate', 'body_type', 'real_face', 'release_clause_eur', 'player_tags', 'team_position', 'team_jersey_number', 'loaned_from', 'joined', 'contract_valid_until', 'nation_position', 'nation_jersey_number', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes', 'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb']

Data types:
sofifa_id      int64
player_url    object
short_name    object
long_name     object
age            int64
               ...  
lb            object
lcb           object
cb            object
rcb           object
rb            object
Length: 106, dtype: object

==================================================
CLEANING THE DATA
==================================================

Missing values summary:
club_name                 239
league_name               239
league_rank               239
release_clause_eur      16155
player_tags             14919
team_position             239
team_jersey_number        239
loaned_from             15243
joined                   1151
contract_valid_until      239
nation_position         15074
nation_jersey_number    15074
pace                     1775
shooting                 1775
passing                  1775
dribbling                1775
defending                1775
physic                   1775
gk_diving               14380
gk_handling             14380
gk_kicking              14380
gk_reflexes             14380
gk_speed                14380
gk_positioning          14380
player_traits            9556
mentality_composure     16155
dtype: int64

Dropping columns with more than 50% missing values: ['release_clause_eur', 'player_tags', 'loaned_from', 'nation_position', 'nation_jersey_number', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 'gk_positioning', 'player_traits', 'mentality_composure']

After initial cleaning - shape: (16155, 54)

Handling missing values...

Encoding categorical variables...

Final check for NaN values:
Total NaN values remaining: 0
Columns with NaN: []

Rows before dropping NaN: 16155
Rows after dropping NaN: 16155
Rows dropped: 0

✅ Data cleaning complete!
Final shape: (16155, 55)
Number of features: 54

Basic statistics of cleaned data:
            overall           age     value_eur       wage_eur
count  16155.000000  16155.000000  1.615500e+04   16155.000000
mean      63.830393     24.776230  1.060882e+06   13056.453110
std        7.169896      4.625321  2.819128e+06   23488.182571
min       40.000000     16.000000  0.000000e+00       0.000000
25%       59.000000     21.000000  1.200000e+05    2000.000000
50%       64.000000     24.000000  3.500000e+05    5000.000000
75%       68.000000     28.000000  8.250000e+05   10000.000000
max       93.000000     44.000000  1.005000e+08  550000.000000

==================================================
ANALYSIS 1: PREDICTING OVERALL RATING
==================================================
Features shape: (16155, 53)
Target shape: (16155,)

Checking for infinite values:
Infinite values in X: 0
Infinite values in y: 0

Training set: (12924, 53)
Testing set: (3231, 53)

Data scaled successfully!

Model Performance:

Training Linear Regression...
  RMSE: 1.8073
  R² Score: 0.9365

Training Decision Tree...
  RMSE: 1.5459
  R² Score: 0.9536

Training Random Forest...
  RMSE: 0.7002
  R² Score: 0.9905
--------------------------------------------------

Feature Importance Analysis:

Top 10 Most Important Features for Overall Rating:
                      feature  importance
5                   value_eur    0.803973
6                    wage_eur    0.137903
0                         age    0.023435
4                   potential    0.022868
14                  defending    0.002350
29         movement_reactions    0.000599
42  defending_standing_tackle    0.000450
34             power_strength    0.000350
37    mentality_interceptions    0.000334
41          defending_marking    0.000320

==================================================
ADDITIONAL VISUALIZATIONS
==================================================

Top 20 Players by Overall Rating:
           short_name  overall  age            club_name
0            L. Messi       93   27         FC Barcelona
1   Cristiano Ronaldo       92   29          Real Madrid
2           A. Robben       90   30    FC Bayern München
3      Z. Ibrahimović       90   32  Paris Saint-Germain
4            M. Neuer       90   28    FC Bayern München
5           L. Suárez       89   27         FC Barcelona
6             Iniesta       89   30         FC Barcelona
7           E. Hazard       88   23              Chelsea
8       R. van Persie       88   30    Manchester United
9   B. Schweinsteiger       88   29    FC Bayern München
10          F. Ribéry       88   31    FC Bayern München
11             Falcao       88   28    Manchester United
16       Thiago Silva       87   29  Paris Saint-Germain
18          L. Modrić       87   28          Real Madrid
17        David Silva       87   28      Manchester City
12            G. Bale       87   24          Real Madrid
15       Sergio Ramos       87   28          Real Madrid
14            P. Lahm       87   30    FC Bayern München
13     R. Lewandowski       87   25    FC Bayern München
26          S. Agüero       86   26      Manchester City

==================================================
CORRELATION ANALYSIS
==================================================

Top Correlations with Overall Rating:
overall      1.000000
potential    0.803952
wage_eur     0.705422
value_eur    0.568540
age          0.436976
weight_kg    0.123992
height_cm    0.050819
Name: overall, dtype: float64

==================================================
ANALYSIS COMPLETE!
==================================================

Analysis 1: Predicting Player's Overall Rating¶