df = pd.read_csv("datasets/enhanced_student_habits_performance_dataset.csv")

import matplotlib.pyplot as plt
import seaborn as sns

numeric_cols = df.select_dtypes(include='number').columns

print("Numeric Columns Being Analyzed:")
print(numeric_cols)

for col in numeric_cols:
    plt.figure(figsize=(10, 5))
    sns.histplot(df[col], kde=True, bins=30)
    plt.title(f"Probability Distribution of {col}")
    plt.xlabel(col)
    plt.ylabel("Frequency")
    plt.show()

Numeric Columns Being Analyzed:
Index(['student_id', 'age', 'study_hours_per_day', 'social_media_hours',
       'netflix_hours', 'attendance_percentage', 'sleep_hours',
       'exercise_frequency', 'mental_health_rating', 'previous_gpa',
       'semester', 'stress_level', 'social_activity', 'screen_time',
       'parental_support_level', 'motivation_level', 'exam_anxiety_score',
       'time_management_score', 'exam_score'],
      dtype='object')

import matplotlib.pyplot as plt
import seaborn as sns

# Choose one factor (example: 'study_hours_per_day')
column = 'study_hours_per_day'

plt.figure(figsize=(10, 5))
sns.histplot(df[column], kde=True, bins=30)
plt.title(f"Probability Distribution of {column}")
plt.xlabel(column)
plt.ylabel("Frequency")
plt.show()

import matplotlib.pyplot as plt
import seaborn as sns

# Choose one factor
column = 'exam_score'   # <-- change this to any numeric column

plt.figure(figsize=(10, 5))
sns.kdeplot(df[column], fill=True, linewidth=2)
plt.title(f"Density Estimation (KDE) for {column}")
plt.xlabel(column)
plt.ylabel("Density")
plt.show()

plt.figure(figsize=(10,5))
sns.histplot(df[column], kde=True, bins=30)
plt.title(f"Histogram + KDE for {column}")
plt.xlabel(column)
plt.ylabel("Frequency / Density")
plt.show()

import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import numpy as np

# -------------------------------------
# 1. Load your dataset (IMPORTANT STEP)
# -------------------------------------
df = pd.read_csv("datasets/enhanced_student_habits_performance_dataset.csv")

# -------------------------------------
# 2. Choose the column for KDE
# -------------------------------------
column = 'study_hours_per_day'   # change to any numeric column you want
data = df[column].dropna()

# -------------------------------------
# 3. Compute KDE
# -------------------------------------
kde = gaussian_kde(data)
x_range = np.linspace(data.min(), data.max(), 500)
density = kde(x_range)

# -------------------------------------
# 4. Plot KDE (with pastel colors)
# -------------------------------------
plt.figure(figsize=(10, 5))
plt.plot(x_range, density, linewidth=3, color='lightcoral')
plt.fill_between(x_range, density, alpha=0.3, color='mistyrose')

plt.title(f"KDE Density Estimation: {column}", fontsize=14)
plt.xlabel(column)
plt.ylabel("Density")
plt.grid(alpha=0.2)

plt.show()

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load your dataset
df = pd.read_csv("datasets/enhanced_student_habits_performance_dataset.csv")

# 2. Select the column
column = "study_hours_per_day"
data = df[column].dropna()

# 3. Plot Histogram + KDE
plt.figure(figsize=(10, 5))
sns.histplot(data, kde=True, bins=30, edgecolor="black", alpha=0.6)

plt.title(f"Histogram + KDE for {column}")
plt.xlabel(column)
plt.ylabel("Frequency / Density")
plt.tight_layout()
plt.show()

Fitting a Probability Distribution to my data¶

Density Estimation¶