Data Science: Probability¶
Goal¶
- quantify uncertainty
Probability¶
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#
# load your own data
#
data = pd.read_csv("~/work/rinchen-khandu/datasets/student_depression_dataset.csv")
# ✅ choose the column to analyze
x = data["Age"].dropna().values # change to "Age" if needed
npts = len(x)
#
# estimate Gaussian parameters from your data
#
mean = np.mean(x)
stddev = np.std(x)
#
# plot histogram and data points
#
plt.hist(x, bins=npts // 50, density=True, alpha=0.6)
plt.plot(x, np.zeros_like(x), '|', ms=10)
#
# plot fitted Gaussian curve
#
xi = np.linspace(mean - 3 * stddev, mean + 3 * stddev, 200)
yi = np.exp(-(xi - mean) ** 2 / (2 * stddev ** 2)) / np.sqrt(2 * np.pi * stddev ** 2)
plt.plot(xi, yi, 'r', linewidth=2)
plt.xlabel("Depression Score")
plt.ylabel("Probability Density")
plt.title("Gaussian Fit to Student Depression Data")
plt.show()
In [ ]: