[Rinchen Khandu] - Fab Futures - Data Science
Home About

Data Science: Probability¶

Goal¶

  • quantify uncertainty

Probability¶

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#
# load your own data
#
data = pd.read_csv("~/work/rinchen-khandu/datasets/student_depression_dataset.csv")

# ✅ choose the column to analyze
x = data["Age"].dropna().values   # change to "Age" if needed

npts = len(x)

#
# estimate Gaussian parameters from your data
#
mean = np.mean(x)
stddev = np.std(x)

#
# plot histogram and data points
#
plt.hist(x, bins=npts // 50, density=True, alpha=0.6)
plt.plot(x, np.zeros_like(x), '|', ms=10)

#
# plot fitted Gaussian curve
#
xi = np.linspace(mean - 3 * stddev, mean + 3 * stddev, 200)
yi = np.exp(-(xi - mean) ** 2 / (2 * stddev ** 2)) / np.sqrt(2 * np.pi * stddev ** 2)

plt.plot(xi, yi, 'r', linewidth=2)

plt.xlabel("Depression Score")
plt.ylabel("Probability Density")
plt.title("Gaussian Fit to Student Depression Data")
plt.show()
No description has been provided for this image
In [ ]: