[Tenzin Namgyal] - Fab Futures - Data Science
Home About

<Home

Probability 02/12/2025¶

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------------
# Load CSV data
# -----------------------------
df = pd.read_csv("datasets/youtube_video.csv")

# Use view_count as data
x = df["view_count"].values

# -----------------------------
# Compute statistics
# -----------------------------
mean = np.mean(x)
stddev = np.std(x)
npts = len(x)

# -----------------------------
# Plot histogram and points
# -----------------------------
plt.hist(x, bins=npts // 50, density=True)
plt.plot(x, 0 * x, '|', ms=5)

# -----------------------------
# Plot Gaussian curve
# -----------------------------
xi = np.linspace(mean - 3 * stddev, mean + 3 * stddev, 200)
yi = np.exp(-(xi - mean)**2 / (2 * stddev**2)) / np.sqrt(2 * np.pi * stddev**2)

plt.plot(xi, yi)

# -----------------------------
# Labels
# -----------------------------
plt.xlabel("View Count")
plt.ylabel("Probability Density")
plt.title("Histogram of YouTube View Count with Gaussian Fit")

plt.show()
No description has been provided for this image
In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Load CSV file
df = pd.read_csv("datasets/youtube_video.csv")   # change filename if needed

# Select numeric column (REPLACE COLUMN_NAME)
data = df["view_count"].dropna()

# Calculate mean and standard deviation
mean = np.mean(data)
std = np.std(data)

# Create x values for normal curve
x = np.linspace(data.min(), data.max(), 300)
pdf = norm.pdf(x, mean, std)

# Plot histogram (density)
plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, density=True)

# Plot normal distribution curve
plt.plot(x, pdf)

# Rug plot
plt.plot(data, np.zeros_like(data), '|')

# Labels
plt.xlabel("Values")
plt.ylabel("Density")
plt.title("Histogram with Normal Distribution Curve")

plt.show()
No description has been provided for this image