import pandas as pd
import numpy as np
import re
from scipy.signal import spectrogram
import matplotlib.pyplot as plt

# ------------------------------------------
# 1. LOAD YOUR CSV FILE
# ------------------------------------------
df = pd.read_csv("datasets/ALD_Data_Big.csv")

# ------------------------------------------
# 2. EXTRACT NUMERIC VALUE FROM STRING
# ------------------------------------------
def extract_numeric(val):
    if isinstance(val, str):
        nums = re.findall(r"[-+]?\d*\.\d+|\d+", val)
        return float(nums[0]) if nums else np.nan
    return val

df["Num"] = df["Value"].apply(extract_numeric)

# ------------------------------------------
# 3. CLEAN YEAR COLUMN (HANDLE "2012 → 2016")
# ------------------------------------------
def extract_year(y):
    if isinstance(y, str) and "→" in y:
        nums = re.findall(r"\d+", y)
        return int(nums[-1])  # take ending year
    try:
        return int(y)
    except:
        return np.nan

df["YearClean"] = df["Year"].apply(extract_year)

# Remove rows without year or numeric value
df = df.dropna(subset=["YearClean", "Num"])

# ------------------------------------------
# 4. GROUP DUPLICATE YEARS BY AVERAGING
# ------------------------------------------
df = df.groupby("YearClean")["Num"].mean().reset_index()

# ------------------------------------------
# 5. INTERPOLATE MISSING YEARS
# ------------------------------------------
year_range = pd.DataFrame({
    "YearClean": range(df["YearClean"].min(), df["YearClean"].max() + 1)
})

df = year_range.merge(df, on="YearClean", how="left")
df["Num"] = df["Num"].interpolate()

# ------------------------------------------
# 6. NORMALIZE VALUES (IMPORTANT)
# ------------------------------------------
signal = df["Num"].values
signal = (signal - signal.mean()) / signal.std()

# ------------------------------------------
# 7. CREATE SPECTROGRAM
# ------------------------------------------
# Auto-adjust window size for small dataset
nperseg = max(4, len(signal) // 3)

f, t, Sxx = spectrogram(signal, fs=1, nperseg=nperseg)

# ------------------------------------------
# 8. PLOT WITH LEGEND (COLORBAR)
# ------------------------------------------
plt.figure(figsize=(9, 4))
plt.pcolormesh(t, f, Sxx, shading="gouraud")

plt.title("Spectrogram of ALD Trend Dataset")
plt.xlabel("Time Window (Years)")
plt.ylabel("Frequency")

# ⭐ LEGEND (SPECTROGRAM COLORBAR)
cbar = plt.colorbar()
cbar.set_label("Intensity (Power)", rotation=90)

plt.tight_layout()
plt.show()

print("Spectrogram shape:", Sxx.shape)

Spectrogram shape: (3, 2)

Week 7: Transform Datasets(09 December 2025)¶

Assignments: We Transform the datasets¶

Introduction to the Dataset¶

Transform Dataset¶

Explanation¶

Week 7: Transform Datasets(09 December 2025)¶

Assignments: We Transform the datasets¶

Compiled Dataset: Alcohol-Related Deaths / Burden in Bhutan¶

Introduction to the Dataset¶

Transform Dataset¶

Explanation¶