import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# (Opcional) intento importar scipy para ajustar una normal
try:
    from scipy.stats import norm
    HAS_SCIPY = True
except ImportError:
    HAS_SCIPY = False
    print("⚠️ SciPy no está instalado. No se hará el ajuste a distribución normal.")


# ==============================
# 1. CONFIGURACIÓN
# ==============================

API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJwZWlib2xAbWFpbC5jb20iLCJqdGkiOiI2NmZlMmExOS0wYjkzLTQwNmMtOTljMC1mNjQ0MDNlMDlhN2UiLCJpc3MiOiJBRU1FVCIsImlhdCI6MTc2NTEzODgwNywidXNlcklkIjoiNjZmZTJhMTktMGI5My00MDZjLTk5YzAtZjY0NDAzZTA5YTdlIiwicm9sZSI6IiJ9.LIkIHu9HvDY7dKw9nncPjXYOo8xyDWi1taIcrhr6m2U"  # 🔴 CAMBIA ESTO por tu API key de AEMET

# Estación de León - Virgen del Camino (muy usada como referencia de León)
station_id = "2661"

# Rango de fechas que quieres analizar
start_date = "2024-08-01T00:00:00UTC"
end_date   = "2024-12-31T23:59:59UTC"


# ==============================
# 2. DESCARGAR DATOS DE AEMET
# ==============================

base_url = (
    "https://opendata.aemet.es/opendata/api/"
    "valores/climatologicos/diarios/datos/"
    f"fechaini/{start_date}/fechafin/{end_date}/estacion/{station_id}"
)

headers = {"api_key": API_KEY}

print("Llamando a la API de AEMET...")
resp_meta = requests.get(base_url, headers=headers)

if resp_meta.status_code != 200:
    raise RuntimeError(f"Error en la petición AEMET (meta): {resp_meta.status_code} - {resp_meta.text}")

meta_json = resp_meta.json()
print("Respuesta meta:", meta_json.get("descripcion", "OK"))

data_url = meta_json.get("datos")
if not data_url:
    raise RuntimeError("No se encontró la URL de datos en la respuesta de AEMET.")

print("Descargando datos reales desde:", data_url)
resp_data = requests.get(data_url)

if resp_data.status_code != 200:
    raise RuntimeError(f"Error al descargar los datos: {resp_data.status_code} - {resp_data.text}")

# AEMET devuelve un JSON (lista de dicts)
data_json = resp_data.json()

# ==============================
# 3. PASAR A DATAFRAME Y LIMPIAR
# ==============================

df = pd.DataFrame(data_json)

# Convertir columnas numéricas a float (tmed, tmax, tmin suelen venir como strings)
for col in ["tmed", "tmax", "tmin"]:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col].str.replace(",", "."), errors="coerce")

# Convertir fecha a datetime
df["fecha"] = pd.to_datetime(df["fecha"], errors="coerce")

print("\nPrimeras filas de los datos:")
print(df.head())

print("\nInformación del DataFrame:")
print(df.info())

Llamando a la API de AEMET...
Respuesta meta: exito
Descargando datos reales desde: https://opendata.aemet.es/opendata/sh/a944e043

Primeras filas de los datos:
       fecha indicativo                   nombre provincia altitud  tmed prec  \
0 2024-08-01       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  24.2  0,0   
1 2024-08-02       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  22.2  0,0   
2 2024-08-03       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  21.5  0,0   
3 2024-08-04       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  22.7  0,0   
4 2024-08-05       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  23.7  0,0   

   tmin horatmin  tmax  ...   sol presMax horaPresMax presMin horaPresMin  \
0  16.5    05:30  31.8  ...  12,4   914,0          00   911,0          16   
1  14.9    03:33  29.6  ...  13,2   915,6      Varias   911,6          04   
2  11.9    03:55  31.1  ...  13,9   915,6          00   912,3          18   
3  13.4    05:42  32.0  ...  13,7   915,1      Varias   911,6          18   
4  14.9    04:42  32.5  ...  13,7   913,0          00   909,3          16   

  hrMedia hrMax horaHrMax hrMin horaHrMin  
0      44    78     05:54    22     15:44  
1      52    85    Varias    28     16:51  
2      38    93    Varias    20     16:32  
3      31    74    Varias    19     17:30  
4      34    76    Varias    16     12:15  

[5 rows x 25 columns]

Información del DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153 entries, 0 to 152
Data columns (total 25 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   fecha        153 non-null    datetime64[ns]
 1   indicativo   153 non-null    object        
 2   nombre       153 non-null    object        
 3   provincia    153 non-null    object        
 4   altitud      153 non-null    object        
 5   tmed         153 non-null    float64       
 6   prec         153 non-null    object        
 7   tmin         153 non-null    float64       
 8   horatmin     152 non-null    object        
 9   tmax         153 non-null    float64       
 10  horatmax     152 non-null    object        
 11  dir          151 non-null    object        
 12  velmedia     152 non-null    object        
 13  racha        151 non-null    object        
 14  horaracha    151 non-null    object        
 15  sol          152 non-null    object        
 16  presMax      152 non-null    object        
 17  horaPresMax  152 non-null    object        
 18  presMin      152 non-null    object        
 19  horaPresMin  152 non-null    object        
 20  hrMedia      152 non-null    object        
 21  hrMax        152 non-null    object        
 22  horaHrMax    152 non-null    object        
 23  hrMin        152 non-null    object        
 24  horaHrMin    152 non-null    object        
dtypes: datetime64[ns](1), float64(3), object(21)
memory usage: 30.0+ KB
None

from sklearn.mixture import GaussianMixture
import numpy as np

t = df["tmed"].dropna().values.reshape(-1, 1)

# Ajustar dos gaussianas (invierno y verano)
gmm = GaussianMixture(n_components=2, random_state=42)
gmm.fit(t)

print("means:", gmm.means_.flatten())
print("Covariances:", np.sqrt(gmm.covariances_).flatten())
print("Weights:", gmm.weights_)

means: [ 8.00986681 16.45978074]
Covariances: [4.61955284 4.90995054]
Weights: [0.42483819 0.57516181]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
from sklearn.mixture import GaussianMixture

# ================================
# 1. CARGAR TU DATASET
# ================================
df = pd.read_csv("datasets/birth_data.csv")

# Asegurar que DBWT es numérica (por si hay valores raros)
df["DBWT"] = pd.to_numeric(df["DBWT"], errors="coerce")

# Eliminar NaN
bw = df["DBWT"].dropna().values.reshape(-1, 1)

print(f"Número de pesos válidos: {len(bw)}")
print("Primeros valores:", bw[:10].flatten())

# ================================
# 2. KDE - estimación no paramétrica
# ================================
plt.figure(figsize=(10,5))
sns.kdeplot(bw.flatten(), fill=True, label="KDE (Estimación no paramétrica)", linewidth=2)
plt.title("Densidad del peso al nacer (DBWT)")
plt.xlabel("Peso (g)")
plt.grid(True)
plt.legend()
plt.show()

# ================================
# 3. AJUSTE NORMAL
# ================================
mu, sigma = norm.fit(bw.flatten())
x = np.linspace(bw.min(), bw.max(), 300)
pdf_normal = norm.pdf(x, mu, sigma)

plt.figure(figsize=(10,5))
plt.hist(bw, bins=30, density=True, alpha=0.5, label="Datos (histograma)")
plt.plot(x, pdf_normal, "r-", lw=2, label=f"Normal ajustada μ={mu:.1f}, σ={sigma:.1f}")
plt.title("Ajuste de distribución normal al peso DBWT")
plt.xlabel("Peso (g)")
plt.grid(True)
plt.legend()
plt.show()

print(f"Normal ajustada → mu={mu:.1f}, sigma={sigma:.1f}")

# ================================
# 4. EM - Mezcla de Gaussianas (GMM)
# ================================
gmm = GaussianMixture(n_components=2, random_state=42)
gmm.fit(bw)

means = gmm.means_.flatten()
stds = np.sqrt(gmm.covariances_.flatten())
weights = gmm.weights_.flatten()

print("\n=== GMM (EM) ===")
for i in range(2):
    print(f"Componente {i+1}: μ={means[i]:.1f} g, σ={stds[i]:.1f} g, peso={weights[i]:.2f}")

# densidad total estimada por el GMM
logprob = gmm.score_samples(x.reshape(-1,1))
pdf_gmm = np.exp(logprob)

plt.figure(figsize=(10,5))
plt.hist(bw, bins=30, density=True, alpha=0.5, label="Datos (histograma)")
plt.plot(x, pdf_gmm, "k-", lw=2, label="GMM (mezcla total)")

# Componentes individuales
def normal_pdf(x, mean, std):
    return 1/(std*np.sqrt(2*np.pi)) * np.exp(-0.5 * ((x-mean)/std)**2)

for i in range(2):
    plt.plot(
        x,
        weights[i] * normal_pdf(x, means[i], stds[i]),
        "--",
        lw=2,
        label=f"Componente {i+1}"
    )

plt.title("Ajuste con EM (GMM) al peso DBWT")
plt.xlabel("Peso (g)")
plt.grid(True)
plt.legend()
plt.show()

Número de pesos válidos: 108082
Primeros valores: [2800 1900 2960 3657 3742 3317 3550  515 2438 4015]

Normal ajustada → mu=3260.1, sigma=589.5

=== GMM (EM) ===
Componente 1: μ=3369.9 g, σ=400.3 g, peso=0.69
Componente 2: μ=3018.8 g, σ=821.4 g, peso=0.31

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
from sklearn.mixture import GaussianMixture

# ============================
# 1. CARGAR DATOS
# ============================
df = pd.read_csv("datasets/birth_data.csv")

# Aseguramos tipos numéricos
df["DBWT"] = pd.to_numeric(df["DBWT"], errors="coerce")

# No tocamos SEX todavía, puede ser numérico o texto
df = df.dropna(subset=["DBWT", "SEX"])

print("Valores únicos de SEX en el fichero:", df["SEX"].unique())

# ============================
# 2. FUNCIÓN PARA ANALIZAR UN GRUPO
# ============================
def analyze_group(data, label):
    data = data.dropna().values.reshape(-1, 1)

    if len(data) < 10:
        print(f"\n[AVISO] El grupo '{label}' tiene muy pocos datos ({len(data)}). Lo salto.")
        return

    print(f"\n===== Análisis para grupo: {label} (n={len(data)}) =====")

    # KDE
    plt.figure(figsize=(10,5))
    sns.kdeplot(data.flatten(), fill=True, linewidth=2)
    plt.title(f"KDE – Peso al nacer ({label})")
    plt.xlabel("Peso (g)")
    plt.grid(True)
    plt.show()

    # Normal
    mu, sigma = norm.fit(data.flatten())
    x = np.linspace(data.min(), data.max(), 300)
    pdf_normal = norm.pdf(x, mu, sigma)

    plt.figure(figsize=(10,5))
    plt.hist(data, bins=30, density=True, alpha=0.4, label="Datos")
    plt.plot(x, pdf_normal, "r-", lw=2, label=f"Normal μ={mu:.1f}, σ={sigma:.1f}")
    plt.title(f"Ajuste Normal – Peso ({label})")
    plt.xlabel("Peso (g)")
    plt.grid(True)
    plt.legend()
    plt.show()

    print(f"Normal ({label}): μ={mu:.1f}, σ={sigma:.1f}")

    # GMM con 2 componentes
    gmm = GaussianMixture(n_components=2, random_state=42)
    gmm.fit(data)

    means = gmm.means_.flatten()
    stds = np.sqrt(gmm.covariances_.flatten())
    weights = gmm.weights_.flatten()

    print(f"\n=== GMM (EM) – {label} ===")
    for i in range(2):
        print(f"Componente {i+1}: μ={means[i]:.1f} g, σ={stds[i]:.1f} g, peso={weights[i]:.2f}")

    # Densidad total GMM
    logprob = gmm.score_samples(x.reshape(-1,1))
    pdf_gmm = np.exp(logprob)

    plt.figure(figsize=(10,5))
    plt.hist(data, bins=30, density=True, alpha=0.4, label="Datos")
    plt.plot(x, pdf_gmm, "k-", lw=2, label="GMM – mezcla total")

    # Componentes individuales
    def normal_pdf(x, mean, std):
        return 1/(std*np.sqrt(2*np.pi)) * np.exp(-0.5 * ((x-mean)/std)**2)

    for i in range(2):
        plt.plot(
            x,
            weights[i] * normal_pdf(x, means[i], stds[i]),
            "--",
            lw=2,
            label=f"Componente {i+1}"
        )

    plt.title(f"GMM (EM) – Peso al nacer ({label})")
    plt.xlabel("Peso (g)")
    plt.grid(True)
    plt.legend()
    plt.show()


# ============================
# 3. AGRUPAR AUTOMÁTICAMENTE POR SEX
# ============================
for sex_value, group in df.groupby("SEX"):
    label = f"SEX = {sex_value}"
    analyze_group(group["DBWT"], label)

Valores únicos de SEX en el fichero: ['F' 'M']

===== Análisis para grupo: SEX = F (n=52756) =====

Normal (SEX = F): μ=3201.8, σ=574.9

=== GMM (EM) – SEX = F ===
Componente 1: μ=3305.3 g, σ=391.0 g, peso=0.70
Componente 2: μ=2962.9 g, σ=811.7 g, peso=0.30

===== Análisis para grupo: SEX = M (n=55326) =====

Normal (SEX = M): μ=3315.7, σ=597.9

=== GMM (EM) – SEX = M ===
Componente 1: μ=3052.9 g, σ=838.7 g, peso=0.30
Componente 2: μ=3430.0 g, σ=404.9 g, peso=0.70

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

df = pd.read_csv("datasets/birth_data.csv")
df["DBWT"] = pd.to_numeric(df["DBWT"], errors="coerce")
bw = df["DBWT"].dropna()

mu, sigma = norm.fit(bw)

print(f"Normal ajustada: μ={mu:.2f}, σ={sigma:.2f}")

xmin, xmax = bw.min(), bw.max()
x = np.linspace(xmin, xmax, 300)
pdf = norm.pdf(x, mu, sigma)

plt.hist(bw, bins=30, density=True, alpha=0.5, label="Data")
plt.plot(x, pdf, "r-", lw=2, label=f"Normal fitted μ={mu:.1f}, σ={sigma:.1f}")
plt.title("Normal distribution fitted to weight")
plt.xlabel("Weight (g)")
plt.legend()
plt.grid(True)
plt.show()

Normal ajustada: μ=3260.11, σ=589.55

from scipy.stats import gamma

shape, loc, scale = gamma.fit(bw)
print("Gamma parameters:", shape, loc, scale)

pdf = gamma.pdf(x, shape, loc=loc, scale=scale)

plt.hist(bw, bins=30, density=True, alpha=0.5)
plt.plot(x, pdf, "m-", label="Gamma fitted")
plt.legend()
plt.grid(True)
plt.show()

Gamma parameters: 411.3949066569892 -9083.014801092791 29.981664624790938

Class 6: Density estimation¶

Working with temperature data¶

other try: weight of newborns¶