import json
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# 1. Cargar datos
with open("datasets/nacimientos_espana_1975_2024.json") as f:
    data = json.load(f)

births = np.array([d["births"] for d in data])

# 2. Histograma
plt.figure(figsize=(8,4))
plt.hist(births, bins=10, density=True, alpha=0.6, color='blue')
plt.title("Histograma de nacimientos")
plt.xlabel("Nacimientos")
plt.ylabel("Densidad")
plt.grid(True)
plt.show()

# Ajustar distribución normal
mu, sigma = norm.fit(births)

print("Media (mu):", mu)
print("Desviación estándar (sigma):", sigma)

# Dibujar histograma + curva normal ajustada
x = np.linspace(min(births), max(births), 100)
pdf = norm.pdf(x, mu, sigma)

plt.figure(figsize=(8,4))
plt.hist(births, bins=10, density=True, alpha=0.6, color='blue')
plt.plot(x, pdf, 'r-', linewidth=2, label=f"N(mu={mu:.0f}, sigma={sigma:.0f})")
plt.title("Ajuste de distribución normal a los nacimientos")
plt.xlabel("Nacimientos")
plt.ylabel("Densidad")
plt.legend()
plt.grid(True)
plt.show()

Media (mu): 448754.3
Desviación estándar (sigma): 90609.6985266478

import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# (Opcional) intento importar scipy para ajustar una normal
try:
    from scipy.stats import norm
    HAS_SCIPY = True
except ImportError:
    HAS_SCIPY = False
    print("⚠️ SciPy no está instalado. No se hará el ajuste a distribución normal.")


# ==============================
# 1. CONFIGURACIÓN
# ==============================

API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJwZWlib2xAbWFpbC5jb20iLCJqdGkiOiI2NmZlMmExOS0wYjkzLTQwNmMtOTljMC1mNjQ0MDNlMDlhN2UiLCJpc3MiOiJBRU1FVCIsImlhdCI6MTc2NTEzODgwNywidXNlcklkIjoiNjZmZTJhMTktMGI5My00MDZjLTk5YzAtZjY0NDAzZTA5YTdlIiwicm9sZSI6IiJ9.LIkIHu9HvDY7dKw9nncPjXYOo8xyDWi1taIcrhr6m2U"  # 🔴 CAMBIA ESTO por tu API key de AEMET

# Estación de León - Virgen del Camino (muy usada como referencia de León)
station_id = "2661"

# Rango de fechas que quieres analizar
start_date = "2024-08-01T00:00:00UTC"
end_date   = "2024-12-31T23:59:59UTC"


# ==============================
# 2. DESCARGAR DATOS DE AEMET
# ==============================

base_url = (
    "https://opendata.aemet.es/opendata/api/"
    "valores/climatologicos/diarios/datos/"
    f"fechaini/{start_date}/fechafin/{end_date}/estacion/{station_id}"
)

headers = {"api_key": API_KEY}

print("Llamando a la API de AEMET...")
resp_meta = requests.get(base_url, headers=headers)

if resp_meta.status_code != 200:
    raise RuntimeError(f"Error en la petición AEMET (meta): {resp_meta.status_code} - {resp_meta.text}")

meta_json = resp_meta.json()
print("Respuesta meta:", meta_json.get("descripcion", "OK"))

data_url = meta_json.get("datos")
if not data_url:
    raise RuntimeError("No se encontró la URL de datos en la respuesta de AEMET.")

print("Descargando datos reales desde:", data_url)
resp_data = requests.get(data_url)

if resp_data.status_code != 200:
    raise RuntimeError(f"Error al descargar los datos: {resp_data.status_code} - {resp_data.text}")

# AEMET devuelve un JSON (lista de dicts)
data_json = resp_data.json()

# ==============================
# 3. PASAR A DATAFRAME Y LIMPIAR
# ==============================

df = pd.DataFrame(data_json)

# Convertir columnas numéricas a float (tmed, tmax, tmin suelen venir como strings)
for col in ["tmed", "tmax", "tmin"]:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col].str.replace(",", "."), errors="coerce")

# Convertir fecha a datetime
df["fecha"] = pd.to_datetime(df["fecha"], errors="coerce")

print("\nPrimeras filas de los datos:")
print(df.head())

print("\nInformación del DataFrame:")
print(df.info())

# ==============================
# 4. ANÁLISIS DE LA DISTRIBUCIÓN
# ==============================

# Elegimos la temperatura media diaria
if "tmed" not in df.columns:
    raise RuntimeError("La columna 'tmed' no está en los datos. Revisa el endpoint o las columnas disponibles.")

tmed = df["tmed"].dropna()

print("\nEstadísticas básicas de la temperatura media (tmed):")
print(tmed.describe())

# Histograma de la distribución
plt.figure(figsize=(8, 4))
plt.hist(tmed, bins=30, density=True, alpha=0.7)
plt.title("Distribución de la temperatura media diaria - León")
plt.xlabel("Temperatura media (°C)")
plt.ylabel("Densidad")
plt.grid(True)
plt.show()

# ==============================
# 5. AJUSTE A DISTRIBUCIÓN NORMAL (SI HAY SCIPY)
# ==============================

if HAS_SCIPY:
    mu, sigma = norm.fit(tmed)

    print(f"\nAjuste a distribución normal:")
    print(f"  media (mu)  ≈ {mu:.2f} °C")
    print(f"  sigma (σ)   ≈ {sigma:.2f} °C")

    # Curva normal ajustada
    x = np.linspace(tmed.min(), tmed.max(), 200)
    pdf = norm.pdf(x, mu, sigma)

    plt.figure(figsize=(8, 4))
    plt.hist(tmed, bins=30, density=True, alpha=0.7, label="Datos (histograma)")
    plt.plot(x, pdf, "r-", linewidth=2, label=f"N(μ={mu:.2f}, σ={sigma:.2f})")
    plt.title("Temperatura media diaria - histograma + normal ajustada")
    plt.xlabel("Temperatura media (°C)")
    plt.ylabel("Densidad")
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print("\nNo se ha hecho ajuste a normal porque SciPy no está instalado.")
    print("Si quieres incluirlo, instala SciPy con:  pip install scipy")

Llamando a la API de AEMET...
Respuesta meta: exito
Descargando datos reales desde: https://opendata.aemet.es/opendata/sh/a944e043

Primeras filas de los datos:
       fecha indicativo                   nombre provincia altitud  tmed prec  \
0 2024-08-01       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  24.2  0,0   
1 2024-08-02       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  22.2  0,0   
2 2024-08-03       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  21.5  0,0   
3 2024-08-04       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  22.7  0,0   
4 2024-08-05       2661  LEÓN, VIRGEN DEL CAMINO      LEON     916  23.7  0,0   

   tmin horatmin  tmax  ...   sol presMax horaPresMax presMin horaPresMin  \
0  16.5    05:30  31.8  ...  12,4   914,0          00   911,0          16   
1  14.9    03:33  29.6  ...  13,2   915,6      Varias   911,6          04   
2  11.9    03:55  31.1  ...  13,9   915,6          00   912,3          18   
3  13.4    05:42  32.0  ...  13,7   915,1      Varias   911,6          18   
4  14.9    04:42  32.5  ...  13,7   913,0          00   909,3          16   

  hrMedia hrMax horaHrMax hrMin horaHrMin  
0      44    78     05:54    22     15:44  
1      52    85    Varias    28     16:51  
2      38    93    Varias    20     16:32  
3      31    74    Varias    19     17:30  
4      34    76    Varias    16     12:15  

[5 rows x 25 columns]

Información del DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153 entries, 0 to 152
Data columns (total 25 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   fecha        153 non-null    datetime64[ns]
 1   indicativo   153 non-null    object        
 2   nombre       153 non-null    object        
 3   provincia    153 non-null    object        
 4   altitud      153 non-null    object        
 5   tmed         153 non-null    float64       
 6   prec         153 non-null    object        
 7   tmin         153 non-null    float64       
 8   horatmin     152 non-null    object        
 9   tmax         153 non-null    float64       
 10  horatmax     152 non-null    object        
 11  dir          151 non-null    object        
 12  velmedia     152 non-null    object        
 13  racha        151 non-null    object        
 14  horaracha    151 non-null    object        
 15  sol          152 non-null    object        
 16  presMax      152 non-null    object        
 17  horaPresMax  152 non-null    object        
 18  presMin      152 non-null    object        
 19  horaPresMin  152 non-null    object        
 20  hrMedia      152 non-null    object        
 21  hrMax        152 non-null    object        
 22  horaHrMax    152 non-null    object        
 23  hrMin        152 non-null    object        
 24  horaHrMin    152 non-null    object        
dtypes: datetime64[ns](1), float64(3), object(21)
memory usage: 30.0+ KB
None

Estadísticas básicas de la temperatura media (tmed):
count    153.000000
mean      12.869935
std        6.375303
min       -3.600000
25%        8.500000
50%       13.000000
75%       17.200000
max       27.800000
Name: tmed, dtype: float64

Ajuste a distribución normal:
  media (mu)  ≈ 12.87 °C
  sigma (σ)   ≈ 6.35 °C

import pandas as pd

df = pd.read_csv("datasets/temp-leon.csv", encoding="latin-1")

print(df.columns.tolist())

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture

# ============================
# 1. CARGAR DATOS
# ============================
# Asegúrate de que temp-leon.csv está en el mismo directorio
df = pd.read_csv("datasets/temp-leon2.csv", encoding="latin-1") #si no ponemos este tipo de encoding no lee los caracteres como la ñ

# Convertimos TM a numérico por si viene con comas o texto
df["TM"] = pd.to_numeric(df["TM"], errors="coerce")

# Nos quedamos solo con los valores válidos
temps = df["TM"].dropna().values.reshape(-1, 1)

print(f"Número de muestras usadas: {temps.shape[0]}")
print("Primeras temperaturas:", temps[:10].flatten())

# ============================
# 2. AJUSTAR GMM (EM)
# ============================
# Número de componentes (puedes cambiar a 3, por ejemplo)
n_components = 2

gmm = GaussianMixture(
    n_components=n_components,
    random_state=42
)

gmm.fit(temps)

print("\n=== Parámetros del GMM (EM) ===")
for k in range(n_components):
    mean = gmm.means_[k, 0]
    # covariances_ depende del tipo, por defecto 'full'
    if gmm.covariances_.ndim == 3:
        var = gmm.covariances_[k, 0, 0]
    else:
        var = gmm.covariances_[k]
    std = np.sqrt(var)
    weight = gmm.weights_[k]
    print(f"Componente {k+1}:")
    print(f"  peso (π)      = {weight:.3f}")
    print(f"  media (μ)     = {mean:.3f} °C")
    print(f"  desviación σ  = {std:.3f} °C")

# ============================
# 3. DIBUJAR HISTOGRAMA + DENSIDADES
# ============================
# Rango de temperaturas para dibujar las curvas
x = np.linspace(temps.min() - 1, temps.max() + 1, 500).reshape(-1, 1)

# Densidad total de la mezcla
logprob = gmm.score_samples(x)
pdf_total = np.exp(logprob)

plt.figure(figsize=(8, 5))

# Histograma de datos
plt.hist(temps, bins=12, density=True, alpha=0.4, label="Datos (histograma)")

# Curva de la mezcla total
plt.plot(x, pdf_total, label="GMM (mezcla total)", linewidth=2)

# Dibujar cada componente por separado
def normal_pdf(x, mean, std):
    return (1.0 / (np.sqrt(2 * np.pi) * std)) * np.exp(-0.5 * ((x - mean) / std) ** 2)

for k in range(n_components):
    mean = gmm.means_[k, 0]
    if gmm.covariances_.ndim == 3:
        var = gmm.covariances_[k, 0, 0]
    else:
        var = gmm.covariances_[k]
    std = np.sqrt(var)
    weight = gmm.weights_[k]

    pdf_comp = weight * normal_pdf(x, mean, std)
    plt.plot(x, pdf_comp, "--", linewidth=2, label=f"Componente {k+1}")

plt.title("GMM con EM sobre temperatura media mensual (TM) - León")
plt.xlabel("Temperatura media (°C)")
plt.ylabel("Densidad")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Número de muestras usadas: 13
Primeras temperaturas: [ 7.1  9.5 13.3 14.8 18.6 24.  27.4 26.9 22.9 16.7]

=== Parámetros del GMM (EM) ===
Componente 1:
  peso (π)      = 0.545
  media (μ)     = 21.448 °C
  desviación σ  = 4.834 °C
Componente 2:
  peso (π)      = 0.455
  media (μ)     = 11.019 °C
  desviación σ  = 3.258 °C

Class 5: Probability¶

Using the temperature data from my city, León¶

Update: the API just left 6 months check¶

Downloading 2025 temperature data from the website¶