# ============================
# 1. IMPORTAR LIBRERÍAS
# ============================
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# ============================
# 2. CARGAR EL DATASET MNIST
# ============================

# Esto descarga MNIST de internet la primera vez (tarda un poco)
mnist = fetch_openml('mnist_784', version=1, as_frame=False)

X = mnist.data      # imágenes (cada imagen es un vector de 784 píxeles)
y = mnist.target    # etiquetas (0,1,...,9) en formato texto

print("Forma de X:", X.shape)   # (70000, 784)
print("Forma de y:", y.shape)   # (70000,)

# Convertir las etiquetas a enteros
y = y.astype(int)

# ============================
# 3. NORMALIZAR LOS DATOS
# ============================
# Cada píxel va de 0 a 255 → lo pasamos a 0–1
X = X / 255.0

# Para que vaya más rápido, usamos solo una parte del dataset (opcional)
# Puedes comentar esta parte si tu ordenador va sobrado.
X_small, _, y_small, _ = train_test_split(X, y, train_size=10000, stratify=y, random_state=42)

print("Usando muestras:", X_small.shape[0])

# ============================
# 4. DIVIDIR EN TRAIN / TEST
# ============================
X_train, X_test, y_train, y_test = train_test_split(
    X_small, y_small,
    test_size=0.2,
    stratify=y_small,
    random_state=42
)

print("Train:", X_train.shape, " Test:", X_test.shape)

# ============================
# 5. DEFINIR Y AJUSTAR EL MODELO
# ============================
# Usamos Regresión Logística Multiclase (modelo clásico para clasificación)
model = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    max_iter=1000
)

print("Entrenando el modelo...")
model.fit(X_train, y_train)
print("Modelo entrenado ✅")

# ============================
# 6. EVALUAR EL MODELO
# ============================
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Precisión en test: {acc:.4f}")

# Matriz de confusión (para ver en qué números falla más)
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(10))
disp.plot(values_format='d', cmap='Blues')
plt.title("Matriz de confusión - MNIST")
plt.show()

# ============================
# 7. VISUALIZAR ALGUNAS PREDICCIONES
# ============================
def plot_digit(image_flat, label_true=None, label_pred=None):
    """Muestra una imagen de 28x28 a partir de un vector de 784 valores."""
    img = image_flat.reshape(28, 28)
    plt.imshow(img, cmap='gray')
    title = ""
    if label_true is not None:
        title += f"Real: {label_true}  "
    if label_pred is not None:
        title += f"Predicho: {label_pred}"
    plt.title(title)
    plt.axis('off')

# Mostrar los primeros 10 dígitos de test y lo que el modelo predice
num_samples = 5  #number of results at the end
plt.figure(figsize=(12, 3))  
for i in range(num_samples):
    plt.subplot(1, num_samples, i + 1)
    plot_digit(X_test[i], label_true=y_test[i], label_pred=y_pred[i])
plt.suptitle("Ejemplos de predicción de dígitos")
plt.show()

Forma de X: (70000, 784)
Forma de y: (70000,)
Usando muestras: 10000
Train: (8000, 784)  Test: (2000, 784)
Entrenando el modelo...

/opt/conda/lib/python3.13/site-packages/sklearn/linear_model/_logistic.py:1272: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.8. From then on, it will always use 'multinomial'. Leave it to its default value to avoid this warning.
  warnings.warn(

Modelo entrenado ✅
Precisión en test: 0.8975

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

# ============================
# 1. CARGAR EL DATASET IRIS
# ============================
iris = load_iris()

X = iris.data          # (150, 4) -> 4 características por flor
y = iris.target        # (150,) -> 0, 1 o 2 (especies)
feature_names = iris.feature_names
target_names = iris.target_names

print("Forma de X:", X.shape)
print("Clases:", target_names)

# Para mayor comodidad, separo columnas
sepal_length = X[:, 0]
sepal_width  = X[:, 1]
petal_length = X[:, 2]
petal_width  = X[:, 3]

# Asignar un color por especie
colors = ['red', 'green', 'blue']

# ===============================
# 1) SÉPALO: largo vs ancho
# ===============================
plt.figure(figsize=(6, 5))

for class_value in np.unique(y):
    plt.scatter(
        sepal_length[y == class_value],
        sepal_width[y == class_value],
        label=target_names[class_value],
        alpha=0.7
    )

plt.xlabel(feature_names[0])  # sepal length (cm)
plt.ylabel(feature_names[1])  # sepal width (cm)
plt.title("Iris - Sépalo (largo vs ancho)")
plt.legend()
plt.grid(True)
plt.show()

# ===============================
# 2) PÉTALO: largo vs ancho
# ===============================
plt.figure(figsize=(6, 5))

for class_value in np.unique(y):
    plt.scatter(
        petal_length[y == class_value],
        petal_width[y == class_value],
        label=target_names[class_value],
        alpha=0.7
    )

plt.xlabel(feature_names[2])  # petal length (cm)
plt.ylabel(feature_names[3])  # petal width (cm)
plt.title("Iris - Pétalo (largo vs ancho)")
plt.legend()
plt.grid(True)
plt.show()

# ============================
# 2. DIVIDIR EN TRAIN / TEST
# ============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

print("Train:", X_train.shape, " Test:", X_test.shape)

# ============================
# 3. DEFINIR Y AJUSTAR EL MODELO
# ============================
# Usamos un Random Forest (bosque aleatorio) para clasificación
model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

print("Entrenando el modelo...")
model.fit(X_train, y_train)
print("Modelo entrenado ✅")

# ============================
# 4. EVALUAR EL MODELO
# ============================
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"\nPrecisión en test: {acc:.4f}\n")

print("Reporte de clasificación:")
print(classification_report(y_test, y_pred, target_names=target_names))

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
disp.plot(values_format='d', cmap='Blues')
plt.title("Matriz de confusión - Iris")
plt.show()

# ============================
# 5. VER ALGUNAS PREDICCIONES
# ============================
print("\nAlgunos ejemplos de test con predicción:\n")
for i in range(5):
    x = X_test[i]
    true_label = target_names[y_test[i]]
    pred_label = target_names[y_pred[i]]
    print(f"Ejemplo {i+1}:")
    print("  Características:", dict(zip(feature_names, x)))
    print(f"  Real: {true_label}  |  Predicho: {pred_label}\n")

Forma de X: (150, 4)
Clases: ['setosa' 'versicolor' 'virginica']

Train: (120, 4)  Test: (30, 4)
Entrenando el modelo...
Modelo entrenado ✅

Precisión en test: 0.9000

Reporte de clasificación:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.82      0.90      0.86        10
   virginica       0.89      0.80      0.84        10

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30

Algunos ejemplos de test con predicción:

Ejemplo 1:
  Características: {'sepal length (cm)': np.float64(4.4), 'sepal width (cm)': np.float64(3.0), 'petal length (cm)': np.float64(1.3), 'petal width (cm)': np.float64(0.2)}
  Real: setosa  |  Predicho: setosa

Ejemplo 2:
  Características: {'sepal length (cm)': np.float64(6.1), 'sepal width (cm)': np.float64(3.0), 'petal length (cm)': np.float64(4.9), 'petal width (cm)': np.float64(1.8)}
  Real: virginica  |  Predicho: virginica

Ejemplo 3:
  Características: {'sepal length (cm)': np.float64(4.9), 'sepal width (cm)': np.float64(2.4), 'petal length (cm)': np.float64(3.3), 'petal width (cm)': np.float64(1.0)}
  Real: versicolor  |  Predicho: versicolor

Ejemplo 4:
  Características: {'sepal length (cm)': np.float64(5.0), 'sepal width (cm)': np.float64(2.3), 'petal length (cm)': np.float64(3.3), 'petal width (cm)': np.float64(1.0)}
  Real: versicolor  |  Predicho: versicolor

Ejemplo 5:
  Características: {'sepal length (cm)': np.float64(4.4), 'sepal width (cm)': np.float64(3.2), 'petal length (cm)': np.float64(1.3), 'petal width (cm)': np.float64(0.2)}
  Real: setosa  |  Predicho: setosa

CLASS 4: Machine Learning¶

Other example¶