import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.kernel_ridge import KernelRidge

# # Load the dataset
df = pd.read_csv('datasets/enhanced_student_habits_performance_dataset.csv')

# ---- Sample 100 ----
df_sample = df.sample(n=100, random_state=1)

x = df_sample['previous_gpa'].values.astype(float)
y = df_sample['exam_score'].values.astype(float)

# ---- Sort for stable plotting ----
order = np.argsort(x)
x_sorted = x[order]
y_sorted = y[order]
x_grid = np.linspace(x_sorted.min(), x_sorted.max(), 400)

# ---- Polynomial fit function ----
def fit_poly(x, y, degree):
    poly = PolynomialFeatures(degree=degree, include_bias=False)
    X_poly = poly.fit_transform(x.reshape(-1,1))
    model = LinearRegression().fit(X_poly, y)
    return model, poly

# ---- RBF Kernel Ridge Regression ----
def fit_rbf(x, y, gamma=1.0):
    model = KernelRidge(kernel='rbf', gamma=gamma)
    model.fit(x.reshape(-1,1), y)
    return model

# ---- Models ----
degrees = [1, 2, 3]  # linear, quadratic, cubic
rbf_model = fit_rbf(x_sorted, y_sorted, gamma=1.0)

# ---- Plot ----
plt.scatter(x, y, s=20, label="data")

# Polynomial curves
for d in degrees:
    model, poly = fit_poly(x_sorted, y_sorted, d)
    y_grid_poly = model.predict(poly.transform(x_grid.reshape(-1,1)))
    plt.plot(x_grid, y_grid_poly, label=f"poly deg {d}", linewidth=1.5)

# RBF curve
y_grid_rbf = rbf_model.predict(x_grid.reshape(-1,1))
plt.plot(x_grid, y_grid_rbf, label="RBF", linewidth=2)

plt.xlabel("previous_gpa")
plt.ylabel("exam_score")
plt.title("Linear, Polynomial, and RBF Fits (n = 100)")
plt.legend()
plt.tight_layout()
plt.show()

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.kernel_approximation import RBFSampler

# ---- Load dataset ----
df = pd.read_csv("datasets/enhanced_student_habits_performance_dataset.csv")

x = df['previous_gpa'].values.astype(float)
y = df['exam_score'].values.astype(float)

# ---- Sort data for smooth plotting ----
order = np.argsort(x)
x_sorted = x[order]
y_sorted = y[order]

x_grid = np.linspace(x_sorted.min(), x_sorted.max(), 400).reshape(-1, 1)

# ---- Linear and Polynomial Fits ----
def fit_poly(x, y, degree):
    poly = PolynomialFeatures(degree=degree, include_bias=False)
    X_poly = poly.fit_transform(x.reshape(-1,1))
    model = LinearRegression().fit(X_poly, y)
    return model, poly

models = {}
for d in [1, 2, 3]:
    model, poly = fit_poly(x_sorted, y_sorted, d)
    models[f"poly_{d}"] = (model, poly)

# ---- Approximate RBF Fit using RBFSampler ----
rbf_feature = RBFSampler(gamma=1.0, n_components=500, random_state=42)
X_features = rbf_feature.fit_transform(x_sorted.reshape(-1,1))
rbf_model = Ridge(alpha=1.0)
rbf_model.fit(X_features, y_sorted)

X_grid_features = rbf_feature.transform(x_grid)
pred_rbf = rbf_model.predict(X_grid_features)

# ---- Predict polynomial fits ----
pred_poly1 = models["poly_1"][0].predict(models["poly_1"][1].transform(x_grid))
pred_poly2 = models["poly_2"][0].predict(models["poly_2"][1].transform(x_grid))
pred_poly3 = models["poly_3"][0].predict(models["poly_3"][1].transform(x_grid))

# ---- Plot ----
plt.figure(figsize=(10,6))
plt.scatter(x_sorted, y_sorted, s=8, label="Data", alpha=0.6)
plt.plot(x_grid, pred_poly1, label="Poly deg 1", linewidth=2)
plt.plot(x_grid, pred_poly2, label="Poly deg 2", linewidth=2)
plt.plot(x_grid, pred_poly3, label="Poly deg 3", linewidth=2)
plt.plot(x_grid, pred_rbf, label="RBF approx", linewidth=2)

plt.legend()
plt.xlabel("previous_gpa")
plt.ylabel("exam_score")
plt.title("Linear, Polynomial, and Approximate RBF Fits")
plt.tight_layout()
plt.show()

# ---- Prepare grid for prediction ----
grid_points = pd.DataFrame({
    feat_x: X_grid.ravel(),
    feat_y: Y_grid.ravel()
})

# Fill other features with mean
for col in X_sample.columns:
    if col not in [feat_x, feat_y]:
        grid_points[col] = X_sample[col].mean()

# Reorder columns to match X_sample exactly
grid_points = grid_points[X_sample.columns]

# ---- Scale and RBF transform ----
grid_scaled = scaler.transform(grid_points)
grid_rbf = rbf_feature.transform(grid_scaled)

# ---- Predict ----
Z_pred = model.predict(grid_rbf)
Z_grid = Z_pred.reshape(X_grid.shape)

# ---- Plot ----
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_sample[feat_x], X_sample[feat_y], y_sample, color='red', alpha=0.5, label='Sample Data')
ax.plot_surface(X_grid, Y_grid, Z_grid, color='blue', alpha=0.4)
ax.set_xlabel(feat_x)
ax.set_ylabel(feat_y)
ax.set_zlabel('exam_score')
ax.set_title('RBF-Fitted Function: exam_score vs study_hours & motivation')
plt.legend()
plt.show()