Fitting Function to The Data¶
Polynomial Fitting (Poly-Fitting)¶
In [ ]:
### Polynomial: P_n(x) = C_0 + C_1X + C_2X^2 + C_3X^3 + ... + C_nX^n (_n: subscript, ^n: Superscript)
Importing Libraries¶
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
Used Chatgpt to write code and Explain¶
In [1]:
### "I would like to fit a function in my data using a polynomial fit and use least square. In this project i am uploading data. Can you write a Python code"
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# -------------------------------
# 1. Load your uploaded dataset
# -------------------------------
# Replace "yourfile.csv" with the actual uploaded filename
data = pd.read_csv("~/work/kelzang-wangdi/datasets/StudentsPerformance.csv")
# Assume your file has columns named 'x' and 'y'
x = data['math score'].values
y = data['reading score'].values
# -------------------------------
# 2. Polynomial degree
# -------------------------------
degree = 2 # change to 1, 2, 3, etc.
# -------------------------------
# 3. Fit a polynomial using least squares
# -------------------------------
coeffs = np.polyfit(x, y, degree)
# Create a polynomial function
poly_func = np.poly1d(coeffs)
print("Polynomial Coefficients:")
print(coeffs)
# -------------------------------
# 4. Predict values
# -------------------------------
x_fit = np.linspace(min(x), max(x), 1000)
y_fit = poly_func(x_fit)
# -------------------------------
# 5. Plot results
# -------------------------------
plt.scatter(x, y, label='Data Points', color = "green")
plt.plot(x_fit, y_fit, label=f'Polynomial Fit (degree={degree})')
plt.xlabel("writing score")
plt.ylabel("reading score")
plt.title("Polynomial Least Squares Fit")
plt.legend()
plt.grid(True)
plt.show()
# Assume your file has columns named 'x' and 'y'
x = data['math score'].values
y = data['reading score'].values
# -------------------------------
# 2. Polynomial degree
# -------------------------------
degree = 3 # change to 1, 2, 3, etc.
# -------------------------------
# 3. Fit polynomial using least squares
# -------------------------------
coeffs = np.polyfit(x, y, degree)
# Create polynomial function
poly_func = np.poly1d(coeffs)
print("Polynomial Coefficients:")
print(coeffs)
# -------------------------------
# 4. Predict values
# -------------------------------
x_fit = np.linspace(min(x), max(x), 1000)
y_fit = poly_func(x_fit)
# -------------------------------
# 5. Plot results
# -------------------------------
plt.scatter(x, y, label='Data Points', color = "green")
plt.plot(x_fit, y_fit, label=f'Polynomial Fit (degree={degree})')
plt.xlabel("math score")
plt.ylabel("reading score")
plt.title("Polynomial Least Squares Fit")
plt.legend()
plt.grid(True)
plt.show()
Polynomial Coefficients: [-3.61993422e-04 8.33548811e-01 1.57448375e+01]
Polynomial Coefficients: [-1.73116970e-05 2.73796875e-03 6.62231807e-01 1.85838930e+01]
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# -----------------------------
# 1. Load your data
# -----------------------------
df = pd.read_csv("~/work/kelzang-wangdi/datasets/StudentsPerformance.csv") # Upload your CSV
x_column = 'reading score' # Replace with your feature column
y_column = 'math score' # Replace with your target column
x = df[x_column].dropna().values
y = df[y_column].dropna().values
# -----------------------------
# 2. Polynomial fitting
# -----------------------------
np.set_printoptions(precision=3)
# Fit first-order (linear) polynomial
coeff1 = np.polyfit(x, y, 1)
pfit1 = np.poly1d(coeff1)
# Fit second-order (quadratic) polynomial
coeff2 = np.polyfit(x, y, 2)
pfit2 = np.poly1d(coeff2)
# Evaluate fits
xfit = np.linspace(np.min(x), np.max(x), 100)
yfit1 = pfit1(xfit)
yfit2 = pfit2(xfit)
# -----------------------------
# 3. Print coefficients
# -----------------------------
print(f"first-order fit coefficients: {coeff1}")
print(f"second-order fit coefficients: {coeff2}")
# -----------------------------
# 4. Plot
# -----------------------------
plt.figure(figsize=(8,6))
plt.plot(x, y, 'o', alpha=0.6, label='data')
plt.plot(xfit, yfit1, 'g-', label='linear fit')
plt.plot(xfit, yfit2, 'r-', label='quadratic fit')
plt.xlabel(x_column)
plt.ylabel(y_column)
plt.title(f"Polynomial Fit: {y_column} vs {x_column}")
plt.legend()
plt.show()
first-order fit coefficients: [0.849 7.358] second-order fit coefficients: [-5.681e-04 9.255e-01 4.909e+00]