Fitting Functions 25/11/25¶
A fitting function in Jupyter (or in Python generally) refers to a mathematical function or model that is used to fit a set of data points, usually to describe the relationship between variables or to make predictions. Essentially, “fitting” means finding the best parameters of a function so that it closely matches your data. Purpose of a fitting function
To model data: Understand trends or patterns in your dataset. To predict values: Once fitted, the function can estimate unknown points. To analyze relationships: Determine how one variable affects another.
Linear Regression¶
✅ What is Linear Regression?
Linear regression is a machine learning/statistical method used to find the relationship between variables. It creates a best-fit straight line that predicts an output (Y) from an input (X).
Example formula:
𝑌 = 𝑚𝑋 + 𝑏 Y=mX+b Where m = slope b = intercept X = input Y = predicted output
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# Sample data
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
y = np.array([2, 4, 5, 4, 5])
# Create model
model = LinearRegression()
model.fit(X, y)
# Predict
y_pred = model.predict(X)
# Plot
plt.scatter(X, y)
plt.plot(X, y_pred)
plt.xlabel("X")
plt.ylabel("Y")
plt.title("Linear Regression")
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# Step 1: Create the dataset
data = {
"Student ID": ["S001","S002","S003","S004","S005","S006","S007","S008","S009","S010",
"S011","S012","S013","S014","S015","S016","S017","S018","S019","S020"],
"Assignment 1": [70,71,72,73,74,75,76,77,78,79,70,71,72,73,74,75,76,77,78,79],
"Assignment 2": [65,66,67,68,69,70,71,72,65,66,67,68,69,70,71,72,65,66,67,68],
"Midterm": [60,61,62,63,64,65,66,67,68,69,70,71,60,61,62,63,64,65,66,67],
"Final Exam": [75,76,77,78,79,80,81,75,76,77,78,79,80,81,75,76,77,78,79,80],
"Total Marks": [270,274,278,282,286,290,294,291,287,291,285,289,281,285,282,286,282,286,290,294]
}
df = pd.DataFrame(data)
# Step 2: Features and target
X = df[["Assignment 1", "Assignment 2", "Midterm", "Final Exam"]]
y = df["Total Marks"]
# Step 3: Train Linear Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
# Step 4: Predict
y_pred = model.predict(X)
# Step 5: Plot actual vs predicted Total Marks
plt.figure(figsize=(10,6))
plt.scatter(range(len(y)), y, color='blue', label='Actual Total Marks')
plt.plot(range(len(y)), y_pred, color='red', label='Predicted Total Marks', linewidth=2)
plt.xlabel('Student Index')
plt.ylabel('Total Marks')
plt.title('Linear Regression: Actual vs Predicted Total Marks')
plt.legend()
plt.grid(True)
plt.show()
import pandas as pd
datasets = pd.read_excel("datasets/Cl IVABC ICT result Analysis Term 1 2025.xlsx")
datasets.head()
| Unnamed: 0 | Unnamed: 1 | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | NaN | NaN | Class IVABC | Midterm 2025 | NaN | NaN | NaN | NaN |
| 1 | NaN | Class | Section | Total Students | Total Stds Passed | Total Stds Fail | Total Pass % | Total Fail % | Mean Mark | National Mean Mark |
| 2 | NaN | IV | A | 28 | 22 | 6 | 78.571429 | 21.428571 | 75.7 | 67.09 |
| 3 | NaN | IV | B | 28 | 24 | 5 | 85.714286 | 17.857143 | 71.7 | 75 |
| 4 | NaN | IV | C | 28 | 13 | 15 | 46.428571 | 53.571429 | 57.9 | 67.09 |
import pandas as pd
# Create the dataset
data = {
"Class": ["IV", "IV", "IV"],
"Section": ["A", "B", "C"],
"Total Students": [28, 28, 28],
"Total Stds Passed": [22, 24, 13],
"Total Stds Fail": [6, 5, 15],
"Total Pass %": [78.571429, 85.714286, 46.428571],
"Total Fail %": [21.428571, 17.857143, 53.571429],
"Mean Mark": [75.7, 71.7, 57.9],
"National Mean Mark": [67.09, 75, 67.09]
}
df = pd.DataFrame(data)
df
| Class | Section | Total Students | Total Stds Passed | Total Stds Fail | Total Pass % | Total Fail % | Mean Mark | National Mean Mark | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | IV | A | 28 | 22 | 6 | 78.571429 | 21.428571 | 75.7 | 67.09 |
| 1 | IV | B | 28 | 24 | 5 | 85.714286 | 17.857143 | 71.7 | 75.00 |
| 2 | IV | C | 28 | 13 | 15 | 46.428571 | 53.571429 | 57.9 | 67.09 |
import matplotlib.pyplot as plt
import seaborn as sns
# Set seaborn style
sns.set(style="whitegrid")
plt.figure(figsize=(8,5))
sns.barplot(x="Section", y="Total Pass %", data=df, palette="viridis")
plt.title("Total Pass % by Section")
plt.ylabel("Pass Percentage")
plt.ylim(0, 100)
plt.show()
/tmp/ipykernel_205/3315276860.py:2: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. sns.barplot(x="Section", y="Total Pass %", data=df, palette="viridis")
plt.figure(figsize=(8,5))
bar_width = 0.35
index = range(len(df))
plt.bar(index, df["Mean Mark"], bar_width, label="Class Mean Mark", color='skyblue')
plt.bar([i + bar_width for i in index], df["National Mean Mark"], bar_width, label="National Mean Mark", color='salmon')
plt.xticks([i + bar_width/2 for i in index], df["Section"])
plt.ylabel("Marks")
plt.title("Class Mean Marks vs National Mean Marks")
plt.legend()
plt.show()
plt.figure(figsize=(8,5))
plt.bar(df["Section"], df["Total Stds Passed"], label="Passed", color='green')
plt.bar(df["Section"], df["Total Stds Fail"], bottom=df["Total Stds Passed"], label="Failed", color='red')
plt.ylabel("Number of Students")
plt.title("Students Passed vs Failed by Section")
plt.legend()
plt.show()
Linear¶
import pandas as pd
# Dataset
data = {
"Section": ["A", "B", "C"],
"Total Pass %": [78.57, 85.71, 46.43],
"Mean Mark": [75.7, 71.7, 57.9],
"National Mean Mark": [67.09, 75, 67.09]
}
df = pd.DataFrame(data)
df
| Section | Total Pass % | Mean Mark | National Mean Mark | |
|---|---|---|---|---|
| 0 | A | 78.57 | 75.7 | 67.09 |
| 1 | B | 85.71 | 71.7 | 75.00 |
| 2 | C | 46.43 | 57.9 | 67.09 |
import numpy as np
import matplotlib.pyplot as plt
# Encode sections as numbers for fitting
x = np.array([0, 1, 2]) # A=0, B=1, C=2
y = df["Total Pass %"].values
# Fit a linear function y = mx + c
coeffs = np.polyfit(x, y, 1) # 1 means linear
m, c = coeffs
print(f"Fitted line: y = {m:.2f}x + {c:.2f}")
# Generate fitted values
y_fit = m*x + c
# Plot
plt.figure(figsize=(8,5))
plt.scatter(x, y, color='blue', label="Actual Pass %")
plt.plot(x, y_fit, color='red', linestyle='--', label="Fitted Line")
plt.xticks(x, df["Section"])
plt.ylabel("Total Pass %")
plt.title("Linear Fit for Pass Percentage")
plt.legend()
plt.show()
Fitted line: y = -16.07x + 86.31
y_mean = df["Mean Mark"].values
coeffs_mean = np.polyfit(x, y_mean, 1)
y_mean_fit = np.polyval(coeffs_mean, x)
plt.figure(figsize=(8,5))
plt.scatter(x, y_mean, color='green', label="Actual Mean Mark")
plt.plot(x, y_mean_fit, color='orange', linestyle='--', label="Fitted Line")
plt.scatter(x, df["National Mean Mark"], color='red', marker='x', label="National Mean")
plt.xticks(x, df["Section"])
plt.ylabel("Marks")
plt.title("Linear Fit for Mean Marks")
plt.legend()
plt.show()
Activities¶