import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Sample data
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
y = np.array([2, 4, 5, 4, 5])

# Create model
model = LinearRegression()
model.fit(X, y)

# Predict
y_pred = model.predict(X)

# Plot
plt.scatter(X, y)
plt.plot(X, y_pred)
plt.xlabel("X")
plt.ylabel("Y")
plt.title("Linear Regression")
plt.show()

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Step 1: Create the dataset
data = {
    "Student ID": ["S001","S002","S003","S004","S005","S006","S007","S008","S009","S010",
                   "S011","S012","S013","S014","S015","S016","S017","S018","S019","S020"],
    "Assignment 1": [70,71,72,73,74,75,76,77,78,79,70,71,72,73,74,75,76,77,78,79],
    "Assignment 2": [65,66,67,68,69,70,71,72,65,66,67,68,69,70,71,72,65,66,67,68],
    "Midterm": [60,61,62,63,64,65,66,67,68,69,70,71,60,61,62,63,64,65,66,67],
    "Final Exam": [75,76,77,78,79,80,81,75,76,77,78,79,80,81,75,76,77,78,79,80],
    "Total Marks": [270,274,278,282,286,290,294,291,287,291,285,289,281,285,282,286,282,286,290,294]
}

df = pd.DataFrame(data)

# Step 2: Features and target
X = df[["Assignment 1", "Assignment 2", "Midterm", "Final Exam"]]
y = df["Total Marks"]

# Step 3: Train Linear Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Predict
y_pred = model.predict(X)

# Step 5: Plot actual vs predicted Total Marks
plt.figure(figsize=(10,6))
plt.scatter(range(len(y)), y, color='blue', label='Actual Total Marks')
plt.plot(range(len(y)), y_pred, color='red', label='Predicted Total Marks', linewidth=2)
plt.xlabel('Student Index')
plt.ylabel('Total Marks')
plt.title('Linear Regression: Actual vs Predicted Total Marks')
plt.legend()
plt.grid(True)
plt.show()

import pandas as pd
datasets = pd.read_excel("datasets/Cl IVABC ICT result Analysis Term 1 2025.xlsx")
datasets.head()

import pandas as pd

# Create the dataset
data = {
    "Class": ["IV", "IV", "IV"],
    "Section": ["A", "B", "C"],
    "Total Students": [28, 28, 28],
    "Total Stds Passed": [22, 24, 13],
    "Total Stds Fail": [6, 5, 15],
    "Total Pass %": [78.571429, 85.714286, 46.428571],
    "Total Fail %": [21.428571, 17.857143, 53.571429],
    "Mean Mark": [75.7, 71.7, 57.9],
    "National Mean Mark": [67.09, 75, 67.09]
}

df = pd.DataFrame(data)
df

import matplotlib.pyplot as plt
import seaborn as sns

# Set seaborn style
sns.set(style="whitegrid")

plt.figure(figsize=(8,5))
sns.barplot(x="Section", y="Total Pass %", data=df, palette="viridis")
plt.title("Total Pass % by Section")
plt.ylabel("Pass Percentage")
plt.ylim(0, 100)
plt.show()

/tmp/ipykernel_205/3315276860.py:2: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x="Section", y="Total Pass %", data=df, palette="viridis")

plt.figure(figsize=(8,5))
bar_width = 0.35
index = range(len(df))

plt.bar(index, df["Mean Mark"], bar_width, label="Class Mean Mark", color='skyblue')
plt.bar([i + bar_width for i in index], df["National Mean Mark"], bar_width, label="National Mean Mark", color='salmon')

plt.xticks([i + bar_width/2 for i in index], df["Section"])
plt.ylabel("Marks")
plt.title("Class Mean Marks vs National Mean Marks")
plt.legend()
plt.show()

plt.figure(figsize=(8,5))
plt.bar(df["Section"], df["Total Stds Passed"], label="Passed", color='green')
plt.bar(df["Section"], df["Total Stds Fail"], bottom=df["Total Stds Passed"], label="Failed", color='red')
plt.ylabel("Number of Students")
plt.title("Students Passed vs Failed by Section")
plt.legend()
plt.show()

import pandas as pd

# Dataset
data = {
    "Section": ["A", "B", "C"],
    "Total Pass %": [78.57, 85.71, 46.43],
    "Mean Mark": [75.7, 71.7, 57.9],
    "National Mean Mark": [67.09, 75, 67.09]
}

df = pd.DataFrame(data)
df

import numpy as np
import matplotlib.pyplot as plt

# Encode sections as numbers for fitting
x = np.array([0, 1, 2])  # A=0, B=1, C=2
y = df["Total Pass %"].values

# Fit a linear function y = mx + c
coeffs = np.polyfit(x, y, 1)  # 1 means linear
m, c = coeffs
print(f"Fitted line: y = {m:.2f}x + {c:.2f}")

# Generate fitted values
y_fit = m*x + c

# Plot
plt.figure(figsize=(8,5))
plt.scatter(x, y, color='blue', label="Actual Pass %")
plt.plot(x, y_fit, color='red', linestyle='--', label="Fitted Line")
plt.xticks(x, df["Section"])
plt.ylabel("Total Pass %")
plt.title("Linear Fit for Pass Percentage")
plt.legend()
plt.show()

Fitted line: y = -16.07x + 86.31

y_mean = df["Mean Mark"].values
coeffs_mean = np.polyfit(x, y_mean, 1)
y_mean_fit = np.polyval(coeffs_mean, x)

plt.figure(figsize=(8,5))
plt.scatter(x, y_mean, color='green', label="Actual Mean Mark")
plt.plot(x, y_mean_fit, color='orange', linestyle='--', label="Fitted Line")
plt.scatter(x, df["National Mean Mark"], color='red', marker='x', label="National Mean")
plt.xticks(x, df["Section"])
plt.ylabel("Marks")
plt.title("Linear Fit for Mean Marks")
plt.legend()
plt.show()

Fitting Functions 25/11/25¶

Linear Regression¶

Linear¶

Activities¶

	Unnamed: 0	Unnamed: 1	Unnamed: 2	Unnamed: 3	Unnamed: 4	Unnamed: 5	Unnamed: 6	Unnamed: 7	Unnamed: 8	Unnamed: 9
0	NaN	NaN	NaN	NaN	Class IVABC	Midterm 2025	NaN	NaN	NaN	NaN
1	NaN	Class	Section	Total Students	Total Stds Passed	Total Stds Fail	Total Pass %	Total Fail %	Mean Mark	National Mean Mark
2	NaN	IV	A	28	22	6	78.571429	21.428571	75.7	67.09
3	NaN	IV	B	28	24	5	85.714286	17.857143	71.7	75
4	NaN	IV	C	28	13	15	46.428571	53.571429	57.9	67.09