import numpy as np
from sklearn.linear_model import LinearRegression

# Data
Math = np.array([20, 35, 70, 40, 50, 67, 88, 46, 67, 46])
Sci  = np.array([54, 60, 54, 34, 36, 67, 89, 90, 57, 67])
Eng  = np.array([67, 76, 55, 45, 34, 25, 78, 47, 67, 76])
Dzo  = np.array([93, 59, 76, 77, 59, 47, 29, 39, 71, 62])
Total = np.array([234, 230, 255, 196, 179, 206, 284, 222, 262, 251])

# Combine independent variables
X = np.column_stack((Math, Sci, Eng, Dzo))
y = Total

# Create linear regression model
model = LinearRegression()
model.fit(X, y)

# Get coefficients
coefficients = model.coef_
intercept = model.intercept_

print("Fitted function:")
print(f"Total = {intercept:.2f} + ({coefficients[0]:.2f}*Math) + ({coefficients[1]:.2f}*Sci) + ({coefficients[2]:.2f}*Eng) + ({coefficients[3]:.2f}*Dzo)")

# Predict Total using the model
Total_pred = model.predict(X)
print("\nPredicted Total:", Total_pred)

Fitted function:
Total = -0.00 + (1.00*Math) + (1.00*Sci) + (1.00*Eng) + (1.00*Dzo)

Predicted Total: [234. 230. 255. 196. 179. 206. 284. 222. 262. 251.]

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Data
Math = np.array([20, 35, 70, 40, 50, 67, 88, 46, 67, 46])
Sci  = np.array([54, 60, 54, 34, 36, 67, 89, 90, 57, 67])
Eng  = np.array([67, 76, 55, 45, 34, 25, 78, 47, 67, 76])
Dzo  = np.array([93, 59, 76, 77, 59, 47, 29, 39, 71, 62])
Total = np.array([234, 230, 255, 196, 179, 206, 284, 222, 262, 251])

# Combine independent variables
X = np.column_stack((Math, Sci, Eng, Dzo))
y = Total

# Linear Regression
model = LinearRegression()
model.fit(X, y)

# Predict Total
Total_pred = model.predict(X)

# Plot Actual vs Predicted Total
plt.figure(figsize=(8,6))
plt.scatter(y, Total_pred, color='blue', label='Predicted vs Actual')
plt.plot([min(y), max(y)], [min(y), max(y)], color='red', linestyle='--', label='Perfect Fit')
plt.xlabel("Actual Total")
plt.ylabel("Predicted Total")
plt.title("Linear Regression: Predicted Total vs Actual Total")
plt.legend()
plt.grid(True)
plt.show()

import numpy as np
import matplotlib.pyplot as plt

# Plotting
plt.figure(figsize=(10,6))
plt.bar(students, Sum_scores, color='skyblue', label='Sum of Subjects')
plt.plot(students, Total, color='red', marker='o', linestyle='--', label='Reported Total')
plt.xlabel("Students")
plt.ylabel("Scores")
plt.title("Sum of Subject Scores vs Reported Total")
plt.xticks(rotation=45)
plt.legend()
plt.grid(axis='y')
plt.show()

import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Data
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Features and target
X = df[['Math', 'Sci', 'Eng', 'Dzo']]
y = df['Total']

# Polynomial features of degree 2 (for example)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Fit polynomial regression model
model = LinearRegression()
model.fit(X_poly, y)

# Predict and evaluate
y_pred = model.predict(X_poly)
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print("Model Coefficients:", model.coef_)
print("Model Intercept:", model.intercept_)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

# Optional: Show original vs predicted totals
results = pd.DataFrame({'Actual Total': y, 'Predicted Total': y_pred})
print(results)

Model Coefficients: [-5.67064566e-06  7.84493403e-05  1.04189542e-04  3.42683531e-05
  2.50221198e-03  4.34619316e-03 -5.77033961e-04  8.42804316e-03
  5.75226527e-03  8.83379888e-04  1.14092040e-03  7.40611371e-03
  3.57583785e-03  3.44895376e-03]
Model Intercept: 102.33153393786222
Mean Squared Error: 3.2311742677852644e-27
R^2 Score: 1.0
   Actual Total  Predicted Total
0           234            234.0
1           230            230.0
2           255            255.0
3           196            196.0
4           179            179.0
5           206            206.0
6           284            284.0
7           222            222.0
8           262            262.0
9           251            251.0

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

# Data
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Features and target
X = df[['Math', 'Sci', 'Eng', 'Dzo']]
y = df['Total']

# Polynomial features of degree 2
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Fit polynomial regression model
model = LinearRegression()
model.fit(X_poly, y)

# Predict for plotting
y_pred = model.predict(X_poly)

# Sort for smooth plotting
sorted_idx = np.argsort(y_pred)
y_pred_sorted = y_pred[sorted_idx]
y_sorted = y.values[sorted_idx]

plt.figure(figsize=(10, 6))
plt.plot(y_pred_sorted, label='Predicted Total (Polynomial Fit)', color='b', marker='o')
plt.plot(y_sorted, label='Actual Total', color='r', marker='x')
plt.title("Polynomial Regression Fit to Total Scores")
plt.xlabel("Sample index (sorted by predicted total)")
plt.ylabel("Total Score")
plt.legend()
plt.grid(True)
plt.show()

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# Data
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Create categories based on Total score
def categorize(total):
    if total < 210:
        return 'Low'
    elif total < 250:
        return 'Medium'
    else:
        return 'High'

df['Category'] = df['Total'].apply(categorize)

# Features and target for classification
X = df[['Math', 'Sci', 'Eng', 'Dzo']]
y = df['Category']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Use OneVsRestClassifier with LogisticRegression (solver='lbfgs')
clf = OneVsRestClassifier(LogisticRegression(solver='lbfgs', max_iter=1000))
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

Confusion Matrix:
[[1 0 0]
 [0 1 0]
 [1 0 0]]

Classification Report:
              precision    recall  f1-score   support

        High       0.50      1.00      0.67         1
         Low       1.00      1.00      1.00         1
      Medium       0.00      0.00      0.00         1

    accuracy                           0.67         3
   macro avg       0.50      0.67      0.56         3
weighted avg       0.50      0.67      0.56         3

pip install pandas scikit-learn matplotlib

Requirement already satisfied: pandas in /opt/conda/lib/python3.13/site-packages (2.3.3)
Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.13/site-packages (1.7.2)
Requirement already satisfied: matplotlib in /opt/conda/lib/python3.13/site-packages (3.10.7)
Requirement already satisfied: numpy>=1.26.0 in /opt/conda/lib/python3.13/site-packages (from pandas) (2.3.3)
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.13/site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.13/site-packages (from pandas) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.13/site-packages (from pandas) (2025.2)
Requirement already satisfied: scipy>=1.8.0 in /opt/conda/lib/python3.13/site-packages (from scikit-learn) (1.16.2)
Requirement already satisfied: joblib>=1.2.0 in /opt/conda/lib/python3.13/site-packages (from scikit-learn) (1.5.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in /opt/conda/lib/python3.13/site-packages (from scikit-learn) (3.6.0)
Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (1.3.3)
Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (4.60.1)
Requirement already satisfied: kiwisolver>=1.3.1 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (1.4.9)
Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (25.0)
Requirement already satisfied: pillow>=8 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (11.3.0)
Requirement already satisfied: pyparsing>=3 in /opt/conda/lib/python3.13/site-packages (from matplotlib) (3.2.5)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Note: you may need to restart the kernel to use updated packages.

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.decomposition import PCA

# Data
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Categorize totals
def categorize(total):
    if total < 210:
        return 'Low'
    elif total < 250:
        return 'Medium'
    else:
        return 'High'

df['Category'] = df['Total'].apply(categorize)

# Features and target
X = df[['Math', 'Sci', 'Eng', 'Dzo']]
y = df['Category']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Fit classifier
clf = OneVsRestClassifier(LogisticRegression(solver='lbfgs', max_iter=1000))
clf.fit(X_scaled, y)
y_pred = clf.predict(X_scaled)

# Reduce to 2D for plotting
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Plot
plt.figure(figsize=(8,6))
for category in df['Category'].unique():
    idx = df['Category'] == category
    plt.scatter(X_pca[idx,0], X_pca[idx,1], label=category, s=100)

plt.title("Student Total Score Classification (PCA projection)")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend()
plt.grid(True)
plt.show()

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

# Data
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Categorize totals
def categorize(total):
    if total < 210:
        return 'Low'
    elif total < 250:
        return 'Medium'
    else:
        return 'High'

df['Category'] = df['Total'].apply(categorize)

# Features and target
X = df[['Math', 'Sci', 'Eng']]  # choose 3 features for 3D plot
y = df['Category']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Fit classifier
clf = OneVsRestClassifier(LogisticRegression(solver='lbfgs', max_iter=1000))
clf.fit(X_scaled, y)
y_pred = clf.predict(X_scaled)

# 3D Plot
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111, projection='3d')

colors = {'Low':'red', 'Medium':'green', 'High':'blue'}

for category in df['Category'].unique():
    idx = df['Category'] == category
    ax.scatter(
        X_scaled[idx,0], 
        X_scaled[idx,1], 
        X_scaled[idx,2], 
        c=colors[category], 
        label=category, 
        s=100
    )

ax.set_xlabel('Math (scaled)')
ax.set_ylabel('Science (scaled)')
ax.set_zlabel('English (scaled)')
ax.set_title('3D Classification of Students by Total Score')
ax.legend()
plt.show()

import pandas as pd
import matplotlib.pyplot as plt

# Data
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Categorize totals
def categorize(total):
    if total < 210:
        return 'Low'
    elif total < 250:
        return 'Medium'
    else:
        return 'High'

df['Category'] = df['Total'].apply(categorize)

# Histogram of Total scores by category
plt.figure(figsize=(10,6))
colors = {'Low':'red', 'Medium':'green', 'High':'blue'}

for category in df['Category'].unique():
    subset = df[df['Category'] == category]
    plt.hist(subset['Total'], bins=5, alpha=0.6, label=category, color=colors[category])

plt.title("Histogram of Total Scores by Category")
plt.xlabel("Total Score")
plt.ylabel("Number of Students")
plt.legend()
plt.grid(axis='y')
plt.show()

# Data
total_scores = [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]

def stem_and_leaf(data):
    data_sorted = sorted(data)
    stem_dict = {}
    
    for num in data_sorted:
        stem = num // 10     # tens place
        leaf = num % 10      # ones place
        if stem in stem_dict:
            stem_dict[stem].append(leaf)
        else:
            stem_dict[stem] = [leaf]
    
    print("Stem | Leaves")
    print("------------")
    for stem, leaves in stem_dict.items():
        leaves_str = " ".join(str(leaf) for leaf in leaves)
        print(f"{stem:>4} | {leaves_str}")

# Run the stem-and-leaf plot
stem_and_leaf(total_scores)

Stem | Leaves
------------
  17 | 9
  19 | 6
  20 | 6
  22 | 2
  23 | 0 4
  25 | 1 5
  26 | 2
  28 | 4

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Data
data = {
    'Name': ['Dorji','Tashi','Pema','Dawa','Nima','Karma','Dema','Dechen','Kelzang','Zam'],
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# Categorize totals
def categorize(total):
    if total < 210:
        return 'Low'
    elif total < 250:
        return 'Medium'
    else:
        return 'High'

df['Category'] = df['Total'].apply(categorize)

# Heatmap of scores
plt.figure(figsize=(10,6))
# Use the scores only (exclude Name)
score_data = df[['Math', 'Sci', 'Eng', 'Dzo', 'Total']]
sns.heatmap(score_data, annot=True, cmap="YlGnBu", linewidths=0.5)
plt.title("Heatmap of Student Scores and Total")
plt.show()

Fitting Functions in dataset¶

Diffrent types of Fitting Functions¶

𝑦¶

Polynomial Regression Fitting¶