import numpy as np
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---------------------------------
# 1. Load your uploaded CSV file
# ---------------------------------
# NOTE: After uploading, replace 'yourfile.csv' with the actual filename
data = pd.read_csv('~/work/kelzang-wangdi/datasets/StudentsPerformance.csv')

print("Data loaded successfully!")
print(data.head())

# ---------------------------------
# 2. Select feature column(s) and target
# ---------------------------------
# Change these column names according to your file
X = data[['math score']].values      # input (must be numeric)
y = data['reading score'].values         # output (0 or 1 ONLY)

# Reshape if needed
X = X.reshape(-1, 1)
y = y.reshape(-1, 1)

# ---------------------------------
# 3. Sigmoid activation function
# ---------------------------------
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# ---------------------------------
# 4. Logistic Regression training (Gradient Descent)
# ---------------------------------
# Initialize weights and bias
w = np.zeros((X.shape[1], 1))
b = 0

learning_rate = 0.01
epochs = 2000

m = len(X)  # number of samples

loss_history = []

for i in range(epochs):
    # Forward pass
    z = np.dot(X, w) + b
    y_hat = sigmoid(z)

    # Compute loss (binary cross entropy)
    loss = -(1/m) * np.sum(
        y*np.log(y_hat + 1e-15) + (1-y)*np.log(1 - y_hat + 1e-15)
    )
    loss_history.append(loss)

    # Gradients
    dw = (1/m) * np.dot(X.T, (y_hat - y))
    db = (1/m) * np.sum(y_hat - y)

    # Update weights
    w -= learning_rate * dw
    b -= learning_rate * db

    # Print progress
    if i % 200 == 0:
        print(f"Epoch {i}, Loss: {loss:.4f}")

print("\nTraining completed!")
print("Final weight:", w)
print("Final bias:", b)

# ---------------------------------
# 5. Plot loss curve
# ---------------------------------
plt.plot(loss_history)
plt.title("Sigmoid Logistic Regression Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

# ---------------------------------
# 6. Make prediction function
# ---------------------------------
def predict(X_new):
    return (sigmoid(np.dot(X_new, w) + b) >= 0.5).astype(int)

# ---------------------------------
# 7. Test prediction example
# ---------------------------------
test_value = np.array([[1.0]])   # change this value
print("Prediction for test value:", predict(test_value))

Data loaded successfully!
   gender race/ethnicity parental level of education         lunch  \
0  female        group B           bachelor's degree      standard   
1  female        group C                some college      standard   
2  female        group B             master's degree      standard   
3    male        group A          associate's degree  free/reduced   
4    male        group C                some college      standard   

  test preparation course  math score  reading score  writing score  
0                    none          72             72             74  
1               completed          69             90             88  
2                    none          90             95             93  
3                    none          47             57             44  
4                    none          76             78             75  
Epoch 0, Loss: 0.6931
Epoch 200, Loss: -2354.4738
Epoch 400, Loss: -2354.4738
Epoch 600, Loss: -2354.4738
Epoch 800, Loss: -2354.4738
Epoch 1000, Loss: -2354.4738
Epoch 1200, Loss: -2354.4738
Epoch 1400, Loss: -2354.4738
Epoch 1600, Loss: -2354.4738
Epoch 1800, Loss: -2354.4738

Training completed!
Final weight: [[93721.110445]]
Final bias: 1363.3850077802035

Prediction for test value: [[1]]

%matplotlib inline
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# -----------------------------
# 1. Load your data
# -----------------------------
df = pd.read_csv("~/work/kelzang-wangdi/datasets/StudentsPerformance.csv")  

# -----------------------------
# 2. Select features (X) and target (y)
# -----------------------------
# Example: predicting 'math score' using other numeric columns
target_col = "reading score"  # change as needed
feature_cols = [col for col in df.columns if col != target_col]

X = df[feature_cols].select_dtypes(include=np.number).fillna(0).values  
y = df[target_col].values

# -----------------------------
# 3. Split data into train and test sets
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# 4. Standardize features
# -----------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -----------------------------
# 5. Train a machine learning model
# -----------------------------
model = LinearRegression()  # you can replace with RandomForestRegressor, etc.
model.fit(X_train, y_train)

# -----------------------------
# 6. Make predictions
# -----------------------------
y_pred = model.predict(X_test)

# -----------------------------
# 7. Evaluate model
# -----------------------------
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# -----------------------------
# 8. Optional: Plot predictions vs actual
# -----------------------------
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.title("Actual vs Predicted")
plt.show()

Mean Squared Error: 19.86
R^2 Score: 0.91

# ---------------------------------------------
# Machine Learning with scikit-learn and Tanh
# ---------------------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# -----------------------------
# 1. Load your dataset
# -----------------------------
df = pd.read_csv("~/work/kelzang-wangdi/datasets/StudentsPerformance.csv")  # Upload your CSV in JupyterLab
feature_cols = ['math score', 'reading score']  # Feature columns
target_col = 'writing score'                    # Target column

# Extract features and target
X = df[feature_cols].values
y = df[target_col].values

# -----------------------------
# 2. Train/test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# 3. Feature scaling
# -----------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -----------------------------
# 4. Create MLP model with Tanh activation
# -----------------------------
model = MLPRegressor(hidden_layer_sizes=(50,50),
                     activation='tanh',
                     solver='adam',
                     max_iter=1000,
                     random_state=42)

# -----------------------------
# 5. Train the model
# -----------------------------
model.fit(X_train, y_train)

# -----------------------------
# 6. Predict on test set
# -----------------------------
y_pred = model.predict(X_test)

# -----------------------------
# 7. Evaluate model
# -----------------------------
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.3f}")
print(f"R2 Score: {r2:.3f}")

# -----------------------------
# 8. Plot predictions vs true values
# -----------------------------
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel("True Writing Score")
plt.ylabel("Predicted Writing Score")
plt.title("MLP Regressor Predictions with Tanh Activation")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)  # Reference line
plt.show()

Mean Squared Error: 26.222
R2 Score: 0.891

/opt/conda/lib/python3.13/site-packages/sklearn/neural_network/_multilayer_perceptron.py:781: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.
  warnings.warn(

Machine Learning¶

Different Functions¶

Importing Libraries¶

1. numpy → for numerical operations (arrays, math functions)¶

2.matplotlib.pyplot → for creating graphs¶

Machine Learning Using SIGMOID ACTIVATION¶

My prompt to ChatGPT to write code for me: Write a code to form a machine learning model using sigmoid, while I am going to upload my own CSV file.¶

Using Scikit-learn¶