Machine Learning¶
Different Functions¶
1. Sigmoid
2. Tanh
3. ReLU
4. Leaky ReLU
These functions help neural networks learn complex patterns by introducing non-linearity.
Importing Libraries¶
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
Machine Learning Using SIGMOID ACTIVATION¶
My prompt to ChatGPT to write code for me: Write a code to form a machine learning model using sigmoid, while I am going to upload my own CSV file.¶
In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# ---------------------------------
# 1. Load your uploaded CSV file
# ---------------------------------
# NOTE: After uploading, replace 'yourfile.csv' with the actual filename
data = pd.read_csv('~/work/kelzang-wangdi/datasets/StudentsPerformance.csv')
print("Data loaded successfully!")
print(data.head())
# ---------------------------------
# 2. Select feature column(s) and target
# ---------------------------------
# Change these column names according to your file
X = data[['math score']].values # input (must be numeric)
y = data['reading score'].values # output (0 or 1 ONLY)
# Reshape if needed
X = X.reshape(-1, 1)
y = y.reshape(-1, 1)
# ---------------------------------
# 3. Sigmoid activation function
# ---------------------------------
def sigmoid(z):
return 1 / (1 + np.exp(-z))
# ---------------------------------
# 4. Logistic Regression training (Gradient Descent)
# ---------------------------------
# Initialize weights and bias
w = np.zeros((X.shape[1], 1))
b = 0
learning_rate = 0.01
epochs = 2000
m = len(X) # number of samples
loss_history = []
for i in range(epochs):
# Forward pass
z = np.dot(X, w) + b
y_hat = sigmoid(z)
# Compute loss (binary cross entropy)
loss = -(1/m) * np.sum(
y*np.log(y_hat + 1e-15) + (1-y)*np.log(1 - y_hat + 1e-15)
)
loss_history.append(loss)
# Gradients
dw = (1/m) * np.dot(X.T, (y_hat - y))
db = (1/m) * np.sum(y_hat - y)
# Update weights
w -= learning_rate * dw
b -= learning_rate * db
# Print progress
if i % 200 == 0:
print(f"Epoch {i}, Loss: {loss:.4f}")
print("\nTraining completed!")
print("Final weight:", w)
print("Final bias:", b)
# ---------------------------------
# 5. Plot loss curve
# ---------------------------------
plt.plot(loss_history)
plt.title("Sigmoid Logistic Regression Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()
# ---------------------------------
# 6. Make prediction function
# ---------------------------------
def predict(X_new):
return (sigmoid(np.dot(X_new, w) + b) >= 0.5).astype(int)
# ---------------------------------
# 7. Test prediction example
# ---------------------------------
test_value = np.array([[1.0]]) # change this value
print("Prediction for test value:", predict(test_value))
Data loaded successfully! gender race/ethnicity parental level of education lunch \ 0 female group B bachelor's degree standard 1 female group C some college standard 2 female group B master's degree standard 3 male group A associate's degree free/reduced 4 male group C some college standard test preparation course math score reading score writing score 0 none 72 72 74 1 completed 69 90 88 2 none 90 95 93 3 none 47 57 44 4 none 76 78 75 Epoch 0, Loss: 0.6931 Epoch 200, Loss: -2354.4738 Epoch 400, Loss: -2354.4738 Epoch 600, Loss: -2354.4738 Epoch 800, Loss: -2354.4738 Epoch 1000, Loss: -2354.4738 Epoch 1200, Loss: -2354.4738 Epoch 1400, Loss: -2354.4738 Epoch 1600, Loss: -2354.4738 Epoch 1800, Loss: -2354.4738 Training completed! Final weight: [[93721.110445]] Final bias: 1363.3850077802035
Prediction for test value: [[1]]
Using Scikit-learn¶
In [5]:
%matplotlib inline
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
# -----------------------------
# 1. Load your data
# -----------------------------
df = pd.read_csv("~/work/kelzang-wangdi/datasets/StudentsPerformance.csv")
# -----------------------------
# 2. Select features (X) and target (y)
# -----------------------------
# Example: predicting 'math score' using other numeric columns
target_col = "reading score" # change as needed
feature_cols = [col for col in df.columns if col != target_col]
X = df[feature_cols].select_dtypes(include=np.number).fillna(0).values
y = df[target_col].values
# -----------------------------
# 3. Split data into train and test sets
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# -----------------------------
# 4. Standardize features
# -----------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# -----------------------------
# 5. Train a machine learning model
# -----------------------------
model = LinearRegression() # you can replace with RandomForestRegressor, etc.
model.fit(X_train, y_train)
# -----------------------------
# 6. Make predictions
# -----------------------------
y_pred = model.predict(X_test)
# -----------------------------
# 7. Evaluate model
# -----------------------------
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")
# -----------------------------
# 8. Optional: Plot predictions vs actual
# -----------------------------
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.title("Actual vs Predicted")
plt.show()
Mean Squared Error: 19.86 R^2 Score: 0.91
In [13]:
# ---------------------------------------------
# Machine Learning with scikit-learn and Tanh
# ---------------------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
# -----------------------------
# 1. Load your dataset
# -----------------------------
df = pd.read_csv("~/work/kelzang-wangdi/datasets/StudentsPerformance.csv") # Upload your CSV in JupyterLab
feature_cols = ['math score', 'reading score'] # Feature columns
target_col = 'writing score' # Target column
# Extract features and target
X = df[feature_cols].values
y = df[target_col].values
# -----------------------------
# 2. Train/test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# -----------------------------
# 3. Feature scaling
# -----------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# -----------------------------
# 4. Create MLP model with Tanh activation
# -----------------------------
model = MLPRegressor(hidden_layer_sizes=(50,50),
activation='tanh',
solver='adam',
max_iter=1000,
random_state=42)
# -----------------------------
# 5. Train the model
# -----------------------------
model.fit(X_train, y_train)
# -----------------------------
# 6. Predict on test set
# -----------------------------
y_pred = model.predict(X_test)
# -----------------------------
# 7. Evaluate model
# -----------------------------
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.3f}")
print(f"R2 Score: {r2:.3f}")
# -----------------------------
# 8. Plot predictions vs true values
# -----------------------------
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel("True Writing Score")
plt.ylabel("Predicted Writing Score")
plt.title("MLP Regressor Predictions with Tanh Activation")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2) # Reference line
plt.show()
Mean Squared Error: 26.222 R2 Score: 0.891
/opt/conda/lib/python3.13/site-packages/sklearn/neural_network/_multilayer_perceptron.py:781: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet. warnings.warn(