In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
import jax
import jax.numpy as jnp
from jax import random, grad, jit
In [2]:
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
In [3]:
import pandas as pd
df = pd.read_csv('datasets/mental_health.csv')
In [4]:
print("Dataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())
print("\nDataset Info:")
print(df.info())
print("\nBasic Statistics:")
print(df.describe())
Dataset Shape: (500, 10)
First 5 rows:
User_ID Age Gender Daily_Screen_Time(hrs) Sleep_Quality(1-10) \
0 U001 44 Male 3.1 7.0
1 U002 30 Other 5.1 7.0
2 U003 23 Other 7.4 6.0
3 U004 36 Female 5.7 7.0
4 U005 34 Female 7.0 4.0
Stress_Level(1-10) Days_Without_Social_Media Exercise_Frequency(week) \
0 6.0 2.0 5.0
1 8.0 5.0 3.0
2 7.0 1.0 3.0
3 8.0 1.0 1.0
4 7.0 5.0 1.0
Social_Media_Platform Happiness_Index(1-10)
0 Facebook 10.0
1 LinkedIn 10.0
2 YouTube 6.0
3 TikTok 8.0
4 X (Twitter) 8.0
Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 User_ID 500 non-null object
1 Age 500 non-null int64
2 Gender 500 non-null object
3 Daily_Screen_Time(hrs) 500 non-null float64
4 Sleep_Quality(1-10) 500 non-null float64
5 Stress_Level(1-10) 500 non-null float64
6 Days_Without_Social_Media 500 non-null float64
7 Exercise_Frequency(week) 500 non-null float64
8 Social_Media_Platform 500 non-null object
9 Happiness_Index(1-10) 500 non-null float64
dtypes: float64(6), int64(1), object(3)
memory usage: 39.2+ KB
None
Basic Statistics:
Age Daily_Screen_Time(hrs) Sleep_Quality(1-10) \
count 500.000000 500.000000 500.000000
mean 32.988000 5.530000 6.304000
std 9.960637 1.734877 1.529792
min 16.000000 1.000000 2.000000
25% 24.000000 4.300000 5.000000
50% 34.000000 5.600000 6.000000
75% 41.000000 6.700000 7.000000
max 49.000000 10.800000 10.000000
Stress_Level(1-10) Days_Without_Social_Media \
count 500.000000 500.000000
mean 6.618000 3.134000
std 1.542996 1.858751
min 2.000000 0.000000
25% 6.000000 2.000000
50% 7.000000 3.000000
75% 8.000000 5.000000
max 10.000000 9.000000
Exercise_Frequency(week) Happiness_Index(1-10)
count 500.000000 500.000000
mean 2.448000 8.376000
std 1.428067 1.524228
min 0.000000 4.000000
25% 1.000000 7.000000
50% 2.000000 9.000000
75% 3.000000 10.000000
max 7.000000 10.000000
In [5]:
import numpy as np
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
x = np.linspace(-3, 3, 100)
plt.figure(figsize=(10, 6))
plt.plot(x, 1/(1+np.exp(-x)), label='Sigmoid', linewidth=2)
plt.plot(x, np.tanh(x), label='Tanh', linewidth=2)
plt.plot(x, np.where(x < 0, 0, x), label='ReLU', linewidth=2)
plt.plot(x, np.where(x < 0, 0.1*x, x), '--', label='Leaky ReLU (α=0.1)', linewidth=2)
plt.xlabel('x', fontsize=12)
plt.ylabel('Activation', fontsize=12)
plt.title('Neural Network Activation Functions', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
In [7]:
df_processed = df.copy()
# Encode categorical variables
label_encoders = {}
categorical_cols = ['Gender', 'Social_Media_Platform']
for col in categorical_cols:
le = LabelEncoder()
df_processed[col] = le.fit_transform(df_processed[col])
label_encoders[col] = le
# Separate features and target
X = df_processed.drop(['User_ID', 'Happiness_Index(1-10)'], axis=1)
y = df_processed['Happiness_Index(1-10)']
# For classification: Create binned happiness categories
y_class = pd.cut(y, bins=[0, 4, 7, 10],
labels=['Low', 'Medium', 'High'])
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42
)
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(
X_scaled, y_class, test_size=0.2, random_state=42
)
print(f"Training samples: {X_train.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")
Training samples: 400 Test samples: 100
In [10]:
# Cell 5: Custom Neural Network with JAX (similar to XOR example)
# Initialize random key
key = random.PRNGKey(42)
# Define network architecture
input_size = X_train.shape[1]
hidden_size1 = 16
hidden_size2 = 8
output_size = 1 # For regression
# Forward pass function
@jit
def forward(params, x):
(w1, b1), (w2, b2), (w3, b3) = params
layer1 = jnp.maximum(0, x @ w1 + b1)
layer2 = jnp.tanh(layer1 @ w2 + b2)
output = layer2 @ w3 + b3
return output
# Loss function (Mean Squared Error)
@jit
def loss(params, x, y):
predictions = forward(params, x)
return jnp.mean((predictions - y) ** 2)
# Gradient update
@jit
def update(params, x, y, learning_rate=0.01):
grads = grad(loss)(params, x, y)
return [(w - learning_rate * dw, b - learning_rate * db)
for (w, b), (dw, db) in zip(params, grads)]
# Initialize parameters
def init_params(key):
keys = random.split(key, 6)
# He initialization for ReLU
w1 = random.normal(keys[0], (input_size, hidden_size1)) * np.sqrt(2/input_size)
b1 = jnp.zeros(hidden_size1)
w2 = random.normal(keys[1], (hidden_size1, hidden_size2)) * np.sqrt(2/hidden_size1)
b2 = jnp.zeros(hidden_size2)
w3 = random.normal(keys[2], (hidden_size2, output_size)) * np.sqrt(2/hidden_size2)
b3 = jnp.zeros(output_size)
return [(w1, b1), (w2, b2), (w3, b3)]
# Training loop
params = init_params(key)
losses = []
print("Training Custom JAX Neural Network...")
for epoch in range(501):
current_loss = loss(params, jnp.array(X_train), jnp.array(y_train).reshape(-1, 1))
losses.append(current_loss)
params = update(params, jnp.array(X_train), jnp.array(y_train).reshape(-1, 1), 0.001)
if epoch % 100 == 0:
print(f"Epoch {epoch}: Loss = {current_loss:.4f}")
# Plot training loss
plt.figure(figsize=(10, 5))
plt.plot(losses, linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss (MSE)', fontsize=12)
plt.title('Training Loss - Custom JAX Neural Network', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
Training Custom JAX Neural Network... Epoch 0: Loss = 53.8688 Epoch 100: Loss = 5.8610 Epoch 200: Loss = 2.1962 Epoch 300: Loss = 1.7813 Epoch 400: Loss = 1.5533 Epoch 500: Loss = 1.3730
In [11]:
# Cell 6: Scikit-learn MLP Regressor for Happiness Index prediction
print("\n" + "="*50)
print("Scikit-learn MLP Regressor")
print("="*50)
mlp_regressor = MLPRegressor(
hidden_layer_sizes=(64, 32, 16),
activation='relu',
solver='adam',
alpha=0.001,
batch_size=32,
learning_rate='adaptive',
max_iter=1000,
random_state=42,
verbose=True,
tol=1e-4
)
mlp_regressor.fit(X_train, y_train)
# Predictions
y_pred_train = mlp_regressor.predict(X_train)
y_pred_test = mlp_regressor.predict(X_test)
# Calculate metrics
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f"\nTraining RMSE: {train_rmse:.4f}")
print(f"Test RMSE: {test_rmse:.4f}")
# Plot actual vs predicted
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(y_train, y_pred_train, alpha=0.6)
plt.plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], 'r--', lw=2)
plt.xlabel('Actual Happiness Index', fontsize=12)
plt.ylabel('Predicted Happiness Index', fontsize=12)
plt.title('Training Set: Actual vs Predicted', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.subplot(1, 2, 2)
plt.scatter(y_test, y_pred_test, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Happiness Index', fontsize=12)
plt.ylabel('Predicted Happiness Index', fontsize=12)
plt.title('Test Set: Actual vs Predicted', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
================================================== Scikit-learn MLP Regressor ================================================== Iteration 1, loss = 34.56925259 Iteration 2, loss = 30.67669257 Iteration 3, loss = 24.56475741 Iteration 4, loss = 14.02682431 Iteration 5, loss = 4.86133443 Iteration 6, loss = 2.56293223 Iteration 7, loss = 2.15787437 Iteration 8, loss = 1.67340045 Iteration 9, loss = 1.48923114 Iteration 10, loss = 1.37106590 Iteration 11, loss = 1.26970512 Iteration 12, loss = 1.19426451 Iteration 13, loss = 1.11020052 Iteration 14, loss = 1.03117457 Iteration 15, loss = 0.96978199 Iteration 16, loss = 0.90858790 Iteration 17, loss = 0.84303194 Iteration 18, loss = 0.78946453 Iteration 19, loss = 0.74620487 Iteration 20, loss = 0.69905119 Iteration 21, loss = 0.66140491 Iteration 22, loss = 0.62714252 Iteration 23, loss = 0.59587206 Iteration 24, loss = 0.57564981 Iteration 25, loss = 0.54944044 Iteration 26, loss = 0.52296163 Iteration 27, loss = 0.50866508 Iteration 28, loss = 0.49067510 Iteration 29, loss = 0.46733934 Iteration 30, loss = 0.46336915 Iteration 31, loss = 0.44301005 Iteration 32, loss = 0.42309593 Iteration 33, loss = 0.41505810 Iteration 34, loss = 0.41145387 Iteration 35, loss = 0.39294964 Iteration 36, loss = 0.38137838 Iteration 37, loss = 0.37816597 Iteration 38, loss = 0.37698427 Iteration 39, loss = 0.36955528 Iteration 40, loss = 0.35722379 Iteration 41, loss = 0.35109560 Iteration 42, loss = 0.34166847 Iteration 43, loss = 0.33710922 Iteration 44, loss = 0.33326160 Iteration 45, loss = 0.33205230 Iteration 46, loss = 0.32973884 Iteration 47, loss = 0.32367515 Iteration 48, loss = 0.32799492 Iteration 49, loss = 0.31752886 Iteration 50, loss = 0.30927218 Iteration 51, loss = 0.30365933 Iteration 52, loss = 0.30161014 Iteration 53, loss = 0.30263549 Iteration 54, loss = 0.29878719 Iteration 55, loss = 0.29639176 Iteration 56, loss = 0.29208149 Iteration 57, loss = 0.28719977 Iteration 58, loss = 0.28102217 Iteration 59, loss = 0.28375385 Iteration 60, loss = 0.28036729 Iteration 61, loss = 0.27478022 Iteration 62, loss = 0.26919066 Iteration 63, loss = 0.26720690 Iteration 64, loss = 0.26364026 Iteration 65, loss = 0.26591573 Iteration 66, loss = 0.26067718 Iteration 67, loss = 0.25903685 Iteration 68, loss = 0.25892693 Iteration 69, loss = 0.25418406 Iteration 70, loss = 0.25500274 Iteration 71, loss = 0.24961609 Iteration 72, loss = 0.25259561 Iteration 73, loss = 0.24723141 Iteration 74, loss = 0.24238260 Iteration 75, loss = 0.24442772 Iteration 76, loss = 0.24422956 Iteration 77, loss = 0.24058619 Iteration 78, loss = 0.24550104 Iteration 79, loss = 0.23870535 Iteration 80, loss = 0.23575536 Iteration 81, loss = 0.23247468 Iteration 82, loss = 0.23032229 Iteration 83, loss = 0.23081317 Iteration 84, loss = 0.22645353 Iteration 85, loss = 0.22367729 Iteration 86, loss = 0.22347513 Iteration 87, loss = 0.22049333 Iteration 88, loss = 0.22171204 Iteration 89, loss = 0.22551644 Iteration 90, loss = 0.21604011 Iteration 91, loss = 0.21368051 Iteration 92, loss = 0.21348098 Iteration 93, loss = 0.21189568 Iteration 94, loss = 0.20763899 Iteration 95, loss = 0.20684766 Iteration 96, loss = 0.20386358 Iteration 97, loss = 0.20529332 Iteration 98, loss = 0.20061667 Iteration 99, loss = 0.20016431 Iteration 100, loss = 0.19913748 Iteration 101, loss = 0.20258758 Iteration 102, loss = 0.19577247 Iteration 103, loss = 0.19290139 Iteration 104, loss = 0.19388786 Iteration 105, loss = 0.19407886 Iteration 106, loss = 0.18993673 Iteration 107, loss = 0.18659522 Iteration 108, loss = 0.18859957 Iteration 109, loss = 0.18796877 Iteration 110, loss = 0.18769290 Iteration 111, loss = 0.18492284 Iteration 112, loss = 0.18722561 Iteration 113, loss = 0.18747500 Iteration 114, loss = 0.18848876 Iteration 115, loss = 0.18492975 Iteration 116, loss = 0.17651180 Iteration 117, loss = 0.17778903 Iteration 118, loss = 0.18812900 Iteration 119, loss = 0.17424728 Iteration 120, loss = 0.18194098 Iteration 121, loss = 0.17432749 Iteration 122, loss = 0.17305678 Iteration 123, loss = 0.16557264 Iteration 124, loss = 0.16548695 Iteration 125, loss = 0.17156687 Iteration 126, loss = 0.16459999 Iteration 127, loss = 0.16630453 Iteration 128, loss = 0.16626132 Iteration 129, loss = 0.16092183 Iteration 130, loss = 0.15584463 Iteration 131, loss = 0.15589240 Iteration 132, loss = 0.15801650 Iteration 133, loss = 0.15333347 Iteration 134, loss = 0.15587248 Iteration 135, loss = 0.15577198 Iteration 136, loss = 0.15089882 Iteration 137, loss = 0.15024225 Iteration 138, loss = 0.15546399 Iteration 139, loss = 0.15733298 Iteration 140, loss = 0.14609544 Iteration 141, loss = 0.14849002 Iteration 142, loss = 0.14425052 Iteration 143, loss = 0.14171353 Iteration 144, loss = 0.13980614 Iteration 145, loss = 0.13986778 Iteration 146, loss = 0.13855488 Iteration 147, loss = 0.13613728 Iteration 148, loss = 0.13739521 Iteration 149, loss = 0.13632767 Iteration 150, loss = 0.13700893 Iteration 151, loss = 0.13266456 Iteration 152, loss = 0.13517551 Iteration 153, loss = 0.13630390 Iteration 154, loss = 0.13009186 Iteration 155, loss = 0.13644015 Iteration 156, loss = 0.13712511 Iteration 157, loss = 0.12910076 Iteration 158, loss = 0.12915846 Iteration 159, loss = 0.12416202 Iteration 160, loss = 0.12720536 Iteration 161, loss = 0.12432719 Iteration 162, loss = 0.12105754 Iteration 163, loss = 0.11747558 Iteration 164, loss = 0.12105473 Iteration 165, loss = 0.11898741 Iteration 166, loss = 0.11907531 Iteration 167, loss = 0.11390702 Iteration 168, loss = 0.11359989 Iteration 169, loss = 0.11677852 Iteration 170, loss = 0.11315862 Iteration 171, loss = 0.11118123 Iteration 172, loss = 0.11028431 Iteration 173, loss = 0.11306042 Iteration 174, loss = 0.11926986 Iteration 175, loss = 0.11249570 Iteration 176, loss = 0.10834448 Iteration 177, loss = 0.11056800 Iteration 178, loss = 0.10839441 Iteration 179, loss = 0.10511513 Iteration 180, loss = 0.10623089 Iteration 181, loss = 0.10312037 Iteration 182, loss = 0.10195874 Iteration 183, loss = 0.10358065 Iteration 184, loss = 0.10344416 Iteration 185, loss = 0.10460024 Iteration 186, loss = 0.10513111 Iteration 187, loss = 0.09502160 Iteration 188, loss = 0.09734888 Iteration 189, loss = 0.09602098 Iteration 190, loss = 0.09865476 Iteration 191, loss = 0.09406177 Iteration 192, loss = 0.09626219 Iteration 193, loss = 0.09877559 Iteration 194, loss = 0.09513882 Iteration 195, loss = 0.09194584 Iteration 196, loss = 0.08743927 Iteration 197, loss = 0.08595104 Iteration 198, loss = 0.08500339 Iteration 199, loss = 0.08831617 Iteration 200, loss = 0.08609466 Iteration 201, loss = 0.08410213 Iteration 202, loss = 0.08125767 Iteration 203, loss = 0.08627934 Iteration 204, loss = 0.08330018 Iteration 205, loss = 0.07938239 Iteration 206, loss = 0.08118554 Iteration 207, loss = 0.07884302 Iteration 208, loss = 0.08135131 Iteration 209, loss = 0.07866718 Iteration 210, loss = 0.07442814 Iteration 211, loss = 0.07411254 Iteration 212, loss = 0.07408252 Iteration 213, loss = 0.07367360 Iteration 214, loss = 0.07714138 Iteration 215, loss = 0.07325322 Iteration 216, loss = 0.07020847 Iteration 217, loss = 0.06908395 Iteration 218, loss = 0.06923244 Iteration 219, loss = 0.07282754 Iteration 220, loss = 0.06591568 Iteration 221, loss = 0.06749723 Iteration 222, loss = 0.06715334 Iteration 223, loss = 0.06718657 Iteration 224, loss = 0.06268602 Iteration 225, loss = 0.06238680 Iteration 226, loss = 0.06401900 Iteration 227, loss = 0.06132417 Iteration 228, loss = 0.05849525 Iteration 229, loss = 0.06163239 Iteration 230, loss = 0.05918812 Iteration 231, loss = 0.06035542 Iteration 232, loss = 0.06038874 Iteration 233, loss = 0.06109856 Iteration 234, loss = 0.06409335 Iteration 235, loss = 0.05776118 Iteration 236, loss = 0.05691379 Iteration 237, loss = 0.05352345 Iteration 238, loss = 0.05306066 Iteration 239, loss = 0.05395695 Iteration 240, loss = 0.05149900 Iteration 241, loss = 0.05065712 Iteration 242, loss = 0.05249931 Iteration 243, loss = 0.04978447 Iteration 244, loss = 0.04829928 Iteration 245, loss = 0.04742868 Iteration 246, loss = 0.04703057 Iteration 247, loss = 0.04934068 Iteration 248, loss = 0.04899851 Iteration 249, loss = 0.04825690 Iteration 250, loss = 0.04574781 Iteration 251, loss = 0.04493207 Iteration 252, loss = 0.04583417 Iteration 253, loss = 0.04591954 Iteration 254, loss = 0.04257534 Iteration 255, loss = 0.04333030 Iteration 256, loss = 0.04566638 Iteration 257, loss = 0.04123632 Iteration 258, loss = 0.03964401 Iteration 259, loss = 0.03752433 Iteration 260, loss = 0.03964353 Iteration 261, loss = 0.03930048 Iteration 262, loss = 0.03868813 Iteration 263, loss = 0.03994310 Iteration 264, loss = 0.03907479 Iteration 265, loss = 0.03976423 Iteration 266, loss = 0.03930738 Iteration 267, loss = 0.03861109 Iteration 268, loss = 0.03649284 Iteration 269, loss = 0.03401605 Iteration 270, loss = 0.03381662 Iteration 271, loss = 0.03203468 Iteration 272, loss = 0.03146165 Iteration 273, loss = 0.03353900 Iteration 274, loss = 0.03227349 Iteration 275, loss = 0.02984136 Iteration 276, loss = 0.02984649 Iteration 277, loss = 0.02953603 Iteration 278, loss = 0.03157550 Iteration 279, loss = 0.03174497 Iteration 280, loss = 0.03004668 Iteration 281, loss = 0.03015050 Iteration 282, loss = 0.03237263 Iteration 283, loss = 0.02836686 Iteration 284, loss = 0.02754347 Iteration 285, loss = 0.02754647 Iteration 286, loss = 0.02757449 Iteration 287, loss = 0.02583987 Iteration 288, loss = 0.02617329 Iteration 289, loss = 0.02673000 Iteration 290, loss = 0.02436325 Iteration 291, loss = 0.02362047 Iteration 292, loss = 0.02190056 Iteration 293, loss = 0.02680426 Iteration 294, loss = 0.02712478 Iteration 295, loss = 0.02689897 Iteration 296, loss = 0.02468813 Iteration 297, loss = 0.02150042 Iteration 298, loss = 0.02159052 Iteration 299, loss = 0.02087412 Iteration 300, loss = 0.02520704 Iteration 301, loss = 0.02121290 Iteration 302, loss = 0.02171413 Iteration 303, loss = 0.02067978 Iteration 304, loss = 0.02001095 Iteration 305, loss = 0.02059691 Iteration 306, loss = 0.02538035 Iteration 307, loss = 0.02464742 Iteration 308, loss = 0.02279423 Iteration 309, loss = 0.02096276 Iteration 310, loss = 0.02157700 Iteration 311, loss = 0.02058823 Iteration 312, loss = 0.01911553 Iteration 313, loss = 0.01835357 Iteration 314, loss = 0.01781110 Iteration 315, loss = 0.01632250 Iteration 316, loss = 0.01825316 Iteration 317, loss = 0.01839394 Iteration 318, loss = 0.01825333 Iteration 319, loss = 0.01805754 Iteration 320, loss = 0.01574645 Iteration 321, loss = 0.01667906 Iteration 322, loss = 0.01566996 Iteration 323, loss = 0.01461036 Iteration 324, loss = 0.01490379 Iteration 325, loss = 0.01403693 Iteration 326, loss = 0.01444470 Iteration 327, loss = 0.01375195 Iteration 328, loss = 0.01449420 Iteration 329, loss = 0.01394311 Iteration 330, loss = 0.01483512 Iteration 331, loss = 0.01319489 Iteration 332, loss = 0.01377774 Iteration 333, loss = 0.01377532 Iteration 334, loss = 0.01402271 Iteration 335, loss = 0.01452638 Iteration 336, loss = 0.01546381 Iteration 337, loss = 0.01810048 Iteration 338, loss = 0.01382029 Iteration 339, loss = 0.01384657 Iteration 340, loss = 0.01343332 Iteration 341, loss = 0.01355808 Iteration 342, loss = 0.01401924 Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping. Training RMSE: 0.1558 Test RMSE: 1.4371
In [12]:
# Cell 7: Scikit-learn MLP Classifier for Happiness Categories
print("\n" + "="*50)
print("Scikit-learn MLP Classifier (Happiness Categories)")
print("="*50)
mlp_classifier = MLPClassifier(
hidden_layer_sizes=(128, 64, 32),
activation='relu',
solver='adam',
alpha=0.0001,
batch_size=32,
learning_rate='adaptive',
max_iter=1000,
random_state=42,
verbose=True,
tol=1e-4
)
mlp_classifier.fit(X_train_cls, y_train_cls)
# Predictions
y_pred_train_cls = mlp_classifier.predict(X_train_cls)
y_pred_test_cls = mlp_classifier.predict(X_test_cls)
# Calculate metrics
train_accuracy = accuracy_score(y_train_cls, y_pred_train_cls)
test_accuracy = accuracy_score(y_test_cls, y_pred_test_cls)
print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nClassification Report (Test Set):")
print(classification_report(y_test_cls, y_pred_test_cls))
# Plot confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns
cm = confusion_matrix(y_test_cls, y_pred_test_cls)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['Low', 'Medium', 'High'],
yticklabels=['Low', 'Medium', 'High'])
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('Actual', fontsize=12)
plt.title('Confusion Matrix - Happiness Categories', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
==================================================
Scikit-learn MLP Classifier (Happiness Categories)
==================================================
Iteration 1, loss = 0.74980355
Iteration 2, loss = 0.52002558
Iteration 3, loss = 0.44332897
Iteration 4, loss = 0.40657690
Iteration 5, loss = 0.38596973
Iteration 6, loss = 0.37457952
Iteration 7, loss = 0.35703986
Iteration 8, loss = 0.34809644
Iteration 9, loss = 0.33486887
Iteration 10, loss = 0.32501302
Iteration 11, loss = 0.31560982
Iteration 12, loss = 0.30890269
Iteration 13, loss = 0.29880504
Iteration 14, loss = 0.29474142
Iteration 15, loss = 0.28345785
Iteration 16, loss = 0.27676673
Iteration 17, loss = 0.26925062
Iteration 18, loss = 0.26670483
Iteration 19, loss = 0.25708587
Iteration 20, loss = 0.25083919
Iteration 21, loss = 0.24155842
Iteration 22, loss = 0.23465111
Iteration 23, loss = 0.23317044
Iteration 24, loss = 0.22114184
Iteration 25, loss = 0.21580674
Iteration 26, loss = 0.21330378
Iteration 27, loss = 0.20467787
Iteration 28, loss = 0.20148578
Iteration 29, loss = 0.19340028
Iteration 30, loss = 0.18395941
Iteration 31, loss = 0.17642575
Iteration 32, loss = 0.17474499
Iteration 33, loss = 0.17201332
Iteration 34, loss = 0.15961138
Iteration 35, loss = 0.14894482
Iteration 36, loss = 0.14908439
Iteration 37, loss = 0.14065831
Iteration 38, loss = 0.13554344
Iteration 39, loss = 0.12670226
Iteration 40, loss = 0.12075479
Iteration 41, loss = 0.11291331
Iteration 42, loss = 0.10629412
Iteration 43, loss = 0.10459969
Iteration 44, loss = 0.10249260
Iteration 45, loss = 0.09136979
Iteration 46, loss = 0.09706808
Iteration 47, loss = 0.08588192
Iteration 48, loss = 0.07727510
Iteration 49, loss = 0.06986936
Iteration 50, loss = 0.06664615
Iteration 51, loss = 0.06238410
Iteration 52, loss = 0.06184626
Iteration 53, loss = 0.05913694
Iteration 54, loss = 0.05547889
Iteration 55, loss = 0.05490956
Iteration 56, loss = 0.04897148
Iteration 57, loss = 0.04525698
Iteration 58, loss = 0.04018920
Iteration 59, loss = 0.03703883
Iteration 60, loss = 0.03356822
Iteration 61, loss = 0.03136667
Iteration 62, loss = 0.03055484
Iteration 63, loss = 0.03052201
Iteration 64, loss = 0.02854216
Iteration 65, loss = 0.02555693
Iteration 66, loss = 0.02366172
Iteration 67, loss = 0.02114843
Iteration 68, loss = 0.02071232
Iteration 69, loss = 0.01924165
Iteration 70, loss = 0.01844432
Iteration 71, loss = 0.01697676
Iteration 72, loss = 0.01583818
Iteration 73, loss = 0.01540004
Iteration 74, loss = 0.01432337
Iteration 75, loss = 0.01393786
Iteration 76, loss = 0.01327088
Iteration 77, loss = 0.01363083
Iteration 78, loss = 0.01319888
Iteration 79, loss = 0.01499188
Iteration 80, loss = 0.01057581
Iteration 81, loss = 0.01058355
Iteration 82, loss = 0.01009974
Iteration 83, loss = 0.00870986
Iteration 84, loss = 0.00884298
Iteration 85, loss = 0.00850469
Iteration 86, loss = 0.00796846
Iteration 87, loss = 0.00817054
Iteration 88, loss = 0.00704775
Iteration 89, loss = 0.00712154
Iteration 90, loss = 0.00639447
Iteration 91, loss = 0.00616586
Iteration 92, loss = 0.00617450
Iteration 93, loss = 0.00562973
Iteration 94, loss = 0.00544640
Iteration 95, loss = 0.00537881
Iteration 96, loss = 0.00507395
Iteration 97, loss = 0.00508614
Iteration 98, loss = 0.00516844
Iteration 99, loss = 0.00479997
Iteration 100, loss = 0.00454176
Iteration 101, loss = 0.00455185
Iteration 102, loss = 0.00430039
Iteration 103, loss = 0.00404063
Iteration 104, loss = 0.00406051
Iteration 105, loss = 0.00383859
Iteration 106, loss = 0.00370933
Iteration 107, loss = 0.00356098
Iteration 108, loss = 0.00353318
Iteration 109, loss = 0.00342575
Iteration 110, loss = 0.00330611
Iteration 111, loss = 0.00323738
Iteration 112, loss = 0.00317871
Iteration 113, loss = 0.00303295
Iteration 114, loss = 0.00298431
Iteration 115, loss = 0.00301327
Iteration 116, loss = 0.00298190
Iteration 117, loss = 0.00283910
Iteration 118, loss = 0.00261796
Iteration 119, loss = 0.00257880
Iteration 120, loss = 0.00253321
Iteration 121, loss = 0.00246828
Iteration 122, loss = 0.00239748
Iteration 123, loss = 0.00231007
Iteration 124, loss = 0.00224259
Iteration 125, loss = 0.00219309
Iteration 126, loss = 0.00212166
Iteration 127, loss = 0.00216875
Iteration 128, loss = 0.00220457
Iteration 129, loss = 0.00207331
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Training Accuracy: 1.0000
Test Accuracy: 0.8100
Classification Report (Test Set):
precision recall f1-score support
High 0.82 0.93 0.87 70
Low 0.00 0.00 0.00 1
Medium 0.76 0.55 0.64 29
accuracy 0.81 100
macro avg 0.53 0.49 0.50 100
weighted avg 0.80 0.81 0.80 100
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
In [13]:
# Cell 8: Feature importance analysis
# Get feature importances from the neural network
feature_names = X.columns.tolist()
# Calculate feature importance using permutation importance
from sklearn.inspection import permutation_importance
perm_importance = permutation_importance(
mlp_regressor, X_test, y_test,
n_repeats=10, random_state=42
)
# Sort features by importance
sorted_idx = perm_importance.importances_mean.argsort()
plt.figure(figsize=(10, 8))
plt.barh(range(len(sorted_idx)),
perm_importance.importances_mean[sorted_idx],
xerr=perm_importance.importances_std[sorted_idx])
plt.yticks(range(len(sorted_idx)),
np.array(feature_names)[sorted_idx])
plt.xlabel('Permutation Importance', fontsize=12)
plt.title('Feature Importance for Happiness Index Prediction',
fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()
In [17]:
# Cell 9: Correlation analysis and visualization
# Calculate correlation matrix
correlation_matrix = df_processed.corr()
# Plot heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm',
center=0, fmt='.2f', linewidths=0.5)
plt.title('Feature Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
# Plot specific relationships
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Daily Screen Time vs Happiness
axes[0, 0].scatter(df['Daily_Screen_Time(hrs)'], df['Happiness_Index(1-10)'], alpha=0.6)
axes[0, 0].set_xlabel('Daily Screen Time (hrs)')
axes[0, 0].set_ylabel('Happiness Index(1-10)')
axes[0, 0].set_title('Screen Time vs Happiness')
axes[0, 0].grid(True, alpha=0.3)
# Sleep Quality vs Happiness
axes[0, 1].scatter(df['Sleep_Quality(1-10)'], df['Happiness_Index(1-10)'], alpha=0.6)
axes[0, 1].set_xlabel('Sleep Quality (1-10)')
axes[0, 1].set_ylabel('Happiness Index')
axes[0, 1].set_title('Sleep Quality vs Happiness')
axes[0, 1].grid(True, alpha=0.3)
# Stress Level vs Happiness
axes[1, 0].scatter(df['Stress_Level(1-10)'], df['Happiness_Index(1-10)'], alpha=0.6)
axes[1, 0].set_xlabel('Stress Level (1-10)')
axes[1, 0].set_ylabel('Happiness Index(1-10)')
axes[1, 0].set_title('Stress Level vs Happiness')
axes[1, 0].grid(True, alpha=0.3)
# Exercise Frequency vs Happiness
axes[1, 1].scatter(df['Exercise_Frequency(week)'], df['Happiness_Index(1-10)'], alpha=0.6)
axes[1, 1].set_xlabel('Exercise Frequency (per week)')
axes[1, 1].set_ylabel('Happiness Index(1-10)')
axes[1, 1].set_title('Exercise Frequency vs Happiness')
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
In [22]:
# Cell 10: Model comparison and summary
print("\n" + "="*60)
print("MODEL PERFORMANCE SUMMARY")
print("="*60)
print("\n1. Custom JAX Neural Network:")
print(f" - Final Training Loss: {losses[-1]:.4f}")
print(f" - Architecture: {input_size} → {hidden_size1} → {hidden_size2} → 1")
print(f" - Activations: ReLU → Tanh → Linear")
print("\n2. Scikit-learn MLP Regressor:")
print(f" - Training RMSE: {train_rmse:.4f}")
print(f" - Test RMSE: {test_rmse:.4f}")
print(f" - Architecture: {X_train.shape[1]} → 64 → 32 → 16 → 1")
print(f" - Activation: ReLU")
print("\n3. Scikit-learn MLP Classifier:")
print(f" - Training Accuracy: {train_accuracy:.4f}")
print(f" - Test Accuracy: {test_accuracy:.4f}")
print(f" - Architecture: {X_train.shape[1]} → 128 → 64 → 32 → 3")
print(f" - Activation: ReLU")
print("\n" + "="*60)
print("KEY INSIGHTS:")
print("="*60)
# Calculate average happiness by platform
platform_happiness = df.groupby('Social_Media_Platform')['Happiness_Index(1-10)'].mean().sort_values()
print("\n1. Average Happiness by Social Media Platform:")
for platform, score in platform_happiness.items():
print(f" {platform}: {score:.2f}")
# Correlation with happiness
# Correlation with happiness
happiness_corr = df_processed.corr()['Happiness_Index(1-10)'].sort_values(ascending=False)
print("\n2. Top Features Correlated with Happiness:")
# FIX: use happiness_corr instead of Happiness_corr
for idx, (feature, corr) in enumerate(happiness_corr.items()):
if feature != 'Happiness_Index(1-10)' and idx < 6:
print(f" {idx}. {feature}: {corr:.3f}")
print("\n3. Recommended Actions for Better Mental Health:")
print(" • Maintain screen time below 5 hours daily")
print(" • Aim for sleep quality score above 7")
print(" • Exercise at least 3 times per week")
print(" • Take regular breaks from social media")
============================================================ MODEL PERFORMANCE SUMMARY ============================================================ 1. Custom JAX Neural Network: - Final Training Loss: 1.3730 - Architecture: 8 → 16 → 8 → 1 - Activations: ReLU → Tanh → Linear 2. Scikit-learn MLP Regressor: - Training RMSE: 0.1558 - Test RMSE: 1.4371 - Architecture: 8 → 64 → 32 → 16 → 1 - Activation: ReLU 3. Scikit-learn MLP Classifier: - Training Accuracy: 1.0000 - Test Accuracy: 0.8100 - Architecture: 8 → 128 → 64 → 32 → 3 - Activation: ReLU ============================================================ KEY INSIGHTS: ============================================================ 1. Average Happiness by Social Media Platform: Instagram: 7.99 YouTube: 8.31 Facebook: 8.35 TikTok: 8.38 LinkedIn: 8.52 X (Twitter): 8.65 2. Top Features Correlated with Happiness: 1. Sleep_Quality(1-10): 0.679 2. Days_Without_Social_Media: 0.064 3. Social_Media_Platform: 0.052 4. Exercise_Frequency(week): 0.041 5. Gender: 0.026 3. Recommended Actions for Better Mental Health: • Maintain screen time below 5 hours daily • Aim for sleep quality score above 7 • Exercise at least 3 times per week • Take regular breaks from social media
Using XOR
In [23]:
# Method 1: Create XOR-like problem from two continuous features
# Binarize two key features that might have XOR-like relationship with happiness
threshold_screen = df['Daily_Screen_Time(hrs)'].median()
threshold_sleep = df['Sleep_Quality(1-10)'].median()
# Create binary features
df['High_Screen_Time'] = (df['Daily_Screen_Time(hrs)'] > threshold_screen).astype(int)
df['High_Sleep_Quality'] = (df['Sleep_Quality(1-10)'] > threshold_sleep).astype(int)
# Create XOR target: 1 if features are different, 0 if same (XOR logic)
df['XOR_Target'] = (df['High_Screen_Time'] != df['High_Sleep_Quality']).astype(int)
print("XOR-like Data Distribution:")
print(df[['High_Screen_Time', 'High_Sleep_Quality', 'XOR_Target']].head(10))
print(f"\nXOR Target distribution:\n{df['XOR_Target'].value_counts()}")
XOR-like Data Distribution: High_Screen_Time High_Sleep_Quality XOR_Target 0 0 1 1 1 0 1 1 2 1 0 1 3 1 1 0 4 1 0 1 5 1 0 1 6 1 0 1 7 1 0 1 8 0 1 1 9 1 0 1 XOR Target distribution: XOR_Target 1 389 0 111 Name: count, dtype: int64
In [25]:
# Cell 2: Visualize the XOR-like relationship
plt.figure(figsize=(10, 8))
# Original features colored by XOR target
plt.subplot(2, 2, 1)
scatter = plt.scatter(df['Daily_Screen_Time(hrs)'],
df['Sleep_Quality(1-10)'],
c=df['XOR_Target'],
cmap='coolwarm',
alpha=0.6)
plt.xlabel('Daily Screen Time (hrs)')
plt.ylabel('Sleep Quality (1-10)')
plt.title('Original Features with XOR Target')
plt.colorbar(scatter, label='XOR Target (1=Different, 0=Same)')
plt.grid(True, alpha=0.3)
# Binarized features
plt.subplot(2, 2, 2)
# Create XOR truth table visualization
xor_table = np.array([[0, 1], [1, 0]])
plt.imshow(xor_table, cmap='binary', interpolation='nearest', extent=[-0.5, 1.5, -0.5, 1.5])
plt.xticks([0, 1], ['Low Screen\nHigh Sleep', 'High Screen\nHigh Sleep'])
plt.yticks([0, 1], ['Low Screen\nLow Sleep', 'High Screen\nLow Sleep'])
plt.title('XOR Truth Table Pattern')
plt.colorbar(label='XOR Output')
# Distribution of actual XOR pattern in data
plt.subplot(2, 2, 3)
xor_counts = df.groupby(['High_Screen_Time', 'High_Sleep_Quality'])['XOR_Target'].mean().unstack()
sns.heatmap(xor_counts, annot=True, fmt='.2f', cmap='YlOrRd',
cbar_kws={'label': 'Proportion of XOR=1'})
plt.title('Actual XOR Pattern in Data')
plt.xlabel('High Sleep Quality (1=Yes)')
plt.ylabel('High Screen Time (1=Yes)')
# Relationship with original happiness
plt.subplot(2, 2, 4)
for screen_val in [0, 1]:
for sleep_val in [0, 1]:
mask = (df['High_Screen_Time'] == screen_val) & (df['High_Sleep_Quality'] == sleep_val)
subset = df[mask]
xor_val = screen_val ^ sleep_val # XOR calculation
plt.scatter([screen_val + sleep_val/10],
[subset['Happiness_Index(1-10)'].mean()],
s=subset.shape[0]*10, # Size by count
c=['red' if xor_val == 1 else 'blue'],
alpha=0.7,
label=f'Screen={screen_val}, Sleep={sleep_val}, XOR={xor_val}')
plt.xlabel('Combined Feature Index')
plt.ylabel('Average Happiness Index')
plt.title('Happiness vs XOR Pattern')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
In [26]:
# Cell 3: Implement XOR Neural Network with JAX (similar to your example)
# Prepare XOR training data from our binarized features
X_xor = df[['High_Screen_Time', 'High_Sleep_Quality']].values
y_xor = df['XOR_Target'].values
print(f"XOR Data Shape: {X_xor.shape}")
print("Sample XOR data:")
for i in range(min(10, len(X_xor))):
print(f"Input: {X_xor[i]}, Output: {y_xor[i]}")
# Initialize random key
key = random.PRNGKey(42)
# XOR forward pass with tanh and sigmoid
@jit
def forward_xor(params, layer_0):
Weight1, bias1, Weight2, bias2 = params
layer_1 = jnp.tanh(layer_0 @ Weight1 + bias1)
layer_2 = jax.nn.sigmoid(layer_1 @ Weight2 + bias2)
return layer_2
# Loss function for binary classification (binary cross-entropy)
@jit
def loss_xor(params):
ypred = forward_xor(params, jnp.array(X_xor, dtype=jnp.float32))
y_true = jnp.array(y_xor, dtype=jnp.float32).reshape(-1, 1)
# Binary cross-entropy
loss = -jnp.mean(y_true * jnp.log(ypred + 1e-8) +
(1 - y_true) * jnp.log(1 - ypred + 1e-8))
return loss
# Gradient update step
@jit
def update_xor(params, rate=0.5):
gradient = grad(loss_xor)(params)
return jax.tree.map(lambda p, g: p - rate * g, params, gradient)
# Parameter initialization
def init_params_xor(key):
key1, key2 = random.split(key)
Weight1 = 0.5 * random.normal(key1, (2, 4)) # 2 inputs, 4 hidden neurons
bias1 = jnp.zeros(4)
Weight2 = 0.5 * random.normal(key2, (4, 1)) # 4 hidden, 1 output
bias2 = jnp.zeros(1)
return (Weight1, bias1, Weight2, bias2)
# Initialize parameters
params_xor = init_params_xor(key)
# Training loop
losses_xor = []
print("\nTraining XOR Neural Network with JAX...")
for step in range(2001):
params_xor = update_xor(params_xor, rate=0.5)
current_loss = loss_xor(params_xor)
losses_xor.append(current_loss)
if step % 400 == 0:
print(f"Step {step:4d}, Loss: {current_loss:.4f}")
# Evaluate
pred_xor = forward_xor(params_xor, jnp.array(X_xor, dtype=jnp.float32))
pred_classes = (pred_xor > 0.5).astype(int)
print("\nPredictions (first 20):")
for i in range(min(20, len(X_xor))):
print(f"Input: {X_xor[i]}, True: {y_xor[i]}, Pred: {pred_xor[i][0]:.3f}, Class: {pred_classes[i][0]}")
accuracy = jnp.mean(pred_classes.flatten() == y_xor)
print(f"\nXOR Accuracy: {accuracy:.4f}")
XOR Data Shape: (500, 2) Sample XOR data: Input: [0 1], Output: 1 Input: [0 1], Output: 1 Input: [1 0], Output: 1 Input: [1 1], Output: 0 Input: [1 0], Output: 1 Input: [1 0], Output: 1 Input: [1 0], Output: 1 Input: [1 0], Output: 1 Input: [0 1], Output: 1 Input: [1 0], Output: 1 Training XOR Neural Network with JAX... Step 0, Loss: 0.6718 Step 400, Loss: 0.0666 Step 800, Loss: 0.0121 Step 1200, Loss: 0.0062 Step 1600, Loss: 0.0041 Step 2000, Loss: 0.0031 Predictions (first 20): Input: [0 1], True: 1, Pred: 0.998, Class: 1 Input: [0 1], True: 1, Pred: 0.998, Class: 1 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [1 1], True: 0, Pred: 0.014, Class: 0 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [0 1], True: 1, Pred: 0.998, Class: 1 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [0 1], True: 1, Pred: 0.998, Class: 1 Input: [0 0], True: 0, Pred: 0.006, Class: 0 Input: [1 1], True: 0, Pred: 0.014, Class: 0 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [1 1], True: 0, Pred: 0.014, Class: 0 Input: [0 1], True: 1, Pred: 0.998, Class: 1 Input: [1 0], True: 1, Pred: 0.998, Class: 1 Input: [0 0], True: 0, Pred: 0.006, Class: 0 Input: [0 0], True: 0, Pred: 0.006, Class: 0 Input: [1 0], True: 1, Pred: 0.998, Class: 1 XOR Accuracy: 1.0000
In [27]:
# Cell 4: Visualize XOR decision boundary
# Create mesh grid for visualization
x_min, x_max = -0.5, 1.5
y_min, y_max = -0.5, 1.5
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
np.linspace(y_min, y_max, 100))
# Predict on mesh grid
grid_points = np.c_[xx.ravel(), yy.ravel()]
grid_preds = forward_xor(params_xor, jnp.array(grid_points, dtype=jnp.float32))
grid_preds = np.array(grid_preds).reshape(xx.shape)
plt.figure(figsize=(12, 5))
# Decision boundary
plt.subplot(1, 2, 1)
plt.contourf(xx, yy, grid_preds, levels=20, cmap='RdYlBu', alpha=0.8)
plt.colorbar(label='Prediction Probability')
plt.scatter(X_xor[:, 0], X_xor[:, 1], c=y_xor,
cmap='coolwarm', edgecolors='black', s=100)
plt.xlabel('High Screen Time (0=Low, 1=High)')
plt.ylabel('High Sleep Quality (0=Low, 1=High)')
plt.title('XOR Decision Boundary')
plt.xticks([0, 1])
plt.yticks([0, 1])
plt.grid(True, alpha=0.3)
# Training loss
plt.subplot(1, 2, 2)
plt.plot(losses_xor, linewidth=2)
plt.xlabel('Training Step')
plt.ylabel('Loss (Binary Cross-Entropy)')
plt.title('XOR Training Loss')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
In [28]:
# Cell 5: Apply XOR concept to more complex relationships
# Create XOR-like relationships with multiple features
print("\nCreating more complex XOR-like patterns...")
# Feature 1: High Screen Time XOR High Stress
threshold_stress = df['Stress_Level(1-10)'].median()
df['High_Stress'] = (df['Stress_Level(1-10)'] > threshold_stress).astype(int)
df['Screen_Stress_XOR'] = (df['High_Screen_Time'] != df['High_Stress']).astype(int)
# Feature 2: Good Sleep XOR Regular Exercise
threshold_exercise = df['Exercise_Frequency(week)'].median()
df['High_Exercise'] = (df['Exercise_Frequency(week)'] > threshold_exercise).astype(int)
df['Sleep_Exercise_XOR'] = (df['High_Sleep_Quality'] != df['High_Exercise']).astype(int)
# Feature 3: Low Stress XOR Social Media Break
threshold_days = df['Days_Without_Social_Media'].median()
df['Long_Break'] = (df['Days_Without_Social_Media'] > threshold_days).astype(int)
df['Stress_Break_XOR'] = (df['High_Stress'] != df['Long_Break']).astype(int)
# Visualize multiple XOR relationships
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Plot 1: Screen Time vs Sleep Quality XOR
sc1 = axes[0, 0].scatter(df['Daily_Screen_Time(hrs)'],
df['Sleep_Quality(1-10)'],
c=df['XOR_Target'], cmap='coolwarm', alpha=0.6)
axes[0, 0].set_xlabel('Screen Time')
axes[0, 0].set_ylabel('Sleep Quality')
axes[0, 0].set_title('Screen Time XOR Sleep Quality')
plt.colorbar(sc1, ax=axes[0, 0])
# Plot 2: Screen Time vs Stress XOR
sc2 = axes[0, 1].scatter(df['Daily_Screen_Time(hrs)'],
df['Stress_Level(1-10)'],
c=df['Screen_Stress_XOR'], cmap='viridis', alpha=0.6)
axes[0, 1].set_xlabel('Screen Time')
axes[0, 1].set_ylabel('Stress Level')
axes[0, 1].set_title('Screen Time XOR Stress Level')
plt.colorbar(sc2, ax=axes[0, 1])
# Plot 3: Sleep vs Exercise XOR
sc3 = axes[1, 0].scatter(df['Sleep_Quality(1-10)'],
df['Exercise_Frequency(week)'],
c=df['Sleep_Exercise_XOR'], cmap='plasma', alpha=0.6)
axes[1, 0].set_xlabel('Sleep Quality')
axes[1, 0].set_ylabel('Exercise Frequency')
axes[1, 0].set_title('Sleep Quality XOR Exercise')
plt.colorbar(sc3, ax=axes[1, 0])
# Plot 4: Stress vs Social Media Break XOR
sc4 = axes[1, 1].scatter(df['Stress_Level(1-10)'],
df['Days_Without_Social_Media'],
c=df['Stress_Break_XOR'], cmap='summer', alpha=0.6)
axes[1, 1].set_xlabel('Stress Level')
axes[1, 1].set_ylabel('Days Without Social Media')
axes[1, 1].set_title('Stress XOR Social Media Break')
plt.colorbar(sc4, ax=axes[1, 1])
plt.tight_layout()
plt.show()
Creating more complex XOR-like patterns...
In [29]:
# Cell 6: Multi-layer XOR with more features
# Create a more complex XOR-like problem with 3 features
print("\nCreating 3-feature XOR-like problem...")
# Create ternary XOR: (A XOR B) XOR C
df['Triple_XOR'] = ((df['High_Screen_Time'] != df['High_Sleep_Quality']) !=
df['High_Stress']).astype(int)
# Prepare data for 3-input neural network
X_triple = df[['High_Screen_Time', 'High_Sleep_Quality', 'High_Stress']].values
y_triple = df['Triple_XOR'].values
print(f"Triple XOR distribution:\n{pd.Series(y_triple).value_counts()}")
# JAX implementation for 3-input XOR
@jit
def forward_triple(params, layer_0):
Weight1, bias1, Weight2, bias2 = params
layer_1 = jnp.tanh(layer_0 @ Weight1 + bias1)
layer_2 = jax.nn.sigmoid(layer_1 @ Weight2 + bias2)
return layer_2
def init_params_triple(key):
key1, key2 = random.split(key)
Weight1 = 0.5 * random.normal(key1, (3, 8)) # 3 inputs, 8 hidden neurons
bias1 = jnp.zeros(8)
Weight2 = 0.5 * random.normal(key2, (8, 1)) # 8 hidden, 1 output
bias2 = jnp.zeros(1)
return (Weight1, bias1, Weight2, bias2)
@jit
def loss_triple(params):
ypred = forward_triple(params, jnp.array(X_triple, dtype=jnp.float32))
y_true = jnp.array(y_triple, dtype=jnp.float32).reshape(-1, 1)
loss = -jnp.mean(y_true * jnp.log(ypred + 1e-8) +
(1 - y_true) * jnp.log(1 - ypred + 1e-8))
return loss
@jit
def update_triple(params, rate=0.5):
gradient = grad(loss_triple)(params)
return jax.tree.map(lambda p, g: p - rate * g, params, gradient)
# Train
params_triple = init_params_triple(key)
losses_triple = []
print("\nTraining 3-input XOR network...")
for step in range(3001):
params_triple = update_triple(params_triple, rate=0.5)
losses_triple.append(loss_triple(params_triple))
if step % 600 == 0:
print(f"Step {step:4d}, Loss: {losses_triple[-1]:.4f}")
# Evaluate
pred_triple = forward_triple(params_triple, jnp.array(X_triple, dtype=jnp.float32))
pred_triple_classes = (pred_triple > 0.5).astype(int)
accuracy_triple = jnp.mean(pred_triple_classes.flatten() == y_triple)
print(f"\nTriple XOR Accuracy: {accuracy_triple:.4f}")
Creating 3-feature XOR-like problem... Triple XOR distribution: 1 274 0 226 Name: count, dtype: int64 Training 3-input XOR network... Step 0, Loss: 0.7022 Step 600, Loss: 0.0134 Step 1200, Loss: 0.0038 Step 1800, Loss: 0.0021 Step 2400, Loss: 0.0014 Step 3000, Loss: 0.0010 Triple XOR Accuracy: 1.0000
In [30]:
# Cell 7: Scikit-learn MLP for XOR classification
print("\n" + "="*50)
print("Scikit-learn MLP for XOR Classification")
print("="*50)
# Use the original XOR problem (2 features)
from sklearn.neural_network import MLPClassifier
mlp_xor = MLPClassifier(
hidden_layer_sizes=(4, 2), # Similar to our JAX architecture
activation='tanh', # Using tanh like our JAX implementation
solver='adam',
alpha=0.001,
learning_rate_init=0.5,
max_iter=2000,
random_state=42,
verbose=True
)
mlp_xor.fit(X_xor, y_xor)
# Predictions
y_pred_mlp = mlp_xor.predict(X_xor)
accuracy_mlp = accuracy_score(y_xor, y_pred_mlp)
print(f"\nMLP XOR Accuracy: {accuracy_mlp:.4f}")
print("\nClassification Report:")
print(classification_report(y_xor, y_pred_mlp))
# Visualize MLP decision boundary
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# Create mesh grid
x_min, x_max = -0.5, 1.5
y_min, y_max = -0.5, 1.5
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
np.linspace(y_min, y_max, 100))
# MLP predictions
Z = mlp_xor.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)
# MLP boundary
contour1 = axes[0].contourf(xx, yy, Z, levels=20, cmap='RdYlBu', alpha=0.8)
axes[0].scatter(X_xor[:, 0], X_xor[:, 1], c=y_xor,
cmap='coolwarm', edgecolors='black', s=100)
axes[0].set_title(f'MLP Decision Boundary\n(Accuracy: {accuracy_mlp:.3f})')
axes[0].set_xlabel('High Screen Time')
axes[0].set_ylabel('High Sleep Quality')
# JAX boundary
contour2 = axes[1].contourf(xx, yy, grid_preds, levels=20, cmap='RdYlBu', alpha=0.8)
axes[1].scatter(X_xor[:, 0], X_xor[:, 1], c=y_xor,
cmap='coolwarm', edgecolors='black', s=100)
axes[1].set_title(f'JAX Decision Boundary\n(Accuracy: {accuracy:.3f})')
axes[1].set_xlabel('High Screen Time')
axes[1].set_ylabel('High Sleep Quality')
# Comparison
axes[2].plot(losses_xor, label='JAX Loss', linewidth=2)
axes[2].plot(mlp_xor.loss_curve_, label='MLP Loss', linewidth=2)
axes[2].set_xlabel('Iteration')
axes[2].set_ylabel('Loss')
axes[2].set_title('Training Loss Comparison')
axes[2].legend()
axes[2].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
==================================================
Scikit-learn MLP for XOR Classification
==================================================
Iteration 1, loss = 0.62004039
Iteration 2, loss = 0.55497792
Iteration 3, loss = 0.53210253
Iteration 4, loss = 0.54456884
Iteration 5, loss = 0.53750454
Iteration 6, loss = 0.53823121
Iteration 7, loss = 0.53398140
Iteration 8, loss = 0.53079015
Iteration 9, loss = 0.52988101
Iteration 10, loss = 0.52979968
Iteration 11, loss = 0.53069518
Iteration 12, loss = 0.53061538
Iteration 13, loss = 0.53232633
Iteration 14, loss = 0.53072177
Iteration 15, loss = 0.53252101
Iteration 16, loss = 0.53441902
Iteration 17, loss = 0.53193421
Iteration 18, loss = 0.53428957
Iteration 19, loss = 0.53073038
Iteration 20, loss = 0.53088722
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
MLP XOR Accuracy: 0.7780
Classification Report:
precision recall f1-score support
0 0.00 0.00 0.00 111
1 0.78 1.00 0.88 389
accuracy 0.78 500
macro avg 0.39 0.50 0.44 500
weighted avg 0.61 0.78 0.68 500
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
In [34]:
# Cell 8: XOR-inspired feature engineering for happiness prediction
print("\n" + "="*50)
print("XOR-Inspired Feature Engineering")
print("="*50)
# Create XOR-inspired interaction features
df['Screen_Sleep_XOR'] = df['High_Screen_Time'] != df['High_Sleep_Quality']
df['Screen_Stress_XOR'] = df['High_Screen_Time'] != df['High_Stress']
df['Sleep_Exercise_XOR'] = df['High_Sleep_Quality'] != df['High_Exercise']
# Convert to numeric
xor_features = ['Screen_Sleep_XOR', 'Screen_Stress_XOR', 'Sleep_Exercise_XOR']
for feat in xor_features:
df[feat] = df[feat].astype(int)
# Analyze relationship with happiness
print("\nAverage Happiness by XOR patterns:")
for feat in xor_features:
avg_happiness = df.groupby(feat)['Happiness_Index(1-10)'].mean()
print(f"\n{feat}:")
print(f" XOR=0 (Same): {avg_happiness[0]:.2f}")
print(f" XOR=1 (Different): {avg_happiness[1]:.2f}")
print(f" Difference: {avg_happiness[1] - avg_happiness[0]:.2f}")
# Visualize XOR patterns vs happiness
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Screen-Sleep XOR
for xor_val in [0, 1]:
subset = df[df['Screen_Sleep_XOR'] == xor_val]
axes[0, 0].hist(subset['Happiness_Index(1-10)'], bins=20,
alpha=0.6, label=f'XOR={xor_val}', density=True)
axes[0, 0].set_xlabel('Happiness Index(1-10)')
axes[0, 0].set_ylabel('Density')
axes[0, 0].set_title('Screen Time XOR Sleep Quality\nvs Happiness')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
# Screen-Stress XOR
for xor_val in [0, 1]:
subset = df[df['Screen_Stress_XOR'] == xor_val]
axes[0, 1].hist(subset['Happiness_Index(1-10)'], bins=20,
alpha=0.6, label=f'XOR={xor_val}', density=True)
axes[0, 1].set_xlabel('Happiness Index(1-10)')
axes[0, 1].set_title('Screen Time XOR Stress Level\nvs Happiness')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
# Sleep-Exercise XOR
for xor_val in [0, 1]:
subset = df[df['Sleep_Exercise_XOR'] == xor_val]
axes[1, 0].hist(subset['Happiness_Index(1-10)'], bins=20,
alpha=0.6, label=f'XOR={xor_val}', density=True)
axes[1, 0].set_xlabel('Happiness Index(1-10)')
axes[1, 0].set_ylabel('Density')
axes[1, 0].set_title('Sleep Quality XOR Exercise\nvs Happiness')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)
# All XOR features correlation with happiness
xor_corr = df[xor_features + ['Happiness_Index(1-10)']].corr()['Happiness_Index(1-10)']
axes[1, 1].bar(range(len(xor_features)), xor_corr[xor_features])
axes[1, 1].set_xticks(range(len(xor_features)))
axes[1, 1].set_xticklabels([f.split('_')[0] for f in xor_features], rotation=45)
axes[1, 1].set_ylabel('Correlation with Happiness')
axes[1, 1].set_title('XOR Feature Correlation\nwith Happiness')
axes[1, 1].grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()
================================================== XOR-Inspired Feature Engineering ================================================== Average Happiness by XOR patterns: Screen_Sleep_XOR: XOR=0 (Same): 8.62 XOR=1 (Different): 8.31 Difference: -0.32 Screen_Stress_XOR: XOR=0 (Same): 8.45 XOR=1 (Different): 8.17 Difference: -0.28 Sleep_Exercise_XOR: XOR=0 (Same): 8.44 XOR=1 (Different): 8.31 Difference: -0.13
In [36]:
# Cell 9: Summary and insights
print("\n" + "="*60)
print("XOR ANALYSIS SUMMARY")
print("="*60)
print("\n1. XOR Patterns Found in Mental Health Data:")
print(f" • Screen Time XOR Sleep Quality: {df['XOR_Target'].mean():.2%} of cases")
print(f" • Accuracy of neural network prediction: {accuracy:.2%}")
print("\n2. Key XOR Relationships with Happiness:")
for feat in xor_features:
avg_0 = df[df[feat] == 0]['Happiness_Index(1-10)'].mean()
avg_1 = df[df[feat] == 1]['Happiness_Index(1-10)'].mean()
diff = avg_1 - avg_0
direction = "higher" if diff > 0 else "lower"
print(f" • {feat}: XOR=1 has {abs(diff):.2f} {direction} happiness")
print("\n3. Neural Network Performance:")
print(f" • JAX XOR Network Accuracy: {accuracy:.4f}")
print(f" • Scikit-learn MLP Accuracy: {accuracy_mlp:.4f}")
print(f" • Triple XOR Network Accuracy: {accuracy_triple:.4f}")
print("\n4. Practical Insights for Mental Health:")
print(" • When screen time and sleep quality are BOTH high or BOTH low,")
print(" happiness tends to be more predictable")
print(" • Mismatched patterns (high screen + low sleep OR low screen + high sleep)")
print(" show more varied happiness outcomes")
print(" • Neural networks can capture these non-linear XOR-like relationships")
print("\n5. Recommendations:")
print(" • Aim for consistency: Either reduce screen time AND improve sleep,")
print(" or accept that mismatches create unpredictable outcomes")
print(" • Monitor XOR patterns in your daily habits")
print(" • Use XOR-inspired feature engineering to understand complex interactions")
============================================================
XOR ANALYSIS SUMMARY
============================================================
1. XOR Patterns Found in Mental Health Data:
• Screen Time XOR Sleep Quality: 77.80% of cases
• Accuracy of neural network prediction: 100.00%
2. Key XOR Relationships with Happiness:
• Screen_Sleep_XOR: XOR=1 has 0.32 lower happiness
• Screen_Stress_XOR: XOR=1 has 0.28 lower happiness
• Sleep_Exercise_XOR: XOR=1 has 0.13 lower happiness
3. Neural Network Performance:
• JAX XOR Network Accuracy: 1.0000
• Scikit-learn MLP Accuracy: 0.7780
• Triple XOR Network Accuracy: 1.0000
4. Practical Insights for Mental Health:
• When screen time and sleep quality are BOTH high or BOTH low,
happiness tends to be more predictable
• Mismatched patterns (high screen + low sleep OR low screen + high sleep)
show more varied happiness outcomes
• Neural networks can capture these non-linear XOR-like relationships
5. Recommendations:
• Aim for consistency: Either reduce screen time AND improve sleep,
or accept that mismatches create unpredictable outcomes
• Monitor XOR patterns in your daily habits
• Use XOR-inspired feature engineering to understand complex interactions
In [ ]: