import sklearn.linear_model import LinearRegression
import numpy as np

  Cell In[3], line 1
    import sklearn.linear_model import LinearRegression
                                ^
SyntaxError: invalid syntax

from sklearn.linear_model import LinearRegression
import numpy as np

# Training data
x = np.array([[1], [2], [3], [4]])
y = np.array([2, 4, 6, 8])

# Create model
model = LinearRegression()
model.fit(x, y)

# Predict
print(model.predict([[5]]))  # Output: 10

[10.]

import jax
import jax.numpy as jnp
from jax import random
import pandas as pd
import altair as alt

# Data
data = {
    "Section": ["A", "B", "C"],
    "Total Students": [28, 28, 28],
    "Total Stds Passed": [22, 24, 13],
    "Total Stds Fail": [6, 5, 15],
    "Total Pass %": [78.57, 85.71, 46.43],
    "Total Fail %": [21.43, 17.86, 53.57],
    "Mean Mark": [75.7, 71.7, 57.9],
    "National Mean Mark": [67.09, 75, 67.09]
}

df = pd.DataFrame(data)
df

key = random.PRNGKey(0)

# Function to simulate student pass/fail
def simulate_pass_fail(key, total_students, total_pass):
    # 1 = Pass, 0 = Fail
    outcomes = jnp.array([1]*total_pass + [0]*(total_students - total_pass))
    shuffled = random.permutation(key, outcomes)
    return shuffled

# Compute pass probability for each section
pass_prob = []
for i, row in df.iterrows():
    key, subkey = random.split(key)
    sim = simulate_pass_fail(subkey, row["Total Students"], row["Total Stds Passed"])
    prob = jnp.mean(sim)  # average = probability of pass
    pass_prob.append(prob)

df["Simulated Pass Prob"] = jnp.array(pass_prob)
df

# Prepare DataFrame for Altair
df_viz = df.melt(
    id_vars=["Section"],
    value_vars=["Total Pass %", "Total Fail %", "Simulated Pass Prob"],
    var_name="Category",
    value_name="Percentage"
)

# Convert simulated pass probability to % for visualization
df_viz["Percentage"] = df_viz.apply(
    lambda row: row["Percentage"]*100 if row["Category"]=="Simulated Pass Prob" else row["Percentage"], axis=1
)

# Plot
chart = alt.Chart(df_viz).mark_bar().encode(
    x=alt.X('Section:N', title='Section'),
    y=alt.Y('Percentage:Q', title='Percentage'),
    color='Category:N',
    tooltip=['Section', 'Category', 'Percentage']
).properties(
    title="Pass % and Fail % by Section (with JAX Simulation)",
    width=500,
    height=300
)

chart

import pandas as pd

data = {
    "Section": ["A", "B", "C"],
    "Total Students": [28, 28, 28],
    "Total Stds Passed": [22, 24, 13],
    "Total Stds Fail": [6, 5, 15],
    "Total Pass %": [78.57, 85.71, 46.43],
    "Total Fail %": [21.43, 17.86, 53.57],
    "Mean Mark": [75.7, 71.7, 57.9],
    "National Mean Mark": [67.09, 75, 67.09]
}

df = pd.DataFrame(data)
df

import jax.numpy as jnp
from jax import grad, jit

# Convert data to JAX arrays
x = jnp.array(df["Total Pass %"])
y = jnp.array(df["Mean Mark"])

# Initialize parameters
m = jnp.array(0.0)
c = jnp.array(0.0)
learning_rate = 0.001
epochs = 1000

# Define loss function (Mean Squared Error)
def loss(params, x, y):
    m, c = params
    y_pred = m * x + c
    return jnp.mean((y - y_pred)**2)

# Gradient descent
params = jnp.array([m, c])
grad_loss = grad(loss)

for _ in range(epochs):
    grads = grad_loss(params, x, y)
    params = params - learning_rate * grads

m, c = params
print(f"Fitted line: y = {m:.2f}x + {c:.2f}")

Fitted line: y = nanx + nan

import matplotlib.pyplot as plt

y_pred = m * x + c

plt.figure(figsize=(8,5))
plt.scatter(x, y, color='blue', label='Actual Mean Marks')
plt.plot(x, y_pred, color='red', linestyle='--', label='Regression Line')
plt.xlabel('Total Pass %')
plt.ylabel('Mean Mark')
plt.title('Regression: Mean Mark vs Total Pass %')
plt.legend()
plt.show()

import altair as alt

df_viz = df.copy()
df_viz["Predicted Mean Mark"] = m * df_viz["Total Pass %"] + c

chart = alt.Chart(df_viz).mark_circle(size=100).encode(
    x='Total Pass %',
    y='Mean Mark',
    tooltip=['Section', 'Mean Mark', 'Total Pass %']
)

line = alt.Chart(df_viz).mark_line(color='red').encode(
    x='Total Pass %',
    y='Predicted Mean Mark'
)

chart + line

import pandas as pd
import jax.numpy as jnp
from jax import grad

# Dataset
data = {
    "Section": ["A", "B", "C"],
    "Total Students": [28, 28, 28],
    "Total Stds Passed": [22, 24, 13],
    "Total Stds Fail": [6, 5, 15],
    "Total Pass %": [78.57, 85.71, 46.43],
    "Total Fail %": [21.43, 17.86, 53.57],
    "Mean Mark": [75.7, 71.7, 57.9],
    "National Mean Mark": [67.09, 75, 67.09]
}

df = pd.DataFrame(data)
df

# Features and target
X = jnp.array(df[["Total Pass %", "Total Fail %", "National Mean Mark"]])
y = jnp.array(df["Mean Mark"])

# Initialize weights and bias
w = jnp.zeros(3)
b = 0.0
learning_rate = 0.0001
epochs = 10000

# Define Mean Squared Error loss
def loss(params, X, y):
    w, b = params[:3], params[3]
    y_pred = jnp.dot(X, w) + b
    return jnp.mean((y - y_pred)**2)

# Gradient function
grad_loss = grad(loss)

# Combine weights and bias for optimization
params = jnp.append(w, b)

# Gradient Descent
for _ in range(epochs):
    grads = grad_loss(params, X, y)
    params = params - learning_rate * grads

w_opt = params[:3]
b_opt = params[3]
print(f"Optimized weights: {w_opt}")
print(f"Optimized bias: {b_opt}")

Optimized weights: [nan nan nan]
Optimized bias: nan

y_pred = jnp.dot(X, w_opt) + b_opt
df["Predicted Mean Mark"] = y_pred
df

import altair as alt

chart = alt.Chart(df).mark_circle(size=100).encode(
    x='Section',
    y='Mean Mark',
    color=alt.value('blue'),
    tooltip=['Section', 'Mean Mark', 'Predicted Mean Mark']
)

predicted_line = alt.Chart(df).mark_line(color='red').encode(
    x='Section',
    y='Predicted Mean Mark'
)

chart + predicted_line

# ===============================
# Beginner-Friendly ML in Jupyter
# ===============================

# 1. Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix

# -------------------------------
# 2. Load Dataset
# -------------------------------
# Example dataset for regression: House Prices
# You can replace this with your own CSV file
data = pd.DataFrame({
    'size': [750, 800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200],
    'bedrooms': [2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
    'price': [150000, 160000, 170000, 180000, 190000, 200000, 210000, 220000, 230000, 240000],
    'sold': [1, 1, 0, 0, 1, 1, 0, 0, 1, 1]  # Example for classification
})

# Preview data
print("Dataset:")
display(data.head())

# -------------------------------
# 3. Data Visualization
# -------------------------------
sns.pairplot(data, x_vars=['size', 'bedrooms'], y_vars='price', height=4, kind='scatter')
plt.show()

sns.countplot(x='sold', data=data)
plt.title("Sold Status Count")
plt.show()

# -------------------------------
# 4. Regression: Predict House Price
# -------------------------------
X = data[['size', 'bedrooms']]  # Features
y = data['price']               # Target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
reg_model = LinearRegression()
reg_model.fit(X_train, y_train)

# Predict
y_pred = reg_model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
print(f"Regression Mean Squared Error: {mse}")

# Plot
plt.scatter(X_test['size'], y_test, color='blue', label='Actual')
plt.scatter(X_test['size'], y_pred, color='red', label='Predicted')
plt.xlabel('Size')
plt.ylabel('Price')
plt.legend()
plt.show()

# -------------------------------
# 5. Classification: Predict Sold Status
# -------------------------------
X_class = data[['size', 'bedrooms']]
y_class = data['sold']

# Split
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Train
clf_model = LogisticRegression()
clf_model.fit(X_train_c, y_train_c)

# Predict
y_pred_c = clf_model.predict(X_test_c)

# Evaluate
acc = accuracy_score(y_test_c, y_pred_c)
cm = confusion_matrix(y_test_c, y_pred_c)

print(f"Classification Accuracy: {acc}")
print("Confusion Matrix:")
print(cm)

Dataset:

Regression Mean Squared Error: 8.470329472543003e-22

Classification Accuracy: 1.0
Confusion Matrix:
[[2]]

/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.
  warnings.warn(

a = 1
b = 0
result = a ^ b   # XOR operator
print(result)  # Output: 1

Machine Learning¶

Simple ML example in Jupyter¶

Imported dataset¶

Regression¶

JAX¶

Multiple Linear Regression using JAX¶

y=w1x1+w2x2+w3x3+b¶

Activities¶

XOR¶

	Section	Total Students	Total Stds Passed	Total Stds Fail	Total Pass %	Total Fail %	Mean Mark	National Mean Mark
0	A	28	22	6	78.57	21.43	75.7	67.09
1	B	28	24	5	85.71	17.86	71.7	75.00
2	C	28	13	15	46.43	53.57	57.9	67.09

	size	bedrooms	price	sold
0	750	2	150000	1
1	800	2	160000	1
2	850	3	170000	0
3	900	3	180000	0
4	950	3	190000	1

Input A	Input B	A XOR B
0	0	0
0	1	1
1	0	1
1	1	0

Machine Learning¶

Simple ML example in Jupyter¶

Imported dataset¶

Regression¶

JAX¶

Multiple Linear Regression using JAX¶

y=w1​x1​+w2​x2​+w3​x3​+b¶

Activities¶

XOR¶

y=w1x1+w2x2+w3x3+b¶