import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-3,3,100)
plt.plot(x,1/(1+np.exp(-x)),label='sigmoid')
plt.plot(x,np.tanh(x),label='tanh')
plt.plot(x,np.where(x < 0,0,x),label='ReLU')
plt.plot(x,np.where(x < 0,0.1*x,x),'--',label='leaky ReLU')
plt.legend()
plt.show()

Rainfall_mm,Temperature_C,Soil_pH,Fertilizer_kg,Pesticide_kg,Irrigation,Yield_ton_ha
820,22.5,6.4,120,3.2,1,4.5
650,24.1,5.8,90,2.1,0,3.1
900,21.3,6.8,140,3.8,1,5.2
720,23.0,6.1,100,2.5,1,4.0
580,25.4,5.6,80,1.9,0,2.8
870,22.1,6.7,130,3.5,1,5.0
760,23.7,6.2,110,2.9,1,4.2
610,24.9,5.7,85,2.0,0,3.0
940,20.8,7.0,150,4.1,1,5.6
700,23.5,6.0,105,2.6,1,4.1
680,24.0,5.9,95,2.3,0,3.3
910,21.5,6.9,145,3.9,1,5.4
790,22.8,6.3,115,3.0,1,4.4
560,26.1,5.5,75,1.8,0,2.6
880,21.9,6.6,135,3.6,1,5.1

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

data = {
    'Name': ['Dorji', 'Tashi', 'Pema', 'Dawa', 'Nima', 'Karma', 'Dema', 'Dechen', 'Kelzang', 'Zam'],
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

X = df[['Math', 'Sci', 'Eng', 'Dzo']]  # Features
y = df['Total']                         # Target variable

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

data = {
    'Name': ['Dorji', 'Tashi', 'Pema', 'Dawa', 'Nima', 'Karma', 'Dema', 'Dechen', 'Kelzang', 'Zam'],
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

X = df[['Math', 'Sci', 'Eng', 'Dzo']]
y = df['Total']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print(coefficients)

Mean Squared Error: 1.6155871338926322e-27
R-squared: 1.0
  Feature  Coefficient
0    Math          1.0
1     Sci          1.0
2     Eng          1.0
3     Dzo          1.0

import jax.numpy as jnp
from jax import grad, jit
import numpy as np
import pandas as pd

# Your dataset
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}
df = pd.DataFrame(data)

# Convert to numpy arrays and add bias term (intercept)
X = df[['Math', 'Sci', 'Eng', 'Dzo']].values
y = df['Total'].values

# Add column of ones for bias (intercept)
X = np.hstack([np.ones((X.shape[0], 1)), X])  # Shape (10,5)

# Convert numpy arrays to jax arrays
X = jnp.array(X)
y = jnp.array(y)

# Initialize weights (parameters) randomly
key = jnp.array([0])  # Dummy key for reproducibility; can use jax.random for real randomness
weights = jnp.zeros(X.shape[1])  # 5 weights including bias

# Define prediction function
def predict(params, inputs):
    return jnp.dot(inputs, params)

# Define loss function (Mean Squared Error)
def loss(params, inputs, targets):
    preds = predict(params, inputs)
    return jnp.mean((preds - targets) ** 2)

# Gradient of loss function w.r.t params
grad_loss = jit(grad(loss))

# Training loop parameters
learning_rate = 0.0001
epochs = 5000

params = weights

for epoch in range(epochs):
    grads = grad_loss(params, X, y)
    params = params - learning_rate * grads
    
    if epoch % 500 == 0:
        current_loss = loss(params, X, y)
        print(f"Epoch {epoch}, Loss: {current_loss:.4f}")

print("Final parameters:", params)

# Make predictions
predictions = predict(params, X)
print("Predictions:", predictions)
print("Actual:", y)

Epoch 0, Loss: 166311.7656
Epoch 500, Loss: nan
Epoch 1000, Loss: nan
Epoch 1500, Loss: nan
Epoch 2000, Loss: nan
Epoch 2500, Loss: nan
Epoch 3000, Loss: nan
Epoch 3500, Loss: nan
Epoch 4000, Loss: nan
Epoch 4500, Loss: nan
Final parameters: [nan nan nan nan nan]
Predictions: [nan nan nan nan nan nan nan nan nan nan]
Actual: [234 230 255 196 179 206 284 222 262 251]

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Dataset
data = {
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Features and target
X = df[['Math', 'Sci', 'Eng', 'Dzo']]
y = df['Total']

# Initialize and fit the model
model = LinearRegression()
model.fit(X, y)

# Predictions
y_pred = model.predict(X)

# Print coefficients and intercept
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

# Evaluate model
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# Show actual vs predicted
results = pd.DataFrame({'Actual': y, 'Predicted': y_pred})
print(results)

Coefficients: [1. 1. 1. 1.]
Intercept: -1.4210854715202004e-13
Mean Squared Error: 0.00
R^2 Score: 1.00
   Actual  Predicted
0     234      234.0
1     230      230.0
2     255      255.0
3     196      196.0
4     179      179.0
5     206      206.0
6     284      284.0
7     222      222.0
8     262      262.0
9     251      251.0

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# -------------------------------
# 1️⃣ Load data
data = {
    'Name': ['Dorji','Tashi','Pema','Dawa','Nima','Karma','Dema','Dechen','Kelzang','Zam'],
    'Math': [20, 35, 70, 40, 50, 67, 88, 46, 67, 46],
    'Sci': [54, 60, 54, 34, 36, 67, 89, 90, 57, 67],
    'Eng': [67, 76, 55, 45, 34, 25, 78, 47, 67, 76],
    'Dzo': [93, 59, 76, 77, 59, 47, 29, 39, 71, 62],
    'Total': [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]
}

df = pd.DataFrame(data)

# -------------------------------
# 2️⃣ Categorize Total scores
def categorize(total):
    if total < 210:
        return 'Low'
    elif total < 250:
        return 'Medium'
    else:
        return 'High'

df['Category'] = df['Total'].apply(categorize)

# -------------------------------
# 3️⃣ Features and target
X = df[['Math','Sci','Eng','Dzo']]
y = df['Category']

# -------------------------------
# 4️⃣ Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# -------------------------------
# 5️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# -------------------------------
# 6️⃣ Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# -------------------------------
# 7️⃣ Predict and evaluate
y_pred = clf.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# -------------------------------
# 8️⃣ Optional: Feature Importance
import matplotlib.pyplot as plt

feature_importance = clf.feature_importances_
features = X.columns

plt.figure(figsize=(8,5))
plt.bar(features, feature_importance, color='skyblue')
plt.title("Feature Importance in Random Forest")
plt.ylabel("Importance")
plt.show()

Confusion Matrix:
[[1 0 0]
 [1 0 0]
 [1 0 0]]

Classification Report:
              precision    recall  f1-score   support

        High       0.33      1.00      0.50         1
         Low       0.00      0.00      0.00         1
      Medium       0.00      0.00      0.00         1

    accuracy                           0.33         3
   macro avg       0.11      0.33      0.17         3
weighted avg       0.11      0.33      0.17         3

/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
/opt/conda/lib/python3.13/site-packages/sklearn/metrics/_classification.py:1731: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Data
total_marks = [234, 230, 255, 196, 179, 206, 284, 222, 262, 251]

# Convert to DataFrame
df = pd.DataFrame({"Total": total_marks})

# Sort total marks
sorted_total = np.sort(df["Total"])

# Calculate cumulative probability
cum_prob = np.arange(1, len(sorted_total) + 1) / len(sorted_total)

# Plot CDF
plt.figure(figsize=(7,5))
plt.plot(sorted_total, cum_prob, marker='o', linestyle='-')
plt.xlabel("Total Marks")
plt.ylabel("Cumulative Probability")
plt.title("Cumulative Probability Distribution (CDF) of Total Marks")
plt.grid(True)
plt.show()

import pandas as pd

# -----------------------
# 1. Dataset
# -----------------------
data = {
    "Name": ["Dorji","Tashi","Pema","Dawa","Nima","Karma","Dema","Dechen","Kelzang","Zam"],
    "Math": [20,35,70,40,50,67,88,46,67,46],
    "Sci": [54,60,54,34,36,67,89,90,57,67],
    "Eng": [67,76,55,45,34,25,78,47,67,76],
    "Dzo": [93,59,76,77,59,47,29,39,71,62],
    "Total": [234,230,255,196,179,206,284,222,262,251]
}
df = pd.DataFrame(data)

# -----------------------
# 2. Define Events
# -----------------------
# Event A: Total >= 250
df['A'] = df['Total'] >= 250

# Event B: Math >= 60
df['B'] = df['Math'] >= 60

# -----------------------
# 3. Compute Probabilities
# -----------------------
P_A = df['A'].mean()  # P(A)
P_B = df['B'].mean()  # P(B)
P_B_given_A = df[df['A']]['B'].mean()  # P(B|A)

# Bayes theorem
P_A_given_B = (P_B_given_A * P_A) / P_B

print(f"P(A) = {P_A:.2f}")
print(f"P(B) = {P_B:.2f}")
print(f"P(B|A) = {P_B_given_A:.2f}")
print(f"P(A|B) = {P_A_given_B:.2f}")

P(A) = 0.40
P(B) = 0.40
P(B|A) = 0.75
P(A|B) = 0.75

import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

# -----------------------
# 1. Dataset
# -----------------------
data = {
    "Math": [20,35,70,40,50,67,88,46,67,46],
    "Sci": [54,60,54,34,36,67,89,90,57,67],
    "Eng": [67,76,55,45,34,25,78,47,67,76],
    "Dzo": [93,59,76,77,59,47,29,39,71,62],
    "Total": [234,230,255,196,179,206,284,222,262,251]
}
df = pd.DataFrame(data)

# -----------------------
# 2. Histogram for Total Marks
# -----------------------
plt.figure(figsize=(7,5))
plt.hist(df['Total'], bins=5, color='skyblue', edgecolor='black', density=True)
plt.title("Histogram of Total Marks (Probability Density)")
plt.xlabel("Total Marks")
plt.ylabel("Probability Density")
plt.grid(axis='y')
plt.show()

# -----------------------
# 3. PMF for Total Marks
# -----------------------
counts = Counter(df['Total'])
total_students = len(df)
pmf = {k: v/total_students for k,v in counts.items()}

# Plot PMF
plt.figure(figsize=(7,5))
plt.bar(pmf.keys(), pmf.values(), color='lightgreen', edgecolor='black')
plt.title("PMF of Total Marks")
plt.xlabel("Total Marks")
plt.ylabel("Probability")
plt.grid(axis='y')
plt.show()

# Optional: print PMF values
print("PMF values for Total Marks:")
for mark, prob in pmf.items():
    print(f"{mark}: {prob:.2f}")

PMF values for Total Marks:
234: 0.10
230: 0.10
255: 0.10
196: 0.10
179: 0.10
206: 0.10
284: 0.10
222: 0.10
262: 0.10
251: 0.10

Machine Learning Model¶

whats Machine learning model?¶

Example in Real Life¶

For beginners starting ML on tabular data, the most important libraries are:¶

What is Probability in Data Science?¶

Probability Tools¶

𝑃 ( 𝑋¶

𝑃 ( 𝐴 ∣ 𝐵 )¶

𝐸 ( 𝑋 )¶