In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import Voronoi, voronoi_plot_2d
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
# -------------------------------
# LOAD AND CLEAN CSV
# -------------------------------
df = pd.read_csv("Baselinedata_Grade VIII E..csv")
df = df.dropna(how="all")
rename_map = {
"Unnamed: 1": "name",
"Unnamed: 4": "exploration",
"Unnamed: 5": "communication",
"Unnamed: 6": "creativity"
}
df = df.rename(columns=rename_map)
df = df[["name", "exploration", "communication", "creativity"]]
df = df.dropna(subset=["name"])
# -------------------------------
# CONVERT RATINGS TO NUMBERS
# -------------------------------
rating_map = {
"Very Good": 3,
"Good": 2,
"Need improvement": 1
}
df["exploration_score"] = df["exploration"].map(rating_map)
df["communication_score"] = df["communication"].map(rating_map)
df["creativity_score"] = df["creativity"].map(rating_map)
# Remove rows with any NaN in scores
df = df.dropna(subset=["exploration_score", "communication_score", "creativity_score"])
# -------------------------------
# PREPARE DATA
# -------------------------------
X = df[["exploration_score", "communication_score", "creativity_score"]].values
k = 3
np.random.seed(0)
# Pick valid initial centroids (no NaNs)
indices = np.random.choice(len(X), k, replace=False)
centroids = X[indices].astype(float)
# -------------------------------
# K-MEANS WITH NaN PROTECTION
# -------------------------------
def assign_clusters(X, centroids):
d = np.sqrt(((X[:, None, :] - centroids[None, :, :]) ** 2).sum(axis=2))
return np.argmin(d, axis=1)
def recompute_centroids(X, labels, k):
new_centroids = np.zeros((k, X.shape[1]))
for i in range(k):
pts = X[labels == i]
if len(pts) == 0:
# Avoid NaN – pick a random point as centroid
new_centroids[i] = X[np.random.randint(0, len(X))]
else:
new_centroids[i] = np.nanmean(pts, axis=0)
return new_centroids
for _ in range(15):
labels = assign_clusters(X, centroids)
centroids = recompute_centroids(X, labels, k)
# -------------------------------
# VORONOI + CLUSTER PLOT (Cloud Style)
# -------------------------------
fig, ax = plt.subplots(figsize=(8,6))
# Add small jitter to avoid overlapping points
jitter = 0.08
X_jittered = X[:, :2] + np.random.uniform(-jitter, jitter, X[:, :2].shape)
# Scatter plot with jitter
scatter = ax.scatter(
X_jittered[:,0], X_jittered[:,1],
c=labels,
cmap="viridis",
s=100,
edgecolor="black",
alpha=0.7
)
# Ensure centroids have no NaNs
vor_centroids = centroids[:, :2]
if np.isnan(vor_centroids).any():
for i in range(len(vor_centroids)):
if np.isnan(vor_centroids[i]).any():
vor_centroids[i] = X[np.random.randint(0, len(X)), :2]
# Create Voronoi diagram
vor = Voronoi(vor_centroids)
# Plot colored Voronoi regions
patches = []
colors = plt.cm.viridis(np.linspace(0, 1, k))
for region_index, color in zip(vor.point_region, colors):
region = vor.regions[region_index]
if not -1 in region and len(region) > 0:
polygon = [vor.vertices[i] for i in region]
patches.append(Polygon(polygon, True, facecolor=color, alpha=0.2))
p = PatchCollection(patches, match_original=True)
ax.add_collection(p)
# Plot Voronoi edges
voronoi_plot_2d(vor, ax=ax, show_vertices=False, line_colors="black", line_width=1, point_size=0)
plt.xlabel("Exploration Score")
plt.ylabel("Communication Score")
plt.title("Student Clusters (K-means with 3 Skills)")
plt.grid(True)
plt.show()
# -------------------------------
# OUTPUT STUDENT CLUSTERS
# -------------------------------
df["cluster"] = labels
print(df[["name", "exploration", "communication", "creativity", "cluster"]])
name exploration communication \
2 Ashish Wakley Good Good
3 Dorji Tshewang Good Good
4 Jigme Namgyel Need improvement Need improvement
5 Kelden Drukda Need improvement Need improvement
6 Kinga Tobgay Good Good
7 Kinley Dendup Good Good
8 Kinley Tshering Good Need improvement
9 Lekden Thujee Drakpa Need improvement Need improvement
10 Pema Namgay Very Good Very Good
11 Rinzin Dorji Good Need improvement
12 Samten Nima Good Need improvement
13 Sherub Phuntsho Good Good
14 Sherub Wangchuk Good Good
15 Sonam Rabten Good Good
16 Sonam Tobden Good Very Good
17 Tashi Yoezer Good Good
18 Thukten Rigtsel Dorji Good Very Good
19 Yonten Yoezer Good Need improvement
20 Dechen Pem Good Very Good
21 Dechen Zangmo Good Need improvement
22 Dorji Choden Very Good Very Good
23 Jigme Metho Need improvement Need improvement
24 Kamala Sunar Need improvement Need improvement
25 Kinzang Lhazin Need improvement Good
26 Namgay Choden Good Very Good
27 Pema Dema Good Need improvement
28 Pema Wangmo Good Good
29 Pema Yangchen Good Good
30 Sangay Choden Good Good
32 Tenzin Chokey Good Need improvement
33 Thukten Ngawang Choden Very Good Very Good
34 Tshering Choden S Good Good
35 Tshering Choden Z Good Good
36 Tshering Yangzom Good Good
37 Ugyen Dema Good Good
38 Ugyen lhaden Good Need improvement
39 Ugyen Yangzom Good Very Good
40 Yeshi lham Need improvement Need improvement
creativity cluster
2 Very Good 1
3 Good 1
4 Need improvement 0
5 Very Good 1
6 Need improvement 0
7 Need improvement 0
8 Need improvement 0
9 Need improvement 0
10 Very Good 2
11 Need improvement 0
12 Need improvement 0
13 Need improvement 0
14 Need improvement 0
15 Need improvement 0
16 Need improvement 2
17 Good 1
18 Good 2
19 Need improvement 0
20 Need improvement 2
21 Good 1
22 Good 2
23 Good 1
24 Need improvement 0
25 Good 1
26 Need improvement 2
27 Need improvement 0
28 Need improvement 0
29 Need improvement 0
30 Need improvement 0
32 Good 1
33 Very Good 2
34 Good 1
35 Good 1
36 Good 1
37 Good 1
38 Need improvement 0
39 Good 2
40 Need improvement 0
In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# -------------------------------
# LOAD AND CLEAN CSV
# -------------------------------
# Skip the first row (file title) and use the second row as header
df = pd.read_csv("Baselinedata_Grade VIII E..csv", header=1)
# Strip spaces and rename columns
df.columns = df.columns.str.strip()
df = df.rename(columns={
"Name": "Name",
"Exploration": "Exploration",
"Communication": "Communication",
"Creativity": "Creativity"
})
# Keep only relevant columns
df = df[["Name", "Exploration", "Communication", "Creativity"]].dropna(subset=["Name"])
# Map ratings to numbers
rating_map = {"Very Good": 3, "Good": 2, "Need improvement": 1}
df["Exploration_score"] = df["Exploration"].map(rating_map)
df["Communication_score"] = df["Communication"].map(rating_map)
df["Creativity_score"] = df["Creativity"].map(rating_map)
# Drop rows with missing scores
df = df.dropna(subset=["Exploration_score", "Communication_score", "Creativity_score"])
# -------------------------------
# PREPARE DATA
# -------------------------------
# Use all three scores for 3D clustering
points = df[["Exploration_score", "Communication_score", "Creativity_score"]].T.values # 3 x npoints
npts = points.shape[1]
# Cluster parameters
nclusters = 5
np.random.seed(10)
# -------------------------------
# INITIALIZE CLUSTER PARAMETERS
# -------------------------------
indices = np.random.randint(0, npts, nclusters)
means = points[:, indices]
dmean = np.zeros_like(means)
variances = np.outer(np.var(points, axis=1), np.ones(nclusters))
pc = np.ones(nclusters) / nclusters # cluster probabilities
pxgc = np.zeros((nclusters, npts))
# -------------------------------
# FUNCTION TO PLOT CLUSTERS IN 3D
# -------------------------------
def plot_3d_clusters(points, means, title="3D Clusters"):
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[0, :], points[1, :], points[2, :], c='skyblue', edgecolor='black', s=100)
for cluster in range(means.shape[1]):
ax.scatter(means[0, cluster], means[1, cluster], means[2, cluster],
marker=f'${cluster}$', s=200, c='red')
ax.set_xlabel("Exploration Score")
ax.set_ylabel("Communication Score")
ax.set_zlabel("Creativity Score")
ax.set_title(title)
plt.show()
# -------------------------------
# PLOT INITIAL DATA
# -------------------------------
plot_3d_clusters(points, means, title="Clusters Before EM Iteration")
# -------------------------------
# EM ITERATION
# -------------------------------
niterations = 50
momentum = 0.9
rate = 0.1
min_var = 0.1
for i in range(niterations):
# E-step: probability of each point given cluster
for cluster in range(nclusters):
mean = np.outer(means[:, cluster], np.ones(npts))
variance = np.outer(variances[:, cluster], np.ones(npts))
pxgc[cluster, :] = np.prod(np.exp(-(points - mean) ** 2 / (2 * variance)) / np.sqrt(2 * np.pi * variance), 0)
pxc = pxgc * np.outer(pc, np.ones(npts))
pcgx = pxc / np.outer(np.ones(nclusters), np.sum(pxc, axis=0))
# M-step: update cluster weights and parameters
pc = np.sum(pcgx, axis=1) / npts
for cluster in range(nclusters):
newmean = momentum * dmean[:, cluster] + np.sum(points * np.outer(np.ones(points.shape[0]), pcgx[cluster, :]), axis=1) / np.sum(pcgx[cluster, :])
dmean[:, cluster] = newmean - means[:, cluster]
means[:, cluster] += rate * dmean[:, cluster]
m = np.outer(means[:, cluster], np.ones(npts))
variances[:, cluster] = np.sum((points - m) ** 2 * np.outer(np.ones(points.shape[0]), pcgx[cluster, :]), axis=1) / np.sum(pcgx[cluster, :]) + min_var
# -------------------------------
# PLOT FINAL CLUSTERS
# -------------------------------
plot_3d_clusters(points, means, title="Clusters After EM Iteration")
# -------------------------------
# ASSIGN CLUSTERS TO STUDENTS
# -------------------------------
student_clusters = np.argmax(pcgx, axis=0)
df["Cluster"] = student_clusters
print(df[["Name", "Exploration", "Communication", "Creativity", "Cluster"]])
Name Exploration Communication \
1 Ashish Wakley Good Good
2 Dorji Tshewang Good Good
3 Jigme Namgyel Need improvement Need improvement
4 Kelden Drukda Need improvement Need improvement
5 Kinga Tobgay Good Good
6 Kinley Dendup Good Good
7 Kinley Tshering Good Need improvement
8 Lekden Thujee Drakpa Need improvement Need improvement
9 Pema Namgay Very Good Very Good
10 Rinzin Dorji Good Need improvement
11 Samten Nima Good Need improvement
12 Sherub Phuntsho Good Good
13 Sherub Wangchuk Good Good
14 Sonam Rabten Good Good
15 Sonam Tobden Good Very Good
16 Tashi Yoezer Good Good
17 Thukten Rigtsel Dorji Good Very Good
18 Yonten Yoezer Good Need improvement
19 Dechen Pem Good Very Good
20 Dechen Zangmo Good Need improvement
21 Dorji Choden Very Good Very Good
22 Jigme Metho Need improvement Need improvement
23 Kamala Sunar Need improvement Need improvement
24 Kinzang Lhazin Need improvement Good
25 Namgay Choden Good Very Good
26 Pema Dema Good Need improvement
27 Pema Wangmo Good Good
28 Pema Yangchen Good Good
29 Sangay Choden Good Good
31 Tenzin Chokey Good Need improvement
32 Thukten Ngawang Choden Very Good Very Good
33 Tshering Choden S Good Good
34 Tshering Choden Z Good Good
35 Tshering Yangzom Good Good
36 Ugyen Dema Good Good
37 Ugyen lhaden Good Need improvement
38 Ugyen Yangzom Good Very Good
39 Yeshi lham Need improvement Need improvement
Creativity Cluster
1 Very Good 4
2 Good 4
3 Need improvement 0
4 Very Good 2
5 Need improvement 4
6 Need improvement 4
7 Need improvement 0
8 Need improvement 0
9 Very Good 1
10 Need improvement 0
11 Need improvement 0
12 Need improvement 4
13 Need improvement 4
14 Need improvement 4
15 Need improvement 4
16 Good 4
17 Good 4
18 Need improvement 0
19 Need improvement 4
20 Good 4
21 Good 1
22 Good 2
23 Need improvement 0
24 Good 2
25 Need improvement 4
26 Need improvement 0
27 Need improvement 4
28 Need improvement 4
29 Need improvement 4
31 Good 4
32 Very Good 1
33 Good 4
34 Good 4
35 Good 4
36 Good 4
37 Need improvement 0
38 Good 4
39 Need improvement 0
In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# -------------------------------
# LOAD CSV
# -------------------------------
# Skip the first row (file title) and use the second row as header
df = pd.read_csv("Baselinedata_Grade VIII E..csv", header=1)
# Strip spaces and rename columns
df.columns = df.columns.str.strip()
df = df.rename(columns={
"Name": "Name",
"Exploration": "Exploration",
"Communication": "Communication",
"Creativity": "Creativity"
})
# Keep only relevant columns
df = df[["Name", "Exploration", "Communication", "Creativity"]].dropna(subset=["Name"])
# Map ratings to numbers
rating_map = {"Very Good": 3, "Good": 2, "Need improvement": 1}
df["Exploration_score"] = df["Exploration"].map(rating_map)
df["Communication_score"] = df["Communication"].map(rating_map)
# Drop rows with missing scores
df = df.dropna(subset=["Exploration_score", "Communication_score"])
# -------------------------------
# PREPARE DATA (2D for clustering)
# -------------------------------
points = df[["Exploration_score", "Communication_score"]].T.values # 2 x npoints
npts = points.shape[1]
# -------------------------------
# CLUSTER PARAMETERS
# -------------------------------
dim = 2
nstates = 2
nclusters = 5
momentum = 0.9
rate = 0.1
min_var = 0.1
niterations = 100
np.random.seed(10)
# -------------------------------
# INITIALIZE CLUSTER ARRAYS
# -------------------------------
indices = np.random.randint(0, npts, nclusters)
means = points[:, indices]
dmean = np.zeros((dim, nclusters))
covariances = np.zeros((nclusters, dim, dim))
covariances[:,0,0] = np.var(points[0,:])
covariances[:,1,1] = np.var(points[1,:])
pc = np.ones(nclusters)/nclusters # cluster probabilities
pxgc = np.zeros((nclusters, npts)) # p(x|c)
psgxc = np.ones((nclusters, nstates))/nstates # p(s|x,c)
# -------------------------------
# PLOT FUNCTION
# -------------------------------
def plot_covar():
for cluster in range(means.shape[1]):
x = means[0, cluster]
y = means[1, cluster]
w, v = np.linalg.eig(covariances[cluster])
w = np.sqrt(w)
plt.plot([x-w[0]*v[0,0], x+w[0]*v[0,0]], [y-w[0]*v[1,0], y+w[0]*v[1,0]], 'k')
plt.plot([x-w[1]*v[0,1], x+w[1]*v[0,1]], [y-w[1]*v[1,1], y+w[1]*v[1,1]], 'k')
plt.plot([x],[y], 'k', marker=f'${cluster}$', markersize=15)
# -------------------------------
# INITIAL PLOT
# -------------------------------
plt.figure(figsize=(8,6))
plt.scatter(points[0,:], points[1,:], c='skyblue', edgecolor='black', s=100)
plot_covar()
plt.xlabel("Exploration Score")
plt.ylabel("Communication Score")
plt.title("Covariance Clusters Before EM Iteration")
plt.grid(True)
plt.show()
# -------------------------------
# EM ITERATION
# -------------------------------
for i in range(niterations):
for cluster in range(nclusters):
mean = np.outer(means[:,cluster], np.ones(npts))
cinv = np.linalg.pinv(covariances[cluster])
pxgc[cluster,:] = (np.sqrt(np.linalg.det(cinv))/(2*np.pi)**(dim/2)) * \
np.exp(-np.sum((points-mean)*(cinv@(points-mean)),0)/2)
pxc = pxgc * np.outer(pc, np.ones(npts))
pcgx = pxc / np.outer(np.ones(nclusters), np.sum(pxc,0))
psxc = psgxc[:, np.zeros(npts,dtype=int)] * pxc # simplified states
pcgsx = psxc / np.outer(np.ones(nclusters), np.sum(psxc,0))
pc = np.sum(pcgsx,1)/npts
for cluster in range(nclusters):
newmean = momentum*dmean[:,cluster] + np.sum(points*np.outer(np.ones(dim), pcgsx[cluster,:]),1)/np.sum(pcgsx[cluster,:])
dmean[:,cluster] = newmean - means[:,cluster]
means[:,cluster] += rate*dmean[:,cluster]
m = np.outer(means[:,cluster], np.ones(npts))
for d in range(dim):
covariances[cluster][:,d] = np.sum(np.outer(np.ones(dim), points[d,:]-m[d,:])*(points-m) * \
np.outer(np.ones(dim), pcgsx[cluster,:]),1)/np.sum(np.outer(np.ones(dim), pcgsx[cluster,:]),1) + min_var
# -------------------------------
# PLOT FINAL CLUSTERS
# -------------------------------
plt.figure(figsize=(8,6))
plt.scatter(points[0,:], points[1,:], c='skyblue', edgecolor='black', s=100)
plot_covar()
plt.xlabel("Exploration Score")
plt.ylabel("Communication Score")
plt.title("Covariance Clusters After EM Iteration")
plt.grid(True)
plt.show()
# -------------------------------
# ASSIGN CLUSTER TO STUDENTS
# -------------------------------
student_clusters = np.argmax(pxgc, axis=0)
df["Cluster"] = student_clusters
print(df[["Name", "Exploration", "Communication", "Cluster"]])
Name Exploration Communication Cluster 1 Ashish Wakley Good Good 2 2 Dorji Tshewang Good Good 2 3 Jigme Namgyel Need improvement Need improvement 2 4 Kelden Drukda Need improvement Need improvement 2 5 Kinga Tobgay Good Good 2 6 Kinley Dendup Good Good 2 7 Kinley Tshering Good Need improvement 0 8 Lekden Thujee Drakpa Need improvement Need improvement 2 9 Pema Namgay Very Good Very Good 2 10 Rinzin Dorji Good Need improvement 0 11 Samten Nima Good Need improvement 0 12 Sherub Phuntsho Good Good 2 13 Sherub Wangchuk Good Good 2 14 Sonam Rabten Good Good 2 15 Sonam Tobden Good Very Good 1 16 Tashi Yoezer Good Good 2 17 Thukten Rigtsel Dorji Good Very Good 1 18 Yonten Yoezer Good Need improvement 0 19 Dechen Pem Good Very Good 1 20 Dechen Zangmo Good Need improvement 0 21 Dorji Choden Very Good Very Good 2 22 Jigme Metho Need improvement Need improvement 2 23 Kamala Sunar Need improvement Need improvement 2 24 Kinzang Lhazin Need improvement Good 1 25 Namgay Choden Good Very Good 1 26 Pema Dema Good Need improvement 0 27 Pema Wangmo Good Good 2 28 Pema Yangchen Good Good 2 29 Sangay Choden Good Good 2 31 Tenzin Chokey Good Need improvement 0 32 Thukten Ngawang Choden Very Good Very Good 2 33 Tshering Choden S Good Good 2 34 Tshering Choden Z Good Good 2 35 Tshering Yangzom Good Good 2 36 Ugyen Dema Good Good 2 37 Ugyen lhaden Good Need improvement 0 38 Ugyen Yangzom Good Very Good 1 39 Yeshi lham Need improvement Need improvement 2
In [32]:
import matplotlib.pyplot as plt
import numpy as np
# -------------------------------
# function cluster-weighted modeling parameters
# -------------------------------
nclusters = 5
xmin = -5
xmax = 5
npts = 100
minvar = 0.01
niterations = 100
np.random.seed(10)
# -------------------------------
# generate synthetic data
# -------------------------------
x = np.linspace(xmin, xmax, npts)
y = np.tanh(x) + 0.25 * np.random.rand(npts)
xplot = np.copy(x)
# initialize cluster parameters
mux = xmin + (xmax - xmin) * np.random.rand(nclusters)
beta = 0.1 * np.random.rand(3, nclusters)
varx = np.ones(nclusters)
vary = np.ones(nclusters)
pc = np.ones(nclusters) / nclusters
# -------------------------------
# plot before iteration
# -------------------------------
pxgc = np.exp(-((np.outer(x, np.ones(nclusters)) - np.outer(np.ones(npts), mux))**2) /
(2 * np.outer(np.ones(npts), varx))) / np.sqrt(2 * np.pi * np.outer(np.ones(npts), varx))
pygxc = np.exp(-((np.outer(y, np.ones(nclusters)) -
(np.outer(np.ones(npts), beta[0, :]) +
np.outer(x, beta[1, :]) +
np.outer(x*x, beta[2, :])))**2) /
(2 * np.outer(np.ones(npts), vary))) / np.sqrt(2 * np.pi * np.outer(np.ones(npts), vary))
pxc = pxgc * np.outer(np.ones(npts), pc)
pxyc = pygxc * pxc
pcgxy = pxyc / np.outer(np.sum(pxyc, axis=1), np.ones(nclusters))
plt.figure(figsize=(8,5))
plt.plot(x, y, '.', label='Data')
yplot = np.sum((np.outer(np.ones(npts), beta[0, :]) +
np.outer(x, beta[1, :]) +
np.outer(x*x, beta[2, :])) * pxc, 1) / np.sum(pxc, 1)
plt.plot(xplot, yplot, label='Cluster estimate')
plt.title('Function clusters before iteration')
plt.legend()
plt.show()
# -------------------------------
# EM iteration
# -------------------------------
for step in range(niterations):
# E-step
pxgc = np.exp(-((np.outer(x, np.ones(nclusters)) - np.outer(np.ones(npts), mux))**2) /
(2 * np.outer(np.ones(npts), varx))) / np.sqrt(2 * np.pi * np.outer(np.ones(npts), varx))
pygxc = np.exp(-((np.outer(y, np.ones(nclusters)) -
(np.outer(np.ones(npts), beta[0, :]) +
np.outer(x, beta[1, :]) +
np.outer(x*x, beta[2, :])))**2) /
(2 * np.outer(np.ones(npts), vary))) / np.sqrt(2 * np.pi * np.outer(np.ones(npts), vary))
pxc = pxgc * np.outer(np.ones(npts), pc)
pxyc = pygxc * pxc
pcgxy = pxyc / np.outer(np.sum(pxyc, axis=1), np.ones(nclusters))
# M-step
pc = np.sum(pcgxy, 0) / npts
mux = np.sum(np.outer(x, np.ones(nclusters)) * pcgxy, 0) / (npts * pc)
varx = minvar + np.sum((np.outer(x, np.ones(nclusters)) - np.outer(np.ones(npts), mux))**2 * pcgxy, 0) / (npts * pc)
# update beta using least squares
a = np.zeros((3, nclusters))
for c in range(nclusters):
a[:, c] = np.array([
np.sum(y * pcgxy[:, c]),
np.sum(y * x * pcgxy[:, c]),
np.sum(y * x * x * pcgxy[:, c])
]) / (npts * pc[c])
B = np.zeros((3, 3, nclusters))
for c in range(nclusters):
B[:, :, c] = np.array([
[np.sum(pcgxy[:, c]), np.sum(x * pcgxy[:, c]), np.sum(x*x * pcgxy[:, c])],
[np.sum(x * pcgxy[:, c]), np.sum(x*x * pcgxy[:, c]), np.sum(x*x*x * pcgxy[:, c])],
[np.sum(x*x * pcgxy[:, c]), np.sum(x*x*x * pcgxy[:, c]), np.sum(x*x*x*x * pcgxy[:, c])]
])
# Solve least squares instead of matrix inversion
beta[:, c], _, _, _ = np.linalg.lstsq(B[:, :, c], a[:, c], rcond=None)
vary = minvar + np.sum((np.outer(y, np.ones(nclusters)) -
(np.outer(np.ones(npts), beta[0, :]) +
np.outer(x, beta[1, :]) +
np.outer(x*x, beta[2, :])))**2 * pcgxy, 0) / (npts * pc)
# -------------------------------
# plot after EM iteration
# -------------------------------
plt.figure(figsize=(8,5))
plt.plot(x, y, '.', label='Data')
yplot = np.sum((np.outer(np.ones(npts), beta[0, :]) +
np.outer(x, beta[1, :]) +
np.outer(x*x, beta[2, :])) * pxc, 1) / np.sum(pxc, 1)
plt.plot(xplot, yplot, label='Cluster estimate')
plt.title('Function clusters after iteration')
plt.legend()
plt.show()
In [ ]: