import matplotlib.pyplot as plt
from scipy.spatial import Voronoi,voronoi_plot_2d
import numpy as np
import time
import pandas as pd

# --- load data ---
df = pd.read_csv("datasets/fabacademy_commit_weekly_summary.csv")
week_cols = [c for c in df.columns if "week_" in c]
x = df["commit_total"].to_numpy()
y = (df[week_cols] > 0).sum(axis=1).to_numpy()



#
# k-means parameters
#
npts = 1000
nclusters = 3
nsteps = 10
xs = [0,5,10]
ys = [0,10,5]
np.random.seed(0)
#
# generate random data
#
#x = np.array([])
#y = np.array([])
'''for i in range(len(xs)):
    x = np.append(x,np.random.normal(loc=xs[i],scale=1,size=npts))
    y = np.append(y,np.random.normal(loc=ys[i],scale=1,size=npts))
    '''
#
# choose starting points
#
indices = np.random.uniform(low=0,high=len(x),size=nclusters).astype(int)
mux = x[indices]
muy = y[indices]
#
# plot before iteration
#
fig,ax = plt.subplots()
plt.plot(x,y,'.')
vor = Voronoi(np.stack((mux,muy),axis=1))
voronoi_plot_2d(vor,ax=ax,show_points=True,show_vertices=False,point_size=20)
plt.autoscale()
plt.title('before k-means iterations')
plt.show()
#
# do k-means iteration
#
for i in range(nsteps):
    #
    # find closest points
    #
    xm = np.outer(x,np.ones(len(mux)))
    ym = np.outer(y,np.ones(len(muy)))
    muxm = np.outer(np.ones(len(x)),mux)
    muym = np.outer(np.ones(len(x)),muy)
    distances = np.sqrt((xm-muxm)**2+(ym-muym)**2)
    mins = np.argmin(distances,axis=1)
    #
    # update means
    #
    for i in range(len(mux)):
        index = np.where(mins == i)
        mux[i] = np.sum(x[index])/len(index[0])
        muy[i] = np.sum(y[index])/len(index[0])
#
# plot after iteration
#
fig,ax = plt.subplots()
plt.plot(x,y,'.')
vor = Voronoi(np.stack((mux,muy),axis=1))
voronoi_plot_2d(vor,ax=ax,show_points=True,show_vertices=False,point_size=20)
plt.autoscale()
plt.title('after k-means iteration')
plt.show()

import matplotlib.pyplot as plt
from scipy.spatial import Voronoi, voronoi_plot_2d
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# --- load data ---
df = pd.read_csv("datasets/fabacademy_commit_weekly_summary.csv")
week_cols = [c for c in df.columns if "week_" in c]

x_raw = df["commit_total"].to_numpy()
y_raw = (df[week_cols] > 0).sum(axis=1).to_numpy()

# --- normalize  ---
scaler = StandardScaler()
XY = np.vstack([x_raw, y_raw]).T
XY_scaled = scaler.fit_transform(XY)
x = XY_scaled[:,0]
y = XY_scaled[:,1]

# --- k-means parameters ---
nclusters = 3
nsteps = 15
np.random.seed(0)

# --- choose starting points ---
indices = np.random.choice(len(x), size=nclusters, replace=False)
mux = x[indices]
muy = y[indices]

# --- plot before ---
fig, ax = plt.subplots()
plt.plot(x, y, '.', alpha=0.5)
vor = Voronoi(np.stack((mux, muy), axis=1))
voronoi_plot_2d(vor, ax=ax, show_points=True, show_vertices=False, point_size=20)
plt.title("before k-means iteration")
plt.show()

# --- k-means iteration ---
for _ in range(nsteps):

    xm = np.outer(x, np.ones(len(mux)))
    ym = np.outer(y, np.ones(len(muy)))
    muxm = np.outer(np.ones(len(x)), mux)
    muym = np.outer(np.ones(len(x)), muy)

    distances = np.sqrt((xm - muxm)**2 + (ym - muym)**2)
    mins = np.argmin(distances, axis=1)

    for k in range(len(mux)):
        idx = np.where(mins == k)[0]
        if len(idx) > 0:
            mux[k] = np.mean(x[idx])
            muy[k] = np.mean(y[idx])

# --- plot after ---
fig, ax = plt.subplots()
plt.plot(x, y, '.', alpha=0.5)
vor = Voronoi(np.stack((mux, muy), axis=1))
voronoi_plot_2d(vor, ax=ax, show_points=True, show_vertices=False, point_size=20)
plt.title("after k-means iteration")
plt.show()

Observation:

import matplotlib.pyplot as plt
from scipy.spatial import Voronoi,voronoi_plot_2d
import numpy as np
import time
#
# Gaussuam mixture model parameters
#
#npts = 3000
npts = 1509
nclusters = 3
nsteps = 25
nplot = 100
xs = [0,5,10]
ys = [0,10,5]
np.random.seed(0)
#
# generate random data
#
'''
x = np.array([])
y = np.array([])
for i in range(len(xs)):
    x = np.append(x,np.random.normal(loc=xs[i],scale=1,size=npts//nclusters))
    y = np.append(y,np.random.normal(loc=ys[i],scale=1,size=npts//nclusters))
'''

# --- load data ---
df = pd.read_csv("datasets/fabacademy_commit_weekly_summary.csv")
week_cols = [c for c in df.columns if "week_" in c]

x_raw = df["commit_total"].to_numpy()
y_raw = (df[week_cols] > 0).sum(axis=1).to_numpy()

# --- normalize (VERY IMPORTANT!) ---
scaler = StandardScaler()
XY = np.vstack([x_raw, y_raw]).T
XY_scaled = scaler.fit_transform(XY)
x = XY_scaled[:,0]
y = XY_scaled[:,1]



#
# choose starting points and initialize
#
indices = np.random.uniform(low=0,high=len(x),size=nclusters).astype(int)
mux = x[indices]
muy = y[indices]
varx = (np.max(x)-np.min(x))**2
vary = (np.max(y)-np.min(y))**2
pc = np.ones(nclusters)/nclusters
#
# plot before iteration
#
fig,ax = plt.subplots()
plt.plot(x,y,'.')
plt.errorbar(mux,muy,xerr=np.sqrt(varx),yerr=np.sqrt(vary),fmt='r.',markersize=20)
plt.autoscale()
plt.title('before iteration')
plt.show()
#
# do E-M iterations
#
for i in range(nsteps):
    #
    # construct matrices
    #
    xm = np.outer(x,np.ones(nclusters))
    ym = np.outer(y,np.ones(nclusters))
    muxm = np.outer(np.ones(npts),mux)
    muym = np.outer(np.ones(npts),muy)
    varxm = np.outer(np.ones(npts),varx)
    varym = np.outer(np.ones(npts),varx)
    pcm = np.outer(np.ones(npts),pc)
    #
    # use model to update probabilities
    #
    pvgc = (1/np.sqrt(2*np.pi*varxm))*\
       np.exp(-(xm-muxm)**2/(2*varxm))*\
       (1/np.sqrt(2*np.pi*varym))*\
       np.exp(-(ym-muym)**2/(2*varym))
    pvc = pvgc*np.outer(np.ones(npts),pc)
    pcgv = pvc/np.outer(np.sum(pvc,1),np.ones(nclusters))
    #
    # use probabilities to update model
    #
    pc = np.sum(pcgv,0)/npts
    mux = np.sum(xm*pcgv,0)/(npts*pc)
    muy = np.sum(ym*pcgv,0)/(npts*pc)
    varx = 0.1+np.sum((xm-muxm)**2*pcgv,0)/(npts*pc)
    vary = 0.1+np.sum((ym-muym)**2*pcgv,0)/(npts*pc)
#
# plot after iteration
#
fig,ax = plt.subplots()
plt.plot(x,y,'.')
plt.errorbar(mux,muy,xerr=np.sqrt(varx),yerr=np.sqrt(vary),fmt='r.',markersize=20)
plt.autoscale()
plt.title('after iteration')
plt.show()
#
# plot distribution
#
xplot = np.linspace(np.min(x),np.max(x),nplot)
yplot = np.linspace(np.min(y),np.max(y),nplot)
(X,Y) = np.meshgrid(xplot,yplot)
p = np.zeros((nplot,nplot))
for c in range(nclusters):
    p += np.exp(-(X-mux[c])**2/(2*varx[c]))/np.sqrt(2*np.pi*varx[c])\
       *np.exp(-(Y-muy[c])**2/(2*vary[c]))/np.sqrt(2*np.pi*vary[c])\
       *pc[c]
fig, ax = plt.subplots(subplot_kw={"projection":"3d"})
ax.plot_surface(X,Y,p)
plt.title('probability distribution')
plt.show()

import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
import pandas as pd
import json

#
# Fab Academy data preparation  ( add graduated label )
#
df = pd.read_csv("datasets/fabacademy_commit_weekly_summary.csv")

with open("datasets/fabacademy_with_projects_direct_2019_2025.json","r") as f:
    data = json.load(f)

records = []
for year in data:
    for lab in year["labs"]:
        for s in lab["students"]:
            graduated = 1 if s.get("graduate_year") is not None else 0
            pm = s.get("project_match", {})
            projects = pm.get("projects", [])
            for p in projects:
                pid = p.get("id", None)
                if pid is not None:
                    records.append({
                        "project_id": pid,
                        "graduated": graduated,
                    })

grad_df = pd.DataFrame(records)

df = df.merge(grad_df, on="project_id", how="left")
df["graduated"] = df["graduated"].fillna(0).astype(int)
print(df.head())

week_cols = [c for c in df.columns if "week_" in c]
x_raw = df["commit_total"].to_numpy()
y_raw = (df[week_cols] > 0).sum(axis=1).to_numpy()
state_raw = df["graduated"].astype(int).to_numpy()

samples = np.column_stack((x_raw, y_raw))  # shape: (npts, 2)
scaler = StandardScaler()
samples_scaled = scaler.fit_transform(samples)  # shape: (npts, 2)
points = samples_scaled.T  # shape: (2, npts)

states = state_raw
npts = len(states)


#
# state cluster-weighted modeling parameters
#
dim = 2
nstates = 2
nclusters = 5
momentum = 0.9
rate = 0.1
min_var = 0.1
niterations = 100
np.random.seed(10)

#
# initialize arrays
#
indices = np.random.randint(0,npts,nclusters)
means = points[:,indices]
dmean = np.zeros((dim,nclusters))
variances = np.outer(np.var(points,axis=1),np.ones(nclusters))
pc = np.ones(nclusters)/nclusters
pxgc = np.zeros((nclusters,npts))
psgxc = np.ones((nclusters,nstates))/nstates

print("=== INITIAL STATE ===")
print("initial means:\n", means)
print("initial variances:\n", variances)

#
# plot data
#
plt.plot(points[0,states==0],points[1,states==0],ls='',marker='$0$',markeredgecolor='green')
plt.plot(points[0,states==1],points[1,states==1],ls='',marker='$1$',markeredgecolor='orange')
plt.xlim([np.min(points[0,:])-1,np.max(points[0,:])+1])
plt.ylim([np.min(points[1,:])-1,np.max(points[1,:])+1])

#
# plot clusters
#
def plot_var():
   for cluster in range(means.shape[1]):
      x = means[0,cluster]
      y = means[1,cluster]
      type = np.argmax(psgxc[cluster,:])
      dx = np.sqrt(variances[0,cluster])
      dy = np.sqrt(variances[1,cluster])
      plt.plot([x-dx,x+dx],[y,y],'k')
      plt.plot([x,x],[y-dy,y+dy],'k')
      plt.plot([x],[y],'k',marker=f'${type}$',markersize=15)

plot_var()
plt.title('variance clusters before iteration')
plt.xlabel("Total Commits (standardized)")
plt.ylabel("Active Weeks (standardized)")
plt.show()

#
# do E-M iteration
#
for i in range(niterations):
   for cluster in range(nclusters):
      mean = np.outer(means[:,cluster],np.ones(npts))
      variance = np.outer(variances[:,cluster],np.ones(npts))
      pxgc[cluster,:] = np.prod(np.exp(-(points-mean)**2/(2*variance))\
         /np.sqrt(2*np.pi*variance),0)
   pxc = pxgc*np.outer(pc,np.ones(npts))
   pcgx = pxc/np.outer(np.ones(nclusters),np.sum(pxc,0))
   psxc = psgxc[:,states]*pxc
   pcgsx = psxc/np.outer(np.ones(nclusters),np.sum(psxc,0))
   pc = np.sum(pcgsx,1)/npts
   for cluster in range(nclusters):
      newmean = momentum*dmean[:,cluster]\
         +np.sum(points*np.outer(np.ones(dim),pcgsx[cluster,:]),1)\
         /np.sum(np.outer(np.ones(dim),pcgsx[cluster,:]),1)
      dmean[:,cluster] = newmean-means[:,cluster]
      means[:,cluster] += rate*dmean[:,cluster]
      m = np.outer(means[:,cluster],np.ones(npts))
      variances[:,cluster] = np.sum((points-m)**2\
          *np.outer(np.ones(dim),pcgsx[cluster,:]),1)\
         /np.sum(np.outer(np.ones(dim),pcgsx[cluster,:]),1)\
         +min_var
   for state in range(nstates):
      index = np.argwhere(states == state)
      psgxc[:,state] = np.sum(pcgsx[:,index],1)[:,0]/np.sum(pcgsx[:,:],1)
    
   if i % 20 == 0:
      print(f"\n=== iteration {i} ===")
      print("pcgsx (first 5 samples):\n", pcgsx[:, :5])
      print("means:\n", means)
      print("variances:\n", variances)
                                                         

#
# plot data
#
plt.plot(points[0,states==0],points[1,states==0],ls='',marker='$0$',markeredgecolor='green')
plt.plot(points[0,states==1],points[1,states==1],ls='',marker='$1$',markeredgecolor='orange')
plt.xlim([np.min(points[0,:])-1,np.max(points[0,:])+1])
plt.ylim([np.min(points[1,:])-1,np.max(points[1,:])+1])
plt.xlabel("Total Commits (standardized)")
plt.ylabel("Active Weeks (standardized)")
#
# plot clusters
#
plot_var()

#
# plot decision boundaries
#
ngrid = 100
xmin = np.min(points[0,:])
xmax = np.max(points[0,:])
ymin = np.min(points[1,:])
ymax = np.max(points[1,:])
x = np.linspace(xmin,xmax,ngrid)
y = np.linspace(ymin,ymax,ngrid)
mx,my = np.meshgrid(x,y)
x = np.reshape(mx,(ngrid*ngrid))
y = np.reshape(my,(ngrid*ngrid))
plotpoints = np.vstack((x,y))
pxgc = np.zeros((nclusters,ngrid*ngrid))
for cluster in range(nclusters):
   mean = np.outer(means[:,cluster],np.ones(ngrid*ngrid))
   variance = np.outer(variances[:,cluster],np.ones(ngrid*ngrid))
   pxgc[cluster,:] = np.prod(np.exp(-(plotpoints-mean)**2/(2*variance))\
      /np.sqrt(2*np.pi*variance),0)
pxc = pxgc*np.outer(pc,np.ones(ngrid*ngrid))
p = np.sum(np.outer(psgxc[:,1],np.ones(ngrid*ngrid))*pxc,0)/np.sum(pxc,0)
p = np.reshape(p,(ngrid,ngrid))
plt.contour(mx,my,p,[0.5])
plt.title('variance clusters and decision boundaries after iteration')

for k in range(nclusters):
    x, y = means[:, k]
    plt.text(x, y+0.2, f"{k}", color="red", fontsize=14, weight="bold")
plt.show()
    
#
# plot probability surface
#
fig, ax = plt.subplots(subplot_kw={"projection":"3d"})
ax.plot_wireframe(mx,my,p,rstride=10,cstride=10,color='gray')
plt.plot(points[0,states==0],points[1,states==0],zdir='z',ls='',marker='r$0$',markeredgecolor='green')
plt.plot(points[0,states==1],points[1,states==1],zdir='z',ls='',marker='$1$',markeredgecolor='orange')
plt.title('probability of graduating (state 1)')
ax.set_xlabel("Git Commit Count (std.)")
ax.set_ylabel("Active Weeks (std.)")
ax.set_zlabel("Graduation Probability")
plt.show()

#
# Cluster ID
#
cluster_ids = np.argmax(pcgsx, axis=0)
df["cluster_id"] = cluster_ids

print("\n=== FINAL CLUSTER ASSIGNMENT ===")
print("cluster counts:", np.bincount(cluster_ids))
print("cluster_ids (first 20):", cluster_ids[:20])
df.to_csv("datasets/fabacademy_commit_weekly_summary_with_cluster.csv", index=False)

   project_id  year lab_name     student_name  \
0        1286  2019    AKGEC    Rajat Ratewal   
1        1287  2019    AKGEC  Narender Sharma   
2        1285  2019    AKGEC   Ashish Sawhney   
3        1290  2019    AKGEC     Jay Dhariwal   
4        1289  2019    AKGEC        Ahmad Ali   

                                         student_url  commit_total  \
0  https://fabacademy.org/2019/labs/akgec/student...            93   
1  https://fabacademy.org/2019/labs/akgec/student...            76   
2  https://fabacademy.org/2019/labs/akgec/student...            60   
3  https://fabacademy.org/2019/labs/akgec/student...           436   
4  https://fabacademy.org/2019/labs/akgec/student...           178   

                    first_commit                    last_commit  week_01  \
0  2019-01-22T10:18:25.000+05:30  2019-06-18T20:57:24.000+05:30        5   
1  2019-01-26T16:09:32.000+00:00  2019-07-16T12:08:12.000+05:30        0   
2  2019-01-25T18:06:56.000+05:30  2019-04-30T17:34:07.000+05:30        0   
3  2019-01-23T17:21:47.000+05:30  2019-07-02T22:00:56.000+05:30        0   
4  2019-01-26T14:36:19.000+00:00  2019-07-07T21:10:13.000+05:30        0   

   week_02  ...  week_22  week_23  week_24  week_25  week_26  week_27  \
0       10  ...        7        0        0        0        0        0   
1       18  ...        6        7        2        1        1        0   
2       23  ...        0        0        0        0        0        0   
3       77  ...       16       17       16        0        0        0   
4        1  ...        1       25       28        7        0        0   

   week_28  week_29  week_30  graduated  
0        0        0        0          1  
1        0        0        0          1  
2        0        0        0          0  
3        0        0        0          1  
4        0        0        0          1  

[5 rows x 39 columns]
=== INITIAL STATE ===
initial means:
 [[-0.44120982 -0.04209935  2.42299473 -0.81214779 -0.45999149]
 [-1.74266354  0.05971063  0.31719266 -1.35644051 -1.48518152]]
initial variances:
 [[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]

=== iteration 0 ===
pcgsx (first 5 samples):
 [[0.04079797 0.05033586 0.24308139 0.00974343 0.04299364]
 [0.77075836 0.73037864 0.16762755 0.4414894  0.75524682]
 [0.01648586 0.01241336 0.00153853 0.51725384 0.04179885]
 [0.09538906 0.11534593 0.30009103 0.01317395 0.08248496]
 [0.07656876 0.09152621 0.2876615  0.01833937 0.07747573]]
means:
 [[-0.44076397 -0.04372788  2.37553325 -0.77505202 -0.45382571]
 [-1.65299083  0.10504431  0.36207465 -1.28443505 -1.40389911]]
variances:
 [[0.29797491 0.35295058 3.05468787 0.37603603 0.30795111]
 [1.38973813 0.77391162 0.71317675 1.27313915 1.36613791]]

=== iteration 20 ===
pcgsx (first 5 samples):
 [[9.93622984e-03 1.56008412e-02 3.26790850e-01 2.65028431e-05
  6.35586568e-03]
 [9.48306415e-01 9.25147994e-01 1.94210323e-03 9.29562478e-01
  9.61370638e-01]
 [7.32297031e-03 7.82204451e-03 1.13548027e-03 7.00722186e-02
  8.14192736e-03]
 [9.57095789e-03 1.49896368e-02 3.88020755e-01 2.10020944e-04
  7.68682550e-03]
 [2.48634273e-02 3.64394832e-02 2.82110811e-01 1.28780016e-04
  1.64447437e-02]]
means:
 [[-0.47343591  0.10091222  2.69152185 -0.42643434 -0.43834146]
 [-0.92524246  0.74094345  0.42541603 -0.87939927 -0.79420413]]
variances:
 [[0.20839976 0.39297959 2.974327   0.269635   0.21846114]
 [0.57613224 0.31829148 0.78406621 0.56693994 0.64787531]]

=== iteration 40 ===
pcgsx (first 5 samples):
 [[1.36924412e-03 2.92518286e-03 6.53033301e-01 3.97475015e-07
  8.11111430e-04]
 [9.66148412e-01 9.55508263e-01 9.03518007e-03 8.59325320e-01
  9.70867916e-01]
 [1.22208644e-02 1.23998713e-02 7.74823572e-04 1.40258942e-01
  1.28075350e-02]
 [5.82731701e-03 8.86534828e-03 2.14020536e-01 6.54856747e-05
  4.26722879e-03]
 [1.44341622e-02 2.03013345e-02 1.23136159e-01 3.49854678e-04
  1.12462084e-02]]
means:
 [[-0.59823116  0.04594098  2.46659585 -0.42661089 -0.36232738]
 [-1.15017195  0.66144447  0.61651391 -0.79073272 -0.66707915]]
variances:
 [[0.17685197 0.36885601 2.97803582 0.24333229 0.25598487]
 [0.38970964 0.38541323 0.6929043  0.56258358 0.62264021]]

=== iteration 60 ===
pcgsx (first 5 samples):
 [[1.18144761e-02 2.09450921e-02 9.70466558e-01 2.05073654e-05
  7.44422501e-03]
 [9.72823776e-01 9.62998828e-01 5.14761563e-03 8.16609928e-01
  9.76056671e-01]
 [1.16413635e-02 1.16609251e-02 3.07069598e-04 1.71205019e-01
  1.21851016e-02]
 [1.32928623e-03 1.60584768e-03 1.46216204e-02 3.83290537e-03
  1.53591801e-03]
 [2.39109782e-03 2.78930665e-03 9.45713631e-03 8.33164034e-03
  2.77808420e-03]]
means:
 [[-0.53401951  0.01300487  2.55603818  0.34185177  0.41109641]
 [-0.97283871  0.70326558  0.78067199 -0.12469499 -0.01338167]]
variances:
 [[0.19769407 0.33683996 3.03742072 0.71776242 0.72833942]
 [0.48684077 0.35285985 0.58780268 0.621719   0.63509663]]

=== iteration 80 ===
pcgsx (first 5 samples):
 [[1.68352015e-02 2.84780765e-02 9.93627967e-01 5.84040601e-05
  1.09315562e-02]
 [9.73684881e-01 9.62274282e-01 5.05111647e-03 7.83472279e-01
  9.77491896e-01]
 [7.48174218e-03 7.30483535e-03 8.13552473e-05 1.31758439e-01
  7.71718433e-03]
 [6.97687826e-04 6.85669599e-04 8.13540288e-04 2.59020771e-02
  1.34308619e-03]
 [1.30048744e-03 1.25713635e-03 4.26021138e-04 5.88088012e-02
  2.51627732e-03]]
means:
 [[-0.51720361  0.00905448  2.92206922  1.25091988  1.41814073]
 [-0.93785175  0.70400624  0.87989508  0.2236297   0.29428099]]
variances:
 [[0.20834095 0.32483881 3.30456293 0.68112743 0.73871137]
 [0.51465941 0.34962538 0.53566329 0.6791331  0.72125249]]

=== FINAL CLUSTER ASSIGNMENT ===
cluster counts: [849 983  57  53  61]
cluster_ids (first 20): [1 1 0 1 1 0 1 1 1 0 0 0 1 1 2 2 4 4 1 0]

fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(7,6))
fig.patch.set_facecolor("black")
ax.set_facecolor("black")

# --- Wireframe: 
ax.plot_wireframe(mx, my, p,
                  rstride=12, cstride=12,
                  color="#DDDDDD",     
                  linewidth=1.0,      
                  alpha=0.9)           
ax.scatter(points[0, states==0], points[1, states==0], 0,
           color="#7FFFD4", s=8, alpha=0.8)
ax.scatter(points[0, states==1], points[1, states==1], 0,
           color="#FFD580", s=8, alpha=0.8)

ax.set_xlabel("Git Commit Count (std.)", color="white")
ax.set_ylabel("Active Weeks (std.)", color="white")
ax.set_zlabel("Graduation Probability", color="white")
ax.tick_params(colors="white")

ax.xaxis.pane.set_facecolor((0,0,0,0))
ax.yaxis.pane.set_facecolor((0,0,0,0))
ax.zaxis.pane.set_facecolor((0,0,0,0))
ax.grid(False)
ax.view_init(elev=30, azim=-45)

plt.show()

Class¶

- Density Estimation ¶

Assignment　　¶

Approach¶

1. K-means with Fab Academy Git data¶

2. Kernel Density Estimation¶

Gaussian mixture models　　¶

3. Cluster-Weighted Modeling　 with Fab Academy DAta¶

Commits Visualization¶

Class¶

- Density Estimation¶

Assignment ¶

Approach¶

1. K-means with Fab Academy Git data¶

2. Kernel Density Estimation¶

Gaussian mixture models ¶

3. Cluster-Weighted Modeling with Fab Academy DAta¶

Commits Visualization¶

- Density Estimation ¶

Assignment　　¶

Gaussian mixture models　　¶

3. Cluster-Weighted Modeling　 with Fab Academy DAta¶