# Voronoi tesselation
import matplotlib.pyplot as plt
from scipy.spatial import Voronoi, voronoi_plot_2d
import numpy as np
import time

#
# Step 1: Set k-means parameters
#　　　K-means の設定
npts = 1000
nclusters = 3
nsteps = 10
xs = [0, 5, 10]   # x-centers for generating data clusters
ys = [0, 10, 5]   # y-centers for generating data clusters
np.random.seed(0)

#
# Step 2: Generate random data around specified cluster centers
#　　　3つの中心をもとにランダムなデータを生成
x = np.array([])
y = np.array([])
for i in range(len(xs)):
    # Generate Gaussian-distributed points for each cluster center
    x = np.append(x, np.random.normal(loc=xs[i], scale=1, size=npts))
    y = np.append(y, np.random.normal(loc=ys[i], scale=1, size=npts))

#
# Step 3: Choose initial cluster center points randomly
#　　　初期クラスタ中心（セントロイド）をランダムに選ぶ
indices = np.random.uniform(low=0, high=len(x), size=nclusters).astype(int)
mux = x[indices]   # initial x-centroids
muy = y[indices]   # initial y-centroids

#
# Step 4: Plot initial points and Voronoi regions
#　　　初期状態のクラスタ中心と Voronoi 図を描画
fig, ax = plt.subplots()
plt.plot(x, y, '.')
vor = Voronoi(np.stack((mux, muy), axis=1))
voronoi_plot_2d(vor, ax=ax, show_points=True, show_vertices=False, point_size=20)
plt.autoscale()
plt.title('before k-means iterations')
plt.show()

#
# Step 5: Perform k-means iterations
#　　　　K-means の反復処理（nsteps 回）
for i in range(nsteps):

    #
    # Step 5-1: Compute distance from each data point to each centroid
    #　　　　すべてのデータとセントロイドの距離を計算
    xm = np.outer(x, np.ones(len(mux)))
    ym = np.outer(y, np.ones(len(muy)))
    muxm = np.outer(np.ones(len(x)), mux)
    muym = np.outer(np.ones(len(x)), muy)
    distances = np.sqrt((xm - muxm)**2 + (ym - muym)**2)

    # Step 5-2: Assign each data point to the closest centroid
    #   最も近いセントロイドを割り当てる
    mins = np.argmin(distances, axis=1)

    #
    # Step 5-3: Update centroids based on the assigned points
    #   セントロイドを更新
    for i in range(len(mux)):
        index = np.where(mins == i)
        mux[i] = np.sum(x[index]) / len(index[0])
        muy[i] = np.sum(y[index]) / len(index[0])

#
# Step 6: Plot results after k-means iterations
#   K-means 終了後の結果をプロット
fig, ax = plt.subplots()
plt.plot(x, y, '.')
vor = Voronoi(np.stack((mux, muy), axis=1))
voronoi_plot_2d(vor, ax=ax, show_points=True, show_vertices=False, point_size=20)
plt.autoscale()
plt.title('after k-means iteration')
plt.show()

# Step 1: Load real data
import numpy as np

#data = np.loadtxt("data/day6_dummy_data.csv", delimiter=",", skiprows=1)
data = np.loadtxt("data/day6_realistic_dummy_data.csv", delimiter=",", skiprows=1)

x = data[:, 0]
y = data[:, 1]

# Step 2: Preprocess / Clean / Normalize
mask = ~np.isnan(x) & ~np.isnan(y)
x = x[mask]
y = y[mask]

# Step 3: Initialize centroids (k-means++)
from sklearn.cluster import KMeans

k = 4
model = KMeans(n_clusters=k, init="k-means++")
model.fit(np.stack([x, y], axis=1))

labels = model.labels_
centroids = model.cluster_centers_

# Step 5:Apply Voronoi
from scipy.spatial import Voronoi, voronoi_plot_2d
vor = Voronoi(centroids)

# 共通の描画範囲を計算
xmin, xmax = x.min(), x.max()
ymin, ymax = y.min(), y.max()

# Step 6-1: Plot only data points
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 8))
plt.scatter(x, y, c=labels, s=5)
# クラスタ中心（大きなオレンジの点）
plt.scatter(centroids[:, 0], centroids[:, 1], 
            color="orange", s=100, edgecolor="black")

plt.title("Step 6-1: Clustered Data Only")
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.gca().set_aspect("equal", adjustable="box")
plt.show()

# Step 6-2: Plot data + Voronoi
fig2 = plt.figure(figsize=(8, 8))
ax2 = fig2.add_subplot(111)

ax2.scatter(x, y, c=labels, s=5)
ax2.scatter(centroids[:, 0], centroids[:, 1], 
            color="orange", s=100, edgecolor="black")

# → axis を指定する
voronoi_plot_2d(vor, ax=ax2, show_points=True, show_vertices=False)

ax2.set_title("Step 6-2: k-means + Voronoi Diagram")
ax2.set_xlim(xmin, xmax)
ax2.set_ylim(ymin, ymax)
ax2.set_aspect("equal", adjustable="box")
plt.show()

# Step 1: Load real data
import numpy as np

#data = np.loadtxt("data/day6_tokyo.csv", delimiter=",", skiprows=1)
data = np.loadtxt("data/day6_tokyo.csv", delimiter=",", skiprows=1)

x = data[:, 1] #longitude → East-West(東西方向) → x Axis
y = data[:, 0] #latitude → North-South(南北方向) → y Axis

# Step 2: Preprocess / Clean / Normalize
mask = ~np.isnan(x) & ~np.isnan(y)
x = x[mask]
y = y[mask]

# Step 3: Initialize centroids (k-means++)
from sklearn.cluster import KMeans

k = 6
model = KMeans(n_clusters=k, init="k-means++")
model.fit(np.stack([x, y], axis=1))

labels = model.labels_
centroids = model.cluster_centers_

# Step 5:Apply Voronoi
from scipy.spatial import Voronoi, voronoi_plot_2d
vor = Voronoi(centroids)

# Calculate a common plotting range
# 共通の描画範囲を計算
xmin, xmax = x.min(), x.max()
ymin, ymax = y.min(), y.max()

# Step 6-1: Plot only data points
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 8))
plt.scatter(x, y, c=labels, s=5)
# クラスタ中心（大きなオレンジの点）
plt.scatter(centroids[:, 0], centroids[:, 1], 
            color="orange", s=100, edgecolor="black")

plt.title("Step 6-1: Clustered Data Only")
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.gca().set_aspect("equal", adjustable="box")
plt.show()

# Step 6-2: Plot data + Voronoi
fig2 = plt.figure(figsize=(8, 8))
ax2 = fig2.add_subplot(111)

ax2.scatter(x, y, c=labels, s=5)
ax2.scatter(centroids[:, 0], centroids[:, 1], 
            color="orange", s=100, edgecolor="black")

# → axis を指定する
voronoi_plot_2d(vor, ax=ax2, show_points=True, show_vertices=False)

ax2.set_title("Step 6-2: k-means + Voronoi Diagram")
ax2.set_xlim(xmin, xmax)
ax2.set_ylim(ymin, ymax)
ax2.set_aspect("equal", adjustable="box")
plt.show()

6.Density Estimation¶

Assignment¶

Understanding Class sample code¶

Fit a probability distribution to real data¶