### code
set_random_seed(0)
print_ans = False

k = choice([2,3,4])
sizes = [choice([4,5,6]) for i in range(k)]
n = sum(sizes)
psum = 0
cuts = [0]
for s in sizes:
    psum += s
    cuts.append(psum)

g = graphs.PathGraph(n)
g.set_pos(None)
for a,b in zip(cuts[:-1], cuts[1:]):
    for i in range(a, b - 1):
        for j in range(i + 1, b):
            g.add_edge(i,j)
    
g.show(figsize=(5,5), title="$G$")
L = g.laplacian_matrix()
eigs = L.eigenvalues()
eigs.sort()
print("eigenvalues:", eigs)

if print_ans:
    print("I would guess G has %s clusters."%k)


### code
import numpy as np
from scipy.linalg import eigh

g = graphs.PathGraph(11)
L = np.array(g.laplacian_matrix())
eigs,vecs = eigh(L, eigvals=(1,1))
fvec = vecs.T[0]
eps = 0.00001
Np = [i for i in g.vertices() if fvec[i] > eps]
Nm = [i for i in g.vertices() if fvec[i] < -eps]
Nz = [i for i in g.vertices() if abs(fvec[i]) < eps]

g.show(vertex_colors={"red": Np, "green": Nm, "white": Nz})


### code
import numpy as np
from scipy.linalg import eigh
import matplotlib.pyplot as plt
%matplotlib inline

g = graphs.CycleGraph(10)
n = g.order()
L = np.array(g.laplacian_matrix())
vals,vecs = eigh(L, subset_by_index=(1,2))
x,y = vecs.T

plt.axis("equal")
### plot points
plt.scatter(x, y, s=50, zorder=3)
### add vertex labels
for i in range(n):
    plt.annotate(i, (x[i], y[i]), zorder=4)
### add lines
for i,j in g.edges(labels=False):
    plt.plot([x[i],x[j]], [y[i],y[j]], 'c')


### code
import numpy as np
from scipy.linalg import eigh
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
%matplotlib notebook

g = graphs.CubeGraph(3)
g.relabel()
n = g.order()
L = np.array(g.laplacian_matrix())
vals,vecs = eigh(L, subset_by_index=(1,3))
x,y,z = vecs.T

ax = plt.axes(projection='3d')
ax.set_xlim(-1,1)
ax.set_ylim(-1,1)
ax.set_zlim(-1,1)

### plot points
ax.scatter(x, y, z, s=50, zorder=3)
### add vertex labels
for i in range(n):
    ax.text(x[i], y[i], z[i], i, zorder=4)
### add lines
for i,j in g.edges(labels=False):
    ax.plot([x[i],x[j]], [y[i],y[j]], [z[i],z[j]], 'c')


### code
%matplotlib inline
X = make_blobs()
y_new, centers = k_means(X, 3)
plt.scatter(*X.T, c=y_new)


### original image
import numpy as np
from PIL import Image

r = 10
img = Image.open('incrediville-side.jpg')
x,y = img.size
img = img.resize((x // r, y // r))

img


### image segmentation by the k-means clustering algorithm
import matplotlib.pyplot as plt
k = 3

arr = np.array(img, dtype=float) / 256
m,n,r = arr.shape
N = m * n
arr = arr.reshape(N, r)
y_new, centers = k_means(arr, k)

fig,axs = plt.subplots(1, 3, figsize=np.array([3*n,m])/100)
for i in range(k):
    axs[i].axis('off')
    axs[i].imshow((y_new == i).reshape(m,n), cmap='binary')


import numpy as np
import matplotlib.pyplot as plt


def make_blobs(N=150, k=3, d=2, seed=None):
    """
    Input:
        N: an integer, number of samples
        k: an integer, number of blobs
        d: an integer, dimension of the space
    Output:
        a dataset X of shape (N, d)
    """
    np.random.seed(seed)
    X = np.random.randn(N,d)
    blob_size = N // k
    centers = np.random.randn(k, d) * 3
    for i in range(k):
        left = blob_size * i
        right = blob_size * (i+1) if i != k-1 else N
        X[left:right] += centers[i]
    return X


def dist_mtx(X, Y=None):
    """Return the distance matrix between rows of X and rows of Y
    
    Input:  
        X: an array of shape (N,d)
        Y: an array of shape (M,d)
            if None, Y = X
           
    Output:
        the matrix [d_ij] where d_ij is the distance between  
        the i-th row of X and the j-th row of Y
    """
    if isinstance(Y, np.ndarray):
        pass
    elif Y == None:
        Y = X.copy()
    else:
        raise TypeError("Y should be a NumPy array or None") 
    X_col = X[:, np.newaxis, :]
    Y_row = Y[np.newaxis, :, :]
    diff = X_col - Y_row
    dist = np.sqrt(np.sum(diff**2, axis=-1))
    return dist


def k_means(X, k, init="random"):
    """k-means clustering algorithm
    
    Input:  
        X: an array of shape (N,d)  
            rows for samples and columns for features
        k: number of clusters
        init: "random" or an array of shape (k,d)
            if "random", k points are chosen randomly from X as the initial cluster centers  
            if an array, the array is used as the initial cluster centers
        
    Output:
        (y_new, centers)
        y_new: an array of shape (N,)  
            that records the labels in (0, ..., k-1) of each sample 
        centers: an array of shape (k,d)  
            that records the cluster centers
            
    Example:
        mu = np.array([3,3])
        cov = np.eye(2)
        X = np.vstack([np.random.multivariate_normal(mu, cov, 100), 
                       np.random.multivariate_normal(-mu, cov, 100)])
        y_new,centers = k_means(X, 2)
    """
    N,d = X.shape
    
    ### initialize y and center
    if isinstance(init, np.ndarray):
        centers = init.copy()
    elif init == "random":
        inds = np.random.choice(np.arange(N), k, replace=False)
        centers = X[inds, :]
    else:
        raise TypeError("init can only be a NumPy array or 'random'")

    dist = dist_mtx(X, centers)
    y_new = dist.argmin(axis=1)
    
    while True:        
        ### compute the new centers
        for i in range(k):
            mask = (y_new == i)
            centers[i] = X[mask].mean(axis=0)
        
        ### generate the new y_new
        dist = dist_mtx(X, centers)
        y_last = y_new.copy()
        y_new = dist.argmin(axis=1)
        
        if np.all(y_last == y_new):
            break

    return y_new, centers

拉普拉斯嵌入法與譜分群¶

Main idea¶

Fiedler's partition theorem (simple version)¶

Algorithm (spectral embedding)¶

Side stories¶

Experiments¶

Exercise 1¶

Exercise 1(a)¶

Exercise 1(b)¶

Exercises¶

Exercise 2¶

Exercise 3¶

Exercise 4¶

Exercise 5¶

Exercise 5(a)¶

Exercise 5(b)¶

Exercise 5(c)¶

Exercise 5(d)¶

Exercise 6¶

Exercise 7¶

本次練習所須的函數¶