PCA intuition

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec

np.random.seed(42)

# Generate data
num_points = 40
n_frames_per_angle = 2 # how many frames per angle. The lower - the faster.
X = np.random.randn(num_points, 2)
X = X @ np.array([[1.6, 0.0], [0.0, 0.4]])

# Normalize data
center_point = [0., 0.]

# PCA components
_, v = np.linalg.eig(X.T @ X)
v_main = v.T[0]

# Set up the grid
gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1])  # Two rows, one column, with the first row 3 times the height of the second

fig = plt.figure(figsize=(5, 6))  # Adjust the total figure size as necessary

ax = plt.subplot(gs[0])  # The first subplot
ax2 = plt.subplot(gs[1])  # The second subplot

scatter = ax.scatter(X[:,0], X[:,1], color='b', label="Data")

direction_line, = ax.plot([], [], 'k')
ax.plot([-v_main[0]*3 + center_point[0],
                             v_main[0]*3 + center_point[0]],
                            [-v_main[1]*3 + center_point[1],
                            v_main[1]*3 + center_point[1]], label="First singular vector of X")

projection_points, = ax.plot([], [], 'ro', markersize=5, label="Projections")
projection_lines = [ax.plot([], [], 'r')[0] for _ in range(num_points)]

direction_line2, = ax2.plot([-3.5, 3.5], [0,0], 'k')
projections, = ax2.plot([],[], 'ro', markersize=7)

def init():
    ax.axis('equal')
    ax.grid(linestyle=":")
    ax.scatter(x=center_point[0], y=center_point[1], c='k')
    ax.legend(loc="upper right")
    ax.set_title("PCA")
    # ax.text(0.94, 0.945, "@fminxyz", transform=fig.transFigure,
    #         ha="right", va="top", fontsize=10, alpha=0.5)

    ax2.set_xlim(-3.5, 3.5)
    ax2.set_ylim(-1, 1)
    w = np.array([0, 0])
    ax2.grid(linestyle=":")
    ax2.set_title("Projections on the First Principal Component\n"
                  f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
    fig.tight_layout()
    return scatter, direction_line, projection_points, projection_lines


def update(frame):
    ax.set_xlim(-3.5+center_point[0], 3.5+center_point[0])
    ax.set_ylim(-3.5+center_point[1], 3.5+center_point[1])
    alpha = frame/n_frames_per_angle
    w = np.array([np.cos(np.radians(alpha)), np.sin(np.radians(alpha))])
    z = X @ w.reshape(-1, 1) @ w.reshape(1, -1) + center_point

    for i in range(num_points):
        projection_lines[i].set_data([X[i, 0], z[i, 0]], [X[i, 1], z[i, 1]])
        projection_lines[i].set_color('r')

    projection_points.set_data(z[:, 0], z[:, 1])
    # distances = pdist(z)
    # max_distance = np.max(distances)
    # projection_points.set_label(f"Max Distance: {max_distance:.2f}")

    direction_line.set_data([-w[0]*3 + center_point[0],
                             w[0]*3 + center_point[0]],
                            [-w[1]*3 + center_point[1],
                            w[1]*3 + center_point[1]])

    ax2.set_xlim(-3.5, 3.5)
    ax2.set_ylim(-1, 1)
    projections.set_data(X@w, np.zeros(len(X@w)))
    ax2.set_title("Projections on the First Principal Component\n"
                  f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")

    return direction_line, projection_points, projection_lines

ani = animation.FuncAnimation(fig, update,
                              frames=np.arange(0, n_frames_per_angle*180),
                              interval=1000/60, # 60 fps
                              init_func=init)

plt.close()
from IPython import display
html = display.HTML(ani.to_html5_video())
display.display(html)

# # Uncomment to save to the file
# ani.save("PCA_animation.mp4", writer='ffmpeg', fps=60, dpi=300)

Exercise: what’s wrong (1)?

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec

np.random.seed(42)

# Generate data
num_points = 40
n_frames_per_angle = 0.5 # how many frames per angle. The lower - the faster.
X = np.random.randn(num_points, 2)
X = X @ np.linalg.cholesky(np.array([[1, 0.6], [0.6, 0.6]]))
X = X - np.ones(2)

# Normalize data
center_point = [0., 0.]

# PCA components
_, v = np.linalg.eig(X.T @ X)
v_main = v.T[0]

# Set up the grid
gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1])  # Two rows, one column, with the first row 3 times the height of the second

fig = plt.figure(figsize=(5, 6))  # Adjust the total figure size as necessary

ax = plt.subplot(gs[0])  # The first subplot
ax2 = plt.subplot(gs[1])  # The second subplot

scatter = ax.scatter(X[:,0], X[:,1], color='b', label="Data")

direction_line, = ax.plot([], [], 'k')
ax.plot([-v_main[0]*3 + center_point[0],
                             v_main[0]*3 + center_point[0]],
                            [-v_main[1]*3 + center_point[1],
                            v_main[1]*3 + center_point[1]], label="First singular vector of X")

projection_points, = ax.plot([], [], 'ro', markersize=5, label="Projections")
projection_lines = [ax.plot([], [], 'r')[0] for _ in range(num_points)]

direction_line2, = ax2.plot([-3.5, 3.5], [0,0], 'k')
projections, = ax2.plot([],[], 'ro', markersize=7)

def init():
    ax.axis('equal')
    ax.grid(linestyle=":")
    ax.scatter(x=center_point[0], y=center_point[1], c='k')
    ax.legend(loc="upper right")
    ax.set_title("PCA")
    # ax.text(0.94, 0.945, "@fminxyz", transform=fig.transFigure,
    #         ha="right", va="top", fontsize=10, alpha=0.5)

    ax2.set_xlim(-3.5, 3.5)
    ax2.set_ylim(-1, 1)
    w = np.array([0, 0])
    ax2.grid(linestyle=":")
    ax2.set_title("Projections on the First Principal Component\n"
                  f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
    fig.tight_layout()
    return scatter, direction_line, projection_points, projection_lines


def update(frame):
    ax.set_xlim(-3.5+center_point[0], 3.5+center_point[0])
    ax.set_ylim(-3.5+center_point[1], 3.5+center_point[1])
    alpha = frame/n_frames_per_angle
    w = np.array([np.cos(np.radians(alpha)), np.sin(np.radians(alpha))])
    z = X @ w.reshape(-1, 1) @ w.reshape(1, -1)

    for i in range(num_points):
        projection_lines[i].set_data([X[i, 0], z[i, 0]], [X[i, 1], z[i, 1]])
        projection_lines[i].set_color('r')

    projection_points.set_data(z[:, 0], z[:, 1])
    # distances = pdist(z)
    # max_distance = np.max(distances)
    # projection_points.set_label(f"Max Distance: {max_distance:.2f}")

    direction_line.set_data([-w[0]*3 + center_point[0],
                             w[0]*3 + center_point[0]],
                            [-w[1]*3 + center_point[1],
                            w[1]*3 + center_point[1]])

    ax2.set_xlim(-3.5, 3.5)
    ax2.set_ylim(-1, 1)
    projections.set_data(X@w, np.zeros(len(X@w)))
    ax2.set_title("Projections on the First Principal Component\n"
                  f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")

    return direction_line, projection_points, projection_lines

ani = animation.FuncAnimation(fig, update,
                              frames=np.arange(0, n_frames_per_angle*180),
                              interval=1000/60, # 60 fps
                              init_func=init)

plt.close()
from IPython import display
html = display.HTML(ani.to_html5_video())
display.display(html)

# # Uncomment to save to the file
# ani.save("PCA_animation.mp4", writer='ffmpeg', fps=60, dpi=300)