import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec
42)
np.random.seed(
# Generate data
= 40
num_points = 2 # how many frames per angle. The lower - the faster.
n_frames_per_angle = np.random.randn(num_points, 2)
X = X @ np.array([[1.6, 0.0], [0.0, 0.4]])
X
# Normalize data
= [0., 0.]
center_point
# PCA components
= np.linalg.eig(X.T @ X)
_, v = v.T[0]
v_main
# Set up the grid
= gridspec.GridSpec(2, 1, height_ratios=[5, 1]) # Two rows, one column, with the first row 3 times the height of the second
gs
= plt.figure(figsize=(5, 6)) # Adjust the total figure size as necessary
fig
= plt.subplot(gs[0]) # The first subplot
ax = plt.subplot(gs[1]) # The second subplot
ax2
= ax.scatter(X[:,0], X[:,1], color='b', label="Data")
scatter
= ax.plot([], [], 'k')
direction_line, -v_main[0]*3 + center_point[0],
ax.plot([0]*3 + center_point[0]],
v_main[-v_main[1]*3 + center_point[1],
[1]*3 + center_point[1]], label="First singular vector of X")
v_main[
= ax.plot([], [], 'ro', markersize=5, label="Projections")
projection_points, = [ax.plot([], [], 'r')[0] for _ in range(num_points)]
projection_lines
= ax2.plot([-3.5, 3.5], [0,0], 'k')
direction_line2, = ax2.plot([],[], 'ro', markersize=7)
projections,
def init():
'equal')
ax.axis(=":")
ax.grid(linestyle=center_point[0], y=center_point[1], c='k')
ax.scatter(x="upper right")
ax.legend(loc"PCA")
ax.set_title(# ax.text(0.94, 0.945, "@fminxyz", transform=fig.transFigure,
# ha="right", va="top", fontsize=10, alpha=0.5)
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(= np.array([0, 0])
w =":")
ax2.grid(linestyle"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
fig.tight_layout()return scatter, direction_line, projection_points, projection_lines
def update(frame):
-3.5+center_point[0], 3.5+center_point[0])
ax.set_xlim(-3.5+center_point[1], 3.5+center_point[1])
ax.set_ylim(= frame/n_frames_per_angle
alpha = np.array([np.cos(np.radians(alpha)), np.sin(np.radians(alpha))])
w = X @ w.reshape(-1, 1) @ w.reshape(1, -1) + center_point
z
for i in range(num_points):
0], z[i, 0]], [X[i, 1], z[i, 1]])
projection_lines[i].set_data([X[i, 'r')
projection_lines[i].set_color(
0], z[:, 1])
projection_points.set_data(z[:, # distances = pdist(z)
# max_distance = np.max(distances)
# projection_points.set_label(f"Max Distance: {max_distance:.2f}")
-w[0]*3 + center_point[0],
direction_line.set_data([0]*3 + center_point[0]],
w[-w[1]*3 + center_point[1],
[1]*3 + center_point[1]])
w[
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(@w, np.zeros(len(X@w)))
projections.set_data(X"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
return direction_line, projection_points, projection_lines
= animation.FuncAnimation(fig, update,
ani =np.arange(0, n_frames_per_angle*180),
frames=1000/60, # 60 fps
interval=init)
init_func
plt.close()from IPython import display
= display.HTML(ani.to_html5_video())
html
display.display(html)
# # Uncomment to save to the file
# ani.save("PCA_animation.mp4", writer='ffmpeg', fps=60, dpi=300)
PCA intuition
Exercise: whatβs wrong (1)?
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec
42)
np.random.seed(
# Generate data
= 40
num_points = 0.5 # how many frames per angle. The lower - the faster.
n_frames_per_angle = np.random.randn(num_points, 2)
X = X @ np.linalg.cholesky(np.array([[1, 0.6], [0.6, 0.6]]))
X = X - np.ones(2)
X
# Normalize data
= [0., 0.]
center_point
# PCA components
= np.linalg.eig(X.T @ X)
_, v = v.T[0]
v_main
# Set up the grid
= gridspec.GridSpec(2, 1, height_ratios=[5, 1]) # Two rows, one column, with the first row 3 times the height of the second
gs
= plt.figure(figsize=(5, 6)) # Adjust the total figure size as necessary
fig
= plt.subplot(gs[0]) # The first subplot
ax = plt.subplot(gs[1]) # The second subplot
ax2
= ax.scatter(X[:,0], X[:,1], color='b', label="Data")
scatter
= ax.plot([], [], 'k')
direction_line, -v_main[0]*3 + center_point[0],
ax.plot([0]*3 + center_point[0]],
v_main[-v_main[1]*3 + center_point[1],
[1]*3 + center_point[1]], label="First singular vector of X")
v_main[
= ax.plot([], [], 'ro', markersize=5, label="Projections")
projection_points, = [ax.plot([], [], 'r')[0] for _ in range(num_points)]
projection_lines
= ax2.plot([-3.5, 3.5], [0,0], 'k')
direction_line2, = ax2.plot([],[], 'ro', markersize=7)
projections,
def init():
'equal')
ax.axis(=":")
ax.grid(linestyle=center_point[0], y=center_point[1], c='k')
ax.scatter(x="upper right")
ax.legend(loc"PCA")
ax.set_title(# ax.text(0.94, 0.945, "@fminxyz", transform=fig.transFigure,
# ha="right", va="top", fontsize=10, alpha=0.5)
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(= np.array([0, 0])
w =":")
ax2.grid(linestyle"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
fig.tight_layout()return scatter, direction_line, projection_points, projection_lines
def update(frame):
-3.5+center_point[0], 3.5+center_point[0])
ax.set_xlim(-3.5+center_point[1], 3.5+center_point[1])
ax.set_ylim(= frame/n_frames_per_angle
alpha = np.array([np.cos(np.radians(alpha)), np.sin(np.radians(alpha))])
w = X @ w.reshape(-1, 1) @ w.reshape(1, -1)
z
for i in range(num_points):
0], z[i, 0]], [X[i, 1], z[i, 1]])
projection_lines[i].set_data([X[i, 'r')
projection_lines[i].set_color(
0], z[:, 1])
projection_points.set_data(z[:, # distances = pdist(z)
# max_distance = np.max(distances)
# projection_points.set_label(f"Max Distance: {max_distance:.2f}")
-w[0]*3 + center_point[0],
direction_line.set_data([0]*3 + center_point[0]],
w[-w[1]*3 + center_point[1],
[1]*3 + center_point[1]])
w[
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(@w, np.zeros(len(X@w)))
projections.set_data(X"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
return direction_line, projection_points, projection_lines
= animation.FuncAnimation(fig, update,
ani =np.arange(0, n_frames_per_angle*180),
frames=1000/60, # 60 fps
interval=init)
init_func
plt.close()from IPython import display
= display.HTML(ani.to_html5_video())
html
display.display(html)
# # Uncomment to save to the file
# ani.save("PCA_animation.mp4", writer='ffmpeg', fps=60, dpi=300)
Exercise: whatβs wrong (2)?
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec
42)
np.random.seed(
# Generate data
= 30
num_points = 1 # how many frames per angle. The lower - the faster.
n_frames_per_angle = np.random.randn(num_points, 2)
X
# Normalize data
= [0, 0]
center_point
# PCA components
= np.linalg.eig(X.T @ X)
eigs, v = v.T[0]
v_main
# Set up the grid
= gridspec.GridSpec(2, 1, height_ratios=[5, 1]) # Two rows, one column, with the first row 3 times the height of the second
gs
= plt.figure(figsize=(5, 6)) # Adjust the total figure size as necessary
fig
= plt.subplot(gs[0]) # The first subplot
ax = plt.subplot(gs[1]) # The second subplot
ax2
= ax.scatter(X[:,0], X[:,1], color='b', label="Data")
scatter
= ax.plot([], [], 'k')
direction_line,
-v_main[0]*3 + center_point[0],
ax.plot([0]*3 + center_point[0]],
v_main[-v_main[1]*3 + center_point[1],
[1]*3 + center_point[1]], label="First singular vector of X")
v_main[
= ax.plot([], [], 'ro', markersize=5, label="Projections")
projection_points, = [ax.plot([], [], 'r')[0] for _ in range(num_points)]
projection_lines
= ax2.plot([-3.5, 3.5], [0,0], 'k')
direction_line2, = ax2.plot([],[], 'ro', markersize=7)
projections,
def init():
'equal')
ax.axis(=":")
ax.grid(linestyle=center_point[0], y=center_point[1], c='k')
ax.scatter(x="upper right")
ax.legend(loc"PCA")
ax.set_title(# ax.text(0.94, 0.945, "@fminxyz", transform=fig.transFigure,
# ha="right", va="top", fontsize=10, alpha=0.5)
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(= np.array([0, 0])
w =":")
ax2.grid(linestyle"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
fig.tight_layout()return scatter, direction_line, projection_points, projection_lines
def update(frame):
-3.5+center_point[0], 3.5+center_point[0])
ax.set_xlim(-3.5+center_point[1], 3.5+center_point[1])
ax.set_ylim(= frame/n_frames_per_angle
alpha = np.array([np.cos(np.radians(alpha)), np.sin(np.radians(alpha))])
w = X @ w.reshape(-1, 1) @ w.reshape(1, -1) + center_point
z
for i in range(num_points):
0], z[i, 0]], [X[i, 1], z[i, 1]])
projection_lines[i].set_data([X[i, 'r')
projection_lines[i].set_color(
0], z[:, 1])
projection_points.set_data(z[:, # distances = pdist(z)
# max_distance = np.max(distances)
# projection_points.set_label(f"Max Distance: {max_distance:.2f}")
-w[0]*3 + center_point[0],
direction_line.set_data([0]*3 + center_point[0]],
w[-w[1]*3 + center_point[1],
[1]*3 + center_point[1]])
w[
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(@w, np.zeros(len(X@w)))
projections.set_data(X"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X@w)**2:.1f}")
return direction_line, projection_points, projection_lines
= animation.FuncAnimation(fig, update,
ani =np.arange(0, n_frames_per_angle*180),
frames=1000/60, # 60 fps
interval=init)
init_func
plt.close()from IPython import display
= display.HTML(ani.to_html5_video())
html
display.display(html)
# # Uncomment to save to the file
# ani.save("PCA_animation.mp4", writer='ffmpeg', fps=60, dpi=300)
Exercise: whatβs βwrongβ (3)?
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.gridspec as gridspec
42)
np.random.seed(
# Generate data
= 60
num_points = 0.5 # how many frames per angle. The lower - the faster.
n_frames_per_angle = np.random.randn(int(num_points/2), 2)
X_1 = X_1 @ np.array([[1.4, 0.0], [0.0, 0.2]])
X_1
= np.random.randn(int(num_points/2), 2)
X_2 = X_2 @ np.array([[1.4, 0.0], [0.0, 0.2]])
X_2 = X_2 + np.array([0, 2])
X_2
= np.vstack([X_1, X_2])
X
# Normalize data
= np.mean(X, axis=0)
center_point = X - center_point
X_std
# PCA components
= np.linalg.eig(X_std.T @ X_std)
_, v = v.T[0]
v_main
# Set up the grid
= gridspec.GridSpec(2, 1, height_ratios=[5, 1]) # Two rows, one column, with the first row 3 times the height of the second
gs
= plt.figure(figsize=(5, 6)) # Adjust the total figure size as necessary
fig
= plt.subplot(gs[0]) # The first subplot
ax = plt.subplot(gs[1]) # The second subplot
ax2
= ax.scatter(X[:,0], X[:,1], color='b', label="Data")
scatter
= ax.plot([], [], 'k')
direction_line, -v_main[0]*3 + center_point[0],
ax.plot([0]*3 + center_point[0]],
v_main[-v_main[1]*3 + center_point[1],
[1]*3 + center_point[1]], label="First eigenvector of X")
v_main[
= ax.plot([], [], 'ro', markersize=5, label="Projections")
projection_points, = [ax.plot([], [], 'r')[0] for _ in range(num_points)]
projection_lines
= ax2.plot([-3.5, 3.5], [0,0], 'k')
direction_line2, = ax2.plot([],[], 'ro', markersize=7)
projections,
def init():
'equal')
ax.axis(=":")
ax.grid(linestyle=center_point[0], y=center_point[1], c='k')
ax.scatter(x="upper right")
ax.legend(loc"PCA")
ax.set_title(# ax.text(0.94, 0.945, "@fminxyz", transform=fig.transFigure,
# ha="right", va="top", fontsize=10, alpha=0.5)
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(= np.array([0, 0])
w =":")
ax2.grid(linestyle"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X_std@w)**2:.1f}")
fig.tight_layout()return scatter, direction_line, projection_points, projection_lines
def update(frame):
-3.5+center_point[0], 3.5+center_point[0])
ax.set_xlim(-3.5+center_point[1], 3.5+center_point[1])
ax.set_ylim(= frame/n_frames_per_angle
alpha = np.array([np.cos(np.radians(alpha)), np.sin(np.radians(alpha))])
w = X_std @ w.reshape(-1, 1) @ w.reshape(1, -1) + center_point
z
for i in range(num_points):
0], z[i, 0]], [X[i, 1], z[i, 1]])
projection_lines[i].set_data([X[i, 'r')
projection_lines[i].set_color(
0], z[:, 1])
projection_points.set_data(z[:, # distances = pdist(z)
# max_distance = np.max(distances)
# projection_points.set_label(f"Max Distance: {max_distance:.2f}")
-w[0]*3 + center_point[0],
direction_line.set_data([0]*3 + center_point[0]],
w[-w[1]*3 + center_point[1],
[1]*3 + center_point[1]])
w[
-3.5, 3.5)
ax2.set_xlim(-1, 1)
ax2.set_ylim(@w, np.zeros(len(X_std@w)))
projections.set_data(X_std"Projections on the First Principal Component\n"
ax2.set_title(f"Variance of the projections: {np.linalg.norm(X_std@w)**2:.1f}")
return direction_line, projection_points, projection_lines
= animation.FuncAnimation(fig, update,
ani =np.arange(0, n_frames_per_angle*180),
frames=1000/60, # 60 fps
interval=init)
init_func
plt.close()from IPython import display
= display.HTML(ani.to_html5_video())
html
display.display(html)
# # Uncomment to save to the file
# ani.save("PCA_animation.mp4", writer='ffmpeg', fps=60, dpi=300)
PCA with Iris
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
=True, precision=4)
np.set_printoptions(suppress
= load_iris()
dataset = dataset['data']
A = dataset['target']
labels = dataset['target_names']
classes = np.array([classes[label] for label in labels])
label_names print('π€: Dataset contains {} points in {}-dimensional space'.format(*A.shape))
π€: Dataset contains 150 points in 4-dimensional space
print('π Mean value over each dimension before the normalization',np.mean(A, axis = 0))
# Data normalization with zero mean and unit variance
= StandardScaler().fit_transform(A)
A_std print('π Mean value over each dimension after the normalization',np.mean(A_std, axis = 0))
π Mean value over each dimension before the normalization [5.8433 3.0573 3.758 1.1993]
π Mean value over each dimension after the normalization [-0. -0. -0. -0.]
# Main part
= np.linalg.svd(A_std)
u,s,wh print('π€Shapes: \n A_std, {}\n u {}\n s {}. Singular values in descending order {} \n wh {}'.format(A_std.shape, u.shape, s.shape,s, wh.shape))
π€Shapes:
A_std, (150, 4)
u (150, 150)
s (4,). Singular values in descending order [20.9231 11.7092 4.6919 1.7627]
wh (4, 4)
= sum(s)
total_variance = [(i / total_variance)*100 for i in sorted(s, reverse=True)]
variance_explained = np.cumsum(variance_explained)
cumulative_variance_explained
= [0.5 + i for i in range(A_std.shape[1])]
xs =0.5, align='center',
plt.bar(xs, variance_explained, alpha='Individual explained variance')
label='mid',
plt.step(xs, cumulative_variance_explained, where='Cumulative explained variance')
label'Explained variance')
plt.ylabel('Principal components')
plt.xlabel(='best')
plt.legend(loc1]+1))
plt.xticks(np.arange(A_std.shape[ plt.show()
# Building projection matrix
= 2
rank = wh.T
w = u[:,:rank] @ np.diag(s[:rank]) projections
for label in classes:
== label, 0],
plt.scatter(projections[label_names == label, 1],
projections[label_names = label)
label 'PC 1')
plt.xlabel('PC 2')
plt.ylabel(='best')
plt.legend(loc=":")
plt.grid(linestyle
plt.show()# plt.savefig('pca_pr_iris.svg')
# Built-in approach
from sklearn.decomposition import PCA as sklearnPCA
= sklearnPCA(n_components=2)
sklearn_pca = sklearn_pca.fit_transform(A_std)
projections_sklearn
for label in classes:
== label, 0],
plt.scatter(projections_sklearn[label_names == label, 1],
projections_sklearn[label_names = label)
label 'PC 1')
plt.xlabel('PC 2')
plt.ylabel(='best')
plt.legend(loc=":")
plt.grid(linestyle plt.show()
# Time comparison
def svd_projections(A_std):
= np.linalg.svd(A_std)
u,s,wh = 2
rank = wh.T
w return u[:,:rank] @ np.diag(s[:rank])
print('π SVD PCA running time')
%timeit svd_projections
print('π sklearn PCA running time')
%timeit sklearn_pca.fit_transform
SVD PCA running time
11.4 ns Β± 0.227 ns per loop (mean Β± std. dev. of 7 runs, 100,000,000 loops each)
sklearn PCA running time
42.3 ns Β± 0.484 ns per loop (mean Β± std. dev. of 7 runs, 10,000,000 loops each)
PCA with wine
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from sklearn.datasets import load_wine #lol
from sklearn.preprocessing import StandardScaler
= load_wine()
dataset = dataset['data']
A = dataset['target']
labels = dataset['target_names']
classes = np.array([classes[label] for label in labels])
label_names print('π€: Dataset contains {} points in {}-dimensional space'.format(*A.shape))
# Data normalization with zero mean and unit variance
= StandardScaler().fit_transform(A)
A_std
= np.linalg.svd(A_std)
u,s,wh
= sum(s)
total_variance = [(i / total_variance)*100 for i in sorted(s, reverse=True)]
variance_explained = np.cumsum(variance_explained)
cumulative_variance_explained
= [0.5 + i for i in range(A_std.shape[1])]
xs =0.5, align='center',
plt.bar(xs, variance_explained, alpha='Individual explained variance')
label='mid',
plt.step(xs, cumulative_variance_explained, where='Cumulative explained variance')
label'Explained variance')
plt.ylabel('Principal components')
plt.xlabel(='best')
plt.legend(loc1]+1))
plt.xticks(np.arange(A_std.shape[=":")
plt.grid(linestyle
plt.show()
plt.figure()= 3
rank = u[:,:rank] @ np.diag(s[:rank])
projections = plt.axes(projection="3d")
ax for label in classes:
== label, 0],
ax.scatter3D(projections[label_names == label, 1],
projections[label_names == label, 2],
projections[label_names = label)
label 'PC 1')
plt.xlabel('PC 2')
plt.ylabel(# plt.zlabel('PC 3')
='best')
plt.legend(loc plt.show()
π€: Dataset contains 178 points in 13-dimensional space
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import plotly.express as px
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
= load_wine()
dataset = dataset['data']
A = dataset['target']
labels = dataset['target_names']
classes = np.array([classes[label] for label in labels])
label_names print('π€: Dataset contains {} points in {}-dimensional space'.format(*A.shape))
# Data normalization with zero mean and unit variance
= StandardScaler().fit_transform(A)
A_std
= np.linalg.svd(A_std)
u, s, wh
= sum(s)
total_variance = [(i / total_variance)*100 for i in sorted(s, reverse=True)]
variance_explained = np.cumsum(variance_explained)
cumulative_variance_explained
= [0.5 + i for i in range(A_std.shape[1])]
xs =0.5, align='center',
plt.bar(xs, variance_explained, alpha='Individual explained variance')
label='mid',
plt.step(xs, cumulative_variance_explained, where='Cumulative explained variance')
label'Explained variance')
plt.ylabel('Principal components')
plt.xlabel(='best')
plt.legend(loc1]+1))
plt.xticks(np.arange(A_std.shape[=":")
plt.grid(linestyle
plt.show()
= 3
rank = u[:,:rank] @ np.diag(s[:rank])
projections = pd.DataFrame(projections, columns=['PC1', 'PC2', 'PC3'])
df 'label'] = label_names
df[
= px.scatter_3d(df, x='PC1', y='PC2', z='PC3', color='label')
fig =dict(size=6),
fig.update_traces(marker=dict(mode='markers'))
selector fig.show()
π€: Dataset contains 178 points in 13-dimensional space
Unable to display output for mime type(s): application/vnd.plotly.v1+json
MNIST PCA
import numpy as np
import plotly.express as px
import pandas as pd
import tensorflow as tf
# Load dataset
= tf.keras.datasets.mnist.load_data()
(train_images, train_labels), _
# Flatten images
= train_images.reshape((train_images.shape[0], -1))
train_images
# Select 20 random images per class
= []
selected_indices for i in range(10):
= np.where(train_labels == i)[0]
indices 1000, replace=False))
selected_indices.extend(np.random.choice(indices, = train_images[selected_indices]
selected_images = train_labels[selected_indices]
selected_labels
# Normalize the data
= selected_images / 255.0
selected_images
# Apply PCA
from sklearn.decomposition import PCA
= PCA(n_components=2)
pca = pca.fit_transform(selected_images)
principal_components
# Prepare data for plotting
= pd.DataFrame(data = principal_components, columns = ['PC1', 'PC2'])
pc_df 'Label'] = selected_labels
pc_df['Label'] = pc_df['Label'].astype(str)
pc_df[
# Plot
= px.scatter(pc_df, x='PC1', y='PC2', color='Label',
fig =px.colors.qualitative.Set1,
color_discrete_sequence='2D PCA of MNIST')
title fig.show()
Unable to display output for mime type(s): application/vnd.plotly.v1+json
MNIST tSNE
import numpy as np
import plotly.express as px
import pandas as pd
import tensorflow as tf
from sklearn.manifold import TSNE
# Load dataset
= tf.keras.datasets.mnist.load_data()
(train_images, train_labels), _
# Flatten images
= train_images.reshape((train_images.shape[0], -1))
train_images
# Select 20 random images per class
= []
selected_indices for i in range(10):
= np.where(train_labels == i)[0]
indices 1000, replace=False))
selected_indices.extend(np.random.choice(indices, = train_images[selected_indices]
selected_images = train_labels[selected_indices]
selected_labels
# Normalize the data
= selected_images / 255.0
selected_images
# Apply t-SNE
= TSNE(n_components=2, random_state=0)
tsne = tsne.fit_transform(selected_images)
tsne_results
# Prepare data for plotting
= pd.DataFrame(data = tsne_results, columns = ['Dim1', 'Dim2'])
tsne_df 'Label'] = selected_labels
tsne_df['Label'] = tsne_df['Label'].astype(str)
tsne_df[
# Plot
= px.scatter(tsne_df, x='Dim1', y='Dim2', color='Label',
fig =px.colors.qualitative.Set1,
color_discrete_sequence='2D t-SNE of MNIST')
title fig.show()
Unable to display output for mime type(s): application/vnd.plotly.v1+json
MNIST UMAP
!pip install umap-learn
import numpy as np
import plotly.express as px
import pandas as pd
import tensorflow as tf
import umap
# Load dataset
= tf.keras.datasets.mnist.load_data()
(train_images, train_labels), _
# Flatten images
= train_images.reshape((train_images.shape[0], -1))
train_images
# Select 20 random images per class
= []
selected_indices for i in range(10):
= np.where(train_labels == i)[0]
indices 1000, replace=False))
selected_indices.extend(np.random.choice(indices, = train_images[selected_indices]
selected_images = train_labels[selected_indices]
selected_labels
# Normalize the data
= selected_images / 255.0
selected_images
# Apply UMAP
= umap.UMAP(random_state=42)
reducer = reducer.fit_transform(selected_images)
embedding
# Prepare data for plotting
= pd.DataFrame(data = embedding, columns = ['Dim1', 'Dim2'])
umap_df 'Label'] = selected_labels
umap_df['Label'] = umap_df['Label'].astype(str)
umap_df[
# Plot
= px.scatter(umap_df, x='Dim1', y='Dim2', color='Label',
fig =px.colors.qualitative.Set1,
color_discrete_sequence='2D UMAP of MNIST')
title fig.show()
/Users/bratishka/.pyenv/versions/3.9.17/envs/benchmarx/lib/python3.9/site-packages/umap/umap_.py:1943: UserWarning:
n_jobs value -1 overridden to 1 by setting random_state. Use no seed for parallelism.
Unable to display output for mime type(s): application/vnd.plotly.v1+json