Code
library(palmerpenguins)
library(tidyverse)
# 1. Prepare the data
penguins <- drop_na(palmerpenguins::penguins) |>
select(bill_length_mm, body_mass_g)
penguins_scaled <- scale(penguins)
penguins_scaled_df <- as_tibble(penguins_scaled, .name_repair = "minimal") |>
rename(bill_length = 1, body_mass = 2)
# 2. PCA on scaled data
pca <- prcomp(penguins_scaled)
# 3. PC1 unit vector (direction)
pc1_vec <- pca$rotation[, 1]
# 4. Project points onto PC1
proj_lengths <- as.matrix(penguins_scaled) %*% pc1_vec
projections <- proj_lengths %*% t(pc1_vec)
# 5. Add projection coordinates to data frame
penguins_scaled_df <- penguins_scaled_df |>
mutate(x_proj = projections[,1],
y_proj = projections[,2])
# 6. Plot in standardized space with PC1 and projections
PC1_pl = ggplot(penguins_scaled_df, aes(x = bill_length, y = body_mass)) +
geom_point(color = "indianred")+
labs(
x = "Standardized Bill Length",
y = "Standardized Body Mass") +
coord_fixed() + # Ensures angles are not distorted
theme_minimal()