I am surprised nobody mentioned the irlba
package yet:
It is even a bit faster than svd
's propack.svd
,
provides with irlba::prcomp_irlba(X, n=2)
a stats::prcomp
-like interface for convenience and
did not require parameter adjustments in the following benchmark for rectangular matrices (2:1) of varying size. For matrices of size 6000x3000 it is 50 times faster than stats::prcomp
. For matrices smaller than 100x50 stats::svd
is still faster though.
library(microbenchmark)
library(tidyverse)
#install.packages("svd","corpcor","irlba","rsvd")
exprs <- rlang::exprs(
svd(M, 2, 2)$v,
prcomp(M)$rotation[,1:2],
irlba::prcomp_irlba(M, n=2)$rotation,
irlba::svdr(M, k=2)$v,
rsvd::rsvd(M, 2)$v,
svd::propack.svd(M, neig=2, opts=list(maxiter=100))$v,
corpcor::fast.svd(M)$v[,1:2]
)
set.seed(42)
tibble(N=c(10,30,100,300,1000,3000)) %>%
group_by(N) %>%
do({
M <- scale(matrix(rnorm(.$N*.$N*2), .$N*2, .$N))
microbenchmark(!!!exprs,
times=min(100, ceiling(3000/.$N)))%>%
as_tibble
}) %>%
ggplot(aes(x=N, y=time/1E9,color=expr)) +
geom_jitter(width=0.05) +
scale_x_log10("matrix size (2N x N)") +
scale_y_log10("time [s]") +
stat_summary(fun.y = median, geom="smooth") +
scale_color_discrete(labels = partial(str_wrap, width=30))
The randomized svd provided by rsvd
is even faster, but unfortunately, quite off:
set.seed(42)
N <- 1000
M <- scale(matrix(rnorm(N^2*2), N*2, N))
cor(set_colnames(sapply(exprs, function(x) eval(x)[,1]), sapply(exprs, deparse)))
svd(M, 2, 2)$v prcomp(M)$rotation[, 1:2] irlba::prcomp_irlba(M, n = 2)$rotation irlba::svdr(M, k = 2)$v rsvd::rsvd(M, 2)$v svd::propack.svd(M, neig = 2, opts = list(maxiter = 100))$v corpcor::fast.svd(M)$v[, 1:2]
svd(M, 2, 2)$v 1.0000000 1.0000000 -1.0000000 0.9998748 0.286184 1.0000000 1.0000000
prcomp(M)$rotation[, 1:2] 1.0000000 1.0000000 -1.0000000 0.9998748 0.286184 1.0000000 1.0000000
irlba::prcomp_irlba(M, n = 2)$rotation -1.0000000 -1.0000000 1.0000000 -0.9998748 -0.286184 -1.0000000 -1.0000000
irlba::svdr(M, k = 2)$v 0.9998748 0.9998748 -0.9998748 1.0000000 0.290397 0.9998748 0.9998748
rsvd::rsvd(M, 2)$v 0.2861840 0.2861840 -0.2861840 0.2903970 1.000000 0.2861840 0.2861840
svd::propack.svd(M, neig = 2, opts = list(maxiter = 100))$v 1.0000000 1.0000000 -1.0000000 0.9998748 0.286184 1.0000000 1.0000000
corpcor::fast.svd(M)$v[, 1:2] 1.0000000 1.0000000 -1.0000000 0.9998748 0.286184 1.0000000 1.0000000
This might be better when the data actually has a structure though.