data.table::transpose(a)
can be a useful tool here if you your list elements have unequal size or you actually wanted a data.frame
instead.
It efficiently turns a length-n list of length-up-to-p vectors into a length-p list of length-n vectors, padding the missing elements with a value of your choice.
# For list of vectors of unequal size if you want to pad instead of recycle
a <- sapply(1:6, function(i) c(i, seq_len(i)))
a
#> [[1]]
#> [1] 1 1
#>
#> [[2]]
#> [1] 2 1 2
#>
#> [[3]]
#> [1] 3 1 2 3
#>
#> [[4]]
#> [1] 4 1 2 3 4
#>
#> [[5]]
#> [1] 5 1 2 3 4 5
#>
#> [[6]]
#> [1] 6 1 2 3 4 5 6
matrix(unlist(data.table::transpose(a)), nrow=length(a))
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] 1 1 NA NA NA NA NA
#> [2,] 2 1 2 NA NA NA NA
#> [3,] 3 1 2 3 NA NA NA
#> [4,] 4 1 2 3 4 NA NA
#> [5,] 5 1 2 3 4 5 NA
#> [6,] 6 1 2 3 4 5 6
#
## neat if you want a data.frame instead
data.table::setDF(data.table::as.data.table(data.table::transpose(a)))[]
#> V1 V2 V3 V4 V5 V6 V7
#> 1 1 1 NA NA NA NA NA
#> 2 2 1 2 NA NA NA NA
#> 3 3 1 2 3 NA NA NA
#> 4 4 1 2 3 4 NA NA
#> 5 5 1 2 3 4 5 NA
#> 6 6 1 2 3 4 5 6
It is almost as fast as the matrix(unlist(
), byrow=TRUE)
solution and much faster than the t(sapply(
approach that also works for unequal lengths.
a <- sapply(1:6, function(i) c(i, seq_len(i)))
a
bench::mark(
matrix(unlist(data.table::transpose(a)), nrow=length(a)),
t(sapply(a, '[', 1:max(sapply(a, length))))
)
#> # A tibble: 2 × 6
#> expression min median
#> <bch:expr> <bch:tm> <bch:tm>
#> 1 matrix(unlist(data.table::transpose(a)), nrow = length(a)) 6.87µs 8.68µs
#> 2 t(sapply(a, "[", 1:max(sapply(a, length)))) 33.29µs 42.14µs
#> # ℹ 3 more variables: `itr/sec` <dbl>, mem_alloc <bch:byt>, `gc/sec` <dbl>
# small list, equal sizes
a <- sapply(1:6, function(i) c(i, seq_len(5)), simplify = FALSE)
a
#> [[1]]
#> [1] 1 1 2 3 4 5
#>
#> [[2]]
#> [1] 2 1 2 3 4 5
#>
#> [[3]]
#> [1] 3 1 2 3 4 5
#>
#> [[4]]
#> [1] 4 1 2 3 4 5
#>
#> [[5]]
#> [1] 5 1 2 3 4 5
#>
#> [[6]]
#> [1] 6 1 2 3 4 5
bench::mark(
matrix(unlist(data.table::transpose(a)), nrow=length(a)),
t(sapply(a, '[', 1:max(sapply(a, length)))),
do.call(rbind, a),
matrix(unlist(a), byrow=TRUE, nrow=length(a) )
)
#> # A tibble: 4 × 6
#> expression min median
#> <bch:expr> <bch:tm> <bch:tm>
#> 1 matrix(unlist(data.table::transpose(a)), nrow = length(a)) 7.03µs 9.06µs
#> 2 t(sapply(a, "[", 1:max(sapply(a, length)))) 32.99µs 36.18µs
#> 3 do.call(rbind, a) 2.92µs 3.47µs
#> 4 matrix(unlist(a), byrow = TRUE, nrow = length(a)) 2.77µs 3.07µs
#> # ℹ 3 more variables: `itr/sec` <dbl>, mem_alloc <bch:byt>, `gc/sec` <dbl>
# large list, equal sizes
a <- sapply(seq_len(100000), function(i) c(i, seq_len(5)), simplify = FALSE)
bench::mark(
matrix(unlist(data.table::transpose(a)), nrow=length(a)),
t(sapply(a, '[', 1:max(sapply(a, length)))),
do.call(rbind, a),
matrix(unlist(a), byrow=TRUE, nrow=length(a) )
)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> # A tibble: 4 × 6
#> expression min median
#> <bch:expr> <bch:tm> <bch:tm>
#> 1 matrix(unlist(data.table::transpose(a)), nrow = length(a)) 11.62ms 12.54ms
#> 2 t(sapply(a, "[", 1:max(sapply(a, length)))) 94.56ms 101.09ms
#> 3 do.call(rbind, a) 59.02ms 70.49ms
#> 4 matrix(unlist(a), byrow = TRUE, nrow = length(a)) 7.02ms 7.82ms
#> # ℹ 3 more variables: `itr/sec` <dbl>, mem_alloc <bch:byt>, `gc/sec` <dbl>