Is there an efficient way to extract a slice of a 3d array?

Asked 19/8, 2024 at 9:29 Answered 19/8, 2024 at 13:16

Solved r arrays performance matrix indexing

Consider this:

a = array(1:60, c(3,4,5))

# Extract (*,2,2)^th elements
a[cbind(1:3,2,2)]
                                
# ^^ returns a vector of length 3, c(16, 17, 18)
# Note this is asymptotically inefficient -- cbind(1:k,2,2) is a kx3 matrix
# So it's not making use of slicing
 
# Extract (1, y_i, z_i)^th elements
N = 100000
y = sample(1:4, N, replace = TRUE)
z = sample(1:5, N, replace = TRUE)
a[cbind(1, y, z)]

# ^^ returns a vector of length N

How do I efficiently extract the (*, y_i, z_i) elements, with the result as a Nx3 matrix (i.e. 2d array)?

Note this question is similar but the only answer proceeds elementwise + so will be slow.

Browse answered 19/8, 2024 at 9:29 Comment(1)

I once had (approximately) this same need in VBA - perhaps it would be interesting for future readers of your question. (after reviewing my question, I was looking for a generalized solution to take an N dimensional array, specify an index in one of the dimensions, and get the resulting N-1 dimensional slice) – Kriskrischer 20/8, 2024 at 14:5

A way might be to calculate the linear index of the array using its dim.

outer((y-1)*dim(a)[1] + (z-1)*prod(dim(a)[1:2]), seq_len(nrow(a)), \(x, y) a[x+y])

do.call(cbind, lapply(seq_len(nrow(a)), \(x, y) a[x+y], (y-1)*dim(a)[1] + (z-1)*prod(dim(a)[1:2])))

Or reducing the dim and selecting whole columns or whole rows.

t(`dim<-`(a, c(dim(a)[1], prod(dim(a)[-1])))[,y + (z - 1)*dim(a)[2]])

t(`dim<-`(a, c(dim(a)[1], prod(dim(a)[-1]))))[y + (z - 1)*dim(a)[2],]

Benchmark:

m <- alist(
  matrix_indexing = `dim<-`(a[cbind(rep(seq_len(nrow(a)), each = N), y, z)], c(N, nrow(a))),
  sapply_indexing = sapply(seq_len(nrow(a)), \(i) a[cbind(i, y, z)]),
  sapply_indexing2 = {yz <- cbind(y, z); sapply(1:dim(a)[1], \(i) a[i,,][yz])},
  linearIndex = outer((y-1)*dim(a)[1] + (z-1)*prod(dim(a)[1:2]), seq_len(nrow(a)), \(x, y) a[x+y]),
  linearIndex2 = do.call(cbind, lapply(seq_len(nrow(a)), \(x, y) a[x+y], (y-1)*dim(a)[1] + (z-1)*prod(dim(a)[1:2]))),
  asplit = do.call(rbind, asplit(a, -1)[cbind(y, z)]),
  aperm = `dim<-`(aperm(a, c(2, 3, 1)), c(prod(dim(a)[-1]), dim(a)[1]))[y + (z - 1)*dim(a)[2],],
  dimT = t(`dim<-`(a, c(dim(a)[1], prod(dim(a)[-1])))[,y + (z - 1)*dim(a)[2]]),
  tDim = t(`dim<-`(a, c(dim(a)[1], prod(dim(a)[-1]))))[y + (z - 1)*dim(a)[2],]
)

a = array(1:60, c(3,4,5))
N = 100000
y = sample(1:4, N, replace = TRUE)
z = sample(1:5, N, replace = TRUE)
bench::mark(exprs = m)
#  expression     min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
#  <bch:expr> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
#1 matrix_in…  9.46ms  9.59ms     102.     6.87MB     26.2    39    10    382.4ms
#2 sapply_in…  3.76ms  3.81ms     261.     8.02MB     92.2    85    30    325.4ms
#3 sapply_in…  2.89ms  2.96ms     334.     5.38MB     72.5   115    25    344.7ms
#4 linearInd…   4.7ms  4.76ms     208.     9.57MB    132.     55    35    264.3ms
#5 linearInd…   3.1ms  3.14ms     316.     7.25MB    115.     96    35    303.3ms
#6 asplit      40.8ms  40.8ms      24.5    3.08MB    221.      1     9     40.8ms
#7 aperm        1.3ms  1.31ms     748.     2.29MB     48.5   324    21    433.3ms
#8 dimT        2.03ms  2.06ms     478.     3.44MB     55.8   197    23    412.5ms
#9 tDim         1.3ms  1.31ms     748.     2.29MB     48.2   326    21    435.9ms

a = array(1:60000, c(30, 40, 50))
N = 1000
y = sample(1:40, N, replace = TRUE)
z = sample(1:50, N, replace = TRUE)
bench::mark(exprs = m)
#  expression            min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc
#  <bch:expr>       <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>
#1 matrix_indexing   952.7µs 960.17µs     1030.     703KB    14.7    492     7
#2 sapply_indexing   530.8µs 540.62µs     1813.     825KB    28.3    832    13
#3 sapply_indexing2  896.2µs 919.12µs     1077.     729KB    14.9    506     7
#4 linearIndex       426.7µs 435.92µs     2263.     836KB    39.5   1032    18
#5 linearIndex2      326.9µs 334.29µs     2945.     606KB    33.1   1244    14
#6 asplit              6.6ms   6.79ms      145.     637KB     8.53    68     4
#7 aperm             318.1µs 323.11µs     2900.     363KB    19.3   1355     9
#8 dimT              155.9µs 163.63µs     6061.     481KB    55.8   2714    25
#9 tDim              198.1µs 208.65µs     4721.     598KB    53.7   2108    24

Spancel answered 19/8, 2024 at 12:27 Comment(0)

Adding a couple more options to @GKi's benchmark:

yz <- cbind(y, z)
sapply(1:dim(a)[1], \(i) a[i,,][yz])

and

`dim<-`(aperm(a, c(2, 3, 1)), c(prod(dim(a)[-1]), dim(a)[1]))[y + (z - 1)*dim(a)[2],]

Benchmark

m <- alist(
  matrix_indexing = `dim<-`(a[cbind(rep(seq_len(nrow(a)), each = N), y, z)], c(N, nrow(a))),
  sapply_indexing = sapply(seq_len(nrow(a)), \(i) a[cbind(i, y, z)]),
  sapply_indexing2 = {yz <- cbind(y, z); sapply(1:dim(a)[1], \(i) a[i,,][yz])},
  linearIndex = outer((y-1)*dim(a)[1] + (z-1)*prod(dim(a)[1:2]), seq_len(nrow(a)), \(x, y) a[x+y]),
  linearIndex2 = do.call(cbind, lapply(seq_len(nrow(a)), \(x, y) a[x+y], (y-1)*dim(a)[1] + (z-1)*prod(dim(a)[1:2]))),
  asplit = do.call(rbind, asplit(a, -1)[cbind(y, z)]),
  aperm = `dim<-`(aperm(a, c(2, 3, 1)), c(prod(dim(a)[-1]), dim(a)[1]))[y + (z - 1)*dim(a)[2],]
)

a = array(1:60, c(3,4,5))
N = 100000
y = sample(1:4, N, replace = TRUE)
z = sample(1:5, N, replace = TRUE)
bench::mark(exprs = m)
#> # A tibble: 7 × 6
#>   expression            min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>       <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 matrix_indexing    3.83ms    4.7ms     210.     6.87MB     71.1
#> 2 sapply_indexing    2.32ms   3.73ms     271.    11.96MB     91.5
#> 3 sapply_indexing2    1.9ms    2.8ms     360.     5.41MB     78.6
#> 4 linearIndex        4.24ms   5.21ms     192.     9.57MB    123. 
#> 5 linearIndex2       1.89ms   3.16ms     322.     7.28MB    103. 
#> 6 asplit            26.15ms  34.32ms      31.3    3.08MB    135. 
#> 7 aperm             699.7µs   1.07ms     976.     2.29MB     62.1

Larger array, fewer samples:

a = array(1:60000, c(30, 40, 50))
N = 1000
y = sample(1:40, N, replace = TRUE)
z = sample(1:50, N, replace = TRUE)
bench::mark(exprs = m)
#> # A tibble: 7 × 6
#>   expression            min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>       <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 matrix_indexing   312.3µs  443.2µs     2223.     703KB     44.3
#> 2 sapply_indexing     305µs 495.25µs     2048.     825KB     42.6
#> 3 sapply_indexing2  493.5µs  651.8µs     1623.     729KB     30.4
#> 4 linearIndex       325.1µs  466.3µs     2246.     836KB     50.6
#> 5 linearIndex2        194µs  341.5µs     3060.     606KB     48.8
#> 6 asplit             4.57ms   4.73ms      204.     637KB     12.6
#> 7 aperm             199.7µs  277.9µs     3642.     363KB     24.8

Papen answered 19/8, 2024 at 13:16 Comment(2)

For the general case, shouldn't your aperm version take a 3-element vector as an input to the aperm() function itself? Not that I'd expect any difference in speed – Wehrmacht 19/8, 2024 at 15:34

Yes, I believe aperm() would be the way to go in the general case of (*, y_i, z_i), (y_i, *, z_i), or (y_i, z_i, *). @GKi's dimT or tDim seem to be best for the OP's specific case of (*, y_i, z_i) (depending on N and the dimensions of a). – Papen 19/8, 2024 at 15:58

You can repeat each element of the 1st indexing vector for N times, and bind it with the 2nd and 3rd indexing vectors. Then reshape the extracted slice into a proper dimension.

`dim<-`(a[cbind(rep(1:3, each = N), y, z)], c(N, 3))

#           [,1] [,2] [,3]
#      [1,]   40   41   42
#      [2,]   49   50   51
#      [3,]   34   35   36
#      [4,]    7    8    9
#      [5,]   46   47   48
#      [6,]   16   17   18
#      [7,]   22   23   24
#      [8,]   25   26   27
#      [9,]   49   50   51
#     [10,]   16   17   18
#  [ reached getOption("max.print") -- omitted 99990 rows ]

You can also use sapply

sapply(1:3, \(i) a[cbind(i, y, z)])

Benchmark

I increase the dimension of a to 30x40x50, and N=1000. It's surprising for me that sapply is faster than matrix indexing.

a = array(1:60000, c(30, 40, 50))
N = 1000
y = sample(1:40, N, replace = TRUE)
z = sample(1:50, N, replace = TRUE)

microbenchmark::microbenchmark(
  matrix_indexing = `dim<-`(a[cbind(rep(seq_len(nrow(a)), each = N), y, z)], c(N, nrow(a))),
  sapply_indexing = sapply(seq_len(nrow(a)), \(i) a[cbind(i, y, z)]),
  times = 100L,
  check = "identical",
  unit = "relative"
)

# Unit: relative
#             expr      min       lq     mean   median       uq       max neval cld
#  matrix_indexing 1.937193 1.930684 1.621237 1.908087 1.927041 0.1649859   100  a 
#  sapply_indexing 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000   100   b

If I reduce N to 100, the result reverses.

N <- 100

# Unit: relative
#             expr      min       lq    mean   median       uq      max neval cld
#  matrix_indexing 1.000000 1.000000 1.00000 1.000000 1.000000 1.000000   100  a 
#  sapply_indexing 1.515337 1.519891 1.54679 1.516215 1.517535 1.756281   100   b

Duax answered 19/8, 2024 at 10:41 Comment(4)

due to the very small i? – Lamina 19/8, 2024 at 11:1

surprising that sapply performs so excellently here, the star of today! – Cameliacamella 19/8, 2024 at 11:19

it seem the speed depends on the distribution of dimensions, interesting! – Cameliacamella 19/8, 2024 at 11:21

The values in ‘data’ are taken to be those in the array with the leftmost subscript moving fastest <?array> may be at play here. – Gravois 19/8, 2024 at 12:36

For handy coding, probably you will be interested in asplit

do.call(rbind, asplit(a, -1)[cbind(y, z)])

but it is definitely slower than `dim<-` by @Darren Tsai

Benchmark

microbenchmark(
  darren = `dim<-`(a[cbind(rep(1:3, each = N), y, z)], c(N, 3)),
  tic = do.call(rbind, asplit(a, -1)[cbind(y, z)]),
  check = "identical",
  unit = "relative"
)

shows

Unit: relative
   expr      min       lq     mean   median       uq      max neval
 darren 1.000000  1.00000  1.00000  1.00000  1.00000  1.00000   100
    tic 9.000844 11.42021 16.63028 13.42301 19.16755 25.68256   100

Cameliacamella answered 19/8, 2024 at 10:58 Comment(0)

Hot tags

Godot Unity Godot Help Programming Godot 4.X GUI GDScript 3D 2D Physics CSharp Godot 3.X VR XR Projects C++

Benchmark

Benchmark

Benchmark

Recommended topics

Hot tags