Find all combinations of n1 elements from vector1 and n2 elements from vector2 in R?
Asked Answered
O

5

5

I have two vectors and I am trying to find all unique combinations of 3 elements from vector1 and 2 elements from vector2. I have tried the following code.

V1 = combn(1:5, 3)   # 10 combinations in total
V2 = combn(6:11, 2)  # 15 combinations in total

How to combine V1 and V2 so that there are 10 * 15 = 150 combinations in total? Thanks.

Ofay answered 2/12, 2022 at 21:14 Comment(0)
I
3

You can use expand.grid():

g <- expand.grid(seq_len(ncol(V1)), seq_len(ncol(V2)))
V3 <- rbind(V1[, g[, 1]], V2[, g[, 2]])

The result is in a similar format as V1 and V2, i.e. a 5 × 150 matrix (here printed transposed):

head(t(V3))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    2    3    6    7
# [2,]    1    2    4    6    7
# [3,]    1    2    5    6    7
# [4,]    1    3    4    6    7
# [5,]    1    3    5    6    7
# [6,]    1    4    5    6    7

dim(unique(t(V3)))
# [1] 150   5

And a generalized approach that can handle more than two initial matrices of combinations, stored in a list V:

V <- list(V1, V2)
g <- do.call(expand.grid, lapply(V, \(x) seq_len(ncol(x))))
V.comb <- do.call(rbind, mapply('[', V, T, g))

identical(V.comb, V3)
[1] TRUE
Insignificance answered 2/12, 2022 at 21:28 Comment(0)
F
6

The function comboGrid from RcppAlgos (I am the author) does just the trick:

library(RcppAlgos)

grid <- comboGrid(c(rep(list(1:5), 3), rep(list(6:11), 2)),
                  repetition = FALSE)

head(grid)
#>      Var1 Var2 Var3 Var4 Var5
#> [1,]    1    2    3    6    7
#> [2,]    1    2    3    6    8
#> [3,]    1    2    3    6    9
#> [4,]    1    2    3    6   10
#> [5,]    1    2    3    6   11
#> [6,]    1    2    3    7    8

tail(grid)
#>        Var1 Var2 Var3 Var4 Var5
#> [145,]    3    4    5    8    9
#> [146,]    3    4    5    8   10
#> [147,]    3    4    5    8   11
#> [148,]    3    4    5    9   10
#> [149,]    3    4    5    9   11
#> [150,]    3    4    5   10   11

It is quite efficient as well. It is written in C++ and pulls together many ideas from the excellent question: Picking unordered combinations from pools with overlap. The underlying algorithm avoids generating duplicates that would need to be filtered out.

Consider the following example where generating the Cartesian product contains more than 10 billion results:

system.time(huge <- comboGrid(c(rep(list(1:20), 5), rep(list(21:35), 3)),
                              repetition = FALSE))
#>    user  system elapsed 
#>   0.990   0.087   1.077

dim(huge)
#> [1] 7054320       8
Finable answered 2/12, 2022 at 21:47 Comment(0)
C
5

You can try expand.grid along with asplit, e.g.,

expand.grid(asplit(V1,2), asplit(V2,2))

or

with(
  expand.grid(asplit(V1, 2), asplit(V2, 2)),
  t(mapply(c, Var1, Var2))
)
Cassycast answered 2/12, 2022 at 22:13 Comment(2)
Hi Thomasls, this solution is very efficient, however, the final product is a dataframe that includes 150 rows and 2 columns. Is it possible to split the 2 columns into 5 columns as there are 5 elements in each row? Thank you.Ofay
@YangYang Try the second option.Cassycast
I
3

You can use expand.grid():

g <- expand.grid(seq_len(ncol(V1)), seq_len(ncol(V2)))
V3 <- rbind(V1[, g[, 1]], V2[, g[, 2]])

The result is in a similar format as V1 and V2, i.e. a 5 × 150 matrix (here printed transposed):

head(t(V3))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    2    3    6    7
# [2,]    1    2    4    6    7
# [3,]    1    2    5    6    7
# [4,]    1    3    4    6    7
# [5,]    1    3    5    6    7
# [6,]    1    4    5    6    7

dim(unique(t(V3)))
# [1] 150   5

And a generalized approach that can handle more than two initial matrices of combinations, stored in a list V:

V <- list(V1, V2)
g <- do.call(expand.grid, lapply(V, \(x) seq_len(ncol(x))))
V.comb <- do.call(rbind, mapply('[', V, T, g))

identical(V.comb, V3)
[1] TRUE
Insignificance answered 2/12, 2022 at 21:28 Comment(0)
K
3

After some helpful refactoring guidance from @onyambu, here is a shorter solution based on base::merge():

merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)

...and the first 20 rows of output:

> merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)
    V1.x V2.x V3 V1.y V2.y
1      1    2  3    6    7
2      1    2  4    6    7
3      1    2  5    6    7
4      1    3  4    6    7
5      1    3  5    6    7
6      1    4  5    6    7
7      2    3  4    6    7
8      2    3  5    6    7
9      2    4  5    6    7
10     3    4  5    6    7
11     1    2  3    6    8
12     1    2  4    6    8
13     1    2  5    6    8
14     1    3  4    6    8
15     1    3  5    6    8
16     1    4  5    6    8
17     2    3  4    6    8
18     2    3  5    6    8
19     2    4  5    6    8
20     3    4  5    6    8

original solution

A base R solution to create a Cartesian product with merge() looks like this:

df1 <- data.frame(t(combn(1:5, 3)))
df2 <- data.frame(t(combn(6:11, 2)))
colnames(df2) <- paste("y",1:2,sep=""))

merge(df1,df2,by.x=NULL,by.y = NULL)

...and the first 25 rows of output:

> merge(df1,df2,by.x=NULL,by.y = NULL)
    X1 X2 X3 y1 y2
1    1  2  3  6  7
2    1  2  4  6  7
3    1  2  5  6  7
4    1  3  4  6  7
5    1  3  5  6  7
6    1  4  5  6  7
7    2  3  4  6  7
8    2  3  5  6  7
9    2  4  5  6  7
10   3  4  5  6  7
11   1  2  3  6  8
12   1  2  4  6  8
13   1  2  5  6  8
14   1  3  4  6  8
15   1  3  5  6  8
16   1  4  5  6  8
17   2  3  4  6  8
18   2  3  5  6  8
19   2  4  5  6  8
20   3  4  5  6  8
21   1  2  3  6  9
22   1  2  4  6  9
23   1  2  5  6  9
24   1  3  4  6  9
25   1  3  5  6  9
Kalman answered 2/12, 2022 at 21:35 Comment(1)
this is a good solution, make it better: No need of the df1 df2. Just do merge(t(V1), t(V2), by=NULL)Shavonda
A
2

Similar idea, using apply

apply(expand.grid(seq(ncol(V1)), seq(ncol(V2))), 1, function(i) {
  c(V1[,i[1]], V2[,i[2]])})
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,]    1    1    1    1    1    1    2    2    2     3     1     1     1     1
#> [2,]    2    2    2    3    3    4    3    3    4     4     2     2     2     3
#> [3,]    3    4    5    4    5    5    4    5    5     5     3     4     5     4
#> [4,]    6    6    6    6    6    6    6    6    6     6     6     6     6     6
#> [5,]    7    7    7    7    7    7    7    7    7     7     8     8     8     8
#>      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     8     8     8     8     8     8     9     9     9     9     9     9
#>      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     9     9     9     9    10    10    10    10    10    10    10    10
#>      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
#> [1,]     2     3     1     1     1     1     1     1     2     2     2     3
#> [2,]     4     4     2     2     2     3     3     4     3     3     4     4
#> [3,]     5     5     3     4     5     4     5     5     4     5     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]    10    10    11    11    11    11    11    11    11    11    11    11
#>      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
#> [1,]     1     1     1     1     1     1     2     2     2     3     1     1
#> [2,]     2     2     2     3     3     4     3     3     4     4     2     2
#> [3,]     3     4     5     4     5     5     4     5     5     5     3     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     8     8     8     8     8     8     8     8     8     8     9     9
#>      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
#> [1,]     1     1     1     1     2     2     2     3     1     1     1     1
#> [2,]     2     3     3     4     3     3     4     4     2     2     2     3
#> [3,]     5     4     5     5     4     5     5     5     3     4     5     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     9     9     9     9     9     9     9     9    10    10    10    10
#>      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]    10    10    10    10    10    10    11    11    11    11    11    11
#>      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     7     7     7     7     8     8     8     8     8     8     8     8
#> [5,]    11    11    11    11     9     9     9     9     9     9     9     9
#>      [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
#> [1,]     2      3      1      1      1      1      1      1      2      2
#> [2,]     4      4      2      2      2      3      3      4      3      3
#> [3,]     5      5      3      4      5      4      5      5      4      5
#> [4,]     8      8      8      8      8      8      8      8      8      8
#> [5,]     9      9     10     10     10     10     10     10     10     10
#>      [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      8      8      8      8      8      8      8      8
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      9      9      9      9      9      9      9      9
#> [5,]     11     11     10     10     10     10     10     10     10     10
#>      [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9      9      9      9      9      9      9      9      9
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9     10     10     10     10     10     10     10     10
#> [5,]     11     11     11     11     11     11     11     11     11     11
#>      [,149] [,150]
#> [1,]      2      3
#> [2,]      4      4
#> [3,]      5      5
#> [4,]     10     10
#> [5,]     11     11

Created on 2022-12-02 with reprex v2.0.2

Asparagine answered 2/12, 2022 at 21:33 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.