I have to admit that your problem inspired me and made me wonder. I decided to face it by creating my own optimization function. And even though you got the answer that uses the gbp
package (I didn't know it before), let them share my own function. Here it is findSumm
.
findSumm = function(x, sfind, nmax=1, tmax=1){
if(sum(x)<sfind) stop("Impossible solution! sum(x)<sfind!")
fTimeSec = function() as.numeric(Sys.time()-l$tstart, units="secs")
#The current selection of vector element
sel = c(TRUE, rep(FALSE, length(x)-1))
#List of intermediate states of the vector sel
lsel = list()
#List with a collection of parameters and results
l = list(
x = sort(x, TRUE),
tstart = Sys.time(),
chosen = list(),
xfind = list(),
time = c(),
stop = FALSE,
reason = "")
while(TRUE) {
#Maximum Runtime Test
if(fTimeSec()>tmax) {
l$reason = "Calculation time is greater than tmax.\n"
l$stop = TRUE
break
}
#Record the solution and test the number of solutions
if(sum(l$x[sel])==sfind){
#Save solution
l$chosen[[length(l$chosen)+1]] = sel
l$xfind[[length(l$xfind)+1]] = l$x[sel]
l$time = c(l$time, fTimeSec())
#Test the number of solutions
if(length(l$chosen)==nmax){
l$reason = "Already found nmax solutions.\n"
l$stop = TRUE
break
}
}
idx = which(sel)
if(idx[length(idx)]==length(sel)) {
if(length(lsel)==0) break
sel=lsel[[length(lsel)]]
idx = which(sel)
lsel[length(lsel)]=NULL
sel[idx[length(idx)]]=FALSE
sel[idx[length(idx)]+1]=TRUE
next
}
if(sum(l$x[sel])>=sfind){
sel[idx[length(idx)]]=FALSE
sel[idx[length(idx)]+1]=TRUE
next
} else {
lsel[[length(lsel)+1]] = sel #Save the current state of sel vector
sel[idx[length(idx)]+1]=TRUE
next
}
}
if(length(l$chosen)==0 & !l$stop) stop("No solutions!")
l$reason = paste(l$reason, "Found", length(l$chosen),
"solutions in time", signif(fTimeSec(), 3), "seconds.\n")
cat(l$reason)
return(l)
}
Let's check how it works
findSumm(1:5, 20)$xfind
#Error in findSumm(1:5, 20) : Impossible solution! sum(x)<sfind!
findSumm(c(1,2,7), 5)$xfind
#Error in findSumm(c(1, 2, 7), 5) : No solutions!
findSumm(1:5, 14, 10, 10)$xfind
# Found 1 solutions in time 0.007 seconds.
# [[1]]
# [1] 5 4 3 2
findSumm(1:5, 5, 10, 10)$xfind
# Found 3 solutions in time 0.001 seconds.
# [[1]]
# [1] 5
#
# [[2]]
# [1] 4 1
#
# [[3]]
# [1] 3 2
findSumm(1:5, 7, 10, 10)$xfind
# Found 3 solutions in time 0.004 seconds.
# [[1]]
# [1] 5 2
#
# [[2]]
# [1] 4 3
#
# [[3]]
# [1] 4 2 1
As you can see it was doing great. Now it's time to check that on your vector x
.
x= c(236L, 407L, 51L, 308L, 72L, 9787L, 458L, 5486L, 42L, 4290L,
31L, 3533L, 1102L, 24L, 100L, 669L, 9352L, 4091L, 2751L, 3324L,
3193L, 245L, 86L, 98932L, 77L, 13L, 9789L, 91L, 999L, 25L, 25379L,
9626L, 9092L, 622L, 97L, 57L, 2911L, 6L, 405L, 894L, 1760L, 9634L,
96L, 9765L, 223L, 765L, 743L, 5960L, 14L, 50L, 89L, 348L, 5875L,
5L, 58602L, 397L, 1181L, 94L, 954L, 7901L, 836L, 8810L, 52L,
15L, 48L, 26L, 4L, 66L, 5265L, 80L, 282L, 231L, 76L, 661L, 7604L,
7406L, 58L, 10L, 903L, 49446L, 80921L, 1L, 876L, 334L, 63L, 796L,
88L, 413L, 1214L, 2983L, 9518L, 595L, 708L, 53L, 321L, 12L, 634L,
4910L, 8984L, 465L)
findSumm(x, 23745, 1, 10)$xfind[[1]]
# Already found nmax solutions.
# Found 1 solutions in time 0.008 seconds.
# [1] 9789 9787 4091 77 1
A few comments about my function. My function searches for all possible and valid combinations unless it reaches the number of valid results specified by nmax
or the computation takes tmax
seconds. In the case of your vector x
and the sum you are looking for 23745
, the number of correct solutions is enormous. I turned it on for 1 min and got the 37827
results! And the function would still find valid results at a rate of 626 solutions per second perhaps for the next 100 years!
Below is a visualization of this process.
l = findSumm(x, 23745, +Inf, 60)
library(tidyverse)
library(ggpmisc)
df = tibble(
n = 1:length(l$chosen),
t = l$time
)
df %>% ggplot(aes(t,n))+
geom_line(size=0.1)+
geom_smooth(method = lm, formula = y~x)+
stat_poly_eq(formula = y~x,
aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE)
Finally, I decided to check the performance of my function. I did not expect a revelation because it was not written in pure C. However, I must admit that the graph below surprised me very pleasantly!
library(microbenchmark)
ggplot2::autoplot(microbenchmark(findSumm(x, 23745),
gbp1d_solver_dpp(p = x, w = x, c = 23745L),
times=100))
As it turned out, my function is almost 4 times faster than gbp1d_solver_dpp
. I'm proud!
Since I am very stubborn and curiously inquisitive, I decided to perform one more test on a vector with a length of 1000.
x=c(234L, 1891L, 3187L, 38417L, 2155L, 6857L, 71692L, 463575L,
800L, 2195L, 820L, 9735L, 913L, 62685L, 920597L, 864L, 903L,
478L, 2828L, 99371L, 3109L, 379L, 8544L, 444L, 772L, 571L, 226L,
94L, 378L, 60253L, 10920L, 47626L, 671L, 45163L, 27767L, 62498L,
87706L, 4966L, 4615L, 14897L, 261L, 684L, 3780L, 97L, 705L, 7313L,
3629L, 436L, 5076L, 3198L, 731L, 56634L, 67411L, 249L, 403L,
82728L, 9986L, 643662L, 11045L, 934L, 8154L, 289L, 4452L, 624L,
4876L, 86859L, 933L, 2372L, 6493L, 773566L, 6599L, 459L, 2024L,
80425L, 591L, 6262L, 35033L, 89607L, 6435L, 14917L, 9559L, 67983L,
82365L, 88127L, 466L, 758L, 11605L, 828L, 410L, 557L, 2991L,
808L, 8512L, 273605L, 294L, 4666L, 27L, 26337L, 7340L, 682L,
46480L, 19903L, 699L, 700L, 58L, 136L, 852L, 909L, 64316L, 9109L,
876L, 6382L, 803L, 295L, 9539L, 26271L, 1906L, 23639L, 9022L,
9513L, 169L, 65427L, 861864L, 743L, 91L, 9039L, 247L, 58749L,
5674L, 65959L, 99126L, 7765L, 5934L, 13881L, 77696L, 66894L,
977L, 6279L, 46273L, 919L, 6307L, 316L, 420113L, 61336L, 70L,
6148L, 257L, 17804L, 14L, 989L, 16907L, 36L, 25L, 333L, 224L,
119L, 4000L, 9438L, 5439L, 748L, 16532L, 4847L, 939L, 9504L,
2782L, 424L, 64034L, 5306L, 30247L, 6636L, 3976L, 60588L, 180L,
78118L, 1L, 61866L, 9501L, 15834L, 66712L, 77219L, 448L, 612L,
5339L, 58413L, 4785L, 2191L, 35711L, 84383L, 6261L, 896L, 24353L,
54868L, 288L, 8059L, 867L, 687L, 94667L, 1713L, 1507L, 71048L,
882L, 4155L, 97230L, 49492L, 47839L, 793L, 263L, 63160L, 9062L,
3518L, 55956L, 6626L, 14619L, 636L, 1127L, 970L, 5512L, 118117L,
2370L, 802L, 98333L, 6089L, 1076L, 80L, 305L, 3995L, 437L, 49L,
9207L, 2021L, 7554L, 9486L, 33501L, 55745L, 967L, 24857L, 692L,
4148L, 464957L, 2381L, 3876L, 3246L, 1478L, 308L, 98068L, 532L,
4670L, 7965L, 940L, 467L, 777L, 68749L, 2739L, 23951L, 831L,
60763L, 12047L, 75620L, 650L, 69584L, 294122L, 41149L, 9657L,
780L, 153054L, 37990L, 16L, 894L, 15500L, 31873L, 3800L, 472L,
50989L, 8767L, 8209L, 2929L, 4751L, 38L, 47403L, 64941L, 28042L,
49020L, 81785L, 299L, 936L, 63136L, 3L, 42033L, 1750L, 1147L,
273L, 62668L, 41L, 5829L, 686L, 511L, 65019L, 842L, 88716L, 96217L,
9442L, 6324L, 197L, 55422L, 630L, 665L, 3921L, 726L, 766916L,
43944L, 9035L, 573L, 77942L, 29689L, 749L, 95240L, 281L, 1933L,
78265L, 812L, 854L, 17445L, 8855L, 2940L, 6057L, 46689L, 999L,
381L, 347L, 50199L, 161L, 534L, 804L, 99043L, 13183L, 679L, 432L,
38887L, 575L, 781L, 2023L, 187077L, 89498L, 85L, 16780L, 3731L,
45904L, 13861L, 3971L, 301L, 4175L, 9427L, 126913L, 845L, 175L,
1684L, 9064L, 56647L, 116L, 479672L, 6754L, 441L, 412L, 97091L,
4062L, 598L, 146L, 423L, 2715L, 198939L, 80577L, 76385L, 2088L,
139L, 647L, 246L, 85002L, 898L, 50939L, 135L, 46388L, 623L, 17928L,
63072L, 346L, 78582L, 16691L, 838L, 44L, 5181L, 7918L, 3650L,
35L, 8825L, 9758L, 22677L, 9838L, 2239L, 9001L, 96689L, 570L,
47373L, 507L, 6378L, 40839L, 11677L, 937874L, 2485L, 22188L,
20413L, 13L, 877L, 5578L, 428L, 61L, 3200L, 5444L, 85540L, 640L,
94460L, 310L, 6043L, 3771L, 6167L, 476L, 9365L, 1956L, 143L,
7841L, 4957L, 3309L, 9317L, 41434L, 97881L, 51853L, 474L, 3098L,
7109L, 93976L, 545L, 28475L, 2066L, 4959L, 7410L, 293L, 8246L,
43L, 721L, 2260L, 72854L, 100L, 61382L, 107L, 5637L, 891L, 256L,
442L, 84440L, 55792L, 195L, 24074L, 19L, 57376L, 59159L, 805253L,
193329L, 3636L, 98954L, 968L, 380L, 5203L, 90157L, 71907L, 35497L,
41769L, 1683L, 1984L, 5765L, 832L, 411L, 4888L, 9801L, 710L,
2325L, 40L, 32927L, 435L, 66L, 66301L, 94776L, 48234L, 28977L,
122312L, 48L, 359L, 572L, 753L, 945L, 32241L, 328L, 55976L, 128L,
815794L, 57894L, 576L, 60131L, 342448L, 8913L, 33506L, 20448L,
58750L, 637L, 82086L, 635710L, 96772L, 272L, 938L, 4863L, 737L,
949L, 4804L, 3446L, 92319L, 28883L, 6032L, 53970L, 9394L, 5630L,
71583L, 136862L, 23161L, 8545L, 54249L, 213666L, 668L, 893L,
881126L, 8252L, 584L, 83L, 13754L, 244156L, 530L, 64574L, 22009L,
89204L, 34992L, 85992L, 82697L, 50L, 95845L, 3096L, 42L, 554949L,
325L, 2092L, 28L, 3830L, 893583L, 625L, 3740L, 4513L, 9938L,
910L, 8868L, 9614L, 41281L, 27915L, 25839L, 4417L, 5730L, 2825L,
683L, 550L, 88838L, 9248L, 961L, 2748L, 7259L, 53220L, 2179L,
4036L, 46014L, 83725L, 8211L, 6957L, 6886L, 4653L, 6300L, 80437L,
135885L, 23745L, 9536L, 78L, 652590L, 1037L, 5293L, 492L, 7467L,
71685L, 890L, 5023L, 96524L, 17465L, 53665L, 21508L, 463L, 159L,
311L, 764L, 27534L, 71L, 2504L, 270L, 6449L, 13449L, 302L, 88L,
3893L, 22007L, 9208L, 680618L, 878L, 14721L, 20L, 322374L, 644L,
944669L, 57334L, 233L, 982L, 870L, 950L, 121L, 254L, 4226L, 45L,
61823L, 9626L, 58590L, 6552L, 3920L, 68L, 3644L, 35775L, 4591L,
636207L, 78314L, 408L, 371L, 984L, 7089L, 4679L, 2233L, 756L,
20527L, 178L, 80573L, 589923L, 120L, 7938L, 894842L, 6563L, 569L,
91110L, 620L, 786288L, 46022L, 396L, 762533L, 145964L, 7732L,
60L, 274L, 87869L, 227L, 6706L, 707L, 955L, 48246L, 771L, 29001L,
14224L, 5173L, 20215L, 7566L, 1564L, 733L, 3568L, 3570L, 39256L,
925L, 41577L, 348L, 68267L, 151L, 98572L, 1389L, 5421L, 69043L,
42434L, 27597L, 53320L, 46051L, 1686L, 59L, 361L, 747579L, 5044L,
73873L, 28894L, 8146L, 353L, 2622L, 664L, 349L, 90764L, 8920L,
716L, 14903L, 96055L, 89L, 94239L, 416L, 7896L, 232L, 5543L,
61664L, 6709L, 2L, 14275L, 2954L, 917416L, 3567L, 42086L, 99956L,
86112L, 206L, 64L, 25956L, 57112L, 425L, 6507L, 28034L, 991L,
8444L, 140L, 1461L, 68783L, 347633L, 87696L, 593L, 164L, 837L,
8793L, 965L, 8811L, 97412L, 351L, 23L, 66808L, 8308L, 14245L,
12519L, 3019L, 1920L, 813L, 485L, 979L, 929L, 2970L, 32447L,
8962L, 867973L, 40534L, 551L, 20941L, 49413L, 188L, 948L, 9018L,
187252L, 3919L, 45963L, 358L, 7211L, 959L, 47L, 4220L, 36086L,
1645L, 33056L, 300L, 29682L, 9152L, 431L, 364L, 2211L, 3779L,
4633L, 22500L, 33980L, 794L, 84558L, 488L, 732L, 6686L, 15042L,
906L, 13553L, 6115L, 153L, 866L, 3624L, 329L, 6875L, 86L, 6298L,
57424L, 17582L, 955879L, 40945L, 4858L, 694L, 755L, 499L, 406L,
564L, 874L, 1695L, 43961L, 578L, 9063L, 505L, 5856L, 4484L, 76708L,
712L, 23348L, 986L, 275L, 996L, 8966L, 220L, 7008L, 849L, 953460L,
3062L, 278L, 26L, 8547L, 16895L, 98289L, 815L, 25135L, 956L,
370L, 8221L, 72674L, 31711L, 73L, 41667L, 2915L, 797L, 41309L,
4257L, 8148L, 5723L, 2124L, 8306L, 53388L, 33520L, 680L, 893759L,
40133L, 94791L, 988L, 162L, 79366L, 37625L, 7125L, 50947L, 171L,
99558L, 166L, 90717L, 5807L, 606L, 98592L, 59207L, 966L, 61299L,
7553L, 9678L, 62322L, 156L, 267L, 8478L, 59554L, 2264L, 28338L,
899L, 9719L, 98L, 51403L, 6302L, 265L, 79929L, 101L, 5227L, 972L,
145L, 48018L, 90140L, 698L, 8L, 5751L, 26083L, 1295L, 78124L,
383L, 2776L, 80204L, 210L, 3422L, 36064L, 46L, 4953L, 20271L,
3916L, 767L, 601372L, 56575L, 5237L, 5621L, 6705L, 1191L, 63768L,
1016L, 313L, 2285L, 12489L, 2755L, 338L, 7518L, 2630L, 421L,
6554L, 306L, 113L, 57197L, 885L, 9445L, 37364L, 86630L, 2460L,
715L, 10829L, 9914L, 6635L, 229L, 525L, 839L, 3278L, 969L, 182L,
187L, 7022L, 554L, 6489L, 15791L, 4157L, 47048L, 9447L, 152L,
1419L, 22618L, 5194L, 609L, 923L, 768L, 6248L, 714L, 1159L, 825893L,
53492L, 19731L, 65167L, 96325L, 336L, 4443L, 843L, 62960L, 9788L,
35032L, 284L, 4647L, 360L, 11297L, 1515L)
findSumm(x, 9568447L)$xfind[[1]]
# Already found nmax solutions.
# Found 1 solutions in time 0.065 seconds.
# [1] 955879 953460 944669 937874 920597 917416 894842 893759 893583 881126 347633 27597 8 3 1
As you can see my findSumm
function works great. It took only 0.065 seconds to extract a subset with a sum equal to 9568447L
!
Unfortunately, trying to run gbp1d_solver_dpp
on such a long vector resulted in the error "size is too large". So I was not able to compare the performance of these two solutions with such a large vector.