Another way using rev
and match
.
rev
reverses the vector, so that match
, which returns the first hit, can be used to find the last 1 sequence.
f <- \(x) {
. <- rev(x)
i <- match(1, .)
if(is.na(i)) return(NA)
j <- match(0, tail(., -i))
if(is.na(j)) 1
else length(.) - i - j + 2 }
f(test1)
#[1] 36
f(test2)
#[1] 29
f(c(1,1))
#[1] 1
f(c(0,1))
#[1] 2
f(c(1,0))
#[1] 1
f(c(0,0))
#[1] NA
Or write a function using Rcpp
doing the same but can iterate starting from the end.
Rcpp::cppFunction("int f2(NumericVector x) {
auto i = x.end();
while(i != x.begin() && *(--i) != 1.) ;
while(i != x.begin() && *(--i) == 1.) ;
if(*i != 1.) ++i;
return i == x.end() || *i != 1. ? 0 : i - x.begin() + 1;
}")
f2(test1)
#[1] 36
f2(test2)
#[1] 29
f2(c(1,1))
#[1] 1
f2(c(0,1))
#[1] 2
f2(c(1,0))
#[1] 1
f2(c(0,0))
#[1] 0
Or using rev
, diff
and match
.
f3 <- \(x) {
i <- match(-1, diff(rev(x)))
if(is.finite(i)) length(x) - i + 1
else if(x[1] == 1) 1
else NA
}
f3(test1)
#[1] 36
f3(test2)
#[1] 29
f3(c(1,1))
#[1] 1
f3(c(0,1))
#[1] 2
f3(c(1,0))
#[1] 1
f3(c(0,0))
#[1] NA
Benchmark
uniqlist <- function(x) { #M.Viking
y <- data.table:::uniqlist(list(x))
ifelse(x[y[length(y)]] == 1, y[length(y)], y[length(y) - 1]) }
which_diff <- function(x) { #Maël
idx <- which(x == 1)
idx[tail(which(c(0, diff(idx)) != 1), 1)] }
# Dataset from question
x <- rep(c(0,1,0,1,0,1), c(14736,413,830,961,274,12787))
bench::mark(max_iterations = 1e5, f(x), f3(x), which_diff(x),
uniqlist(x), f2(x) )
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 f(x) 199.07µs 251.5µs 3412. 1.21MB 76.3 1341 30
#2 f3(x) 218.05µs 319.61µs 3144. 1.76MB 117. 1079 40
#3 which_diff(x) 155.01µs 177.53µs 5518. 954.17KB 103. 2296 43
#4 uniqlist(x) 17.04µs 17.72µs 55386. 1.36MB 4.04 27442 2
#5 f2(x) 5.61µs 6.13µs 161213. 2.49KB 6.16 78462 3
# Data with many changes between 0 and 1 and hit at end
x <- rep(c(0,1), 1e6)
bench::mark(max_iterations = 1e5, f(x), f3(x), which_diff(x),
uniqlist(x), f2(x) )
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 f(x) 17.97ms 19.86ms 44.6 76.29MB 50.5 23 26
#2 f3(x) 28.77ms 32.78ms 25.6 114.44MB 52.9 14 29
#3 which_diff(x) 14.47ms 16.91ms 52.3 68.67MB 67.8 27 35
#4 uniqlist(x) 2.66ms 3ms 294. 7.63MB 27.8 148 14
#5 f2(x) 1.08µs 1.28µs 701103. 2.49KB 21.0 100000 3
# Data where hit is at beginning
x <- c(0,1,rep(0, 1e6))
bench::mark(max_iterations = 1e5, f(x), f3(x), which_diff(x),
uniqlist(x), f2(x) )
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 f(x) 4.34ms 6.6ms 131. 19.11MB 84.6 71 46
#2 f3(x) 15.1ms 18.73ms 35.9 57.24MB 75.7 18 38
#3 which_diff(x) 1.37ms 1.44ms 529. 7.63MB 93.9 265 47
#4 uniqlist(x) 470.91µs 491.54µs 1994. 1.36MB 0 997 0
#5 f2(x) 364.46µs 375.08µs 2649. 2.49KB 0 1325 0
# Data where hit is at end
x <- c(rep(0, 1e6),1,0)
bench::mark(max_iterations = 1e5, f(x), f3(x), which_diff(x),
uniqlist(x), f2(x) )
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 f(x) 10.53ms 11.33ms 69.8 38.18MB 91.8 35 46
#2 f3(x) 14.19ms 17.18ms 37.6 57.24MB 69.3 19 35
#3 which_diff(x) 1.38ms 1.49ms 512. 7.63MB 77.9 256 39
#4 uniqlist(x) 479.76µs 491.61µs 1997. 1.36MB 0 999 0
#5 f2(x) 1.08µs 1.28µs 683440. 2.49KB 27.3 100000 4
The Rcpp function is the fastest and allocates the lowest amount of memory. Its performance depends where the match could be found.
1
sequence. Try it withtest1 <- c(0,1,1,0)
where I would expect2
as a result but here I getinteger(0)
. – Neurology