An option would be to use arrayInd
.
A <- array(1:8, c(2,2,2))
data.frame(arrayInd(seq_along(A), dim(A)), value = as.vector(A))
# X1 X2 X3 value
#1 1 1 1 1
#2 2 1 1 2
#3 1 2 1 3
#4 2 2 1 4
#5 1 1 2 5
#6 2 1 2 6
#7 1 2 2 7
#8 2 2 2 8
Or quite similar to @ThomasIsCoding using which
.
data.frame(which(array(TRUE, dim(A)), arr.ind = TRUE), value = as.vector(A))
# dim1 dim2 dim3 value
#1 1 1 1 1
#2 2 1 1 2
#3 1 2 1 3
#4 2 2 1 4
#5 1 1 2 5
#6 2 1 2 6
#7 1 2 2 7
#8 2 2 2 8
If the array has dimension names.
A <- array(1:8, c(2,2,2), list(X=c("a","b"), Y=c("c","d"), Z=c("e","f")))
i <- arrayInd(seq_along(A), dim(A), dimnames(A), TRUE)
data.frame(mapply(`[`, dimnames(A), asplit(i, 2)), value = as.vector(A))
# X Y Z value
#1 a c e 1
#2 b c e 2
#3 a d e 3
#4 b d e 4
#5 a c f 5
#6 b c f 6
#7 a d f 7
#8 b d f 8
But this can be achieved, as shown in the comments, with as.data.frame(ftable(A))
@Jon Spring or as.data.frame.table(A)
@Onyambu.
If you look at the source of as.data.frame.table
you see that it is using expand.grid
.
as.data.frame.table(A) #@Onyambu.
#as.data.frame(ftable(A)) #@Jon Spring
# X Y Z Freq
#1 a c e 1
#2 b c e 2
#3 a d e 3
#4 b d e 4
#5 a c f 5
#6 b c f 6
#7 a d f 7
#8 b d f 8
But if numeric indices are wanted this can be used.
sapply(as.data.frame.table(A), unclass)
# X Y Z Freq
#[1,] 1 1 1 1
#[2,] 2 1 1 2
#[3,] 1 2 1 3
#[4,] 2 2 1 4
#[5,] 1 1 2 5
#[6,] 2 1 2 6
#[7,] 1 2 2 7
#[8,] 2 2 2 8
Or more robust and giving a data.frame
:
tt <- as.data.frame.table(A)
tt[-length(tt)] <- lapply(tt[-length(tt)], unclass)
tt
# Var1 Var2 Var3 Freq
#1 1 1 1 1
#2 2 1 1 2
#3 1 2 1 3
#4 2 2 1 4
#5 1 1 2 5
#6 2 1 2 6
#7 1 2 2 7
#8 2 2 2 8
#or
list2DF(lapply(as.data.frame.table(A), unclass))
Or a variant - Thanks to @Onyambu for the hint!
type.convert(as.data.frame.table(`dimnames<-`(A, NULL),
base = list(as.character(seq_len(max(dim(A)))))), as.is = TRUE)
# Var1 Var2 Var3 Freq
#1 1 1 1 1
#2 2 1 1 2
#3 1 2 1 3
#4 2 2 1 4
#5 1 1 2 5
#6 2 1 2 6
#7 1 2 2 7
#8 2 2 2 8
Another option is to calculate it "by hand" with %%
and %/%
.
cbind(1 + mapply(`%%`,
Reduce(`%/%`, dim(A)[-length(dim(A))], 0:(length(A)-1), accumulate = TRUE),
dim(A)), Value=as.vector(A))
# Value
#[1,] 1 1 1 1
#[2,] 2 1 1 2
#[3,] 1 2 1 3
#[4,] 2 2 1 4
#[5,] 1 1 2 5
#[6,] 2 1 2 6
#[7,] 1 2 2 7
#[8,] 2 2 2 8
#Alternative
. <- 0:(length(A)-1)
cbind(1 +
t(t(cbind(., outer(., cumprod(dim(A)[-length(dim(A))]), `%/%`))) %% dim(A)),
Value=A)
or using rep
.
list2DF(c(Map(\(i, j, n) rep(rep(1:i, each=j), length.out=n),
dim(A),
c(1, cumprod(dim(A)[-length(dim(A))])),
length(A)), Value=list(as.vector(A))))
# Value
#1 1 1 1 1
#2 2 1 1 2
#3 1 2 1 3
#4 2 2 1 4
#5 1 1 2 5
#6 2 1 2 6
#7 1 2 2 7
#8 2 2 2 8
Or basically the same but keeping names and make use of auto repetition.
d <- setNames(dim(A), names(dimnames(A)))
do.call(data.frame, c(
Map(\(i,j) rep(1:i, each=j), d, c(1, cumprod(d[-length(d)]))),
Value=list(as.vector(A) ), fix.empty.names = FALSE) )
X Y Z Value
1 1 1 1 1
2 2 1 1 2
3 1 2 1 3
4 2 2 1 4
5 1 1 2 5
6 2 1 2 6
7 1 2 2 7
8 2 2 2 8
Benchmark
A <- array(0, c(1e5, 12, 30), list(T=NULL, Month=NULL, Year=NULL))
bench::mark(check=FALSE,
reshape2 = reshape2::melt(A),
expand.grid = {data.frame( #@Roland
expand.grid(lapply(dim(A), seq_len)),
value = as.vector(A)) },
data.frame.table = {tt <- as.data.frame.table(A)
tt[-length(tt)] <- lapply(tt[-length(tt)], unclass)
tt},
rep = {d <- setNames(dim(A), names(dimnames(A)))
do.call(data.frame, c(
Map(\(i,j) rep(1:i, each=j), d, c(1, cumprod(d[-length(d)]))),
Value=list(as.vector(A) ), fix.empty.names = FALSE) )} )
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
#1 reshape2 812ms 812ms 1.23 1.21GB 1.23 1 1
#2 expand.grid 733ms 733ms 1.36 1.21GB 2.73 1 2
#3 data.frame.table 605ms 605ms 1.65 1.23GB 3.31 1 2
#4 rep 293ms 331ms 3.02 691.99MB 1.51 2 1
In this case the variant using rep
is the fastest and allocates the lowest amount of memory.
as.data.frame(ftable(g_as_array))
ordata.table::as.data.table(g_as_array)
– Caldwellas.data.frame(ftable(g_as_array)) %>% dplyr::mutate(dplyr::across(dplyr::starts_with("Var"), as.numeric))
– Flyfishpivot_longer
andpivot_wider
from tidyr seem to be the main alternatives these days. – Tookpivot_longer
andpivot_wider
only work for two-dimensional table-like data, not multi-dimensional arrays, unless I'm missing some functionality there? – Flyfishas.data.frame.table(g_as_array)
will give the results but usingLETTERS
instead ofnumbers
– Adrianople