Multiple groups tests via permutation

Asked 4/1, 2018 at 21:26 Answered 4/1, 2018 at 22:55

I have a df containing two sets of values (value_1 and value_2) relative to two experiments.

One experiment contains two groups (0 and 1), the other contains three groups (0,1,2).

test    group   Value_1    Value_2
AA      0           15.1    11.2
AA      0           12.4    8.6
AA      1           9.6     22.5
AA      1           10.2    22
BB      0           12.11   11
BB      0           14      1.2
BB      1           11      13.2
BB      1           12.3    9
BB      2           14.2    12
BB      2           15      13

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

I want to apply a permutation test - by test, by group - for both value_1 and value_2 - covering:

the two groups case (0 vs 1) and
more-than-2 case (0 vs 1, 0 vs 2, 1 vs 2).

What I do so far - broken into pieces is:

When the groups are only two, I simply apply the oneway.test():

df %>%
  filter(test %in% 'AA') -> df_test_aa

df_test_aa_value_1 <- oneway.test(df_test_aa$Value_1~df_test_aa$group)
df_test_aa_value_1$p.value
[1] 0.2011234


df_test_aa_value_2 <- oneway.test(df_test_aa$Value_2~df_test_aa$group)
df_test_aa_value_2$p.value
[1] 0.05854026

Whenever the groups are more than 2, I test all the possible permutations:

First 0 vs 1:

df %>% filter(test %in% 'BB' & group %in% c(0,1)) -> df_test_bb_01

df_test_bb_01_value_1 <- oneway.test(df_test_bb_01$Value_1~df_test_bb_01$group)
df_test_bb_01_value_1$p.value
[1] 0.3585415

df_test_bb_01_value_2 <-    oneway.test(df_test_bb_01$Value_2~df_test_bb_01$group)
df_test_bb_01_value_2$p.value
[1] 0.4848446

Then 0 vs 2:

 df %>%
 filter(test %in% 'BB' & group %in% c(0,2)) -> df_test_bb_02

 df_test_bb_02_value_1 <-       oneway.test(df_test_bb_02$Value_1~df_test_bb_02$group)
 df_test_bb_02_value_1$p.value
 [1] 0.3246012

 df_test_bb_02_value_2 <- oneway.test(df_test_bb_02$Value_2~df_test_bb_02$group)
 df_test_bb_02_value_2$p.value
 [1] 0.4142838

Then 1 vs 2:

 df %>%
  filter(test %in% 'BB' & group %in% c(1,2)) -> df_test_bb_12

  df_test_bb_12_value_1 <- oneway.test(df_test_bb_12$Value_1~df_test_bb_12$group)
  df_test_bb_12_value_1$p.value
  [1] 0.08105404


  df_test_bb_12_value_2 <- oneway.test(df_test_bb_12$Value_2~df_test_bb_12$group)
  df_test_bb_12_value_2$p.value
  [1] 0.6245713

As a result, I wish to obtain a df that looking like:

test value  p_value_2sided  hypothesis
AA  Value_1   0.201         0,1
AA  Value_2   0.059         0,1
BB  Value_1   0.359         0,1
BB  Value_1   0.325         0,2
BB  Value_1   0.081         1,2
BB  Value_2   0.485         0,1
BB  Value_2   0.414         0,2
BB  Value_2   0.625         1,2

Thanks for your hints!

Clangor answered 4/1, 2018 at 21:26 Comment(0)

Well, this is not pretty but ...

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

as.data.frame(
  do.call(rbind, by(df, factor(df$test), function(x) {
  h <- t(combn(unique(x$group), 2))
  p <- apply(h, 1, function(y) {
    with(x[x$group %in% y, ], {
      c(oneway.test(Value_1 ~ group)$p.value,
        oneway.test(Value_2 ~ group)$p.value)
      })
    })

  h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)

  cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
  }))
)


  test   value            p_value hypothesis
1   AA Value_1  0.201123366107666        0,1
2   AA Value_2 0.0585402590546805        0,1
3   BB Value_1  0.358541470571387        0,1
4   BB Value_2  0.484844587956832        0,1
5   BB Value_1  0.324601180998953        0,2
6   BB Value_2  0.414283756097153        0,2
7   BB Value_1 0.0810540380817137        1,2
8   BB Value_2  0.624571310834221        1,2

Flopeared answered 4/1, 2018 at 22:49 Comment(2)

It works and I understand the code. My qyuestion is: if I add an extra variable (let's say Value_3), the code doesn't work anymore bc it doesn't add the right combinations.. let me edit my question – Clangor 9/1, 2018 at 11:21

Are you sure? It works fine for me, just make sure to also set h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2) to each = 3 ! – Flopeared 9/1, 2018 at 19:38

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, -10L))

library(tidyverse)

# reshape dataset
df2 = df %>% gather(value, v, -test, -group)

# function to compute p value
# vectorized version
f = function(t,val,x1,x2) {
  (df2 %>% 
     filter(test == t & value == val & group %in% c(x1,x2)) %>% 
     oneway.test(v~group, data = .))$p.value }
f = Vectorize(f)

df2 %>% 
  distinct(test, group, value) %>%       # get unique combinations
  group_by(test, value) %>%              # for each test and value
  nest() %>%                             # nest rest of columns
  mutate(d = map(data, ~data.frame(t(combn(.$group, 2)))),
         hypothesis = map(d, ~paste0(.$X1, ",", .$X2))) %>%  # get pairs/combinations of values
  unnest(d, hypothesis) %>%              # unnest data
  mutate(pval = f(test, value, X1, X2))  # apply vectorised function to get p value

# # A tibble: 8 x 6
#   test   value   hypothesis    X1    X2   pval
#   <fctr> <chr>   <chr>      <int> <int>  <dbl>
# 1 AA     Value_1 0,1            0     1 0.201 
# 2 BB     Value_1 0,1            0     1 0.359 
# 3 BB     Value_1 0,2            0     2 0.325 
# 4 BB     Value_1 1,2            1     2 0.0811
# 5 AA     Value_2 0,1            0     1 0.0585
# 6 BB     Value_2 0,1            0     1 0.485 
# 7 BB     Value_2 0,2            0     2 0.414 
# 8 BB     Value_2 1,2            1     2 0.625

You can delete X1 and X2 if you really don't need them. However, in this way, you (also) have them as separate numeric variables in case you want to use them in another process (eg. filter on a specific group) at a later stage in your analysis.

Loadstar answered 4/1, 2018 at 22:55 Comment(0)

Well, this is not pretty but ...

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

as.data.frame(
  do.call(rbind, by(df, factor(df$test), function(x) {
  h <- t(combn(unique(x$group), 2))
  p <- apply(h, 1, function(y) {
    with(x[x$group %in% y, ], {
      c(oneway.test(Value_1 ~ group)$p.value,
        oneway.test(Value_2 ~ group)$p.value)
      })
    })

  h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)

  cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
  }))
)


  test   value            p_value hypothesis
1   AA Value_1  0.201123366107666        0,1
2   AA Value_2 0.0585402590546805        0,1
3   BB Value_1  0.358541470571387        0,1
4   BB Value_2  0.484844587956832        0,1
5   BB Value_1  0.324601180998953        0,2
6   BB Value_2  0.414283756097153        0,2
7   BB Value_1 0.0810540380817137        1,2
8   BB Value_2  0.624571310834221        1,2

Flopeared answered 4/1, 2018 at 22:49 Comment(2)

Are you sure? It works fine for me, just make sure to also set h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2) to each = 3 ! – Flopeared 9/1, 2018 at 19:38

Recommended topics

Hot tags