Multiple groups tests via permutation
Asked Answered
C

2

6

I have a df containing two sets of values (value_1 and value_2) relative to two experiments.

One experiment contains two groups (0 and 1), the other contains three groups (0,1,2).

test    group   Value_1    Value_2
AA      0           15.1    11.2
AA      0           12.4    8.6
AA      1           9.6     22.5
AA      1           10.2    22
BB      0           12.11   11
BB      0           14      1.2
BB      1           11      13.2
BB      1           12.3    9
BB      2           14.2    12
BB      2           15      13

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

I want to apply a permutation test - by test, by group - for both value_1 and value_2 - covering:

  • the two groups case (0 vs 1) and
  • more-than-2 case (0 vs 1, 0 vs 2, 1 vs 2).

What I do so far - broken into pieces is:

  1. When the groups are only two, I simply apply the oneway.test():

    df %>%
      filter(test %in% 'AA') -> df_test_aa
    
    df_test_aa_value_1 <- oneway.test(df_test_aa$Value_1~df_test_aa$group)
    df_test_aa_value_1$p.value
    [1] 0.2011234
    
    
    df_test_aa_value_2 <- oneway.test(df_test_aa$Value_2~df_test_aa$group)
    df_test_aa_value_2$p.value
    [1] 0.05854026
    
  2. Whenever the groups are more than 2, I test all the possible permutations:

    • First 0 vs 1:

      df %>% filter(test %in% 'BB' & group %in% c(0,1)) -> df_test_bb_01
      
      df_test_bb_01_value_1 <- oneway.test(df_test_bb_01$Value_1~df_test_bb_01$group)
      df_test_bb_01_value_1$p.value
      [1] 0.3585415
      
      df_test_bb_01_value_2 <-    oneway.test(df_test_bb_01$Value_2~df_test_bb_01$group)
      df_test_bb_01_value_2$p.value
      [1] 0.4848446
      
    • Then 0 vs 2:

       df %>%
       filter(test %in% 'BB' & group %in% c(0,2)) -> df_test_bb_02
      
       df_test_bb_02_value_1 <-       oneway.test(df_test_bb_02$Value_1~df_test_bb_02$group)
       df_test_bb_02_value_1$p.value
       [1] 0.3246012
      
       df_test_bb_02_value_2 <- oneway.test(df_test_bb_02$Value_2~df_test_bb_02$group)
       df_test_bb_02_value_2$p.value
       [1] 0.4142838
      
    • Then 1 vs 2:

       df %>%
        filter(test %in% 'BB' & group %in% c(1,2)) -> df_test_bb_12
      
        df_test_bb_12_value_1 <- oneway.test(df_test_bb_12$Value_1~df_test_bb_12$group)
        df_test_bb_12_value_1$p.value
        [1] 0.08105404
      
      
        df_test_bb_12_value_2 <- oneway.test(df_test_bb_12$Value_2~df_test_bb_12$group)
        df_test_bb_12_value_2$p.value
        [1] 0.6245713
      

As a result, I wish to obtain a df that looking like:

test value  p_value_2sided  hypothesis
AA  Value_1   0.201         0,1
AA  Value_2   0.059         0,1
BB  Value_1   0.359         0,1
BB  Value_1   0.325         0,2
BB  Value_1   0.081         1,2
BB  Value_2   0.485         0,1
BB  Value_2   0.414         0,2
BB  Value_2   0.625         1,2

Thanks for your hints!

Clangor answered 4/1, 2018 at 21:26 Comment(0)
F
2

Well, this is not pretty but ...

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

as.data.frame(
  do.call(rbind, by(df, factor(df$test), function(x) {
  h <- t(combn(unique(x$group), 2))
  p <- apply(h, 1, function(y) {
    with(x[x$group %in% y, ], {
      c(oneway.test(Value_1 ~ group)$p.value,
        oneway.test(Value_2 ~ group)$p.value)
      })
    })

  h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)

  cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
  }))
)


  test   value            p_value hypothesis
1   AA Value_1  0.201123366107666        0,1
2   AA Value_2 0.0585402590546805        0,1
3   BB Value_1  0.358541470571387        0,1
4   BB Value_2  0.484844587956832        0,1
5   BB Value_1  0.324601180998953        0,2
6   BB Value_2  0.414283756097153        0,2
7   BB Value_1 0.0810540380817137        1,2
8   BB Value_2  0.624571310834221        1,2
Flopeared answered 4/1, 2018 at 22:49 Comment(2)
It works and I understand the code. My qyuestion is: if I add an extra variable (let's say Value_3), the code doesn't work anymore bc it doesn't add the right combinations.. let me edit my questionClangor
Are you sure? It works fine for me, just make sure to also set h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2) to each = 3 !Flopeared
L
3
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, -10L))

library(tidyverse)

# reshape dataset
df2 = df %>% gather(value, v, -test, -group)

# function to compute p value
# vectorized version
f = function(t,val,x1,x2) {
  (df2 %>% 
     filter(test == t & value == val & group %in% c(x1,x2)) %>% 
     oneway.test(v~group, data = .))$p.value }
f = Vectorize(f)

df2 %>% 
  distinct(test, group, value) %>%       # get unique combinations
  group_by(test, value) %>%              # for each test and value
  nest() %>%                             # nest rest of columns
  mutate(d = map(data, ~data.frame(t(combn(.$group, 2)))),
         hypothesis = map(d, ~paste0(.$X1, ",", .$X2))) %>%  # get pairs/combinations of values
  unnest(d, hypothesis) %>%              # unnest data
  mutate(pval = f(test, value, X1, X2))  # apply vectorised function to get p value

# # A tibble: 8 x 6
#   test   value   hypothesis    X1    X2   pval
#   <fctr> <chr>   <chr>      <int> <int>  <dbl>
# 1 AA     Value_1 0,1            0     1 0.201 
# 2 BB     Value_1 0,1            0     1 0.359 
# 3 BB     Value_1 0,2            0     2 0.325 
# 4 BB     Value_1 1,2            1     2 0.0811
# 5 AA     Value_2 0,1            0     1 0.0585
# 6 BB     Value_2 0,1            0     1 0.485 
# 7 BB     Value_2 0,2            0     2 0.414 
# 8 BB     Value_2 1,2            1     2 0.625

You can delete X1 and X2 if you really don't need them. However, in this way, you (also) have them as separate numeric variables in case you want to use them in another process (eg. filter on a specific group) at a later stage in your analysis.

Loadstar answered 4/1, 2018 at 22:55 Comment(0)
F
2

Well, this is not pretty but ...

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

as.data.frame(
  do.call(rbind, by(df, factor(df$test), function(x) {
  h <- t(combn(unique(x$group), 2))
  p <- apply(h, 1, function(y) {
    with(x[x$group %in% y, ], {
      c(oneway.test(Value_1 ~ group)$p.value,
        oneway.test(Value_2 ~ group)$p.value)
      })
    })

  h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)

  cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
  }))
)


  test   value            p_value hypothesis
1   AA Value_1  0.201123366107666        0,1
2   AA Value_2 0.0585402590546805        0,1
3   BB Value_1  0.358541470571387        0,1
4   BB Value_2  0.484844587956832        0,1
5   BB Value_1  0.324601180998953        0,2
6   BB Value_2  0.414283756097153        0,2
7   BB Value_1 0.0810540380817137        1,2
8   BB Value_2  0.624571310834221        1,2
Flopeared answered 4/1, 2018 at 22:49 Comment(2)
It works and I understand the code. My qyuestion is: if I add an extra variable (let's say Value_3), the code doesn't work anymore bc it doesn't add the right combinations.. let me edit my questionClangor
Are you sure? It works fine for me, just make sure to also set h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2) to each = 3 !Flopeared

© 2022 - 2024 — McMap. All rights reserved.