How to output tree-like human-readable object structure in R
Asked Answered
G

4

7

I often teach R to my peers and getting to explain the structure of nested data such as nested lists can be somewhat an arduous task and I find that creating a visual aid can go a long way.

However the output of a function such as str() has a lot of information and is not the most human readable format so I have attempted to format this output to then use RegEx to a more readable output. I have experienced some caveats as well as not being very proficient in string manipulation I was hoping that I could get some help.

Given the following object:

object <- list(
    a = 1:5,
    b = matrix(c(1, 3, "a", "i"), byrow = TRUE),
    l1 = list(
        data = data.frame(
            x = letters,
            y = LETTERS
        ),
        vec = "The river",
        l2 = list(
            abc = seq(1, 9, by = 2),
            col = "#445f43"
        )
    ),
    data2 = data.frame(
        x = c("a","h"),
        y = runif(2, 9, 90)
    ),
    rand = runif(12, 99, 120),
    form = y~x^4
)

And the expected output would be a tree renderization:

object                   
├── a 'int'              
├── b 'chr'              
├── l1 'list'            
│   ├── data 'data.frame'
│   │   ├── x 'factor'   
│   │   └── y 'factor'   
│   ├── vec 'chr'        
│   └── l2 'list'        
│       ├── abc 'chr'    
│       └── col 'chr'    
├── data2 'data.frame'   
│   ├── x 'factor'       
│   └── y 'num'          
├── rand 'num'                      
└── form 'formula'          

I would like to write a function that gives this output as well as adding some arguments to also return the length of the elements of the list and other information and perhaps color-coded classes.

EDIT: Just found other questions similar to mine here: and here:

Gibberish answered 18/7, 2020 at 16:25 Comment(0)
P
4

I have considered implementing something similar in the past but never got round to it. Prompted by your question, I have written a function, str2, that is a naive implementation of what you requested. I'm sure it could be substantially improved, but it's a start. It works like this:

> str2(object)
object
│     
├──── a 'integer'  
├──── b 'matrix'  
├──── l1 'list'  
│      ├──── data 'data.frame' 
│      │      ├──── x 'character'
│      │      └──── y 'character'
│      ├──── vec 'character' 
│      └──── l2 'list' 
│             ├──── abc 'numeric'
│             └──── col 'character'
├──── data2 'data.frame'  
│      ├──── x 'character' 
│      └──── y 'numeric' 
├──── rand 'numeric'  
└──── form 'formula'   

It handles unnamed list elements too:

> str2(list(1:5, list(1, 2)))
list(1:5, list(1, 2))
│     
├──── unnamed 'integer' 
└──── unnamed 'list' 
       ├──── unnamed 'numeric'
       └──── unnamed 'numeric'

and works as expected with data frames:

> str2(mtcars)
mtcars
│     
├──── mpg 'numeric'
├──── cyl 'numeric'
├──── disp 'numeric'
├──── hp 'numeric'
├──── drat 'numeric'
├──── wt 'numeric'
├──── qsec 'numeric'
├──── vs 'numeric'
├──── am 'numeric'
├──── gear 'numeric'
└──── carb 'numeric'

The function contains 3 recursive sub-functions which could probably be combined, and some inefficient loops that could probably be vectorized with a bit of care:

str2 <- function(obj)
{
  branch      <- "\u251c\u2500\u2500\u2500\u2500"
  last_branch <- "\u2514\u2500\u2500\u2500\u2500"
  trunk       <- "\u2502     "
  blank       <- "      "
  
  name_list <- function(obj)
  {
    if(is.list(obj))
    {
      o_n <- names(obj)
      if(is.null(o_n)) o_n <- character(length(obj))
      names(obj) <- sapply(seq_along(obj),  
                           function(i) {
                             if(!nzchar(o_n[i])) 
                               paste0("unnamed '", class(obj[[i]])[1], "'")
                             else paste0(o_n[i], " '", class(obj[[i]])[1], "'")
                           })
      obj <- lapply(obj, name_list)
    }
    obj
  }
  
  depth <- function(obj, lev = 0){
    if(!is.list(obj)) lev else list(lev, lapply(obj, depth, lev = lev + 1))
  }
  
  name_strip <- function(obj) {
    o_n <- names(obj)
    lapply(seq_along(o_n), function(i) c(o_n[i], name_strip(obj[[i]])))
  }
  
  obj        <- name_list(obj)
  depths     <- unlist(depth(obj))[-1]
  diffdepths <- c(diff(depths), -1)
  name_els   <- unlist(name_strip(obj))
  
  col1 <- rep(trunk, length(depths))
  col1[depths == 1] <- branch
  col1[max(which(depths == 1))] <- last_branch
  if(max(which(depths == 1)) != length(col1))
    col1[(max(which(depths == 1)) + 1):length(name_els)] <- blank
  for(i in 1:max(depths))
  {
    next_col                          <- character(length(name_els))
    next_col[which(depths == i)]      <- name_els[which(depths == i)]
    next_col[which(depths > (i + 1))] <- trunk
    next_col[which(depths == i + 1)]  <- branch
    next_col[which(depths == i + 1 & 
                   diffdepths < 0)]   <- last_branch
    
    for(j in which(next_col == name_els))
    {
      k <- j - 1
      while(k > 0)
      {
        if(next_col[k] != trunk) {
          if(next_col[k] == branch) next_col[k] <- last_branch
          break}
        next_col[k] <- blank
        k <- k - 1
      }
    }
    col1 <- cbind(col1, next_col)
  }
  col1 <- apply(col1, 1, paste, collapse = " ")
  cat(as.character(as.list(match.call())[-1]), trunk, col1, sep = "\n")
}
Prerequisite answered 19/7, 2020 at 9:28 Comment(0)
J
4

it can help:

a = Hmisc::list.tree(object, fill = " | ", attr.print = F, size = F, maxlen = 1)     
  

object = list 6
 |  a = integer 5= 1 ...
 |  b = character 4= array 4 X 1= 1  ... 
 |  l1 = list 3
 |  |  data = list 2( data.frame )
 |  |  |  x = character 26= a  ... 
 |  |  |  y = character 26= A  ... 
 |  |  vec = character 1= T 
 |  |  l2 = list 2
 |  |  |  abc = double 5= 1 ...
 |  |  |  col = character 1= # 
 |  data2 = list 2( data.frame )
 |  |  x = character 2= a  ... 
 |  |  y = double 2= 11.16 ...
 |  rand = double 12= 110.91 ...
 |  form = language 3( formula )
Janson answered 18/7, 2020 at 17:15 Comment(0)
P
4

I have considered implementing something similar in the past but never got round to it. Prompted by your question, I have written a function, str2, that is a naive implementation of what you requested. I'm sure it could be substantially improved, but it's a start. It works like this:

> str2(object)
object
│     
├──── a 'integer'  
├──── b 'matrix'  
├──── l1 'list'  
│      ├──── data 'data.frame' 
│      │      ├──── x 'character'
│      │      └──── y 'character'
│      ├──── vec 'character' 
│      └──── l2 'list' 
│             ├──── abc 'numeric'
│             └──── col 'character'
├──── data2 'data.frame'  
│      ├──── x 'character' 
│      └──── y 'numeric' 
├──── rand 'numeric'  
└──── form 'formula'   

It handles unnamed list elements too:

> str2(list(1:5, list(1, 2)))
list(1:5, list(1, 2))
│     
├──── unnamed 'integer' 
└──── unnamed 'list' 
       ├──── unnamed 'numeric'
       └──── unnamed 'numeric'

and works as expected with data frames:

> str2(mtcars)
mtcars
│     
├──── mpg 'numeric'
├──── cyl 'numeric'
├──── disp 'numeric'
├──── hp 'numeric'
├──── drat 'numeric'
├──── wt 'numeric'
├──── qsec 'numeric'
├──── vs 'numeric'
├──── am 'numeric'
├──── gear 'numeric'
└──── carb 'numeric'

The function contains 3 recursive sub-functions which could probably be combined, and some inefficient loops that could probably be vectorized with a bit of care:

str2 <- function(obj)
{
  branch      <- "\u251c\u2500\u2500\u2500\u2500"
  last_branch <- "\u2514\u2500\u2500\u2500\u2500"
  trunk       <- "\u2502     "
  blank       <- "      "
  
  name_list <- function(obj)
  {
    if(is.list(obj))
    {
      o_n <- names(obj)
      if(is.null(o_n)) o_n <- character(length(obj))
      names(obj) <- sapply(seq_along(obj),  
                           function(i) {
                             if(!nzchar(o_n[i])) 
                               paste0("unnamed '", class(obj[[i]])[1], "'")
                             else paste0(o_n[i], " '", class(obj[[i]])[1], "'")
                           })
      obj <- lapply(obj, name_list)
    }
    obj
  }
  
  depth <- function(obj, lev = 0){
    if(!is.list(obj)) lev else list(lev, lapply(obj, depth, lev = lev + 1))
  }
  
  name_strip <- function(obj) {
    o_n <- names(obj)
    lapply(seq_along(o_n), function(i) c(o_n[i], name_strip(obj[[i]])))
  }
  
  obj        <- name_list(obj)
  depths     <- unlist(depth(obj))[-1]
  diffdepths <- c(diff(depths), -1)
  name_els   <- unlist(name_strip(obj))
  
  col1 <- rep(trunk, length(depths))
  col1[depths == 1] <- branch
  col1[max(which(depths == 1))] <- last_branch
  if(max(which(depths == 1)) != length(col1))
    col1[(max(which(depths == 1)) + 1):length(name_els)] <- blank
  for(i in 1:max(depths))
  {
    next_col                          <- character(length(name_els))
    next_col[which(depths == i)]      <- name_els[which(depths == i)]
    next_col[which(depths > (i + 1))] <- trunk
    next_col[which(depths == i + 1)]  <- branch
    next_col[which(depths == i + 1 & 
                   diffdepths < 0)]   <- last_branch
    
    for(j in which(next_col == name_els))
    {
      k <- j - 1
      while(k > 0)
      {
        if(next_col[k] != trunk) {
          if(next_col[k] == branch) next_col[k] <- last_branch
          break}
        next_col[k] <- blank
        k <- k - 1
      }
    }
    col1 <- cbind(col1, next_col)
  }
  col1 <- apply(col1, 1, paste, collapse = " ")
  cat(as.character(as.list(match.call())[-1]), trunk, col1, sep = "\n")
}
Prerequisite answered 19/7, 2020 at 9:28 Comment(0)
R
2

I am not aware of a ready-to-use function for your purpose. In order to produce list structure, you need to recurse over a list. Here is a quick solution (I did not intend to match your output, but rather to give you an idea):

list_structure <- function(x,level=1){
  cat(strrep("-",level), "level",level,"\n")
  cat(strrep("-",level), names(x),"==",class(x),"\n")
  for (i in seq_along(x)){
    if (is.list(x[[i]])) {
      list_structure(x[[i]],level+1)
    } else {
      cat(strrep("-",level), names(x[i])," ",class(x[[i]]),"\n")
    }
  }
}

which gives:

> list_structure(object)
- level 1 
- a b l1 data2 rand form == list 
- a   integer 
- b   matrix 
-- level 2 
-- data vec l2 == list 
--- level 3 
--- x y == data.frame 
--- x   factor 
--- y   factor 
-- vec   character 
--- level 3 
--- abc col == list 
--- abc   numeric 
--- col   character 
-- level 2 
-- x y == data.frame 
-- x   factor 
-- y   numeric 
- rand   numeric 
- form   formula
Rubidium answered 18/7, 2020 at 17:16 Comment(0)
M
2

You can also use the lobstr package.

library(lobstr)

> lobstr::tree(object)
<list>
├─a<int [5]>: 1, 2, 3, 4, 5
├─b<chr [4]>: "1", "3", "a", "i"
├─l1: <list>
│ ├─data: S3<data.frame>
│ │ ├─x<chr [26]>: "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", ...
│ │ └─y<chr [26]>: "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", ...
│ ├─vec: "The river"
│ └─l2: <list>
│   ├─abc<dbl [5]>: 1, 3, 5, 7, 9
│   └─col: "#445f43"
├─data2: S3<data.frame>
│ ├─x<chr [2]>: "a", "h"
│ └─y<dbl [2]>: 33.5670106820762, 47.2537167894188
├─rand<dbl [12]>: 113.150841896189, 118.319494332187, 107.936506421305, 117.579201066401, 118.104245000985, 116.481618712423, 115.649064659374, 116.050208359491, 110.187894097064, 119.073304581922, ...
└─form: S3<formula> y ~ x^4

To clean it up and omit printing of values, you can supply a custom function to the val_printer parameter.

> lobstr::tree(object, val_printer = function(x) {
      paste('')
  })
<list>
├─a<int [5]>: 
├─b<chr [4]>: 
├─l1: <list>
│ ├─data: S3<data.frame>
│ │ ├─x<chr [26]>: 
│ │ └─y<chr [26]>: 
│ ├─vec: 
│ └─l2: <list>
│   ├─abc<dbl [5]>: 
│   └─col: 
├─data2: S3<data.frame>
│ ├─x<chr [2]>: 
│ └─y<dbl [2]>: 
├─rand<dbl [12]>: 
└─form: S3<formula> y ~ x^4

There are several other parameters and features within this package that are worth checking out.

Marleenmarlen answered 17/6, 2023 at 23:21 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.