(First-time poster and rather new in programming, so be patient, please!)
I'm interested in both an efficient general algorithm for printing formatted binary trees (in a CLI environment) and a C implementation. Here is some code that I wrote myself for fun (this is a much simplified version of the original and part of a larger program supporting many BST operations, but it should compile just fine):
#include <stdbool.h> // C99, boolean type support
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define DATATYPE_IS_DOUBLE
#define NDEBUG // disable assertions
#include <assert.h>
#define WCHARBUF_LINES 20 // def: 20
#define WCHARBUF_COLMS 800 // def: 80 (using a huge number, like 500, is a good idea,
// in order to prevent a buffer overflow :)
#define RECOMMENDED_CONS_WIDTH 150
#define RECOMMENDED_CONS_WIDTHQ "150" // use the same value, quoted
/* Preprocessor directives depending on DATATYPE_IS_* : */
#if defined DATATYPE_IS_INT || defined DATATYPE_IS_LONG
#define DTYPE long int
#define DTYPE_STRING "INTEGER"
#define DTYPE_PRINTF "%*.*ld"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_FLOAT
#define DTYPE float
#define DTYPE_STRING "FLOAT"
#define DTYPE_PRINTF "%*.*f"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_DOUBLE
#define DTYPE double
#define DTYPE_STRING "DOUBLE"
#define DTYPE_PRINTF "%*.*lf"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_CHAR
#define DTYPE char
#define DTYPE_STRING "CHARACTER"
#define DTYPE_PRINTF "%*.*c" /* using the "precision" sub-specifier ( .* ) with a */
/* character will produce a harmless compiler warning */
#else
#error "DATATYPE_IS_* preprocessor directive undefined!"
#endif
typedef struct node_struct {
DTYPE data;
struct node_struct *left;
struct node_struct *right;
/* int height; // useful for AVL trees */
} node;
typedef struct {
node *root;
bool IsAVL; // useful for AVL trees
long size;
} tree;
static inline
DTYPE get_largest(node *n){
if (n == NULL)
return (DTYPE)0;
for(; n->right != NULL; n=n->right);
return n->data;
}
static
int subtreeheight(node *ST){
if (ST == NULL)
return -1;
int height_left = subtreeheight(ST->left);
int height_right = subtreeheight(ST->right);
return (height_left > height_right) ? (height_left + 1) : (height_right + 1);
}
void prettyprint_tree(tree *T){
if (T == NULL) // if T empty, abort
return;
#ifndef DATATYPE_IS_CHAR /* then DTYPE is a numeric type */
/* compute spaces, find width: */
int width, i, j;
DTYPE max = get_largest(T->root);
width = (max < 10) ? 1 :
(max < 100) ? 2 :
(max < 1000) ? 3 :
(max < 10000) ? 4 :
(max < 100000) ? 5 :
(max < 1000000) ? 6 :
(max < 10000000) ? 7 :
(max < 100000000) ? 8 :
(max < 1000000000) ? 9 : 10;
assert (max < 10000000000);
width += 2; // needed for prettier results
#if defined DATATYPE_IS_FLOAT || defined DATATYPE_IS_DOUBLE
width += 2; // because of the decimals! (1 decimal is printed by default...)
#endif // float or double
int spacesafter = width / 2;
int spacesbefore = spacesafter + 1;
//int spacesbefore = ceil(width / 2.0);
#else /* character input */
int i, j, width = 3, spacesbefore = 2, spacesafter = 1;
#endif // #ifndef DATATYPE_IS_CHAR
/* start wchar_t printing, using a 2D character array with swprintf() : */
struct columninfo{ // auxiliary structure
bool visited;
int col;
};
wchar_t wcharbuf[WCHARBUF_LINES][WCHARBUF_COLMS];
int line=0;
struct columninfo eachline[WCHARBUF_LINES];
for (i=0; i<WCHARBUF_LINES; ++i){ // initialization
for (j=0; j<WCHARBUF_COLMS; ++j)
wcharbuf[i][j] = (wchar_t)' ';
eachline[i].visited = false;
eachline[i].col = 0;
}
int height = subtreeheight(T->root);
void recur_swprintf(node *ST, int cur_line, const wchar_t *nullstr){ // nested function,
// GCC extension!
float offset = width * pow(2, height - cur_line);
++cur_line;
if (eachline[cur_line].visited == false) {
eachline[cur_line].col = (int) (offset / 2);
eachline[cur_line].visited = true;
}
else{
eachline[cur_line].col += (int) offset;
if (eachline[cur_line].col + width > WCHARBUF_COLMS)
swprintf(wcharbuf[cur_line], L" BUFFER OVERFLOW DETECTED! ");
}
if (ST == NULL){
swprintf(wcharbuf[cur_line] + eachline[cur_line].col, L"%*.*s", 0, width, nullstr);
if (cur_line <= height){
/* use spaces instead of the nullstr for all the "children" of a NULL node */
recur_swprintf(NULL, cur_line, L" ");
recur_swprintf(NULL, cur_line, L" ");
}
else
return;
}
else{
recur_swprintf(ST->left, cur_line, nullstr);
recur_swprintf(ST->right, cur_line, nullstr);
swprintf(wcharbuf[cur_line] + eachline[cur_line].col - 1, L"("DTYPE_PRINTF"",
spacesbefore, 1, ST->data);
//swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 1, L")");
swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 2, L")");
}
}
void call_recur(tree *tr){ // nested function, GCC extension! (wraps recur_swprintf())
recur_swprintf(tr->root, -1, L"NULL");
}
call_recur(T);
/* Omit empty columns: */
int omit_cols(void){ // nested function, GCC extension!
int col;
for (col=0; col<RECOMMENDED_CONS_WIDTH; ++col)
for (line=0; line <= height+1; ++line)
if (wcharbuf[line][col] != ' ' && wcharbuf[line][col] != '\0')
return col;
return 0;
}
/* Use fputwc to transfer the character array to the screen: */
j = omit_cols() - 2;
j = (j < 0) ? 0 : j;
for (line=0; line <= height+1; ++line){ // assumes RECOMMENDED_CONS_WIDTH console window!
fputwc('\n', stdout); // optional blanc line
for (i=j; i<j+RECOMMENDED_CONS_WIDTH && i<WCHARBUF_COLMS; ++i)
fputwc(wcharbuf[line][i], stdout);
fputwc('\n', stdout);
}
}
(also uploaded to a pastebin service, in order to preserve syntax highlighting)
It works quite well, although the automatic width setting could be better. The preprocessor magic is a bit silly (or even ugly) and not really related to the algorithm, but it allows using various data types in the tree nodes (I saw it as a chance to experiment a bit with the preprocessor - remember, I am a newbie!).
The main program is supposed to call
system("mode con:cols="RECOMMENDED_CONS_WIDTHQ" lines=2000");
before calling prettyprint_tree(), when running inside cmd.exe .
Sample output:
(106.0) (102.0) (109.0) (101.5) NULL (107.0) (115.0) NULL NULL (106.1) NULL (113.0) NULL NULL NULL NULL NULL
Ideally, the output would be like this (the reason I'm using the wprintf() family of functions is being able to print Unicode characters anyway):
(107.0) ┌─────┴─────┐ (106.1) NULL ┌───┴───┐ NULL NULL
So, my questions:
- What do you think about this code? (Coding style suggestions are also very welcome!)
- Can it be extended in an elegant way in order to include the line-drawing characters? (Unfortunately, I don't think so.)
- Any other algorithms in C or other imperative languages (or imperative pseudo-code)?
- Somewhat unrelated: What's your opinion about nested functions (non-portable GNU extension)? I think it's an elegant way to write recursive parts of a function without having to provide all the local variables as arguments (and also useful as an implementation-hiding technique), but it could be my Pascal past :-) I'm interested in the opinion of more experienced coders.
Thank you in advance for your responses!
PS. The question is not a duplicate of this one.
edit: Jonathan Leffler wrote an excellent answer that will most probably become the "accepted answer" after a few days (unless someone posts something equally awesome!). I decided to respond here instead of commenting because of the space constraints.
- The code above is actually part of a larger "homework" project (implementing BST operations in a shared library + a CLI app using that library). However, the "prettyprint" function was not part of the requirements; just something I decided to add myself.
- I also added a "convert to AVL without rotations" function, that used "arraystr" as an intermediate representation ;-) I forgot that it wasn't used here. I've edited the code to remove it. Also, the
bool IsAVL
struct member is anything but unused; just not used in this particular function. I had to copy/paste code from various files and make a lot of changes in order to present the code cited above. That's a problem that I don't know how to solve. I would gladly post the whole program, but it is too large and commented in my mother-tongue (not in English!). - The whole project was about 1600 LOC (including comments) with multiple build targets (debug/release/static-linking) and it compiled cleanly with
-Wall
and-Wextra
enabled. Assertions and debug messages were enabled/disabled automatically depending on the build target. Also I thought that function prototypes weren't needed for nested functions, after all nested functions do not implement any external interface by definition - GCC certainly didn't complain here. I don't know why there are so many warnings on OSX :( - I'm using GCC 4.4.1 on Windows 7.
- Despite writing and testing this program on Windows, I am actually a Linux user... Still, I can't stand
vim
and I usenano
(inside GNU screen) orgedit
instead (shoot me)! In any case, I prefer the K&R brace style :) - Portability doesn't really matter, for Linux users GCC is pretty much de facto... The fact that it also works well under Windows is a nice bonus.
- I'm not using a VCS, perhaps I should. I want to try, but all of them seem too complex for my needs and I don't know how to choose one :-)
- You are definitely right about checking for depth overflow, thankfully it is very easy to add.
- Thanks for the
L' '
advice! - I find your suggestion (encapsulating "the whole of the drawing code so that the screen image and related information is in a single structure") extremely interesting... but I don't really understand what you mean as "encapsulation". Could you, please, provide 3 or 4 lines of (pseudo)code showing a possible function declaration and/or a possible function call?
This is my first "large-ish" (and non-trivial) program and I'm really thankful for your advice.
edit #2:
Here is an implementation of the "quick and dirty" method mentioned here.
(edit #3: I decided to split it to a separate answer, since it is a valid answer to the OP.)
Many responses mentioned Graphviz. I already knew about it (many Linux apps are linked against it) but I thought it would be overkill for a 10KB CLI executable. However, I'll keep it in mind for the future. It seems great.
graphviz
is just begging to be used for this kind of thing. – Sweeny