If you need to attach data to the node (not just have the parent-child keys) and if for whatever reason you don't want to scan the whole dataframe to find the roots, the following modification to @Suparshva 's answer works. Saving and checking prospective roots in a set is fast. The attached data (val in this example) can of course be substituted by anything, including the whole dataframe row. No assumption on the order of the inputs is made.
import pandas as pd
from anytree import Node, RenderTree
def print_tree(nodes: dict, roots: set) -> None:
for root in roots:
print()
for pre, _, node in RenderTree(nodes[root]):
print(f'{pre}{node.name} ({node.val})')
def add_nodes(nodes: dict, roots: set, parent: str, child: str, val: int) -> None:
if parent not in nodes:
nodes[parent] = Node(parent, val=None)
roots.add(parent)
if child not in nodes:
nodes[child] = Node(child, val=val)
else:
nodes[child].val = val
nodes[child].parent = nodes[parent]
if child in roots:
roots.remove(child)
def create_tree(df: pd.DataFrame) -> None:
nodes = {}
roots = set()
for row in df.itertuples(index=False, name='df_row'):
if row.c_key is not None:
add_nodes(nodes, roots, row.p_key, row.c_key, row.val)
print_tree(nodes, roots)
# Sample DataFrame
data = {'p_key': ['R', 'A', 'A', 'B', 'B', 'C', 'G', 'Y'],
'c_key': ['X', 'B', 'C', 'D', 'E', 'F', 'H', 'A'],
'val': [0, 1, 2, 3, 4, 5, 6, 55]}
df = pd.DataFrame(data)
create_tree(df)
Produces:
R (None)
└── X (0)
Y (None)
└── A (55)
├── B (1)
│ ├── D (3)
│ └── E (4)
└── C (2)
└── F (5)
G (None)
└── H (6)