The following codes are based on previous good answers, if anyone also needs to locate leaf attributes' line numbers, the following codes may help:
from yaml.composer import Composer
from yaml.constructor import Constructor
from yaml.nodes import ScalarNode
from yaml.resolver import BaseResolver
from yaml.loader import Loader
class LineLoader(Loader):
def __init__(self, stream):
super(LineLoader, self).__init__(stream)
def compose_node(self, parent, index):
# the line number where the previous token has ended (plus empty lines)
line = self.line
node = Composer.compose_node(self, parent, index)
node.__line__ = line + 1
return node
def construct_mapping(self, node, deep=False):
node_pair_lst = node.value
node_pair_lst_for_appending = []
for key_node, value_node in node_pair_lst:
shadow_key_node = ScalarNode(tag=BaseResolver.DEFAULT_SCALAR_TAG, value='__line__' + key_node.value)
shadow_value_node = ScalarNode(tag=BaseResolver.DEFAULT_SCALAR_TAG, value=key_node.__line__)
node_pair_lst_for_appending.append((shadow_key_node, shadow_value_node))
node.value = node_pair_lst + node_pair_lst_for_appending
mapping = Constructor.construct_mapping(self, node, deep=deep)
return mapping
if __name__ == '__main__':
stream = """ # The first line
key1: # This is the second line
key1_1: item1
key1_2: item1_2
key1_3:
- item1_3_1
- item1_3_2
key2: item 2
key3: another item 1
"""
loader = LineLoader(stream)
data = loader.get_single_data()
from pprint import pprint
pprint(data)
And the output are as follows, with another key with prefix "__line__", like "__line__key" at the same level.
PS: For the list items, I cannot show the line yet.
{'__line__key1': 2,
'__line__key2': 8,
'__line__key3': 9,
'key1': {'__line__key1_1': 3,
'__line__key1_2': 4,
'__line__key1_3': 5,
'key1_1': 'item1',
'key1_2': 'item1_2',
'key1_3': ['item1_3_1', 'item1_3_2']},
'key2': 'item 2',
'key3': 'another item 1'}