Tree CrawlerΒΆ

from itertools import repeat
from collections import Mapping, Sequence, deque, namedtuple

from treecrawler._compat import string_type, text_type, binary_type

DONT_ITER_TYPES = string_type, text_type, binary_type
Node = namedtuple('Node', ['keys', 'val'])


def get_children(node, element_char, visit_arrays=True,
                 dont_iter=DONT_ITER_TYPES):
    """Return the children of this Node as a list of Nodes."""

    items = []

    if isinstance(node.val, dont_iter):
        # ignore string types
        pass

    elif isinstance(node.val, Mapping):
        # node is like a dictionary
        items = node.val.items()

    elif isinstance(node.val, Sequence) and visit_arrays:
        # node is like a list
        if element_char:
            items = zip(repeat(element_char), node.val)
        else:
            items = enumerate(node.val)

    return [Node(node.keys + [k], v) for k, v in items]


def node_visitor(d, process_node, visit_arrays=True, element_char=None):
    """Call process_node funct for every node in tree and yield results.

    d (obj):
        Data to traverse (the tree)

    process_node (funct):
        Accepts a parent Node and list of child Nodes as args. Example:
        lambda parent, children: '.'.join(map(str, parent.keys))

    visit_arrays (bool):
        Visit the elements in arrays?

    element_char (str):
        Replaces sequence index numbers with this character when set;
        if not visit_arrays then ignore this option.
    """
    first_node = Node(keys=[], val=d)
    to_crawl = deque([first_node])
    while to_crawl:
        node = to_crawl.popleft()
        children = get_children(node, element_char, visit_arrays)
        yield process_node(node, children)
        to_crawl.extend(children)