Source code for simvx.core.scene_io.source_tree

"""Lossless parse and re-emit of Python source via parso.

`parse_source` returns a `SourceTree` whose `dump()` is byte-identical to the
input when nothing has been edited. Edits are applied directly to the
underlying parso tree (see `simvx.core.scene_io.edits`); `dump()` serialises
the current state via parso's `get_code()`.
"""

from __future__ import annotations

from collections.abc import Iterator
from typing import TYPE_CHECKING

import parso
from parso.python.tree import Class, Module
from parso.tree import NodeOrLeaf

if TYPE_CHECKING:
    pass


[docs] def parse_source(text: str, *, error_recovery: bool = True) -> SourceTree: """Parse Python source into a lossless `SourceTree`. With ``error_recovery=True`` (default), broken sources still return a tree and any parser issues are exposed via :attr:`SourceTree.errors`. With ``error_recovery=False``, malformed input raises `parso.ParserSyntaxError`. """ module = parso.parse(text, error_recovery=error_recovery, version=None) return SourceTree(module, original_text=text)
[docs] def parse_snippet(text: str) -> NodeOrLeaf: """Parse a fragment for splicing into another tree. Accepts whole statements, single expressions, or suites. Returns the *lifted* inner node — the first child of the wrapping ``file_input`` module, with the trailing ``endmarker`` stripped — not the module itself. Caller is responsible for prefix/indent fixup before insertion (see :mod:`simvx.core.scene_io.edits`). """ module = parso.parse(text, version=None) children = [c for c in module.children if c.type != "endmarker"] if not children: raise ValueError(f"parse_snippet: no parseable content in {text!r}") if len(children) == 1: node = children[0] else: # Multiple top-level children — return the module itself so the caller # can splice the run as a whole. We unparent it from its original module # to keep the snippet detachable. node = module node.parent = None return node
[docs] class SourceTree: """A parsed Python source file with byte-perfect re-emit. Holds the parso :class:`Module` plus the original source text so callers can detect a no-op round-trip via :meth:`is_unchanged` without diffing bytes themselves. """ __slots__ = ("_module", "_original_text", "_errors_cache") def __init__(self, module: Module, *, original_text: str) -> None: self._module = module self._original_text = original_text self._errors_cache: list[str] | None = None
[docs] @property def module(self) -> Module: """The underlying parso module. Edit in place via `scene_io.edits`.""" return self._module
[docs] @property def original_text(self) -> str: """The text passed to :func:`parse_source`.""" return self._original_text
[docs] def dump(self) -> str: """Return current source text via parso's ``get_code()``. Round-trip identity is guaranteed when no edits have been made — see :meth:`is_unchanged`. """ return self._module.get_code()
[docs] def is_unchanged(self) -> bool: """True iff :meth:`dump` equals the original input text.""" return self.dump() == self._original_text
[docs] @property def errors(self) -> list[str]: """Syntax errors detected by parso. Empty when source is valid.""" if self._errors_cache is None: grammar = parso.load_grammar() self._errors_cache = [issue.message for issue in grammar.iter_errors(self._module)] return self._errors_cache
[docs] def find_class(self, name: str) -> Class | None: """First top-level class with matching name, else None.""" for cls in self.iter_classes(): if cls.name.value == name: return cls return None
[docs] def iter_classes(self) -> Iterator[Class]: """Yield top-level class definitions in source order.""" for child in self._module.children: unwrapped = _unwrap_top_level(child) if unwrapped is not None and unwrapped.type == "classdef": yield unwrapped # type: ignore[misc]
[docs] def iter_imports(self) -> Iterator[NodeOrLeaf]: """Yield top-level ``import_name`` and ``import_from`` nodes in source order.""" for child in self._module.children: if child.type != "simple_stmt": continue for sub in child.children: if sub.type in ("import_name", "import_from"): yield sub
[docs] def position_of(self, node: NodeOrLeaf) -> tuple[int, int]: """1-indexed (line, column) of ``node``'s first leaf. Matches the editor cursor convention; parso's ``start_pos`` is already (line:1-indexed, column:0-indexed) so the column value is column 0 for the first character on a line. """ leaf = node.get_first_leaf() if hasattr(node, "get_first_leaf") else node return leaf.start_pos
def _unwrap_top_level(child: NodeOrLeaf) -> NodeOrLeaf | None: """Return the meaningful classdef/funcdef inside a top-level child. parso wraps simple statements in ``simple_stmt`` and decorated definitions in ``decorated``. Returns ``None`` if the child is not a class/func-bearing construct (e.g. plain expressions, imports, the endmarker). """ if child.type in ("classdef", "funcdef"): return child if child.type == "decorated": # children: [decorator(s), classdef|funcdef] for sub in child.children: if sub.type in ("classdef", "funcdef"): return sub return None