Source code for simvx.core.scene_io.source_tree
"""Lossless parse and re-emit of Python source via parso.
`parse_source` returns a `SourceTree` whose `dump()` is byte-identical to the
input when nothing has been edited. Edits are applied directly to the
underlying parso tree (see `simvx.core.scene_io.edits`); `dump()` serialises
the current state via parso's `get_code()`.
"""
from __future__ import annotations
from collections.abc import Iterator
from typing import TYPE_CHECKING
import parso
from parso.python.tree import Class, Module
from parso.tree import NodeOrLeaf
if TYPE_CHECKING:
pass
[docs]
def parse_source(text: str, *, error_recovery: bool = True) -> SourceTree:
"""Parse Python source into a lossless `SourceTree`.
With ``error_recovery=True`` (default), broken sources still return a tree
and any parser issues are exposed via :attr:`SourceTree.errors`. With
``error_recovery=False``, malformed input raises `parso.ParserSyntaxError`.
"""
module = parso.parse(text, error_recovery=error_recovery, version=None)
return SourceTree(module, original_text=text)
[docs]
def parse_snippet(text: str) -> NodeOrLeaf:
"""Parse a fragment for splicing into another tree.
Accepts whole statements, single expressions, or suites. Returns the
*lifted* inner node — the first child of the wrapping ``file_input``
module, with the trailing ``endmarker`` stripped — not the module itself.
Caller is responsible for prefix/indent fixup before insertion (see
:mod:`simvx.core.scene_io.edits`).
"""
module = parso.parse(text, version=None)
children = [c for c in module.children if c.type != "endmarker"]
if not children:
raise ValueError(f"parse_snippet: no parseable content in {text!r}")
if len(children) == 1:
node = children[0]
else:
# Multiple top-level children — return the module itself so the caller
# can splice the run as a whole. We unparent it from its original module
# to keep the snippet detachable.
node = module
node.parent = None
return node
[docs]
class SourceTree:
"""A parsed Python source file with byte-perfect re-emit.
Holds the parso :class:`Module` plus the original source text so callers
can detect a no-op round-trip via :meth:`is_unchanged` without diffing
bytes themselves.
"""
__slots__ = ("_module", "_original_text", "_errors_cache")
def __init__(self, module: Module, *, original_text: str) -> None:
self._module = module
self._original_text = original_text
self._errors_cache: list[str] | None = None
[docs]
@property
def module(self) -> Module:
"""The underlying parso module. Edit in place via `scene_io.edits`."""
return self._module
[docs]
@property
def original_text(self) -> str:
"""The text passed to :func:`parse_source`."""
return self._original_text
[docs]
def dump(self) -> str:
"""Return current source text via parso's ``get_code()``.
Round-trip identity is guaranteed when no edits have been made — see
:meth:`is_unchanged`.
"""
return self._module.get_code()
[docs]
def is_unchanged(self) -> bool:
"""True iff :meth:`dump` equals the original input text."""
return self.dump() == self._original_text
[docs]
@property
def errors(self) -> list[str]:
"""Syntax errors detected by parso. Empty when source is valid."""
if self._errors_cache is None:
grammar = parso.load_grammar()
self._errors_cache = [issue.message for issue in grammar.iter_errors(self._module)]
return self._errors_cache
[docs]
def find_class(self, name: str) -> Class | None:
"""First top-level class with matching name, else None."""
for cls in self.iter_classes():
if cls.name.value == name:
return cls
return None
[docs]
def iter_classes(self) -> Iterator[Class]:
"""Yield top-level class definitions in source order."""
for child in self._module.children:
unwrapped = _unwrap_top_level(child)
if unwrapped is not None and unwrapped.type == "classdef":
yield unwrapped # type: ignore[misc]
[docs]
def iter_imports(self) -> Iterator[NodeOrLeaf]:
"""Yield top-level ``import_name`` and ``import_from`` nodes in source order."""
for child in self._module.children:
if child.type != "simple_stmt":
continue
for sub in child.children:
if sub.type in ("import_name", "import_from"):
yield sub
[docs]
def position_of(self, node: NodeOrLeaf) -> tuple[int, int]:
"""1-indexed (line, column) of ``node``'s first leaf.
Matches the editor cursor convention; parso's ``start_pos`` is already
(line:1-indexed, column:0-indexed) so the column value is column 0 for
the first character on a line.
"""
leaf = node.get_first_leaf() if hasattr(node, "get_first_leaf") else node
return leaf.start_pos
def _unwrap_top_level(child: NodeOrLeaf) -> NodeOrLeaf | None:
"""Return the meaningful classdef/funcdef inside a top-level child.
parso wraps simple statements in ``simple_stmt`` and decorated definitions
in ``decorated``. Returns ``None`` if the child is not a class/func-bearing
construct (e.g. plain expressions, imports, the endmarker).
"""
if child.type in ("classdef", "funcdef"):
return child
if child.type == "decorated":
# children: [decorator(s), classdef|funcdef]
for sub in child.children:
if sub.type in ("classdef", "funcdef"):
return sub
return None