486 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			486 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| from abc import abstractmethod
 | |
| from typing import (
 | |
|     AbstractSet,
 | |
|     Any,
 | |
|     Dict,
 | |
|     Iterable,
 | |
|     Iterator,
 | |
|     List,
 | |
|     Optional,
 | |
|     Set,
 | |
|     Tuple,
 | |
|     TYPE_CHECKING,
 | |
|     Union,
 | |
| )
 | |
| 
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     from pegen.parser_generator import ParserGenerator
 | |
| 
 | |
| 
 | |
| class GrammarError(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| class GrammarVisitor:
 | |
|     def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any:
 | |
|         """Visit a node."""
 | |
|         method = "visit_" + node.__class__.__name__
 | |
|         visitor = getattr(self, method, self.generic_visit)
 | |
|         return visitor(node, *args, **kwargs)
 | |
| 
 | |
|     def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
 | |
|         """Called if no explicit visitor function exists for a node."""
 | |
|         for value in node:
 | |
|             if isinstance(value, list):
 | |
|                 for item in value:
 | |
|                     self.visit(item, *args, **kwargs)
 | |
|             else:
 | |
|                 self.visit(value, *args, **kwargs)
 | |
| 
 | |
| 
 | |
| class Grammar:
 | |
|     def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
 | |
|         self.rules = {rule.name: rule for rule in rules}
 | |
|         self.metas = dict(metas)
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return "\n".join(str(rule) for name, rule in self.rules.items())
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         lines = ["Grammar("]
 | |
|         lines.append("  [")
 | |
|         for rule in self.rules.values():
 | |
|             lines.append(f"    {repr(rule)},")
 | |
|         lines.append("  ],")
 | |
|         lines.append("  {repr(list(self.metas.items()))}")
 | |
|         lines.append(")")
 | |
|         return "\n".join(lines)
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Rule]:
 | |
|         yield from self.rules.values()
 | |
| 
 | |
| 
 | |
| # Global flag whether we want actions in __str__() -- default off.
 | |
| SIMPLE_STR = True
 | |
| 
 | |
| 
 | |
| class Rule:
 | |
|     def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
 | |
|         self.name = name
 | |
|         self.type = type
 | |
|         self.rhs = rhs
 | |
|         self.memo = bool(memo)
 | |
|         self.visited = False
 | |
|         self.nullable = False
 | |
|         self.left_recursive = False
 | |
|         self.leader = False
 | |
| 
 | |
|     def is_loop(self) -> bool:
 | |
|         return self.name.startswith("_loop")
 | |
| 
 | |
|     def is_gather(self) -> bool:
 | |
|         return self.name.startswith("_gather")
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         if SIMPLE_STR or self.type is None:
 | |
|             res = f"{self.name}: {self.rhs}"
 | |
|         else:
 | |
|             res = f"{self.name}[{self.type}]: {self.rhs}"
 | |
|         if len(res) < 88:
 | |
|             return res
 | |
|         lines = [res.split(":")[0] + ":"]
 | |
|         lines += [f"    | {alt}" for alt in self.rhs.alts]
 | |
|         return "\n".join(lines)
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Rhs]:
 | |
|         yield self.rhs
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         if self.visited:
 | |
|             # A left-recursive rule is considered non-nullable.
 | |
|             return False
 | |
|         self.visited = True
 | |
|         self.nullable = self.rhs.nullable_visit(rules)
 | |
|         return self.nullable
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return self.rhs.initial_names()
 | |
| 
 | |
|     def flatten(self) -> Rhs:
 | |
|         # If it's a single parenthesized group, flatten it.
 | |
|         rhs = self.rhs
 | |
|         if (
 | |
|             not self.is_loop()
 | |
|             and len(rhs.alts) == 1
 | |
|             and len(rhs.alts[0].items) == 1
 | |
|             and isinstance(rhs.alts[0].items[0].item, Group)
 | |
|         ):
 | |
|             rhs = rhs.alts[0].items[0].item.rhs
 | |
|         return rhs
 | |
| 
 | |
|     def collect_todo(self, gen: ParserGenerator) -> None:
 | |
|         rhs = self.flatten()
 | |
|         rhs.collect_todo(gen)
 | |
| 
 | |
| 
 | |
| class Leaf:
 | |
|     def __init__(self, value: str):
 | |
|         self.value = value
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return self.value
 | |
| 
 | |
|     def __iter__(self) -> Iterable[str]:
 | |
|         if False:
 | |
|             yield
 | |
| 
 | |
|     @abstractmethod
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         raise NotImplementedError
 | |
| 
 | |
|     @abstractmethod
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         raise NotImplementedError
 | |
| 
 | |
| 
 | |
| class NameLeaf(Leaf):
 | |
|     """The value is the name."""
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         if self.value == "ENDMARKER":
 | |
|             return "$"
 | |
|         return super().__str__()
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"NameLeaf({self.value!r})"
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         if self.value in rules:
 | |
|             return rules[self.value].nullable_visit(rules)
 | |
|         # Token or unknown; never empty.
 | |
|         return False
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return {self.value}
 | |
| 
 | |
| 
 | |
| class StringLeaf(Leaf):
 | |
|     """The value is a string literal, including quotes."""
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"StringLeaf({self.value!r})"
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         # The string token '' is considered empty.
 | |
|         return not self.value
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return set()
 | |
| 
 | |
| 
 | |
| class Rhs:
 | |
|     def __init__(self, alts: List[Alt]):
 | |
|         self.alts = alts
 | |
|         self.memo: Optional[Tuple[Optional[str], str]] = None
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return " | ".join(str(alt) for alt in self.alts)
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Rhs({self.alts!r})"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[List[Alt]]:
 | |
|         yield self.alts
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         for alt in self.alts:
 | |
|             if alt.nullable_visit(rules):
 | |
|                 return True
 | |
|         return False
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         names: Set[str] = set()
 | |
|         for alt in self.alts:
 | |
|             names |= alt.initial_names()
 | |
|         return names
 | |
| 
 | |
|     def collect_todo(self, gen: ParserGenerator) -> None:
 | |
|         for alt in self.alts:
 | |
|             alt.collect_todo(gen)
 | |
| 
 | |
| 
 | |
| class Alt:
 | |
|     def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
 | |
|         self.items = items
 | |
|         self.icut = icut
 | |
|         self.action = action
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         core = " ".join(str(item) for item in self.items)
 | |
|         if not SIMPLE_STR and self.action:
 | |
|             return f"{core} {{ {self.action} }}"
 | |
|         else:
 | |
|             return core
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         args = [repr(self.items)]
 | |
|         if self.icut >= 0:
 | |
|             args.append(f"icut={self.icut}")
 | |
|         if self.action:
 | |
|             args.append(f"action={self.action!r}")
 | |
|         return f"Alt({', '.join(args)})"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[List[NamedItem]]:
 | |
|         yield self.items
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         for item in self.items:
 | |
|             if not item.nullable_visit(rules):
 | |
|                 return False
 | |
|         return True
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         names: Set[str] = set()
 | |
|         for item in self.items:
 | |
|             names |= item.initial_names()
 | |
|             if not item.nullable:
 | |
|                 break
 | |
|         return names
 | |
| 
 | |
|     def collect_todo(self, gen: ParserGenerator) -> None:
 | |
|         for item in self.items:
 | |
|             item.collect_todo(gen)
 | |
| 
 | |
| 
 | |
| class NamedItem:
 | |
|     def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
 | |
|         self.name = name
 | |
|         self.item = item
 | |
|         self.type = type
 | |
|         self.nullable = False
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         if not SIMPLE_STR and self.name:
 | |
|             return f"{self.name}={self.item}"
 | |
|         else:
 | |
|             return str(self.item)
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"NamedItem({self.name!r}, {self.item!r})"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Item]:
 | |
|         yield self.item
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         self.nullable = self.item.nullable_visit(rules)
 | |
|         return self.nullable
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return self.item.initial_names()
 | |
| 
 | |
|     def collect_todo(self, gen: ParserGenerator) -> None:
 | |
|         gen.callmakervisitor.visit(self.item)
 | |
| 
 | |
| 
 | |
| class Forced:
 | |
|     def __init__(self, node: Plain):
 | |
|         self.node = node
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return f"&&{self.node}"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Plain]:
 | |
|         yield self.node
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return True
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return set()
 | |
| 
 | |
| 
 | |
| class Lookahead:
 | |
|     def __init__(self, node: Plain, sign: str):
 | |
|         self.node = node
 | |
|         self.sign = sign
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return f"{self.sign}{self.node}"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Plain]:
 | |
|         yield self.node
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return True
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return set()
 | |
| 
 | |
| 
 | |
| class PositiveLookahead(Lookahead):
 | |
|     def __init__(self, node: Plain):
 | |
|         super().__init__(node, "&")
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"PositiveLookahead({self.node!r})"
 | |
| 
 | |
| 
 | |
| class NegativeLookahead(Lookahead):
 | |
|     def __init__(self, node: Plain):
 | |
|         super().__init__(node, "!")
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"NegativeLookahead({self.node!r})"
 | |
| 
 | |
| 
 | |
| class Opt:
 | |
|     def __init__(self, node: Item):
 | |
|         self.node = node
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         s = str(self.node)
 | |
|         # TODO: Decide whether to use [X] or X? based on type of X
 | |
|         if " " in s:
 | |
|             return f"[{s}]"
 | |
|         else:
 | |
|             return f"{s}?"
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Opt({self.node!r})"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Item]:
 | |
|         yield self.node
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return True
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return self.node.initial_names()
 | |
| 
 | |
| 
 | |
| class Repeat:
 | |
|     """Shared base class for x* and x+."""
 | |
| 
 | |
|     def __init__(self, node: Plain):
 | |
|         self.node = node
 | |
|         self.memo: Optional[Tuple[Optional[str], str]] = None
 | |
| 
 | |
|     @abstractmethod
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         raise NotImplementedError
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Plain]:
 | |
|         yield self.node
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return self.node.initial_names()
 | |
| 
 | |
| 
 | |
| class Repeat0(Repeat):
 | |
|     def __str__(self) -> str:
 | |
|         s = str(self.node)
 | |
|         # TODO: Decide whether to use (X)* or X* based on type of X
 | |
|         if " " in s:
 | |
|             return f"({s})*"
 | |
|         else:
 | |
|             return f"{s}*"
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Repeat0({self.node!r})"
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return True
 | |
| 
 | |
| 
 | |
| class Repeat1(Repeat):
 | |
|     def __str__(self) -> str:
 | |
|         s = str(self.node)
 | |
|         # TODO: Decide whether to use (X)+ or X+ based on type of X
 | |
|         if " " in s:
 | |
|             return f"({s})+"
 | |
|         else:
 | |
|             return f"{s}+"
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Repeat1({self.node!r})"
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return False
 | |
| 
 | |
| 
 | |
| class Gather(Repeat):
 | |
|     def __init__(self, separator: Plain, node: Plain):
 | |
|         self.separator = separator
 | |
|         self.node = node
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return f"{self.separator!s}.{self.node!s}+"
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Gather({self.separator!r}, {self.node!r})"
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return False
 | |
| 
 | |
| 
 | |
| class Group:
 | |
|     def __init__(self, rhs: Rhs):
 | |
|         self.rhs = rhs
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return f"({self.rhs})"
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Group({self.rhs!r})"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Rhs]:
 | |
|         yield self.rhs
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return self.rhs.nullable_visit(rules)
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return self.rhs.initial_names()
 | |
| 
 | |
| 
 | |
| class Cut:
 | |
|     def __init__(self) -> None:
 | |
|         pass
 | |
| 
 | |
|     def __repr__(self) -> str:
 | |
|         return f"Cut()"
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         return f"~"
 | |
| 
 | |
|     def __iter__(self) -> Iterator[Tuple[str, str]]:
 | |
|         if False:
 | |
|             yield
 | |
| 
 | |
|     def __eq__(self, other: object) -> bool:
 | |
|         if not isinstance(other, Cut):
 | |
|             return NotImplemented
 | |
|         return True
 | |
| 
 | |
|     def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
 | |
|         return True
 | |
| 
 | |
|     def initial_names(self) -> AbstractSet[str]:
 | |
|         return set()
 | |
| 
 | |
| 
 | |
| Plain = Union[Leaf, Group]
 | |
| Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
 | |
| RuleName = Tuple[str, str]
 | |
| MetaTuple = Tuple[str, Optional[str]]
 | |
| MetaList = List[MetaTuple]
 | |
| RuleList = List[Rule]
 | |
| NamedItemList = List[NamedItem]
 | |
| LookaheadOrCut = Union[Lookahead, Cut]
 |