2844 lines
80 KiB
Python
2844 lines
80 KiB
Python
""" @package antlr3.tree
|
|
@brief ANTLR3 runtime package, tree module
|
|
|
|
This module contains all support classes for AST construction and tree parsers.
|
|
|
|
"""
|
|
|
|
# begin[licence]
|
|
#
|
|
# [The "BSD licence"]
|
|
# Copyright (c) 2005-2008 Terence Parr
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# 3. The name of the author may not be used to endorse or promote products
|
|
# derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# end[licence]
|
|
|
|
# lot's of docstrings are missing, don't complain for now...
|
|
# pylint: disable-msg=C0111
|
|
|
|
import re
|
|
|
|
from antlr3.constants import UP, DOWN, EOF, INVALID_TOKEN_TYPE
|
|
from antlr3.recognizers import BaseRecognizer, RuleReturnScope
|
|
from antlr3.streams import IntStream
|
|
from antlr3.tokens import CommonToken, Token, INVALID_TOKEN
|
|
from antlr3.exceptions import MismatchedTreeNodeException, \
|
|
MissingTokenException, UnwantedTokenException, MismatchedTokenException, \
|
|
NoViableAltException
|
|
|
|
|
|
############################################################################
|
|
#
|
|
# tree related exceptions
|
|
#
|
|
############################################################################
|
|
|
|
|
|
class RewriteCardinalityException(RuntimeError):
|
|
"""
|
|
@brief Base class for all exceptions thrown during AST rewrite construction.
|
|
|
|
This signifies a case where the cardinality of two or more elements
|
|
in a subrule are different: (ID INT)+ where |ID|!=|INT|
|
|
"""
|
|
|
|
def __init__(self, elementDescription):
|
|
RuntimeError.__init__(self, elementDescription)
|
|
|
|
self.elementDescription = elementDescription
|
|
|
|
|
|
def getMessage(self):
|
|
return self.elementDescription
|
|
|
|
|
|
class RewriteEarlyExitException(RewriteCardinalityException):
|
|
"""@brief No elements within a (...)+ in a rewrite rule"""
|
|
|
|
def __init__(self, elementDescription=None):
|
|
RewriteCardinalityException.__init__(self, elementDescription)
|
|
|
|
|
|
class RewriteEmptyStreamException(RewriteCardinalityException):
|
|
"""
|
|
@brief Ref to ID or expr but no tokens in ID stream or subtrees in expr stream
|
|
"""
|
|
|
|
pass
|
|
|
|
|
|
############################################################################
|
|
#
|
|
# basic Tree and TreeAdaptor interfaces
|
|
#
|
|
############################################################################
|
|
|
|
class Tree(object):
|
|
"""
|
|
@brief Abstract baseclass for tree nodes.
|
|
|
|
What does a tree look like? ANTLR has a number of support classes
|
|
such as CommonTreeNodeStream that work on these kinds of trees. You
|
|
don't have to make your trees implement this interface, but if you do,
|
|
you'll be able to use more support code.
|
|
|
|
NOTE: When constructing trees, ANTLR can build any kind of tree; it can
|
|
even use Token objects as trees if you add a child list to your tokens.
|
|
|
|
This is a tree node without any payload; just navigation and factory stuff.
|
|
"""
|
|
|
|
|
|
def getChild(self, i):
|
|
raise NotImplementedError
|
|
|
|
|
|
def getChildCount(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
def getParent(self):
|
|
"""Tree tracks parent and child index now > 3.0"""
|
|
|
|
raise NotImplementedError
|
|
|
|
def setParent(self, t):
|
|
"""Tree tracks parent and child index now > 3.0"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def hasAncestor(self, ttype):
|
|
"""Walk upwards looking for ancestor with this token type."""
|
|
|
|
raise NotImplementedError
|
|
|
|
def getAncestor(self, ttype):
|
|
"""Walk upwards and get first ancestor with this token type."""
|
|
|
|
raise NotImplementedError
|
|
|
|
def getAncestors(self):
|
|
"""Return a list of all ancestors of this node.
|
|
|
|
The first node of list is the root and the last is the parent of
|
|
this node.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getChildIndex(self):
|
|
"""This node is what child index? 0..n-1"""
|
|
|
|
raise NotImplementedError
|
|
|
|
def setChildIndex(self, index):
|
|
"""This node is what child index? 0..n-1"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def freshenParentAndChildIndexes(self):
|
|
"""Set the parent and child index values for all children"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def addChild(self, t):
|
|
"""
|
|
Add t as a child to this node. If t is null, do nothing. If t
|
|
is nil, add all children of t to this' children.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setChild(self, i, t):
|
|
"""Set ith child (0..n-1) to t; t must be non-null and non-nil node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def deleteChild(self, i):
|
|
raise NotImplementedError
|
|
|
|
|
|
def replaceChildren(self, startChildIndex, stopChildIndex, t):
|
|
"""
|
|
Delete children from start to stop and replace with t even if t is
|
|
a list (nil-root tree). num of children can increase or decrease.
|
|
For huge child lists, inserting children can force walking rest of
|
|
children to set their childindex; could be slow.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def isNil(self):
|
|
"""
|
|
Indicates the node is a nil node but may still have children, meaning
|
|
the tree is a flat list.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTokenStartIndex(self):
|
|
"""
|
|
What is the smallest token index (indexing from 0) for this node
|
|
and its children?
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setTokenStartIndex(self, index):
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTokenStopIndex(self):
|
|
"""
|
|
What is the largest token index (indexing from 0) for this node
|
|
and its children?
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setTokenStopIndex(self, index):
|
|
raise NotImplementedError
|
|
|
|
|
|
def dupNode(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
def getType(self):
|
|
"""Return a token type; needed for tree parsing."""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getText(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
def getLine(self):
|
|
"""
|
|
In case we don't have a token payload, what is the line for errors?
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getCharPositionInLine(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
def toStringTree(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
def toString(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
class TreeAdaptor(object):
|
|
"""
|
|
@brief Abstract baseclass for tree adaptors.
|
|
|
|
How to create and navigate trees. Rather than have a separate factory
|
|
and adaptor, I've merged them. Makes sense to encapsulate.
|
|
|
|
This takes the place of the tree construction code generated in the
|
|
generated code in 2.x and the ASTFactory.
|
|
|
|
I do not need to know the type of a tree at all so they are all
|
|
generic Objects. This may increase the amount of typecasting needed. :(
|
|
"""
|
|
|
|
# C o n s t r u c t i o n
|
|
|
|
def createWithPayload(self, payload):
|
|
"""
|
|
Create a tree node from Token object; for CommonTree type trees,
|
|
then the token just becomes the payload. This is the most
|
|
common create call.
|
|
|
|
Override if you want another kind of node to be built.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def dupNode(self, treeNode):
|
|
"""Duplicate a single tree node.
|
|
|
|
Override if you want another kind of node to be built."""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def dupTree(self, tree):
|
|
"""Duplicate tree recursively, using dupNode() for each node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def nil(self):
|
|
"""
|
|
Return a nil node (an empty but non-null node) that can hold
|
|
a list of element as the children. If you want a flat tree (a list)
|
|
use "t=adaptor.nil(); t.addChild(x); t.addChild(y);"
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def errorNode(self, input, start, stop, exc):
|
|
"""
|
|
Return a tree node representing an error. This node records the
|
|
tokens consumed during error recovery. The start token indicates the
|
|
input symbol at which the error was detected. The stop token indicates
|
|
the last symbol consumed during recovery.
|
|
|
|
You must specify the input stream so that the erroneous text can
|
|
be packaged up in the error node. The exception could be useful
|
|
to some applications; default implementation stores ptr to it in
|
|
the CommonErrorNode.
|
|
|
|
This only makes sense during token parsing, not tree parsing.
|
|
Tree parsing should happen only when parsing and tree construction
|
|
succeed.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def isNil(self, tree):
|
|
"""Is tree considered a nil node used to make lists of child nodes?"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def addChild(self, t, child):
|
|
"""
|
|
Add a child to the tree t. If child is a flat tree (a list), make all
|
|
in list children of t. Warning: if t has no children, but child does
|
|
and child isNil then you can decide it is ok to move children to t via
|
|
t.children = child.children; i.e., without copying the array. Just
|
|
make sure that this is consistent with have the user will build
|
|
ASTs. Do nothing if t or child is null.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def becomeRoot(self, newRoot, oldRoot):
|
|
"""
|
|
If oldRoot is a nil root, just copy or move the children to newRoot.
|
|
If not a nil root, make oldRoot a child of newRoot.
|
|
|
|
old=^(nil a b c), new=r yields ^(r a b c)
|
|
old=^(a b c), new=r yields ^(r ^(a b c))
|
|
|
|
If newRoot is a nil-rooted single child tree, use the single
|
|
child as the new root node.
|
|
|
|
old=^(nil a b c), new=^(nil r) yields ^(r a b c)
|
|
old=^(a b c), new=^(nil r) yields ^(r ^(a b c))
|
|
|
|
If oldRoot was null, it's ok, just return newRoot (even if isNil).
|
|
|
|
old=null, new=r yields r
|
|
old=null, new=^(nil r) yields ^(nil r)
|
|
|
|
Return newRoot. Throw an exception if newRoot is not a
|
|
simple node or nil root with a single child node--it must be a root
|
|
node. If newRoot is ^(nil x) return x as newRoot.
|
|
|
|
Be advised that it's ok for newRoot to point at oldRoot's
|
|
children; i.e., you don't have to copy the list. We are
|
|
constructing these nodes so we should have this control for
|
|
efficiency.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def rulePostProcessing(self, root):
|
|
"""
|
|
Given the root of the subtree created for this rule, post process
|
|
it to do any simplifications or whatever you want. A required
|
|
behavior is to convert ^(nil singleSubtree) to singleSubtree
|
|
as the setting of start/stop indexes relies on a single non-nil root
|
|
for non-flat trees.
|
|
|
|
Flat trees such as for lists like "idlist : ID+ ;" are left alone
|
|
unless there is only one ID. For a list, the start/stop indexes
|
|
are set in the nil node.
|
|
|
|
This method is executed after all rule tree construction and right
|
|
before setTokenBoundaries().
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getUniqueID(self, node):
|
|
"""For identifying trees.
|
|
|
|
How to identify nodes so we can say "add node to a prior node"?
|
|
Even becomeRoot is an issue. Use System.identityHashCode(node)
|
|
usually.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
# R e w r i t e R u l e s
|
|
|
|
def createFromToken(self, tokenType, fromToken, text=None):
|
|
"""
|
|
Create a new node derived from a token, with a new token type and
|
|
(optionally) new text.
|
|
|
|
This is invoked from an imaginary node ref on right side of a
|
|
rewrite rule as IMAG[$tokenLabel] or IMAG[$tokenLabel "IMAG"].
|
|
|
|
This should invoke createToken(Token).
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def createFromType(self, tokenType, text):
|
|
"""Create a new node derived from a token, with a new token type.
|
|
|
|
This is invoked from an imaginary node ref on right side of a
|
|
rewrite rule as IMAG["IMAG"].
|
|
|
|
This should invoke createToken(int,String).
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
# C o n t e n t
|
|
|
|
def getType(self, t):
|
|
"""For tree parsing, I need to know the token type of a node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setType(self, t, type):
|
|
"""Node constructors can set the type of a node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getText(self, t):
|
|
raise NotImplementedError
|
|
|
|
def setText(self, t, text):
|
|
"""Node constructors can set the text of a node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getToken(self, t):
|
|
"""Return the token object from which this node was created.
|
|
|
|
Currently used only for printing an error message.
|
|
The error display routine in BaseRecognizer needs to
|
|
display where the input the error occurred. If your
|
|
tree of limitation does not store information that can
|
|
lead you to the token, you can create a token filled with
|
|
the appropriate information and pass that back. See
|
|
BaseRecognizer.getErrorMessage().
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setTokenBoundaries(self, t, startToken, stopToken):
|
|
"""
|
|
Where are the bounds in the input token stream for this node and
|
|
all children? Each rule that creates AST nodes will call this
|
|
method right before returning. Flat trees (i.e., lists) will
|
|
still usually have a nil root node just to hold the children list.
|
|
That node would contain the start/stop indexes then.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTokenStartIndex(self, t):
|
|
"""
|
|
Get the token start index for this subtree; return -1 if no such index
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTokenStopIndex(self, t):
|
|
"""
|
|
Get the token stop index for this subtree; return -1 if no such index
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
# N a v i g a t i o n / T r e e P a r s i n g
|
|
|
|
def getChild(self, t, i):
|
|
"""Get a child 0..n-1 node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setChild(self, t, i, child):
|
|
"""Set ith child (0..n-1) to t; t must be non-null and non-nil node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def deleteChild(self, t, i):
|
|
"""Remove ith child and shift children down from right."""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getChildCount(self, t):
|
|
"""How many children? If 0, then this is a leaf node"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getParent(self, t):
|
|
"""
|
|
Who is the parent node of this node; if null, implies node is root.
|
|
If your node type doesn't handle this, it's ok but the tree rewrites
|
|
in tree parsers need this functionality.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setParent(self, t, parent):
|
|
"""
|
|
Who is the parent node of this node; if null, implies node is root.
|
|
If your node type doesn't handle this, it's ok but the tree rewrites
|
|
in tree parsers need this functionality.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getChildIndex(self, t):
|
|
"""
|
|
What index is this node in the child list? Range: 0..n-1
|
|
If your node type doesn't handle this, it's ok but the tree rewrites
|
|
in tree parsers need this functionality.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setChildIndex(self, t, index):
|
|
"""
|
|
What index is this node in the child list? Range: 0..n-1
|
|
If your node type doesn't handle this, it's ok but the tree rewrites
|
|
in tree parsers need this functionality.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
|
|
"""
|
|
Replace from start to stop child index of parent with t, which might
|
|
be a list. Number of children may be different
|
|
after this call.
|
|
|
|
If parent is null, don't do anything; must be at root of overall tree.
|
|
Can't replace whatever points to the parent externally. Do nothing.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
# Misc
|
|
|
|
def create(self, *args):
|
|
"""
|
|
Deprecated, use createWithPayload, createFromToken or createFromType.
|
|
|
|
This method only exists to mimic the Java interface of TreeAdaptor.
|
|
|
|
"""
|
|
|
|
if len(args) == 1 and isinstance(args[0], Token):
|
|
# Object create(Token payload);
|
|
## warnings.warn(
|
|
## "Using create() is deprecated, use createWithPayload()",
|
|
## DeprecationWarning,
|
|
## stacklevel=2
|
|
## )
|
|
return self.createWithPayload(args[0])
|
|
|
|
if (len(args) == 2
|
|
and isinstance(args[0], (int, long))
|
|
and isinstance(args[1], Token)
|
|
):
|
|
# Object create(int tokenType, Token fromToken);
|
|
## warnings.warn(
|
|
## "Using create() is deprecated, use createFromToken()",
|
|
## DeprecationWarning,
|
|
## stacklevel=2
|
|
## )
|
|
return self.createFromToken(args[0], args[1])
|
|
|
|
if (len(args) == 3
|
|
and isinstance(args[0], (int, long))
|
|
and isinstance(args[1], Token)
|
|
and isinstance(args[2], basestring)
|
|
):
|
|
# Object create(int tokenType, Token fromToken, String text);
|
|
## warnings.warn(
|
|
## "Using create() is deprecated, use createFromToken()",
|
|
## DeprecationWarning,
|
|
## stacklevel=2
|
|
## )
|
|
return self.createFromToken(args[0], args[1], args[2])
|
|
|
|
if (len(args) == 2
|
|
and isinstance(args[0], (int, long))
|
|
and isinstance(args[1], basestring)
|
|
):
|
|
# Object create(int tokenType, String text);
|
|
## warnings.warn(
|
|
## "Using create() is deprecated, use createFromType()",
|
|
## DeprecationWarning,
|
|
## stacklevel=2
|
|
## )
|
|
return self.createFromType(args[0], args[1])
|
|
|
|
raise TypeError(
|
|
"No create method with this signature found: %s"
|
|
% (', '.join(type(v).__name__ for v in args))
|
|
)
|
|
|
|
|
|
############################################################################
|
|
#
|
|
# base implementation of Tree and TreeAdaptor
|
|
#
|
|
# Tree
|
|
# \- BaseTree
|
|
#
|
|
# TreeAdaptor
|
|
# \- BaseTreeAdaptor
|
|
#
|
|
############################################################################
|
|
|
|
|
|
class BaseTree(Tree):
|
|
"""
|
|
@brief A generic tree implementation with no payload.
|
|
|
|
You must subclass to
|
|
actually have any user data. ANTLR v3 uses a list of children approach
|
|
instead of the child-sibling approach in v2. A flat tree (a list) is
|
|
an empty node whose children represent the list. An empty, but
|
|
non-null node is called "nil".
|
|
"""
|
|
|
|
# BaseTree is abstract, no need to complain about not implemented abstract
|
|
# methods
|
|
# pylint: disable-msg=W0223
|
|
|
|
def __init__(self, node=None):
|
|
"""
|
|
Create a new node from an existing node does nothing for BaseTree
|
|
as there are no fields other than the children list, which cannot
|
|
be copied as the children are not considered part of this node.
|
|
"""
|
|
|
|
Tree.__init__(self)
|
|
self.children = []
|
|
self.parent = None
|
|
self.childIndex = 0
|
|
|
|
|
|
def getChild(self, i):
|
|
try:
|
|
return self.children[i]
|
|
except IndexError:
|
|
return None
|
|
|
|
|
|
def getChildren(self):
|
|
"""@brief Get the children internal List
|
|
|
|
Note that if you directly mess with
|
|
the list, do so at your own risk.
|
|
"""
|
|
|
|
# FIXME: mark as deprecated
|
|
return self.children
|
|
|
|
|
|
def getFirstChildWithType(self, treeType):
|
|
for child in self.children:
|
|
if child.getType() == treeType:
|
|
return child
|
|
|
|
return None
|
|
|
|
|
|
def getChildCount(self):
|
|
return len(self.children)
|
|
|
|
|
|
def addChild(self, childTree):
|
|
"""Add t as child of this node.
|
|
|
|
Warning: if t has no children, but child does
|
|
and child isNil then this routine moves children to t via
|
|
t.children = child.children; i.e., without copying the array.
|
|
"""
|
|
|
|
# this implementation is much simpler and probably less efficient
|
|
# than the mumbo-jumbo that Ter did for the Java runtime.
|
|
|
|
if childTree is None:
|
|
return
|
|
|
|
if childTree.isNil():
|
|
# t is an empty node possibly with children
|
|
|
|
if self.children is childTree.children:
|
|
raise ValueError("attempt to add child list to itself")
|
|
|
|
# fix parent pointer and childIndex for new children
|
|
for idx, child in enumerate(childTree.children):
|
|
child.parent = self
|
|
child.childIndex = len(self.children) + idx
|
|
|
|
self.children += childTree.children
|
|
|
|
else:
|
|
# child is not nil (don't care about children)
|
|
self.children.append(childTree)
|
|
childTree.parent = self
|
|
childTree.childIndex = len(self.children) - 1
|
|
|
|
|
|
def addChildren(self, children):
|
|
"""Add all elements of kids list as children of this node"""
|
|
|
|
self.children += children
|
|
|
|
|
|
def setChild(self, i, t):
|
|
if t is None:
|
|
return
|
|
|
|
if t.isNil():
|
|
raise ValueError("Can't set single child to a list")
|
|
|
|
self.children[i] = t
|
|
t.parent = self
|
|
t.childIndex = i
|
|
|
|
|
|
def deleteChild(self, i):
|
|
killed = self.children[i]
|
|
|
|
del self.children[i]
|
|
|
|
# walk rest and decrement their child indexes
|
|
for idx, child in enumerate(self.children[i:]):
|
|
child.childIndex = i + idx
|
|
|
|
return killed
|
|
|
|
|
|
def replaceChildren(self, startChildIndex, stopChildIndex, newTree):
|
|
"""
|
|
Delete children from start to stop and replace with t even if t is
|
|
a list (nil-root tree). num of children can increase or decrease.
|
|
For huge child lists, inserting children can force walking rest of
|
|
children to set their childindex; could be slow.
|
|
"""
|
|
|
|
if (startChildIndex >= len(self.children)
|
|
or stopChildIndex >= len(self.children)
|
|
):
|
|
raise IndexError("indexes invalid")
|
|
|
|
replacingHowMany = stopChildIndex - startChildIndex + 1
|
|
|
|
# normalize to a list of children to add: newChildren
|
|
if newTree.isNil():
|
|
newChildren = newTree.children
|
|
|
|
else:
|
|
newChildren = [newTree]
|
|
|
|
replacingWithHowMany = len(newChildren)
|
|
delta = replacingHowMany - replacingWithHowMany
|
|
|
|
|
|
if delta == 0:
|
|
# if same number of nodes, do direct replace
|
|
for idx, child in enumerate(newChildren):
|
|
self.children[idx + startChildIndex] = child
|
|
child.parent = self
|
|
child.childIndex = idx + startChildIndex
|
|
|
|
else:
|
|
# length of children changes...
|
|
|
|
# ...delete replaced segment...
|
|
del self.children[startChildIndex:stopChildIndex+1]
|
|
|
|
# ...insert new segment...
|
|
self.children[startChildIndex:startChildIndex] = newChildren
|
|
|
|
# ...and fix indeces
|
|
self.freshenParentAndChildIndexes(startChildIndex)
|
|
|
|
|
|
def isNil(self):
|
|
return False
|
|
|
|
|
|
def freshenParentAndChildIndexes(self, offset=0):
|
|
for idx, child in enumerate(self.children[offset:]):
|
|
child.childIndex = idx + offset
|
|
child.parent = self
|
|
|
|
|
|
def sanityCheckParentAndChildIndexes(self, parent=None, i=-1):
|
|
if parent != self.parent:
|
|
raise ValueError(
|
|
"parents don't match; expected %r found %r"
|
|
% (parent, self.parent)
|
|
)
|
|
|
|
if i != self.childIndex:
|
|
raise ValueError(
|
|
"child indexes don't match; expected %d found %d"
|
|
% (i, self.childIndex)
|
|
)
|
|
|
|
for idx, child in enumerate(self.children):
|
|
child.sanityCheckParentAndChildIndexes(self, idx)
|
|
|
|
|
|
def getChildIndex(self):
|
|
"""BaseTree doesn't track child indexes."""
|
|
|
|
return 0
|
|
|
|
|
|
def setChildIndex(self, index):
|
|
"""BaseTree doesn't track child indexes."""
|
|
|
|
pass
|
|
|
|
|
|
def getParent(self):
|
|
"""BaseTree doesn't track parent pointers."""
|
|
|
|
return None
|
|
|
|
def setParent(self, t):
|
|
"""BaseTree doesn't track parent pointers."""
|
|
|
|
pass
|
|
|
|
|
|
def hasAncestor(self, ttype):
|
|
"""Walk upwards looking for ancestor with this token type."""
|
|
return self.getAncestor(ttype) is not None
|
|
|
|
def getAncestor(self, ttype):
|
|
"""Walk upwards and get first ancestor with this token type."""
|
|
t = self.getParent()
|
|
while t is not None:
|
|
if t.getType() == ttype:
|
|
return t
|
|
t = t.getParent()
|
|
|
|
return None
|
|
|
|
def getAncestors(self):
|
|
"""Return a list of all ancestors of this node.
|
|
|
|
The first node of list is the root and the last is the parent of
|
|
this node.
|
|
"""
|
|
if selfgetParent() is None:
|
|
return None
|
|
|
|
ancestors = []
|
|
t = self.getParent()
|
|
while t is not None:
|
|
ancestors.insert(0, t) # insert at start
|
|
t = t.getParent()
|
|
|
|
return ancestors
|
|
|
|
|
|
def toStringTree(self):
|
|
"""Print out a whole tree not just a node"""
|
|
|
|
if len(self.children) == 0:
|
|
return self.toString()
|
|
|
|
buf = []
|
|
if not self.isNil():
|
|
buf.append('(')
|
|
buf.append(self.toString())
|
|
buf.append(' ')
|
|
|
|
for i, child in enumerate(self.children):
|
|
if i > 0:
|
|
buf.append(' ')
|
|
buf.append(child.toStringTree())
|
|
|
|
if not self.isNil():
|
|
buf.append(')')
|
|
|
|
return ''.join(buf)
|
|
|
|
|
|
def getLine(self):
|
|
return 0
|
|
|
|
|
|
def getCharPositionInLine(self):
|
|
return 0
|
|
|
|
|
|
def toString(self):
|
|
"""Override to say how a node (not a tree) should look as text"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
class BaseTreeAdaptor(TreeAdaptor):
|
|
"""
|
|
@brief A TreeAdaptor that works with any Tree implementation.
|
|
"""
|
|
|
|
# BaseTreeAdaptor is abstract, no need to complain about not implemented
|
|
# abstract methods
|
|
# pylint: disable-msg=W0223
|
|
|
|
def nil(self):
|
|
return self.createWithPayload(None)
|
|
|
|
|
|
def errorNode(self, input, start, stop, exc):
|
|
"""
|
|
create tree node that holds the start and stop tokens associated
|
|
with an error.
|
|
|
|
If you specify your own kind of tree nodes, you will likely have to
|
|
override this method. CommonTree returns Token.INVALID_TOKEN_TYPE
|
|
if no token payload but you might have to set token type for diff
|
|
node type.
|
|
|
|
You don't have to subclass CommonErrorNode; you will likely need to
|
|
subclass your own tree node class to avoid class cast exception.
|
|
"""
|
|
|
|
return CommonErrorNode(input, start, stop, exc)
|
|
|
|
|
|
def isNil(self, tree):
|
|
return tree.isNil()
|
|
|
|
|
|
def dupTree(self, t, parent=None):
|
|
"""
|
|
This is generic in the sense that it will work with any kind of
|
|
tree (not just Tree interface). It invokes the adaptor routines
|
|
not the tree node routines to do the construction.
|
|
"""
|
|
|
|
if t is None:
|
|
return None
|
|
|
|
newTree = self.dupNode(t)
|
|
|
|
# ensure new subtree root has parent/child index set
|
|
|
|
# same index in new tree
|
|
self.setChildIndex(newTree, self.getChildIndex(t))
|
|
|
|
self.setParent(newTree, parent)
|
|
|
|
for i in range(self.getChildCount(t)):
|
|
child = self.getChild(t, i)
|
|
newSubTree = self.dupTree(child, t)
|
|
self.addChild(newTree, newSubTree)
|
|
|
|
return newTree
|
|
|
|
|
|
def addChild(self, tree, child):
|
|
"""
|
|
Add a child to the tree t. If child is a flat tree (a list), make all
|
|
in list children of t. Warning: if t has no children, but child does
|
|
and child isNil then you can decide it is ok to move children to t via
|
|
t.children = child.children; i.e., without copying the array. Just
|
|
make sure that this is consistent with have the user will build
|
|
ASTs.
|
|
"""
|
|
|
|
#if isinstance(child, Token):
|
|
# child = self.createWithPayload(child)
|
|
|
|
if tree is not None and child is not None:
|
|
tree.addChild(child)
|
|
|
|
|
|
def becomeRoot(self, newRoot, oldRoot):
|
|
"""
|
|
If oldRoot is a nil root, just copy or move the children to newRoot.
|
|
If not a nil root, make oldRoot a child of newRoot.
|
|
|
|
old=^(nil a b c), new=r yields ^(r a b c)
|
|
old=^(a b c), new=r yields ^(r ^(a b c))
|
|
|
|
If newRoot is a nil-rooted single child tree, use the single
|
|
child as the new root node.
|
|
|
|
old=^(nil a b c), new=^(nil r) yields ^(r a b c)
|
|
old=^(a b c), new=^(nil r) yields ^(r ^(a b c))
|
|
|
|
If oldRoot was null, it's ok, just return newRoot (even if isNil).
|
|
|
|
old=null, new=r yields r
|
|
old=null, new=^(nil r) yields ^(nil r)
|
|
|
|
Return newRoot. Throw an exception if newRoot is not a
|
|
simple node or nil root with a single child node--it must be a root
|
|
node. If newRoot is ^(nil x) return x as newRoot.
|
|
|
|
Be advised that it's ok for newRoot to point at oldRoot's
|
|
children; i.e., you don't have to copy the list. We are
|
|
constructing these nodes so we should have this control for
|
|
efficiency.
|
|
"""
|
|
|
|
if isinstance(newRoot, Token):
|
|
newRoot = self.create(newRoot)
|
|
|
|
if oldRoot is None:
|
|
return newRoot
|
|
|
|
if not isinstance(newRoot, CommonTree):
|
|
newRoot = self.createWithPayload(newRoot)
|
|
|
|
# handle ^(nil real-node)
|
|
if newRoot.isNil():
|
|
nc = newRoot.getChildCount()
|
|
if nc == 1:
|
|
newRoot = newRoot.getChild(0)
|
|
|
|
elif nc > 1:
|
|
# TODO: make tree run time exceptions hierarchy
|
|
raise RuntimeError("more than one node as root")
|
|
|
|
# add oldRoot to newRoot; addChild takes care of case where oldRoot
|
|
# is a flat list (i.e., nil-rooted tree). All children of oldRoot
|
|
# are added to newRoot.
|
|
newRoot.addChild(oldRoot)
|
|
return newRoot
|
|
|
|
|
|
def rulePostProcessing(self, root):
|
|
"""Transform ^(nil x) to x and nil to null"""
|
|
|
|
if root is not None and root.isNil():
|
|
if root.getChildCount() == 0:
|
|
root = None
|
|
|
|
elif root.getChildCount() == 1:
|
|
root = root.getChild(0)
|
|
# whoever invokes rule will set parent and child index
|
|
root.setParent(None)
|
|
root.setChildIndex(-1)
|
|
|
|
return root
|
|
|
|
|
|
def createFromToken(self, tokenType, fromToken, text=None):
|
|
if fromToken is None:
|
|
return self.createFromType(tokenType, text)
|
|
|
|
assert isinstance(tokenType, (int, long)), type(tokenType).__name__
|
|
assert isinstance(fromToken, Token), type(fromToken).__name__
|
|
assert text is None or isinstance(text, basestring), type(text).__name__
|
|
|
|
fromToken = self.createToken(fromToken)
|
|
fromToken.type = tokenType
|
|
if text is not None:
|
|
fromToken.text = text
|
|
t = self.createWithPayload(fromToken)
|
|
return t
|
|
|
|
|
|
def createFromType(self, tokenType, text):
|
|
assert isinstance(tokenType, (int, long)), type(tokenType).__name__
|
|
assert isinstance(text, basestring) or text is None, type(text).__name__
|
|
|
|
fromToken = self.createToken(tokenType=tokenType, text=text)
|
|
t = self.createWithPayload(fromToken)
|
|
return t
|
|
|
|
|
|
def getType(self, t):
|
|
return t.getType()
|
|
|
|
|
|
def setType(self, t, type):
|
|
raise RuntimeError("don't know enough about Tree node")
|
|
|
|
|
|
def getText(self, t):
|
|
return t.getText()
|
|
|
|
|
|
def setText(self, t, text):
|
|
raise RuntimeError("don't know enough about Tree node")
|
|
|
|
|
|
def getChild(self, t, i):
|
|
return t.getChild(i)
|
|
|
|
|
|
def setChild(self, t, i, child):
|
|
t.setChild(i, child)
|
|
|
|
|
|
def deleteChild(self, t, i):
|
|
return t.deleteChild(i)
|
|
|
|
|
|
def getChildCount(self, t):
|
|
return t.getChildCount()
|
|
|
|
|
|
def getUniqueID(self, node):
|
|
return hash(node)
|
|
|
|
|
|
def createToken(self, fromToken=None, tokenType=None, text=None):
|
|
"""
|
|
Tell me how to create a token for use with imaginary token nodes.
|
|
For example, there is probably no input symbol associated with imaginary
|
|
token DECL, but you need to create it as a payload or whatever for
|
|
the DECL node as in ^(DECL type ID).
|
|
|
|
If you care what the token payload objects' type is, you should
|
|
override this method and any other createToken variant.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
############################################################################
|
|
#
|
|
# common tree implementation
|
|
#
|
|
# Tree
|
|
# \- BaseTree
|
|
# \- CommonTree
|
|
# \- CommonErrorNode
|
|
#
|
|
# TreeAdaptor
|
|
# \- BaseTreeAdaptor
|
|
# \- CommonTreeAdaptor
|
|
#
|
|
############################################################################
|
|
|
|
|
|
class CommonTree(BaseTree):
|
|
"""@brief A tree node that is wrapper for a Token object.
|
|
|
|
After 3.0 release
|
|
while building tree rewrite stuff, it became clear that computing
|
|
parent and child index is very difficult and cumbersome. Better to
|
|
spend the space in every tree node. If you don't want these extra
|
|
fields, it's easy to cut them out in your own BaseTree subclass.
|
|
|
|
"""
|
|
|
|
def __init__(self, payload):
|
|
BaseTree.__init__(self)
|
|
|
|
# What token indexes bracket all tokens associated with this node
|
|
# and below?
|
|
self.startIndex = -1
|
|
self.stopIndex = -1
|
|
|
|
# Who is the parent node of this node; if null, implies node is root
|
|
self.parent = None
|
|
|
|
# What index is this node in the child list? Range: 0..n-1
|
|
self.childIndex = -1
|
|
|
|
# A single token is the payload
|
|
if payload is None:
|
|
self.token = None
|
|
|
|
elif isinstance(payload, CommonTree):
|
|
self.token = payload.token
|
|
self.startIndex = payload.startIndex
|
|
self.stopIndex = payload.stopIndex
|
|
|
|
elif payload is None or isinstance(payload, Token):
|
|
self.token = payload
|
|
|
|
else:
|
|
raise TypeError(type(payload).__name__)
|
|
|
|
|
|
|
|
def getToken(self):
|
|
return self.token
|
|
|
|
|
|
def dupNode(self):
|
|
return CommonTree(self)
|
|
|
|
|
|
def isNil(self):
|
|
return self.token is None
|
|
|
|
|
|
def getType(self):
|
|
if self.token is None:
|
|
return INVALID_TOKEN_TYPE
|
|
|
|
return self.token.getType()
|
|
|
|
type = property(getType)
|
|
|
|
|
|
def getText(self):
|
|
if self.token is None:
|
|
return None
|
|
|
|
return self.token.text
|
|
|
|
text = property(getText)
|
|
|
|
|
|
def getLine(self):
|
|
if self.token is None or self.token.getLine() == 0:
|
|
if self.getChildCount():
|
|
return self.getChild(0).getLine()
|
|
else:
|
|
return 0
|
|
|
|
return self.token.getLine()
|
|
|
|
line = property(getLine)
|
|
|
|
|
|
def getCharPositionInLine(self):
|
|
if self.token is None or self.token.getCharPositionInLine() == -1:
|
|
if self.getChildCount():
|
|
return self.getChild(0).getCharPositionInLine()
|
|
else:
|
|
return 0
|
|
|
|
else:
|
|
return self.token.getCharPositionInLine()
|
|
|
|
charPositionInLine = property(getCharPositionInLine)
|
|
|
|
|
|
def getTokenStartIndex(self):
|
|
if self.startIndex == -1 and self.token is not None:
|
|
return self.token.getTokenIndex()
|
|
|
|
return self.startIndex
|
|
|
|
def setTokenStartIndex(self, index):
|
|
self.startIndex = index
|
|
|
|
tokenStartIndex = property(getTokenStartIndex, setTokenStartIndex)
|
|
|
|
|
|
def getTokenStopIndex(self):
|
|
if self.stopIndex == -1 and self.token is not None:
|
|
return self.token.getTokenIndex()
|
|
|
|
return self.stopIndex
|
|
|
|
def setTokenStopIndex(self, index):
|
|
self.stopIndex = index
|
|
|
|
tokenStopIndex = property(getTokenStopIndex, setTokenStopIndex)
|
|
|
|
|
|
def setUnknownTokenBoundaries(self):
|
|
"""For every node in this subtree, make sure it's start/stop token's
|
|
are set. Walk depth first, visit bottom up. Only updates nodes
|
|
with at least one token index < 0.
|
|
"""
|
|
|
|
if self.children is None:
|
|
if self.startIndex < 0 or self.stopIndex < 0:
|
|
self.startIndex = self.stopIndex = self.token.getTokenIndex()
|
|
|
|
return
|
|
|
|
for child in self.children:
|
|
child.setUnknownTokenBoundaries()
|
|
|
|
if self.startIndex >= 0 and self.stopIndex >= 0:
|
|
# already set
|
|
return
|
|
|
|
if self.children:
|
|
firstChild = self.children[0]
|
|
lastChild = self.children[-1]
|
|
self.startIndex = firstChild.getTokenStartIndex()
|
|
self.stopIndex = lastChild.getTokenStopIndex()
|
|
|
|
|
|
def getChildIndex(self):
|
|
#FIXME: mark as deprecated
|
|
return self.childIndex
|
|
|
|
|
|
def setChildIndex(self, idx):
|
|
#FIXME: mark as deprecated
|
|
self.childIndex = idx
|
|
|
|
|
|
def getParent(self):
|
|
#FIXME: mark as deprecated
|
|
return self.parent
|
|
|
|
|
|
def setParent(self, t):
|
|
#FIXME: mark as deprecated
|
|
self.parent = t
|
|
|
|
|
|
def toString(self):
|
|
if self.isNil():
|
|
return "nil"
|
|
|
|
if self.getType() == INVALID_TOKEN_TYPE:
|
|
return "<errornode>"
|
|
|
|
return self.token.text
|
|
|
|
__str__ = toString
|
|
|
|
|
|
|
|
def toStringTree(self):
|
|
if not self.children:
|
|
return self.toString()
|
|
|
|
ret = ''
|
|
if not self.isNil():
|
|
ret += '(%s ' % (self.toString())
|
|
|
|
ret += ' '.join([child.toStringTree() for child in self.children])
|
|
|
|
if not self.isNil():
|
|
ret += ')'
|
|
|
|
return ret
|
|
|
|
|
|
INVALID_NODE = CommonTree(INVALID_TOKEN)
|
|
|
|
|
|
class CommonErrorNode(CommonTree):
|
|
"""A node representing erroneous token range in token stream"""
|
|
|
|
def __init__(self, input, start, stop, exc):
|
|
CommonTree.__init__(self, None)
|
|
|
|
if (stop is None or
|
|
(stop.getTokenIndex() < start.getTokenIndex() and
|
|
stop.getType() != EOF
|
|
)
|
|
):
|
|
# sometimes resync does not consume a token (when LT(1) is
|
|
# in follow set. So, stop will be 1 to left to start. adjust.
|
|
# Also handle case where start is the first token and no token
|
|
# is consumed during recovery; LT(-1) will return null.
|
|
stop = start
|
|
|
|
self.input = input
|
|
self.start = start
|
|
self.stop = stop
|
|
self.trappedException = exc
|
|
|
|
|
|
def isNil(self):
|
|
return False
|
|
|
|
|
|
def getType(self):
|
|
return INVALID_TOKEN_TYPE
|
|
|
|
|
|
def getText(self):
|
|
if isinstance(self.start, Token):
|
|
i = self.start.getTokenIndex()
|
|
j = self.stop.getTokenIndex()
|
|
if self.stop.getType() == EOF:
|
|
j = self.input.size()
|
|
|
|
badText = self.input.toString(i, j)
|
|
|
|
elif isinstance(self.start, Tree):
|
|
badText = self.input.toString(self.start, self.stop)
|
|
|
|
else:
|
|
# people should subclass if they alter the tree type so this
|
|
# next one is for sure correct.
|
|
badText = "<unknown>"
|
|
|
|
return badText
|
|
|
|
|
|
def toString(self):
|
|
if isinstance(self.trappedException, MissingTokenException):
|
|
return ("<missing type: "
|
|
+ str(self.trappedException.getMissingType())
|
|
+ ">")
|
|
|
|
elif isinstance(self.trappedException, UnwantedTokenException):
|
|
return ("<extraneous: "
|
|
+ str(self.trappedException.getUnexpectedToken())
|
|
+ ", resync=" + self.getText() + ">")
|
|
|
|
elif isinstance(self.trappedException, MismatchedTokenException):
|
|
return ("<mismatched token: "
|
|
+ str(self.trappedException.token)
|
|
+ ", resync=" + self.getText() + ">")
|
|
|
|
elif isinstance(self.trappedException, NoViableAltException):
|
|
return ("<unexpected: "
|
|
+ str(self.trappedException.token)
|
|
+ ", resync=" + self.getText() + ">")
|
|
|
|
return "<error: "+self.getText()+">"
|
|
|
|
|
|
class CommonTreeAdaptor(BaseTreeAdaptor):
|
|
"""
|
|
@brief A TreeAdaptor that works with any Tree implementation.
|
|
|
|
It provides
|
|
really just factory methods; all the work is done by BaseTreeAdaptor.
|
|
If you would like to have different tokens created than ClassicToken
|
|
objects, you need to override this and then set the parser tree adaptor to
|
|
use your subclass.
|
|
|
|
To get your parser to build nodes of a different type, override
|
|
create(Token), errorNode(), and to be safe, YourTreeClass.dupNode().
|
|
dupNode is called to duplicate nodes during rewrite operations.
|
|
"""
|
|
|
|
def dupNode(self, treeNode):
|
|
"""
|
|
Duplicate a node. This is part of the factory;
|
|
override if you want another kind of node to be built.
|
|
|
|
I could use reflection to prevent having to override this
|
|
but reflection is slow.
|
|
"""
|
|
|
|
if treeNode is None:
|
|
return None
|
|
|
|
return treeNode.dupNode()
|
|
|
|
|
|
def createWithPayload(self, payload):
|
|
return CommonTree(payload)
|
|
|
|
|
|
def createToken(self, fromToken=None, tokenType=None, text=None):
|
|
"""
|
|
Tell me how to create a token for use with imaginary token nodes.
|
|
For example, there is probably no input symbol associated with imaginary
|
|
token DECL, but you need to create it as a payload or whatever for
|
|
the DECL node as in ^(DECL type ID).
|
|
|
|
If you care what the token payload objects' type is, you should
|
|
override this method and any other createToken variant.
|
|
"""
|
|
|
|
if fromToken is not None:
|
|
return CommonToken(oldToken=fromToken)
|
|
|
|
return CommonToken(type=tokenType, text=text)
|
|
|
|
|
|
def setTokenBoundaries(self, t, startToken, stopToken):
|
|
"""
|
|
Track start/stop token for subtree root created for a rule.
|
|
Only works with Tree nodes. For rules that match nothing,
|
|
seems like this will yield start=i and stop=i-1 in a nil node.
|
|
Might be useful info so I'll not force to be i..i.
|
|
"""
|
|
|
|
if t is None:
|
|
return
|
|
|
|
start = 0
|
|
stop = 0
|
|
|
|
if startToken is not None:
|
|
start = startToken.index
|
|
|
|
if stopToken is not None:
|
|
stop = stopToken.index
|
|
|
|
t.setTokenStartIndex(start)
|
|
t.setTokenStopIndex(stop)
|
|
|
|
|
|
def getTokenStartIndex(self, t):
|
|
if t is None:
|
|
return -1
|
|
return t.getTokenStartIndex()
|
|
|
|
|
|
def getTokenStopIndex(self, t):
|
|
if t is None:
|
|
return -1
|
|
return t.getTokenStopIndex()
|
|
|
|
|
|
def getText(self, t):
|
|
if t is None:
|
|
return None
|
|
return t.getText()
|
|
|
|
|
|
def getType(self, t):
|
|
if t is None:
|
|
return INVALID_TOKEN_TYPE
|
|
|
|
return t.getType()
|
|
|
|
|
|
def getToken(self, t):
|
|
"""
|
|
What is the Token associated with this node? If
|
|
you are not using CommonTree, then you must
|
|
override this in your own adaptor.
|
|
"""
|
|
|
|
if isinstance(t, CommonTree):
|
|
return t.getToken()
|
|
|
|
return None # no idea what to do
|
|
|
|
|
|
def getChild(self, t, i):
|
|
if t is None:
|
|
return None
|
|
return t.getChild(i)
|
|
|
|
|
|
def getChildCount(self, t):
|
|
if t is None:
|
|
return 0
|
|
return t.getChildCount()
|
|
|
|
|
|
def getParent(self, t):
|
|
return t.getParent()
|
|
|
|
|
|
def setParent(self, t, parent):
|
|
t.setParent(parent)
|
|
|
|
|
|
def getChildIndex(self, t):
|
|
if t is None:
|
|
return 0
|
|
return t.getChildIndex()
|
|
|
|
|
|
def setChildIndex(self, t, index):
|
|
t.setChildIndex(index)
|
|
|
|
|
|
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
|
|
if parent is not None:
|
|
parent.replaceChildren(startChildIndex, stopChildIndex, t)
|
|
|
|
|
|
############################################################################
|
|
#
|
|
# streams
|
|
#
|
|
# TreeNodeStream
|
|
# \- BaseTree
|
|
# \- CommonTree
|
|
#
|
|
# TreeAdaptor
|
|
# \- BaseTreeAdaptor
|
|
# \- CommonTreeAdaptor
|
|
#
|
|
############################################################################
|
|
|
|
|
|
|
|
class TreeNodeStream(IntStream):
|
|
"""@brief A stream of tree nodes
|
|
|
|
It accessing nodes from a tree of some kind.
|
|
"""
|
|
|
|
# TreeNodeStream is abstract, no need to complain about not implemented
|
|
# abstract methods
|
|
# pylint: disable-msg=W0223
|
|
|
|
def get(self, i):
|
|
"""Get a tree node at an absolute index i; 0..n-1.
|
|
If you don't want to buffer up nodes, then this method makes no
|
|
sense for you.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def LT(self, k):
|
|
"""
|
|
Get tree node at current input pointer + i ahead where i=1 is next node.
|
|
i<0 indicates nodes in the past. So LT(-1) is previous node, but
|
|
implementations are not required to provide results for k < -1.
|
|
LT(0) is undefined. For i>=n, return null.
|
|
Return null for LT(0) and any index that results in an absolute address
|
|
that is negative.
|
|
|
|
This is analogus to the LT() method of the TokenStream, but this
|
|
returns a tree node instead of a token. Makes code gen identical
|
|
for both parser and tree grammars. :)
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTreeSource(self):
|
|
"""
|
|
Where is this stream pulling nodes from? This is not the name, but
|
|
the object that provides node objects.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTokenStream(self):
|
|
"""
|
|
If the tree associated with this stream was created from a TokenStream,
|
|
you can specify it here. Used to do rule $text attribute in tree
|
|
parser. Optional unless you use tree parser rule text attribute
|
|
or output=template and rewrite=true options.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def getTreeAdaptor(self):
|
|
"""
|
|
What adaptor can tell me how to interpret/navigate nodes and
|
|
trees. E.g., get text of a node.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def setUniqueNavigationNodes(self, uniqueNavigationNodes):
|
|
"""
|
|
As we flatten the tree, we use UP, DOWN nodes to represent
|
|
the tree structure. When debugging we need unique nodes
|
|
so we have to instantiate new ones. When doing normal tree
|
|
parsing, it's slow and a waste of memory to create unique
|
|
navigation nodes. Default should be false;
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def reset(self):
|
|
"""
|
|
Reset the tree node stream in such a way that it acts like
|
|
a freshly constructed stream.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def toString(self, start, stop):
|
|
"""
|
|
Return the text of all nodes from start to stop, inclusive.
|
|
If the stream does not buffer all the nodes then it can still
|
|
walk recursively from start until stop. You can always return
|
|
null or "" too, but users should not access $ruleLabel.text in
|
|
an action of course in that case.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
# REWRITING TREES (used by tree parser)
|
|
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
|
|
"""
|
|
Replace from start to stop child index of parent with t, which might
|
|
be a list. Number of children may be different
|
|
after this call. The stream is notified because it is walking the
|
|
tree and might need to know you are monkeying with the underlying
|
|
tree. Also, it might be able to modify the node stream to avoid
|
|
restreaming for future phases.
|
|
|
|
If parent is null, don't do anything; must be at root of overall tree.
|
|
Can't replace whatever points to the parent externally. Do nothing.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
class CommonTreeNodeStream(TreeNodeStream):
|
|
"""@brief A buffered stream of tree nodes.
|
|
|
|
Nodes can be from a tree of ANY kind.
|
|
|
|
This node stream sucks all nodes out of the tree specified in
|
|
the constructor during construction and makes pointers into
|
|
the tree using an array of Object pointers. The stream necessarily
|
|
includes pointers to DOWN and UP and EOF nodes.
|
|
|
|
This stream knows how to mark/release for backtracking.
|
|
|
|
This stream is most suitable for tree interpreters that need to
|
|
jump around a lot or for tree parsers requiring speed (at cost of memory).
|
|
There is some duplicated functionality here with UnBufferedTreeNodeStream
|
|
but just in bookkeeping, not tree walking etc...
|
|
|
|
@see UnBufferedTreeNodeStream
|
|
"""
|
|
|
|
def __init__(self, *args):
|
|
TreeNodeStream.__init__(self)
|
|
|
|
if len(args) == 1:
|
|
adaptor = CommonTreeAdaptor()
|
|
tree = args[0]
|
|
|
|
nodes = None
|
|
down = None
|
|
up = None
|
|
eof = None
|
|
|
|
elif len(args) == 2:
|
|
adaptor = args[0]
|
|
tree = args[1]
|
|
|
|
nodes = None
|
|
down = None
|
|
up = None
|
|
eof = None
|
|
|
|
elif len(args) == 3:
|
|
parent = args[0]
|
|
start = args[1]
|
|
stop = args[2]
|
|
|
|
adaptor = parent.adaptor
|
|
tree = parent.root
|
|
|
|
nodes = parent.nodes[start:stop]
|
|
down = parent.down
|
|
up = parent.up
|
|
eof = parent.eof
|
|
|
|
else:
|
|
raise TypeError("Invalid arguments")
|
|
|
|
# all these navigation nodes are shared and hence they
|
|
# cannot contain any line/column info
|
|
if down is not None:
|
|
self.down = down
|
|
else:
|
|
self.down = adaptor.createFromType(DOWN, "DOWN")
|
|
|
|
if up is not None:
|
|
self.up = up
|
|
else:
|
|
self.up = adaptor.createFromType(UP, "UP")
|
|
|
|
if eof is not None:
|
|
self.eof = eof
|
|
else:
|
|
self.eof = adaptor.createFromType(EOF, "EOF")
|
|
|
|
# The complete mapping from stream index to tree node.
|
|
# This buffer includes pointers to DOWN, UP, and EOF nodes.
|
|
# It is built upon ctor invocation. The elements are type
|
|
# Object as we don't what the trees look like.
|
|
|
|
# Load upon first need of the buffer so we can set token types
|
|
# of interest for reverseIndexing. Slows us down a wee bit to
|
|
# do all of the if p==-1 testing everywhere though.
|
|
if nodes is not None:
|
|
self.nodes = nodes
|
|
else:
|
|
self.nodes = []
|
|
|
|
# Pull nodes from which tree?
|
|
self.root = tree
|
|
|
|
# IF this tree (root) was created from a token stream, track it.
|
|
self.tokens = None
|
|
|
|
# What tree adaptor was used to build these trees
|
|
self.adaptor = adaptor
|
|
|
|
# Reuse same DOWN, UP navigation nodes unless this is true
|
|
self.uniqueNavigationNodes = False
|
|
|
|
# The index into the nodes list of the current node (next node
|
|
# to consume). If -1, nodes array not filled yet.
|
|
self.p = -1
|
|
|
|
# Track the last mark() call result value for use in rewind().
|
|
self.lastMarker = None
|
|
|
|
# Stack of indexes used for push/pop calls
|
|
self.calls = []
|
|
|
|
|
|
def __iter__(self):
|
|
return TreeIterator(self.root, self.adaptor)
|
|
|
|
|
|
def fillBuffer(self):
|
|
"""Walk tree with depth-first-search and fill nodes buffer.
|
|
Don't do DOWN, UP nodes if its a list (t is isNil).
|
|
"""
|
|
|
|
self._fillBuffer(self.root)
|
|
self.p = 0 # buffer of nodes intialized now
|
|
|
|
|
|
def _fillBuffer(self, t):
|
|
nil = self.adaptor.isNil(t)
|
|
|
|
if not nil:
|
|
self.nodes.append(t) # add this node
|
|
|
|
# add DOWN node if t has children
|
|
n = self.adaptor.getChildCount(t)
|
|
if not nil and n > 0:
|
|
self.addNavigationNode(DOWN)
|
|
|
|
# and now add all its children
|
|
for c in range(n):
|
|
self._fillBuffer(self.adaptor.getChild(t, c))
|
|
|
|
# add UP node if t has children
|
|
if not nil and n > 0:
|
|
self.addNavigationNode(UP)
|
|
|
|
|
|
def getNodeIndex(self, node):
|
|
"""What is the stream index for node? 0..n-1
|
|
Return -1 if node not found.
|
|
"""
|
|
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
for i, t in enumerate(self.nodes):
|
|
if t == node:
|
|
return i
|
|
|
|
return -1
|
|
|
|
|
|
def addNavigationNode(self, ttype):
|
|
"""
|
|
As we flatten the tree, we use UP, DOWN nodes to represent
|
|
the tree structure. When debugging we need unique nodes
|
|
so instantiate new ones when uniqueNavigationNodes is true.
|
|
"""
|
|
|
|
navNode = None
|
|
|
|
if ttype == DOWN:
|
|
if self.hasUniqueNavigationNodes():
|
|
navNode = self.adaptor.createFromType(DOWN, "DOWN")
|
|
|
|
else:
|
|
navNode = self.down
|
|
|
|
else:
|
|
if self.hasUniqueNavigationNodes():
|
|
navNode = self.adaptor.createFromType(UP, "UP")
|
|
|
|
else:
|
|
navNode = self.up
|
|
|
|
self.nodes.append(navNode)
|
|
|
|
|
|
def get(self, i):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
return self.nodes[i]
|
|
|
|
|
|
def LT(self, k):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
if k == 0:
|
|
return None
|
|
|
|
if k < 0:
|
|
return self.LB(-k)
|
|
|
|
if self.p + k - 1 >= len(self.nodes):
|
|
return self.eof
|
|
|
|
return self.nodes[self.p + k - 1]
|
|
|
|
|
|
def getCurrentSymbol(self):
|
|
return self.LT(1)
|
|
|
|
|
|
def LB(self, k):
|
|
"""Look backwards k nodes"""
|
|
|
|
if k == 0:
|
|
return None
|
|
|
|
if self.p - k < 0:
|
|
return None
|
|
|
|
return self.nodes[self.p - k]
|
|
|
|
|
|
def isEOF(self, obj):
|
|
return self.adaptor.getType(obj) == EOF
|
|
|
|
|
|
def getTreeSource(self):
|
|
return self.root
|
|
|
|
|
|
def getSourceName(self):
|
|
return self.getTokenStream().getSourceName()
|
|
|
|
|
|
def getTokenStream(self):
|
|
return self.tokens
|
|
|
|
|
|
def setTokenStream(self, tokens):
|
|
self.tokens = tokens
|
|
|
|
|
|
def getTreeAdaptor(self):
|
|
return self.adaptor
|
|
|
|
|
|
def hasUniqueNavigationNodes(self):
|
|
return self.uniqueNavigationNodes
|
|
|
|
|
|
def setUniqueNavigationNodes(self, uniqueNavigationNodes):
|
|
self.uniqueNavigationNodes = uniqueNavigationNodes
|
|
|
|
|
|
def consume(self):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
self.p += 1
|
|
|
|
|
|
def LA(self, i):
|
|
return self.adaptor.getType(self.LT(i))
|
|
|
|
|
|
def mark(self):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
|
|
self.lastMarker = self.index()
|
|
return self.lastMarker
|
|
|
|
|
|
def release(self, marker=None):
|
|
# no resources to release
|
|
|
|
pass
|
|
|
|
|
|
def index(self):
|
|
return self.p
|
|
|
|
|
|
def rewind(self, marker=None):
|
|
if marker is None:
|
|
marker = self.lastMarker
|
|
|
|
self.seek(marker)
|
|
|
|
|
|
def seek(self, index):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
self.p = index
|
|
|
|
|
|
def push(self, index):
|
|
"""
|
|
Make stream jump to a new location, saving old location.
|
|
Switch back with pop().
|
|
"""
|
|
|
|
self.calls.append(self.p) # save current index
|
|
self.seek(index)
|
|
|
|
|
|
def pop(self):
|
|
"""
|
|
Seek back to previous index saved during last push() call.
|
|
Return top of stack (return index).
|
|
"""
|
|
|
|
ret = self.calls.pop(-1)
|
|
self.seek(ret)
|
|
return ret
|
|
|
|
|
|
def reset(self):
|
|
self.p = 0
|
|
self.lastMarker = 0
|
|
self.calls = []
|
|
|
|
|
|
def size(self):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
return len(self.nodes)
|
|
|
|
|
|
# TREE REWRITE INTERFACE
|
|
|
|
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
|
|
if parent is not None:
|
|
self.adaptor.replaceChildren(
|
|
parent, startChildIndex, stopChildIndex, t
|
|
)
|
|
|
|
|
|
def __str__(self):
|
|
"""Used for testing, just return the token type stream"""
|
|
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
return ' '.join([str(self.adaptor.getType(node))
|
|
for node in self.nodes
|
|
])
|
|
|
|
|
|
def toString(self, start, stop):
|
|
if start is None or stop is None:
|
|
return None
|
|
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
#System.out.println("stop: "+stop);
|
|
#if ( start instanceof CommonTree )
|
|
# System.out.print("toString: "+((CommonTree)start).getToken()+", ");
|
|
#else
|
|
# System.out.println(start);
|
|
#if ( stop instanceof CommonTree )
|
|
# System.out.println(((CommonTree)stop).getToken());
|
|
#else
|
|
# System.out.println(stop);
|
|
|
|
# if we have the token stream, use that to dump text in order
|
|
if self.tokens is not None:
|
|
beginTokenIndex = self.adaptor.getTokenStartIndex(start)
|
|
endTokenIndex = self.adaptor.getTokenStopIndex(stop)
|
|
|
|
# if it's a tree, use start/stop index from start node
|
|
# else use token range from start/stop nodes
|
|
if self.adaptor.getType(stop) == UP:
|
|
endTokenIndex = self.adaptor.getTokenStopIndex(start)
|
|
|
|
elif self.adaptor.getType(stop) == EOF:
|
|
endTokenIndex = self.size() -2 # don't use EOF
|
|
|
|
return self.tokens.toString(beginTokenIndex, endTokenIndex)
|
|
|
|
# walk nodes looking for start
|
|
i, t = 0, None
|
|
for i, t in enumerate(self.nodes):
|
|
if t == start:
|
|
break
|
|
|
|
# now walk until we see stop, filling string buffer with text
|
|
buf = []
|
|
t = self.nodes[i]
|
|
while t != stop:
|
|
text = self.adaptor.getText(t)
|
|
if text is None:
|
|
text = " " + self.adaptor.getType(t)
|
|
|
|
buf.append(text)
|
|
i += 1
|
|
t = self.nodes[i]
|
|
|
|
# include stop node too
|
|
text = self.adaptor.getText(stop)
|
|
if text is None:
|
|
text = " " +self.adaptor.getType(stop)
|
|
|
|
buf.append(text)
|
|
|
|
return ''.join(buf)
|
|
|
|
|
|
## iterator interface
|
|
def __iter__(self):
|
|
if self.p == -1:
|
|
self.fillBuffer()
|
|
|
|
for node in self.nodes:
|
|
yield node
|
|
|
|
|
|
#############################################################################
|
|
#
|
|
# tree parser
|
|
#
|
|
#############################################################################
|
|
|
|
class TreeParser(BaseRecognizer):
|
|
"""@brief Baseclass for generated tree parsers.
|
|
|
|
A parser for a stream of tree nodes. "tree grammars" result in a subclass
|
|
of this. All the error reporting and recovery is shared with Parser via
|
|
the BaseRecognizer superclass.
|
|
"""
|
|
|
|
def __init__(self, input, state=None):
|
|
BaseRecognizer.__init__(self, state)
|
|
|
|
self.input = None
|
|
self.setTreeNodeStream(input)
|
|
|
|
|
|
def reset(self):
|
|
BaseRecognizer.reset(self) # reset all recognizer state variables
|
|
if self.input is not None:
|
|
self.input.seek(0) # rewind the input
|
|
|
|
|
|
def setTreeNodeStream(self, input):
|
|
"""Set the input stream"""
|
|
|
|
self.input = input
|
|
|
|
|
|
def getTreeNodeStream(self):
|
|
return self.input
|
|
|
|
|
|
def getSourceName(self):
|
|
return self.input.getSourceName()
|
|
|
|
|
|
def getCurrentInputSymbol(self, input):
|
|
return input.LT(1)
|
|
|
|
|
|
def getMissingSymbol(self, input, e, expectedTokenType, follow):
|
|
tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">"
|
|
adaptor = input.adaptor
|
|
return adaptor.createToken(
|
|
CommonToken(type=expectedTokenType, text=tokenText))
|
|
|
|
|
|
# precompiled regex used by inContext
|
|
dotdot = ".*[^.]\\.\\.[^.].*"
|
|
doubleEtc = ".*\\.\\.\\.\\s+\\.\\.\\..*"
|
|
dotdotPattern = re.compile(dotdot)
|
|
doubleEtcPattern = re.compile(doubleEtc)
|
|
|
|
def inContext(self, context, adaptor=None, tokenName=None, t=None):
|
|
"""Check if current node in input has a context.
|
|
|
|
Context means sequence of nodes towards root of tree. For example,
|
|
you might say context is "MULT" which means my parent must be MULT.
|
|
"CLASS VARDEF" says current node must be child of a VARDEF and whose
|
|
parent is a CLASS node. You can use "..." to mean zero-or-more nodes.
|
|
"METHOD ... VARDEF" means my parent is VARDEF and somewhere above
|
|
that is a METHOD node. The first node in the context is not
|
|
necessarily the root. The context matcher stops matching and returns
|
|
true when it runs out of context. There is no way to force the first
|
|
node to be the root.
|
|
"""
|
|
|
|
return _inContext(
|
|
self.input.getTreeAdaptor(), self.getTokenNames(),
|
|
self.input.LT(1), context)
|
|
|
|
@classmethod
|
|
def _inContext(cls, adaptor, tokenNames, t, context):
|
|
"""The worker for inContext.
|
|
|
|
It's static and full of parameters for testing purposes.
|
|
"""
|
|
|
|
if cls.dotdotPattern.match(context):
|
|
# don't allow "..", must be "..."
|
|
raise ValueError("invalid syntax: ..")
|
|
|
|
if cls.doubleEtcPattern.match(context):
|
|
# don't allow double "..."
|
|
raise ValueError("invalid syntax: ... ...")
|
|
|
|
# ensure spaces around ...
|
|
context = context.replace("...", " ... ")
|
|
context = context.strip()
|
|
nodes = context.split()
|
|
|
|
ni = len(nodes) - 1
|
|
t = adaptor.getParent(t)
|
|
while ni >= 0 and t is not None:
|
|
if nodes[ni] == "...":
|
|
# walk upwards until we see nodes[ni-1] then continue walking
|
|
if ni == 0:
|
|
# ... at start is no-op
|
|
return True
|
|
goal = nodes[ni-1]
|
|
ancestor = cls._getAncestor(adaptor, tokenNames, t, goal)
|
|
if ancestor is None:
|
|
return False
|
|
t = ancestor
|
|
ni -= 1
|
|
|
|
name = tokenNames[adaptor.getType(t)]
|
|
if name != nodes[ni]:
|
|
return False
|
|
|
|
# advance to parent and to previous element in context node list
|
|
ni -= 1
|
|
t = adaptor.getParent(t)
|
|
|
|
# at root but more nodes to match
|
|
if t is None and ni >= 0:
|
|
return False
|
|
|
|
return True
|
|
|
|
@staticmethod
|
|
def _getAncestor(adaptor, tokenNames, t, goal):
|
|
"""Helper for static inContext."""
|
|
while t is not None:
|
|
name = tokenNames[adaptor.getType(t)]
|
|
if name == goal:
|
|
return t
|
|
t = adaptor.getParent(t)
|
|
|
|
return None
|
|
|
|
|
|
def matchAny(self, ignore): # ignore stream, copy of this.input
|
|
"""
|
|
Match '.' in tree parser has special meaning. Skip node or
|
|
entire tree if node has children. If children, scan until
|
|
corresponding UP node.
|
|
"""
|
|
|
|
self._state.errorRecovery = False
|
|
|
|
look = self.input.LT(1)
|
|
if self.input.getTreeAdaptor().getChildCount(look) == 0:
|
|
self.input.consume() # not subtree, consume 1 node and return
|
|
return
|
|
|
|
# current node is a subtree, skip to corresponding UP.
|
|
# must count nesting level to get right UP
|
|
level = 0
|
|
tokenType = self.input.getTreeAdaptor().getType(look)
|
|
while tokenType != EOF and not (tokenType == UP and level==0):
|
|
self.input.consume()
|
|
look = self.input.LT(1)
|
|
tokenType = self.input.getTreeAdaptor().getType(look)
|
|
if tokenType == DOWN:
|
|
level += 1
|
|
|
|
elif tokenType == UP:
|
|
level -= 1
|
|
|
|
self.input.consume() # consume UP
|
|
|
|
|
|
def mismatch(self, input, ttype, follow):
|
|
"""
|
|
We have DOWN/UP nodes in the stream that have no line info; override.
|
|
plus we want to alter the exception type. Don't try to recover
|
|
from tree parser errors inline...
|
|
"""
|
|
|
|
raise MismatchedTreeNodeException(ttype, input)
|
|
|
|
|
|
def getErrorHeader(self, e):
|
|
"""
|
|
Prefix error message with the grammar name because message is
|
|
always intended for the programmer because the parser built
|
|
the input tree not the user.
|
|
"""
|
|
|
|
return (self.getGrammarFileName() +
|
|
": node from %sline %s:%s"
|
|
% (['', "after "][e.approximateLineInfo],
|
|
e.line,
|
|
e.charPositionInLine
|
|
)
|
|
)
|
|
|
|
def getErrorMessage(self, e, tokenNames):
|
|
"""
|
|
Tree parsers parse nodes they usually have a token object as
|
|
payload. Set the exception token and do the default behavior.
|
|
"""
|
|
|
|
if isinstance(self, TreeParser):
|
|
adaptor = e.input.getTreeAdaptor()
|
|
e.token = adaptor.getToken(e.node)
|
|
if e.token is not None: # could be an UP/DOWN node
|
|
e.token = CommonToken(
|
|
type=adaptor.getType(e.node),
|
|
text=adaptor.getText(e.node)
|
|
)
|
|
|
|
return BaseRecognizer.getErrorMessage(self, e, tokenNames)
|
|
|
|
|
|
def traceIn(self, ruleName, ruleIndex):
|
|
BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))
|
|
|
|
|
|
def traceOut(self, ruleName, ruleIndex):
|
|
BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
|
|
|
|
|
|
#############################################################################
|
|
#
|
|
# tree visitor
|
|
#
|
|
#############################################################################
|
|
|
|
class TreeVisitor(object):
|
|
"""Do a depth first walk of a tree, applying pre() and post() actions
|
|
we go.
|
|
"""
|
|
|
|
def __init__(self, adaptor=None):
|
|
if adaptor is not None:
|
|
self.adaptor = adaptor
|
|
else:
|
|
self.adaptor = CommonTreeAdaptor()
|
|
|
|
def visit(self, t, pre_action=None, post_action=None):
|
|
"""Visit every node in tree t and trigger an action for each node
|
|
before/after having visited all of its children. Bottom up walk.
|
|
Execute both actions even if t has no children. Ignore return
|
|
results from transforming children since they will have altered
|
|
the child list of this node (their parent). Return result of
|
|
applying post action to this node.
|
|
|
|
The Python version differs from the Java version by taking two
|
|
callables 'pre_action' and 'post_action' instead of a class instance
|
|
that wraps those methods. Those callables must accept a TreeNode as
|
|
their single argument and return the (potentially transformed or
|
|
replaced) TreeNode.
|
|
"""
|
|
|
|
isNil = self.adaptor.isNil(t)
|
|
if pre_action is not None and not isNil:
|
|
# if rewritten, walk children of new t
|
|
t = pre_action(t)
|
|
|
|
idx = 0
|
|
while idx < self.adaptor.getChildCount(t):
|
|
child = self.adaptor.getChild(t, idx)
|
|
self.visit(child, pre_action, post_action)
|
|
idx += 1
|
|
|
|
if post_action is not None and not isNil:
|
|
t = post_action(t)
|
|
|
|
return t
|
|
|
|
#############################################################################
|
|
#
|
|
# tree iterator
|
|
#
|
|
#############################################################################
|
|
|
|
class TreeIterator(object):
|
|
"""
|
|
Return a node stream from a doubly-linked tree whose nodes
|
|
know what child index they are.
|
|
|
|
Emit navigation nodes (DOWN, UP, and EOF) to let show tree structure.
|
|
"""
|
|
|
|
def __init__(self, tree, adaptor=None):
|
|
if adaptor is None:
|
|
adaptor = CommonTreeAdaptor()
|
|
|
|
self.root = tree
|
|
self.adaptor = adaptor
|
|
|
|
self.first_time = True
|
|
self.tree = tree
|
|
|
|
# If we emit UP/DOWN nodes, we need to spit out multiple nodes per
|
|
# next() call.
|
|
self.nodes = []
|
|
|
|
# navigation nodes to return during walk and at end
|
|
self.down = adaptor.createFromType(DOWN, "DOWN")
|
|
self.up = adaptor.createFromType(UP, "UP")
|
|
self.eof = adaptor.createFromType(EOF, "EOF")
|
|
|
|
|
|
def reset(self):
|
|
self.first_time = True
|
|
self.tree = self.root
|
|
self.nodes = []
|
|
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
|
|
def has_next(self):
|
|
if self.first_time:
|
|
return self.root is not None
|
|
|
|
if len(self.nodes) > 0:
|
|
return True
|
|
|
|
if self.tree is None:
|
|
return False
|
|
|
|
if self.adaptor.getChildCount(self.tree) > 0:
|
|
return True
|
|
|
|
# back at root?
|
|
return self.adaptor.getParent(self.tree) is not None
|
|
|
|
|
|
def next(self):
|
|
if not self.has_next():
|
|
raise StopIteration
|
|
|
|
if self.first_time:
|
|
# initial condition
|
|
self.first_time = False
|
|
if self.adaptor.getChildCount(self.tree) == 0:
|
|
# single node tree (special)
|
|
self.nodes.append(self.eof)
|
|
return self.tree
|
|
|
|
return self.tree
|
|
|
|
# if any queued up, use those first
|
|
if len(self.nodes) > 0:
|
|
return self.nodes.pop(0)
|
|
|
|
# no nodes left?
|
|
if self.tree is None:
|
|
return self.eof
|
|
|
|
# next node will be child 0 if any children
|
|
if self.adaptor.getChildCount(self.tree) > 0:
|
|
self.tree = self.adaptor.getChild(self.tree, 0)
|
|
# real node is next after DOWN
|
|
self.nodes.append(self.tree)
|
|
return self.down
|
|
|
|
# if no children, look for next sibling of tree or ancestor
|
|
parent = self.adaptor.getParent(self.tree)
|
|
# while we're out of siblings, keep popping back up towards root
|
|
while (parent is not None
|
|
and self.adaptor.getChildIndex(self.tree)+1 >= self.adaptor.getChildCount(parent)):
|
|
# we're moving back up
|
|
self.nodes.append(self.up)
|
|
self.tree = parent
|
|
parent = self.adaptor.getParent(self.tree)
|
|
|
|
# no nodes left?
|
|
if parent is None:
|
|
self.tree = None # back at root? nothing left then
|
|
self.nodes.append(self.eof) # add to queue, might have UP nodes in there
|
|
return self.nodes.pop(0)
|
|
|
|
# must have found a node with an unvisited sibling
|
|
# move to it and return it
|
|
nextSiblingIndex = self.adaptor.getChildIndex(self.tree) + 1
|
|
self.tree = self.adaptor.getChild(parent, nextSiblingIndex)
|
|
self.nodes.append(self.tree) # add to queue, might have UP nodes in there
|
|
return self.nodes.pop(0)
|
|
|
|
|
|
|
|
#############################################################################
|
|
#
|
|
# streams for rule rewriting
|
|
#
|
|
#############################################################################
|
|
|
|
class RewriteRuleElementStream(object):
|
|
"""@brief Internal helper class.
|
|
|
|
A generic list of elements tracked in an alternative to be used in
|
|
a -> rewrite rule. We need to subclass to fill in the next() method,
|
|
which returns either an AST node wrapped around a token payload or
|
|
an existing subtree.
|
|
|
|
Once you start next()ing, do not try to add more elements. It will
|
|
break the cursor tracking I believe.
|
|
|
|
@see org.antlr.runtime.tree.RewriteRuleSubtreeStream
|
|
@see org.antlr.runtime.tree.RewriteRuleTokenStream
|
|
|
|
TODO: add mechanism to detect/puke on modification after reading from
|
|
stream
|
|
"""
|
|
|
|
def __init__(self, adaptor, elementDescription, elements=None):
|
|
# Cursor 0..n-1. If singleElement!=null, cursor is 0 until you next(),
|
|
# which bumps it to 1 meaning no more elements.
|
|
self.cursor = 0
|
|
|
|
# Track single elements w/o creating a list. Upon 2nd add, alloc list
|
|
self.singleElement = None
|
|
|
|
# The list of tokens or subtrees we are tracking
|
|
self.elements = None
|
|
|
|
# Once a node / subtree has been used in a stream, it must be dup'd
|
|
# from then on. Streams are reset after subrules so that the streams
|
|
# can be reused in future subrules. So, reset must set a dirty bit.
|
|
# If dirty, then next() always returns a dup.
|
|
self.dirty = False
|
|
|
|
# The element or stream description; usually has name of the token or
|
|
# rule reference that this list tracks. Can include rulename too, but
|
|
# the exception would track that info.
|
|
self.elementDescription = elementDescription
|
|
|
|
self.adaptor = adaptor
|
|
|
|
if isinstance(elements, (list, tuple)):
|
|
# Create a stream, but feed off an existing list
|
|
self.singleElement = None
|
|
self.elements = elements
|
|
|
|
else:
|
|
# Create a stream with one element
|
|
self.add(elements)
|
|
|
|
|
|
def reset(self):
|
|
"""
|
|
Reset the condition of this stream so that it appears we have
|
|
not consumed any of its elements. Elements themselves are untouched.
|
|
Once we reset the stream, any future use will need duplicates. Set
|
|
the dirty bit.
|
|
"""
|
|
|
|
self.cursor = 0
|
|
self.dirty = True
|
|
|
|
|
|
def add(self, el):
|
|
if el is None:
|
|
return
|
|
|
|
if self.elements is not None: # if in list, just add
|
|
self.elements.append(el)
|
|
return
|
|
|
|
if self.singleElement is None: # no elements yet, track w/o list
|
|
self.singleElement = el
|
|
return
|
|
|
|
# adding 2nd element, move to list
|
|
self.elements = []
|
|
self.elements.append(self.singleElement)
|
|
self.singleElement = None
|
|
self.elements.append(el)
|
|
|
|
|
|
def nextTree(self):
|
|
"""
|
|
Return the next element in the stream. If out of elements, throw
|
|
an exception unless size()==1. If size is 1, then return elements[0].
|
|
|
|
Return a duplicate node/subtree if stream is out of elements and
|
|
size==1. If we've already used the element, dup (dirty bit set).
|
|
"""
|
|
|
|
if (self.dirty
|
|
or (self.cursor >= len(self) and len(self) == 1)
|
|
):
|
|
# if out of elements and size is 1, dup
|
|
el = self._next()
|
|
return self.dup(el)
|
|
|
|
# test size above then fetch
|
|
el = self._next()
|
|
return el
|
|
|
|
|
|
def _next(self):
|
|
"""
|
|
do the work of getting the next element, making sure that it's
|
|
a tree node or subtree. Deal with the optimization of single-
|
|
element list versus list of size > 1. Throw an exception
|
|
if the stream is empty or we're out of elements and size>1.
|
|
protected so you can override in a subclass if necessary.
|
|
"""
|
|
|
|
if len(self) == 0:
|
|
raise RewriteEmptyStreamException(self.elementDescription)
|
|
|
|
if self.cursor >= len(self): # out of elements?
|
|
if len(self) == 1: # if size is 1, it's ok; return and we'll dup
|
|
return self.toTree(self.singleElement)
|
|
|
|
# out of elements and size was not 1, so we can't dup
|
|
raise RewriteCardinalityException(self.elementDescription)
|
|
|
|
# we have elements
|
|
if self.singleElement is not None:
|
|
self.cursor += 1 # move cursor even for single element list
|
|
return self.toTree(self.singleElement)
|
|
|
|
# must have more than one in list, pull from elements
|
|
o = self.toTree(self.elements[self.cursor])
|
|
self.cursor += 1
|
|
return o
|
|
|
|
|
|
def dup(self, el):
|
|
"""
|
|
When constructing trees, sometimes we need to dup a token or AST
|
|
subtree. Dup'ing a token means just creating another AST node
|
|
around it. For trees, you must call the adaptor.dupTree() unless
|
|
the element is for a tree root; then it must be a node dup.
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def toTree(self, el):
|
|
"""
|
|
Ensure stream emits trees; tokens must be converted to AST nodes.
|
|
AST nodes can be passed through unmolested.
|
|
"""
|
|
|
|
return el
|
|
|
|
|
|
def hasNext(self):
|
|
return ( (self.singleElement is not None and self.cursor < 1)
|
|
or (self.elements is not None
|
|
and self.cursor < len(self.elements)
|
|
)
|
|
)
|
|
|
|
|
|
def size(self):
|
|
if self.singleElement is not None:
|
|
return 1
|
|
|
|
if self.elements is not None:
|
|
return len(self.elements)
|
|
|
|
return 0
|
|
|
|
__len__ = size
|
|
|
|
|
|
def getDescription(self):
|
|
"""Deprecated. Directly access elementDescription attribute"""
|
|
|
|
return self.elementDescription
|
|
|
|
|
|
class RewriteRuleTokenStream(RewriteRuleElementStream):
|
|
"""@brief Internal helper class."""
|
|
|
|
def toTree(self, el):
|
|
# Don't convert to a tree unless they explicitly call nextTree.
|
|
# This way we can do hetero tree nodes in rewrite.
|
|
return el
|
|
|
|
|
|
def nextNode(self):
|
|
t = self._next()
|
|
return self.adaptor.createWithPayload(t)
|
|
|
|
|
|
def nextToken(self):
|
|
return self._next()
|
|
|
|
|
|
def dup(self, el):
|
|
raise TypeError("dup can't be called for a token stream.")
|
|
|
|
|
|
class RewriteRuleSubtreeStream(RewriteRuleElementStream):
|
|
"""@brief Internal helper class."""
|
|
|
|
def nextNode(self):
|
|
"""
|
|
Treat next element as a single node even if it's a subtree.
|
|
This is used instead of next() when the result has to be a
|
|
tree root node. Also prevents us from duplicating recently-added
|
|
children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration
|
|
must dup the type node, but ID has been added.
|
|
|
|
Referencing a rule result twice is ok; dup entire tree as
|
|
we can't be adding trees as root; e.g., expr expr.
|
|
|
|
Hideous code duplication here with super.next(). Can't think of
|
|
a proper way to refactor. This needs to always call dup node
|
|
and super.next() doesn't know which to call: dup node or dup tree.
|
|
"""
|
|
|
|
if (self.dirty
|
|
or (self.cursor >= len(self) and len(self) == 1)
|
|
):
|
|
# if out of elements and size is 1, dup (at most a single node
|
|
# since this is for making root nodes).
|
|
el = self._next()
|
|
return self.adaptor.dupNode(el)
|
|
|
|
# test size above then fetch
|
|
el = self._next()
|
|
while self.adaptor.isNil(el) and self.adaptor.getChildCount(el) == 1:
|
|
el = self.adaptor.getChild(el, 0)
|
|
|
|
# dup just the root (want node here)
|
|
return self.adaptor.dupNode(el)
|
|
|
|
|
|
def dup(self, el):
|
|
return self.adaptor.dupTree(el)
|
|
|
|
|
|
|
|
class RewriteRuleNodeStream(RewriteRuleElementStream):
|
|
"""
|
|
Queues up nodes matched on left side of -> in a tree parser. This is
|
|
the analog of RewriteRuleTokenStream for normal parsers.
|
|
"""
|
|
|
|
def nextNode(self):
|
|
return self._next()
|
|
|
|
|
|
def toTree(self, el):
|
|
return self.adaptor.dupNode(el)
|
|
|
|
|
|
def dup(self, el):
|
|
# we dup every node, so don't have to worry about calling dup; short-
|
|
#circuited next() so it doesn't call.
|
|
raise TypeError("dup can't be called for a node stream.")
|
|
|
|
|
|
class TreeRuleReturnScope(RuleReturnScope):
|
|
"""
|
|
This is identical to the ParserRuleReturnScope except that
|
|
the start property is a tree nodes not Token object
|
|
when you are parsing trees. To be generic the tree node types
|
|
have to be Object.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.start = None
|
|
self.tree = None
|
|
|
|
|
|
def getStart(self):
|
|
return self.start
|
|
|
|
|
|
def getTree(self):
|
|
return self.tree
|