311 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			311 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
| """ANTLR3 runtime package"""
 | |
| 
 | |
| # begin[licence]
 | |
| #
 | |
| # [The "BSD licence"]
 | |
| # Copyright (c) 2005-2012 Terence Parr
 | |
| # All rights reserved.
 | |
| #
 | |
| # Redistribution and use in source and binary forms, with or without
 | |
| # modification, are permitted provided that the following conditions
 | |
| # are met:
 | |
| # 1. Redistributions of source code must retain the above copyright
 | |
| #    notice, this list of conditions and the following disclaimer.
 | |
| # 2. Redistributions in binary form must reproduce the above copyright
 | |
| #    notice, this list of conditions and the following disclaimer in the
 | |
| #    documentation and/or other materials provided with the distribution.
 | |
| # 3. The name of the author may not be used to endorse or promote products
 | |
| #    derived from this software without specific prior written permission.
 | |
| #
 | |
| # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 | |
| # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | |
| # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | |
| # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
| # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | |
| # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| #
 | |
| # end[licence]
 | |
| 
 | |
| from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE
 | |
| 
 | |
| ############################################################################
 | |
| #
 | |
| # basic token interface
 | |
| #
 | |
| ############################################################################
 | |
| 
 | |
| class Token(object):
 | |
|     """@brief Abstract token baseclass."""
 | |
| 
 | |
|     TOKEN_NAMES_MAP = None
 | |
| 
 | |
|     @classmethod
 | |
|     def registerTokenNamesMap(cls, tokenNamesMap):
 | |
|         """@brief Store a mapping from token type to token name.
 | |
|         
 | |
|         This enables token.typeName to give something more meaningful
 | |
|         than, e.g., '6'.
 | |
|         """
 | |
|         cls.TOKEN_NAMES_MAP = tokenNamesMap
 | |
|         cls.TOKEN_NAMES_MAP[EOF] = "EOF"
 | |
| 
 | |
|     def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
 | |
|                  index=-1, line=0, charPositionInLine=-1, input=None):
 | |
|         # We use -1 for index and charPositionInLine as an invalid index
 | |
|         self._type = type
 | |
|         self._channel = channel
 | |
|         self._text = text
 | |
|         self._index = index
 | |
|         self._line = 0
 | |
|         self._charPositionInLine = charPositionInLine
 | |
|         self.input = input
 | |
| 
 | |
|     # To override a property, you'll need to override both the getter and setter.
 | |
|     @property
 | |
|     def text(self):
 | |
|         return self._text
 | |
| 
 | |
|     @text.setter
 | |
|     def text(self, value):
 | |
|         self._text = value
 | |
| 
 | |
| 
 | |
|     @property
 | |
|     def type(self):
 | |
|         return self._type
 | |
| 
 | |
|     @type.setter
 | |
|     def type(self, value):
 | |
|         self._type = value
 | |
| 
 | |
|     # For compatibility
 | |
|     def getType(self):
 | |
|         return self._type
 | |
| 
 | |
|     @property
 | |
|     def typeName(self):
 | |
|         if self.TOKEN_NAMES_MAP:
 | |
|             return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE")
 | |
|         else:
 | |
|             return str(self._type)
 | |
|     
 | |
|     @property
 | |
|     def line(self):
 | |
|         """Lines are numbered 1..n."""
 | |
|         return self._line
 | |
| 
 | |
|     @line.setter
 | |
|     def line(self, value):
 | |
|         self._line = value
 | |
| 
 | |
| 
 | |
|     @property
 | |
|     def charPositionInLine(self):
 | |
|         """Columns are numbered 0..n-1."""
 | |
|         return self._charPositionInLine
 | |
| 
 | |
|     @charPositionInLine.setter
 | |
|     def charPositionInLine(self, pos):
 | |
|         self._charPositionInLine = pos
 | |
| 
 | |
| 
 | |
|     @property
 | |
|     def channel(self):
 | |
|         return self._channel
 | |
| 
 | |
|     @channel.setter
 | |
|     def channel(self, value):
 | |
|         self._channel = value
 | |
| 
 | |
| 
 | |
|     @property
 | |
|     def index(self):
 | |
|         """
 | |
|         An index from 0..n-1 of the token object in the input stream.
 | |
|         This must be valid in order to use the ANTLRWorks debugger.
 | |
|         """
 | |
|         return self._index
 | |
| 
 | |
|     @index.setter
 | |
|     def index(self, value):
 | |
|         self._index = value
 | |
| 
 | |
| 
 | |
|     def getInputStream(self):
 | |
|         """@brief From what character stream was this token created.
 | |
| 
 | |
|         You don't have to implement but it's nice to know where a Token
 | |
|         comes from if you have include files etc... on the input."""
 | |
| 
 | |
|         raise NotImplementedError
 | |
| 
 | |
|     def setInputStream(self, input):
 | |
|         """@brief From what character stream was this token created.
 | |
| 
 | |
|         You don't have to implement but it's nice to know where a Token
 | |
|         comes from if you have include files etc... on the input."""
 | |
| 
 | |
|         raise NotImplementedError
 | |
| 
 | |
| 
 | |
| ############################################################################
 | |
| #
 | |
| # token implementations
 | |
| #
 | |
| # Token
 | |
| # +- CommonToken
 | |
| # \- ClassicToken
 | |
| #
 | |
| ############################################################################
 | |
| 
 | |
| class CommonToken(Token):
 | |
|     """@brief Basic token implementation.
 | |
| 
 | |
|     This implementation does not copy the text from the input stream upon
 | |
|     creation, but keeps start/stop pointers into the stream to avoid
 | |
|     unnecessary copy operations.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
 | |
|                  input=None, start=None, stop=None, oldToken=None):
 | |
| 
 | |
|         if oldToken:
 | |
|             super().__init__(oldToken.type, oldToken.channel, oldToken.text,
 | |
|                              oldToken.index, oldToken.line,
 | |
|                              oldToken.charPositionInLine, oldToken.input)
 | |
|             if isinstance(oldToken, CommonToken):
 | |
|                 self.start = oldToken.start
 | |
|                 self.stop = oldToken.stop
 | |
|             else:
 | |
|                 self.start = start
 | |
|                 self.stop = stop
 | |
| 
 | |
|         else:
 | |
|             super().__init__(type=type, channel=channel, input=input)
 | |
| 
 | |
|             # We need to be able to change the text once in a while.  If
 | |
|             # this is non-null, then getText should return this.  Note that
 | |
|             # start/stop are not affected by changing this.
 | |
|             self._text = text
 | |
| 
 | |
|             # The char position into the input buffer where this token starts
 | |
|             self.start = start
 | |
| 
 | |
|             # The char position into the input buffer where this token stops
 | |
|             # This is the index of the last char, *not* the index after it!
 | |
|             self.stop = stop
 | |
| 
 | |
| 
 | |
|     @property
 | |
|     def text(self):
 | |
|         # Could be the empty string, and we want to return that.
 | |
|         if self._text is not None:
 | |
|             return self._text
 | |
| 
 | |
|         if not self.input:
 | |
|             return None
 | |
| 
 | |
|         if self.start < self.input.size() and self.stop < self.input.size():
 | |
|             return self.input.substring(self.start, self.stop)
 | |
| 
 | |
|         return '<EOF>'
 | |
| 
 | |
|     @text.setter
 | |
|     def text(self, value):
 | |
|         """
 | |
|         Override the text for this token.  getText() will return this text
 | |
|         rather than pulling from the buffer.  Note that this does not mean
 | |
|         that start/stop indexes are not valid.  It means that that input
 | |
|         was converted to a new string in the token object.
 | |
|         """
 | |
|         self._text = value
 | |
| 
 | |
| 
 | |
|     def getInputStream(self):
 | |
|         return self.input
 | |
| 
 | |
|     def setInputStream(self, input):
 | |
|         self.input = input
 | |
| 
 | |
| 
 | |
|     def __str__(self):
 | |
|         if self.type == EOF:
 | |
|             return "<EOF>"
 | |
| 
 | |
|         channelStr = ""
 | |
|         if self.channel > 0:
 | |
|             channelStr = ",channel=" + str(self.channel)
 | |
| 
 | |
|         txt = self.text
 | |
|         if txt:
 | |
|             # Put 2 backslashes in front of each character
 | |
|             txt = txt.replace("\n", r"\\n")
 | |
|             txt = txt.replace("\r", r"\\r")
 | |
|             txt = txt.replace("\t", r"\\t")
 | |
|         else:
 | |
|             txt = "<no text>"
 | |
| 
 | |
|         return ("[@{0.index},{0.start}:{0.stop}={txt!r},"
 | |
|                 "<{0.typeName}>{channelStr},"
 | |
|                 "{0.line}:{0.charPositionInLine}]"
 | |
|                 .format(self, txt=txt, channelStr=channelStr))
 | |
| 
 | |
| 
 | |
| class ClassicToken(Token):
 | |
|     """@brief Alternative token implementation.
 | |
| 
 | |
|     A Token object like we'd use in ANTLR 2.x; has an actual string created
 | |
|     and associated with this object.  These objects are needed for imaginary
 | |
|     tree nodes that have payload objects.  We need to create a Token object
 | |
|     that has a string; the tree node will point at this token.  CommonToken
 | |
|     has indexes into a char stream and hence cannot be used to introduce
 | |
|     new strings.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
 | |
|                  oldToken=None):
 | |
|         if oldToken:
 | |
|             super().__init__(type=oldToken.type, channel=oldToken.channel,
 | |
|                              text=oldToken.text, line=oldToken.line,
 | |
|                              charPositionInLine=oldToken.charPositionInLine)
 | |
| 
 | |
|         else:
 | |
|             super().__init__(type=type, channel=channel, text=text,
 | |
|                              index=None, line=None, charPositionInLine=None)
 | |
| 
 | |
| 
 | |
|     def getInputStream(self):
 | |
|         return None
 | |
| 
 | |
|     def setInputStream(self, input):
 | |
|         pass
 | |
| 
 | |
| 
 | |
|     def toString(self):
 | |
|         channelStr = ""
 | |
|         if self.channel > 0:
 | |
|             channelStr = ",channel=" + str(self.channel)
 | |
| 
 | |
|         txt = self.text
 | |
|         if not txt:
 | |
|             txt = "<no text>"
 | |
| 
 | |
|         return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr},"
 | |
|                 "{0.line!r}:{0.charPositionInLine!r}]"
 | |
|                 .format(self, txt=txt, channelStr=channelStr))
 | |
| 
 | |
|     __str__ = toString
 | |
|     __repr__ = toString
 | |
| 
 | |
| 
 | |
| INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
 | |
| 
 | |
| # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
 | |
| # will avoid creating a token for this symbol and try to fetch another.
 | |
| SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
 |