196 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			196 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			Python
		
	
	
	
| """ANTLR3 runtime package"""
 | |
| 
 | |
| # begin[licence]
 | |
| #
 | |
| # [The "BSD licence"]
 | |
| # Copyright (c) 2005-2012 Terence Parr
 | |
| # All rights reserved.
 | |
| #
 | |
| # Redistribution and use in source and binary forms, with or without
 | |
| # modification, are permitted provided that the following conditions
 | |
| # are met:
 | |
| # 1. Redistributions of source code must retain the above copyright
 | |
| #    notice, this list of conditions and the following disclaimer.
 | |
| # 2. Redistributions in binary form must reproduce the above copyright
 | |
| #    notice, this list of conditions and the following disclaimer in the
 | |
| #    documentation and/or other materials provided with the distribution.
 | |
| # 3. The name of the author may not be used to endorse or promote products
 | |
| #    derived from this software without specific prior written permission.
 | |
| #
 | |
| # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 | |
| # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | |
| # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | |
| # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
| # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | |
| # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| #
 | |
| # end[licence]
 | |
| 
 | |
| from .constants import EOF
 | |
| from .exceptions import NoViableAltException, BacktrackingFailed
 | |
| 
 | |
| 
 | |
| class DFA(object):
 | |
|     """@brief A DFA implemented as a set of transition tables.
 | |
| 
 | |
|     Any state that has a semantic predicate edge is special; those states
 | |
|     are generated with if-then-else structures in a specialStateTransition()
 | |
|     which is generated by cyclicDFA template.
 | |
|     
 | |
|     """
 | |
|     
 | |
|     def __init__(
 | |
|         self,
 | |
|         recognizer, decisionNumber,
 | |
|         eot, eof, min, max, accept, special, transition
 | |
|         ):
 | |
|         ## Which recognizer encloses this DFA?  Needed to check backtracking
 | |
|         self.recognizer = recognizer
 | |
| 
 | |
|         self.decisionNumber = decisionNumber
 | |
|         self.eot = eot
 | |
|         self.eof = eof
 | |
|         self.min = min
 | |
|         self.max = max
 | |
|         self.accept = accept
 | |
|         self.special = special
 | |
|         self.transition = transition
 | |
| 
 | |
| 
 | |
|     def predict(self, input):
 | |
|         """
 | |
|         From the input stream, predict what alternative will succeed
 | |
|         using this DFA (representing the covering regular approximation
 | |
|         to the underlying CFL).  Return an alternative number 1..n.  Throw
 | |
|         an exception upon error.
 | |
|         """
 | |
|         mark = input.mark()
 | |
|         s = 0 # we always start at s0
 | |
|         try:
 | |
|             for _ in range(50000):
 | |
|                 specialState = self.special[s]
 | |
|                 if specialState >= 0:
 | |
|                     s = self.specialStateTransition(specialState, input)
 | |
|                     if s == -1:
 | |
|                         self.noViableAlt(s, input)
 | |
|                         return 0
 | |
|                     input.consume()
 | |
|                     continue
 | |
| 
 | |
|                 if self.accept[s] >= 1:
 | |
|                     return self.accept[s]
 | |
| 
 | |
|                 # look for a normal char transition
 | |
|                 c = input.LA(1)
 | |
| 
 | |
|                 if c >= self.min[s] and c <= self.max[s]:
 | |
|                     # move to next state
 | |
|                     snext = self.transition[s][c-self.min[s]]
 | |
|                     
 | |
|                     if snext < 0:
 | |
|                         # was in range but not a normal transition
 | |
|                         # must check EOT, which is like the else clause.
 | |
|                         # eot[s]>=0 indicates that an EOT edge goes to another
 | |
|                         # state.
 | |
|                         if self.eot[s] >= 0: # EOT Transition to accept state?
 | |
|                             s = self.eot[s]
 | |
|                             input.consume()
 | |
|                             # TODO: I had this as return accept[eot[s]]
 | |
|                             # which assumed here that the EOT edge always
 | |
|                             # went to an accept...faster to do this, but
 | |
|                             # what about predicated edges coming from EOT
 | |
|                             # target?
 | |
|                             continue
 | |
| 
 | |
|                         self.noViableAlt(s, input)
 | |
|                         return 0
 | |
| 
 | |
|                     s = snext
 | |
|                     input.consume()
 | |
|                     continue
 | |
| 
 | |
|                 if self.eot[s] >= 0:
 | |
|                     s = self.eot[s]
 | |
|                     input.consume()
 | |
|                     continue
 | |
| 
 | |
|                 # EOF Transition to accept state?
 | |
|                 if c == EOF and self.eof[s] >= 0:
 | |
|                     return self.accept[self.eof[s]]
 | |
| 
 | |
|                 # not in range and not EOF/EOT, must be invalid symbol
 | |
|                 self.noViableAlt(s, input)
 | |
|                 return 0
 | |
| 
 | |
|             else:
 | |
|                 raise RuntimeError("DFA bang!")
 | |
|             
 | |
|         finally:
 | |
|             input.rewind(mark)
 | |
| 
 | |
| 
 | |
|     def noViableAlt(self, s, input):
 | |
|         if self.recognizer._state.backtracking > 0:
 | |
|             raise BacktrackingFailed
 | |
| 
 | |
|         nvae = NoViableAltException(
 | |
|             self.getDescription(),
 | |
|             self.decisionNumber,
 | |
|             s,
 | |
|             input
 | |
|             )
 | |
| 
 | |
|         self.error(nvae)
 | |
|         raise nvae
 | |
| 
 | |
| 
 | |
|     def error(self, nvae):
 | |
|         """A hook for debugging interface"""
 | |
|         pass
 | |
| 
 | |
| 
 | |
|     def specialStateTransition(self, s, input):
 | |
|         return -1
 | |
| 
 | |
| 
 | |
|     def getDescription(self):
 | |
|         return "n/a"
 | |
| 
 | |
| 
 | |
| ##     def specialTransition(self, state, symbol):
 | |
| ##         return 0
 | |
| 
 | |
| 
 | |
|     @classmethod
 | |
|     def unpack(cls, string):
 | |
|         """@brief Unpack the runlength encoded table data.
 | |
| 
 | |
|         Terence implemented packed table initializers, because Java has a
 | |
|         size restriction on .class files and the lookup tables can grow
 | |
|         pretty large. The generated JavaLexer.java of the Java.g example
 | |
|         would be about 15MB with uncompressed array initializers.
 | |
| 
 | |
|         Python does not have any size restrictions, but the compilation of
 | |
|         such large source files seems to be pretty memory hungry. The memory
 | |
|         consumption of the python process grew to >1.5GB when importing a
 | |
|         15MB lexer, eating all my swap space and I was to impacient to see,
 | |
|         if it could finish at all. With packed initializers that are unpacked
 | |
|         at import time of the lexer module, everything works like a charm.
 | |
|         
 | |
|         """
 | |
|         
 | |
|         ret = []
 | |
|         for i in range(0, len(string) - 1, 2):
 | |
|             (n, v) = ord(string[i]), ord(string[i + 1])
 | |
| 
 | |
|             if v == 0xFFFF:
 | |
|                 v = -1
 | |
| 
 | |
|             ret += [v] * n
 | |
| 
 | |
|         return ret
 |