321 lines
10 KiB
Ruby
321 lines
10 KiB
Ruby
#!/usr/bin/ruby
|
|
# encoding: utf-8
|
|
|
|
=begin LICENSE
|
|
|
|
[The "BSD licence"]
|
|
Copyright (c) 2009-2010 Kyle Yetter
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
3. The name of the author may not be used to endorse or promote products
|
|
derived from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
=end
|
|
|
|
module ANTLR3
|
|
|
|
=begin rdoc ANTLR3::DFA
|
|
|
|
DFA is a class that implements a finite state machine that chooses between
|
|
alternatives in a rule based upon lookahead symbols from an input stream.
|
|
|
|
Deterministic Finite Automata (DFA) are finite state machines that are capable
|
|
of recognizing <i>regular languages</i>. For more background on the subject,
|
|
check out http://en.wikipedia.org/wiki/Deterministic_finite-state_machine or
|
|
check out general ANTLR documentation at http://www.antlr.org
|
|
|
|
ANTLR implements most of its decision logic directly using code branching
|
|
structures in methods. However, for certain types of decisions, ANTLR will
|
|
generate a special DFA class definition to implement a decision.
|
|
|
|
Conceptually, these state machines are defined by a number of states, each state
|
|
represented by an integer indexed upward from zero. State number +0+ is the
|
|
<i>start state</i> of the machine; every prediction will begin in state +0+. At
|
|
each step, the machine examines the next symbol on the input stream, checks the
|
|
value against the transition parameters associated with the current state, and
|
|
either moves to a new state number to repeat the process or decides that the
|
|
machine cannot transition any further. If the machine cannot transition any
|
|
further and the current state is defined as an <i>accept state</i>, an
|
|
alternative has been chosen successfully and the prediction procedure ends. If
|
|
the current state is not an <i>accept state</i>, the prediction has failed and
|
|
there is <i>no viable alternative</i>.
|
|
|
|
In generated code, ANTLR defines DFA states using seven parameters, each defined
|
|
as a member of seven seperate array constants -- +MIN+, +MAX+, +EOT+, +EOF+,
|
|
+SPECIAL+, +ACCEPT+, and +TRANSITION+. The parameters that characterize state
|
|
+s+ are defined by the value of these lists at index +s+.
|
|
|
|
MIN[s]::
|
|
The smallest value of the next input symbol that has
|
|
a transition for state +s+
|
|
MAX[s]::
|
|
The largest value of the next input symbol that has
|
|
a transition for state +s+
|
|
TRANSITION[s]::
|
|
A list that defines the next state number based upon
|
|
the current input symbol.
|
|
EOT[s]::
|
|
If positive, it specifies a state transition in
|
|
situations where a non-matching input symbol does
|
|
not indicate failure.
|
|
SPECIAL[s]::
|
|
If positive, it indicates that the prediction
|
|
algorithm must defer to a special code block
|
|
to determine the next state. The value is used
|
|
by the special state code to determine the next
|
|
state.
|
|
ACCEPT[s]::
|
|
If positive and there are no possible state
|
|
transitions, this is the alternative number
|
|
that has been predicted
|
|
EOF[s]::
|
|
If positive and the input stream has been exhausted,
|
|
this is the alternative number that has been predicted.
|
|
|
|
For more information on the prediction algorithm, check out the #predict method
|
|
below.
|
|
|
|
=end
|
|
|
|
class DFA
|
|
include Constants
|
|
include Error
|
|
|
|
attr_reader :recognizer, :decision_number, :eot, :eof, :min, :max,
|
|
:accept, :special, :transition, :special_block
|
|
|
|
class << self
|
|
attr_reader :decision, :eot, :eof, :min, :max,
|
|
:accept, :special, :transition
|
|
|
|
def unpack( *data )
|
|
data.empty? and return [].freeze
|
|
|
|
n = data.length / 2
|
|
size = 0
|
|
n.times { |i| size += data[ 2*i ] }
|
|
if size > 1024
|
|
values = Hash.new( 0 )
|
|
data.each_slice( 2 ) do |count, value|
|
|
values[ value ] += count
|
|
end
|
|
default = values.keys.max_by { |v| values[ v ] }
|
|
|
|
unpacked = Hash.new( default )
|
|
position = 0
|
|
data.each_slice( 2 ) do |count, value|
|
|
unless value == default
|
|
count.times { |i| unpacked[ position + i ] = value }
|
|
end
|
|
position += count
|
|
end
|
|
else
|
|
unpacked = []
|
|
data.each_slice( 2 ) do |count, value|
|
|
unpacked.fill( value, unpacked.length, count )
|
|
end
|
|
end
|
|
|
|
return unpacked
|
|
end
|
|
|
|
end
|
|
|
|
def initialize( recognizer, decision_number = nil,
|
|
eot = nil, eof = nil, min = nil, max = nil,
|
|
accept = nil, special = nil,
|
|
transition = nil, &special_block )
|
|
@recognizer = recognizer
|
|
@decision_number = decision_number || self.class.decision
|
|
@eot = eot || self.class::EOT #.eot
|
|
@eof = eof || self.class::EOF #.eof
|
|
@min = min || self.class::MIN #.min
|
|
@max = max || self.class::MAX #.max
|
|
@accept = accept || self.class::ACCEPT #.accept
|
|
@special = special || self.class::SPECIAL #.special
|
|
@transition = transition || self.class::TRANSITION #.transition
|
|
@special_block = special_block
|
|
rescue NameError => e
|
|
raise unless e.message =~ /uninitialized constant/
|
|
constant = e.name
|
|
message = Util.tidy( <<-END )
|
|
| No #{ constant } information provided.
|
|
| DFA cannot be instantiated without providing state array information.
|
|
| When DFAs are generated by ANTLR, this information should already be
|
|
| provided in the DFA subclass constants.
|
|
END
|
|
end
|
|
|
|
if RUBY_VERSION =~ /^1\.9/
|
|
|
|
def predict( input )
|
|
mark = input.mark
|
|
state = 0
|
|
|
|
50000.times do
|
|
special_state = @special[ state ]
|
|
if special_state >= 0
|
|
state = @special_block.call( special_state )
|
|
if state == -1
|
|
no_viable_alternative( state, input )
|
|
return 0
|
|
end
|
|
input.consume
|
|
next
|
|
end
|
|
@accept[ state ] >= 1 and return @accept[ state ]
|
|
|
|
# look for a normal char transition
|
|
|
|
c = input.peek.ord
|
|
# the @min and @max arrays contain the bounds of the character (or token type)
|
|
# ranges for the transition decisions
|
|
if c.between?( @min[ state ], @max[ state ] )
|
|
# c - @min[state] is the position of the character within the range
|
|
# so for a range like ?a..?z, a match of ?a would be 0,
|
|
# ?c would be 2, and ?z would be 25
|
|
next_state = @transition[ state ][ c - @min[ state ] ]
|
|
if next_state < 0
|
|
if @eot[ state ] >= 0
|
|
state = @eot[ state ]
|
|
input.consume
|
|
next
|
|
end
|
|
no_viable_alternative( state, input )
|
|
return 0
|
|
end
|
|
|
|
state = next_state
|
|
input.consume
|
|
next
|
|
end
|
|
|
|
if @eot[ state ] >= 0
|
|
state = @eot[ state ]
|
|
input.consume()
|
|
next
|
|
end
|
|
|
|
( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ]
|
|
no_viable_alternative( state, input )
|
|
return 0
|
|
end
|
|
|
|
ANTLR3.bug!( Util.tidy( <<-END ) )
|
|
| DFA BANG!
|
|
| The prediction loop has exceeded a maximum limit of 50000 iterations
|
|
| ----
|
|
| decision: #@decision_number
|
|
| description: #{ description }
|
|
END
|
|
ensure
|
|
input.rewind( mark )
|
|
end
|
|
|
|
else
|
|
|
|
def predict( input )
|
|
mark = input.mark
|
|
state = 0
|
|
|
|
50000.times do
|
|
special_state = @special[ state ]
|
|
if special_state >= 0
|
|
state = @special_block.call( special_state )
|
|
if state == -1
|
|
no_viable_alternative( state, input )
|
|
return 0
|
|
end
|
|
input.consume
|
|
next
|
|
end
|
|
@accept[ state ] >= 1 and return @accept[ state ]
|
|
|
|
# look for a normal char transition
|
|
|
|
c = input.peek
|
|
# the @min and @max arrays contain the bounds of the character (or token type)
|
|
# ranges for the transition decisions
|
|
if c.between?( @min[ state ], @max[ state ] )
|
|
# c - @min[state] is the position of the character within the range
|
|
# so for a range like ?a..?z, a match of ?a would be 0,
|
|
# ?c would be 2, and ?z would be 25
|
|
next_state = @transition[ state ][ c - @min[ state ] ]
|
|
if next_state < 0
|
|
if @eot[ state ] >= 0
|
|
state = @eot[ state ]
|
|
input.consume
|
|
next
|
|
end
|
|
no_viable_alternative( state, input )
|
|
return 0
|
|
end
|
|
|
|
state = next_state
|
|
input.consume()
|
|
next
|
|
end
|
|
if @eot[ state ] >= 0
|
|
state = @eot[ state ]
|
|
input.consume()
|
|
next
|
|
end
|
|
( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ]
|
|
no_viable_alternative( state, input )
|
|
return 0
|
|
end
|
|
|
|
ANTLR3.bug!( Util.tidy( <<-END ) )
|
|
| DFA BANG!
|
|
| The prediction loop has exceeded a maximum limit of 50000 iterations
|
|
| ----
|
|
| decision: #@decision_number
|
|
| description: #{ description }
|
|
END
|
|
ensure
|
|
input.rewind( mark )
|
|
end
|
|
|
|
end
|
|
|
|
def no_viable_alternative( state, input )
|
|
raise( BacktrackingFailed ) if @recognizer.state.backtracking > 0
|
|
except = NoViableAlternative.new( description, @decision_number, state, input )
|
|
error( except )
|
|
raise( except )
|
|
end
|
|
|
|
def error( except )
|
|
# overridable debugging hook
|
|
end
|
|
|
|
def special_state_transition( state, input )
|
|
return -1
|
|
end
|
|
|
|
def description
|
|
return "n/a"
|
|
end
|
|
end
|
|
end
|