446 lines
15 KiB
C#
446 lines
15 KiB
C#
/*
|
|
* [The "BSD license"]
|
|
* Copyright (c) 2011 Terence Parr
|
|
* All rights reserved.
|
|
*
|
|
* Conversion to C#:
|
|
* Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
namespace Antlr.Runtime
|
|
{
|
|
public abstract class SlimLexer
|
|
: BaseRecognizer
|
|
, ITokenSource<SlimToken>
|
|
{
|
|
/** <summary>Where is the lexer drawing characters from?</summary> */
|
|
protected SlimStringStream input;
|
|
SlimToken _token;
|
|
bool _emitted;
|
|
bool _skip;
|
|
|
|
public SlimLexer()
|
|
{
|
|
}
|
|
|
|
public SlimLexer( ICharStream input )
|
|
{
|
|
this.input = (SlimStringStream)input;
|
|
}
|
|
|
|
public SlimLexer( ICharStream input, RecognizerSharedState state )
|
|
: base( state )
|
|
{
|
|
this.input = (SlimStringStream)input;
|
|
}
|
|
|
|
#region Properties
|
|
public string Text
|
|
{
|
|
/** <summary>Return the text matched so far for the current token or any text override.</summary> */
|
|
get
|
|
{
|
|
if ( state.text != null )
|
|
{
|
|
return state.text;
|
|
}
|
|
return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex );
|
|
}
|
|
/** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
|
|
set
|
|
{
|
|
state.text = value;
|
|
}
|
|
}
|
|
public int Line
|
|
{
|
|
get
|
|
{
|
|
return input.Line;
|
|
}
|
|
set
|
|
{
|
|
input.Line = value;
|
|
}
|
|
}
|
|
public int CharPositionInLine
|
|
{
|
|
get
|
|
{
|
|
return input.CharPositionInLine;
|
|
}
|
|
set
|
|
{
|
|
input.CharPositionInLine = value;
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
public override void Reset()
|
|
{
|
|
base.Reset(); // reset all recognizer state variables
|
|
// wack Lexer state variables
|
|
if ( input != null )
|
|
{
|
|
input.Seek( 0 ); // rewind the input
|
|
}
|
|
if ( state == null )
|
|
{
|
|
return; // no shared state work to do
|
|
}
|
|
_token = default( SlimToken );
|
|
_emitted = false;
|
|
_skip = false;
|
|
//state.token = null;
|
|
state.type = TokenTypes.Invalid;
|
|
state.channel = TokenChannels.Default;
|
|
state.tokenStartCharIndex = -1;
|
|
#if TRACK_POSITION
|
|
state.tokenStartCharPositionInLine = -1;
|
|
state.tokenStartLine = -1;
|
|
#endif
|
|
state.text = null;
|
|
}
|
|
|
|
/** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
|
|
public virtual SlimToken NextToken()
|
|
{
|
|
for ( ; ; )
|
|
{
|
|
_token = default( SlimToken );
|
|
_emitted = false;
|
|
_skip = false;
|
|
//state.token = null;
|
|
state.channel = TokenChannels.Default;
|
|
state.tokenStartCharIndex = input.Index;
|
|
#if TRACK_POSITION
|
|
state.tokenStartCharPositionInLine = input.CharPositionInLine;
|
|
state.tokenStartLine = input.Line;
|
|
#endif
|
|
state.text = null;
|
|
if ( input.LA( 1 ) == CharStreamConstants.EndOfFile )
|
|
{
|
|
return new SlimToken(TokenTypes.EndOfFile);
|
|
}
|
|
try
|
|
{
|
|
mTokens();
|
|
if ( _skip )
|
|
{
|
|
continue;
|
|
}
|
|
else if ( !_emitted )
|
|
{
|
|
Emit();
|
|
}
|
|
|
|
return _token;
|
|
}
|
|
catch ( NoViableAltException nva )
|
|
{
|
|
ReportError( nva );
|
|
Recover( nva ); // throw out current char and try again
|
|
}
|
|
catch ( RecognitionException re )
|
|
{
|
|
ReportError( re );
|
|
// match() routine has already called recover()
|
|
}
|
|
}
|
|
}
|
|
IToken ITokenSource.NextToken()
|
|
{
|
|
return NextToken();
|
|
}
|
|
|
|
/** <summary>
|
|
* Instruct the lexer to skip creating a token for current lexer rule
|
|
* and look for another token. nextToken() knows to keep looking when
|
|
* a lexer rule finishes with token set to SKIP_TOKEN. Recall that
|
|
* if token==null at end of any token rule, it creates one for you
|
|
* and emits it.
|
|
* </summary>
|
|
*/
|
|
public virtual void Skip()
|
|
{
|
|
_skip = true;
|
|
//state.token = Tokens.Skip;
|
|
}
|
|
|
|
/** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
|
|
public abstract void mTokens();
|
|
|
|
public ICharStream CharStream
|
|
{
|
|
get
|
|
{
|
|
return input;
|
|
}
|
|
/** <summary>Set the char stream and reset the lexer</summary> */
|
|
set
|
|
{
|
|
input = null;
|
|
Reset();
|
|
input = (SlimStringStream)value;
|
|
}
|
|
}
|
|
|
|
public override string SourceName
|
|
{
|
|
get
|
|
{
|
|
return input.SourceName;
|
|
}
|
|
}
|
|
|
|
///** <summary>
|
|
// * Currently does not support multiple emits per nextToken invocation
|
|
// * for efficiency reasons. Subclass and override this method and
|
|
// * nextToken (to push tokens into a list and pull from that list rather
|
|
// * than a single variable as this implementation does).
|
|
// * </summary>
|
|
// */
|
|
//public void Emit( T token )
|
|
//{
|
|
// _token = token;
|
|
//}
|
|
|
|
/** <summary>
|
|
* The standard method called to automatically emit a token at the
|
|
* outermost lexical rule. The token object should point into the
|
|
* char buffer start..stop. If there is a text override in 'text',
|
|
* use that to set the token's text. Override this method to emit
|
|
* custom Token objects.
|
|
* </summary>
|
|
*
|
|
* <remarks>
|
|
* If you are building trees, then you should also override
|
|
* Parser or TreeParser.getMissingSymbol().
|
|
* </remarks>
|
|
*/
|
|
public void Emit()
|
|
{
|
|
_token = new SlimToken()
|
|
{
|
|
//InputStream = input,
|
|
Type = state.type,
|
|
Channel = state.channel,
|
|
//CharPositionInLine = state.tokenStartCharPositionInLine,
|
|
//Line = state.tokenStartLine,
|
|
//Text = state.text
|
|
};
|
|
//Emit( t );
|
|
//return t;
|
|
|
|
//IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 );
|
|
//t.Line = state.tokenStartLine;
|
|
//t.Text = state.text;
|
|
//t.CharPositionInLine = state.tokenStartCharPositionInLine;
|
|
//Emit( t );
|
|
//return t;
|
|
}
|
|
|
|
public void Match( string s )
|
|
{
|
|
int i = 0;
|
|
while ( i < s.Length )
|
|
{
|
|
if ( input.LA( 1 ) != s[i] )
|
|
{
|
|
if ( state.backtracking > 0 )
|
|
{
|
|
state.failed = true;
|
|
return;
|
|
}
|
|
MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
|
|
Recover( mte );
|
|
throw mte;
|
|
}
|
|
i++;
|
|
input.Consume();
|
|
state.failed = false;
|
|
}
|
|
}
|
|
|
|
public void MatchAny()
|
|
{
|
|
input.Consume();
|
|
}
|
|
|
|
public void Match( int c )
|
|
{
|
|
if ( input.LA( 1 ) != c )
|
|
{
|
|
if ( state.backtracking > 0 )
|
|
{
|
|
state.failed = true;
|
|
return;
|
|
}
|
|
MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
|
|
Recover( mte ); // don't really recover; just consume in lexer
|
|
throw mte;
|
|
}
|
|
input.Consume();
|
|
state.failed = false;
|
|
}
|
|
|
|
public void MatchRange( int a, int b )
|
|
{
|
|
if ( input.LA( 1 ) < a || input.LA( 1 ) > b )
|
|
{
|
|
if ( state.backtracking > 0 )
|
|
{
|
|
state.failed = true;
|
|
return;
|
|
}
|
|
MismatchedRangeException mre =
|
|
new MismatchedRangeException( a, b, input );
|
|
Recover( mre );
|
|
throw mre;
|
|
}
|
|
input.Consume();
|
|
state.failed = false;
|
|
}
|
|
|
|
/** <summary>What is the index of the current character of lookahead?</summary> */
|
|
public int CharIndex
|
|
{
|
|
get
|
|
{
|
|
return input.Index;
|
|
}
|
|
}
|
|
|
|
public override void ReportError( RecognitionException e )
|
|
{
|
|
/** TODO: not thought about recovery in lexer yet.
|
|
*
|
|
// if we've already reported an error and have not matched a token
|
|
// yet successfully, don't report any errors.
|
|
if ( errorRecovery ) {
|
|
//System.err.print("[SPURIOUS] ");
|
|
return;
|
|
}
|
|
errorRecovery = true;
|
|
*/
|
|
|
|
DisplayRecognitionError( this.TokenNames, e );
|
|
}
|
|
|
|
public override string GetErrorMessage( RecognitionException e, string[] tokenNames )
|
|
{
|
|
string msg = null;
|
|
if ( e is MismatchedTokenException )
|
|
{
|
|
MismatchedTokenException mte = (MismatchedTokenException)e;
|
|
msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting );
|
|
}
|
|
else if ( e is NoViableAltException )
|
|
{
|
|
NoViableAltException nvae = (NoViableAltException)e;
|
|
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
|
|
// and "(decision="+nvae.decisionNumber+") and
|
|
// "state "+nvae.stateNumber
|
|
msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character );
|
|
}
|
|
else if ( e is EarlyExitException )
|
|
{
|
|
EarlyExitException eee = (EarlyExitException)e;
|
|
// for development, can add "(decision="+eee.decisionNumber+")"
|
|
msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character );
|
|
}
|
|
else if ( e is MismatchedNotSetException )
|
|
{
|
|
MismatchedNotSetException mse = (MismatchedNotSetException)e;
|
|
msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
|
|
}
|
|
else if ( e is MismatchedSetException )
|
|
{
|
|
MismatchedSetException mse = (MismatchedSetException)e;
|
|
msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
|
|
}
|
|
else if ( e is MismatchedRangeException )
|
|
{
|
|
MismatchedRangeException mre = (MismatchedRangeException)e;
|
|
msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " +
|
|
GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B );
|
|
}
|
|
else
|
|
{
|
|
msg = base.GetErrorMessage( e, tokenNames );
|
|
}
|
|
return msg;
|
|
}
|
|
|
|
public virtual string GetCharErrorDisplay( int c )
|
|
{
|
|
string s = ( (char)c ).ToString();
|
|
switch ( c )
|
|
{
|
|
case TokenTypes.EndOfFile:
|
|
s = "<EOF>";
|
|
break;
|
|
case '\n':
|
|
s = "\\n";
|
|
break;
|
|
case '\t':
|
|
s = "\\t";
|
|
break;
|
|
case '\r':
|
|
s = "\\r";
|
|
break;
|
|
}
|
|
return "'" + s + "'";
|
|
}
|
|
|
|
/** <summary>
|
|
* Lexers can normally match any char in it's vocabulary after matching
|
|
* a token, so do the easy thing and just kill a character and hope
|
|
* it all works out. You can instead use the rule invocation stack
|
|
* to do sophisticated error recovery if you are in a fragment rule.
|
|
* </summary>
|
|
*/
|
|
public virtual void Recover( RecognitionException re )
|
|
{
|
|
//System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
|
|
//re.printStackTrace();
|
|
input.Consume();
|
|
}
|
|
|
|
public virtual void TraceIn( string ruleName, int ruleIndex )
|
|
{
|
|
string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
|
|
base.TraceIn( ruleName, ruleIndex, inputSymbol );
|
|
}
|
|
|
|
public virtual void TraceOut( string ruleName, int ruleIndex )
|
|
{
|
|
string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
|
|
base.TraceOut( ruleName, ruleIndex, inputSymbol );
|
|
}
|
|
}
|
|
}
|