6564 lines
201 KiB
ObjectPascal
6564 lines
201 KiB
ObjectPascal
unit Antlr.Runtime;
|
|
(*
|
|
[The "BSD licence"]
|
|
Copyright (c) 2008 Erik van Bilsen
|
|
Copyright (c) 2005-2007 Kunle Odutola
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
1. Redistributions of source code MUST RETAIN the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form MUST REPRODUCE the above copyright
|
|
notice, this list of conditions and the following disclaimer in
|
|
the documentation and/or other materials provided with the
|
|
distribution.
|
|
3. The name of the author may not be used to endorse or promote products
|
|
derived from this software without specific prior WRITTEN permission.
|
|
4. Unless explicitly state otherwise, any contribution intentionally
|
|
submitted for inclusion in this work to the copyright owner or licensor
|
|
shall be under the terms and conditions of this license, without any
|
|
additional terms or conditions.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*)
|
|
|
|
interface
|
|
|
|
{$IF CompilerVersion < 20}
|
|
{$MESSAGE ERROR 'You need Delphi 2009 or higher to use the Antlr runtime'}
|
|
{$IFEND}
|
|
|
|
uses
|
|
SysUtils,
|
|
Classes,
|
|
Generics.Defaults,
|
|
Generics.Collections,
|
|
Antlr.Runtime.Tools,
|
|
Antlr.Runtime.Collections;
|
|
|
|
type
|
|
TCharStreamConstants = (cscEOF = -1);
|
|
|
|
type
|
|
ERecognitionException = class;
|
|
ENoViableAltException = class;
|
|
|
|
/// <summary>
|
|
/// A simple stream of integers. This is useful when all we care about is the char
|
|
/// or token type sequence (such as for interpretation).
|
|
/// </summary>
|
|
IIntStream = interface(IANTLRInterface)
|
|
['{6B851BDB-DD9C-422B-AD1E-567E52D2654F}']
|
|
{ Property accessors }
|
|
function GetSourceName: String;
|
|
|
|
{ Methods }
|
|
/// <summary>
|
|
/// Advances the read position of the stream. Updates line and column state
|
|
/// </summary>
|
|
procedure Consume;
|
|
|
|
/// <summary>
|
|
/// Get int at current input pointer + I ahead (where I=1 is next int)
|
|
/// Negative indexes are allowed. LA(-1) is previous token (token just matched).
|
|
/// LA(-i) where i is before first token should yield -1, invalid char or EOF.
|
|
/// </summary>
|
|
function LA(I: Integer): Integer;
|
|
function LAChar(I: Integer): Char;
|
|
|
|
/// <summary>Tell the stream to start buffering if it hasn't already.</summary>
|
|
/// <remarks>
|
|
/// Executing Rewind(Mark()) on a stream should not affect the input position.
|
|
/// The Lexer tracks line/col info as well as input index so its markers are
|
|
/// not pure input indexes. Same for tree node streams. */
|
|
/// </remarks>
|
|
/// <returns>Return a marker that can be passed to
|
|
/// <see cref="IIntStream.Rewind(Integer)"/> to return to the current position.
|
|
/// This could be the current input position, a value return from
|
|
/// <see cref="IIntStream.Index"/>, or some other marker.</returns>
|
|
function Mark: Integer;
|
|
|
|
/// <summary>
|
|
/// Return the current input symbol index 0..N where N indicates the
|
|
/// last symbol has been read. The index is the symbol about to be
|
|
/// read not the most recently read symbol.
|
|
/// </summary>
|
|
function Index: Integer;
|
|
|
|
/// <summary>
|
|
/// Resets the stream so that the next call to
|
|
/// <see cref="IIntStream.Index"/> would return marker.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The marker will usually be <see cref="IIntStream.Index"/> but
|
|
/// it doesn't have to be. It's just a marker to indicate what
|
|
/// state the stream was in. This is essentially calling
|
|
/// <see cref="IIntStream.Release"/> and <see cref="IIntStream.Seek"/>.
|
|
/// If there are other markers created after the specified marker,
|
|
/// this routine must unroll them like a stack. Assumes the state the
|
|
/// stream was in when this marker was created.
|
|
/// </remarks>
|
|
procedure Rewind(const Marker: Integer); overload;
|
|
|
|
/// <summary>
|
|
/// Rewind to the input position of the last marker.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Used currently only after a cyclic DFA and just before starting
|
|
/// a sem/syn predicate to get the input position back to the start
|
|
/// of the decision. Do not "pop" the marker off the state. Mark(I)
|
|
/// and Rewind(I) should balance still. It is like invoking
|
|
/// Rewind(last marker) but it should not "pop" the marker off.
|
|
/// It's like Seek(last marker's input position).
|
|
/// </remarks>
|
|
procedure Rewind; overload;
|
|
|
|
/// <summary>
|
|
/// You may want to commit to a backtrack but don't want to force the
|
|
/// stream to keep bookkeeping objects around for a marker that is
|
|
/// no longer necessary. This will have the same behavior as
|
|
/// <see cref="IIntStream.Rewind(Integer)"/> except it releases resources without
|
|
/// the backward seek.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This must throw away resources for all markers back to the marker
|
|
/// argument. So if you're nested 5 levels of Mark(), and then Release(2)
|
|
/// you have to release resources for depths 2..5.
|
|
/// </remarks>
|
|
procedure Release(const Marker: Integer);
|
|
|
|
/// <summary>
|
|
/// Set the input cursor to the position indicated by index. This is
|
|
/// normally used to seek ahead in the input stream.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// No buffering is required to do this unless you know your stream
|
|
/// will use seek to move backwards such as when backtracking.
|
|
///
|
|
/// This is different from rewind in its multi-directional requirement
|
|
/// and in that its argument is strictly an input cursor (index).
|
|
///
|
|
/// For char streams, seeking forward must update the stream state such
|
|
/// as line number. For seeking backwards, you will be presumably
|
|
/// backtracking using the
|
|
/// <see cref="IIntStream.Mark"/>/<see cref="IIntStream.Rewind(Integer)"/>
|
|
/// mechanism that restores state and so this method does not need to
|
|
/// update state when seeking backwards.
|
|
///
|
|
/// Currently, this method is only used for efficient backtracking using
|
|
/// memoization, but in the future it may be used for incremental parsing.
|
|
///
|
|
/// The index is 0..N-1. A seek to position i means that LA(1) will return
|
|
/// the ith symbol. So, seeking to 0 means LA(1) will return the first
|
|
/// element in the stream.
|
|
/// </remarks>
|
|
procedure Seek(const Index: Integer);
|
|
|
|
/// <summary>Returns the size of the entire stream.</summary>
|
|
/// <remarks>
|
|
/// Only makes sense for streams that buffer everything up probably,
|
|
/// but might be useful to display the entire stream or for testing.
|
|
/// This value includes a single EOF.
|
|
/// </remarks>
|
|
function Size: Integer;
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>
|
|
/// Where are you getting symbols from? Normally, implementations will
|
|
/// pass the buck all the way to the lexer who can ask its input stream
|
|
/// for the file name or whatever.
|
|
/// </summary>
|
|
property SourceName: String read GetSourceName;
|
|
end;
|
|
|
|
/// <summary>A source of characters for an ANTLR lexer </summary>
|
|
ICharStream = interface(IIntStream)
|
|
['{C30EF0DB-F4BD-4CBC-8C8F-828DABB6FF36}']
|
|
{ Property accessors }
|
|
function GetLine: Integer;
|
|
procedure SetLine(const Value: Integer);
|
|
function GetCharPositionInLine: Integer;
|
|
procedure SetCharPositionInLine(const Value: Integer);
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// Get the ith character of lookahead. This is usually the same as
|
|
/// LA(I). This will be used for labels in the generated lexer code.
|
|
/// I'd prefer to return a char here type-wise, but it's probably
|
|
/// better to be 32-bit clean and be consistent with LA.
|
|
/// </summary>
|
|
function LT(const I: Integer): Integer;
|
|
|
|
/// <summary>
|
|
/// This primarily a useful interface for action code (just make sure
|
|
/// actions don't use this on streams that don't support it).
|
|
/// For infinite streams, you don't need this.
|
|
/// </summary>
|
|
function Substring(const Start, Stop: Integer): String;
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>
|
|
/// The current line in the character stream (ANTLR tracks the
|
|
/// line information automatically. To support rewinding character
|
|
/// streams, we are able to [re-]set the line.
|
|
/// </summary>
|
|
property Line: Integer read GetLine write SetLine;
|
|
|
|
/// <summary>
|
|
/// The index of the character relative to the beginning of the
|
|
/// line (0..N-1). To support rewinding character streams, we are
|
|
/// able to [re-]set the character position.
|
|
/// </summary>
|
|
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
|
|
end;
|
|
|
|
IToken = interface(IANTLRInterface)
|
|
['{73BF129C-2F45-4C68-838E-BF5D3536AC6D}']
|
|
{ Property accessors }
|
|
function GetTokenType: Integer;
|
|
procedure SetTokenType(const Value: Integer);
|
|
function GetLine: Integer;
|
|
procedure SetLine(const Value: Integer);
|
|
function GetCharPositionInLine: Integer;
|
|
procedure SetCharPositionInLine(const Value: Integer);
|
|
function GetChannel: Integer;
|
|
procedure SetChannel(const Value: Integer);
|
|
function GetTokenIndex: Integer;
|
|
procedure SetTokenIndex(const Value: Integer);
|
|
function GetText: String;
|
|
procedure SetText(const Value: String);
|
|
|
|
{ Properties }
|
|
property TokenType: Integer read GetTokenType write SetTokenType;
|
|
|
|
/// <summary>The line number on which this token was matched; line=1..N</summary>
|
|
property Line: Integer read GetLine write SetLine;
|
|
|
|
/// <summary>
|
|
/// The index of the first character relative to the beginning of the line 0..N-1
|
|
/// </summary>
|
|
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
|
|
|
|
/// <summary>The line number on which this token was matched; line=1..N</summary>
|
|
property Channel: Integer read GetChannel write SetChannel;
|
|
|
|
/// <summary>
|
|
/// An index from 0..N-1 of the token object in the input stream
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This must be valid in order to use the ANTLRWorks debugger.
|
|
/// </remarks>
|
|
property TokenIndex: Integer read GetTokenIndex write SetTokenIndex;
|
|
|
|
/// <summary>The text of the token</summary>
|
|
/// <remarks>
|
|
/// When setting the text, it might be a NOP such as for the CommonToken,
|
|
/// which doesn't have string pointers, just indexes into a char buffer.
|
|
/// </remarks>
|
|
property Text: String read GetText write SetText;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A source of tokens must provide a sequence of tokens via NextToken()
|
|
/// and also must reveal it's source of characters; CommonToken's text is
|
|
/// computed from a CharStream; it only store indices into the char stream.
|
|
///
|
|
/// Errors from the lexer are never passed to the parser. Either you want
|
|
/// to keep going or you do not upon token recognition error. If you do not
|
|
/// want to continue lexing then you do not want to continue parsing. Just
|
|
/// throw an exception not under RecognitionException and Delphi will naturally
|
|
/// toss you all the way out of the recognizers. If you want to continue
|
|
/// lexing then you should not throw an exception to the parser--it has already
|
|
/// requested a token. Keep lexing until you get a valid one. Just report
|
|
/// errors and keep going, looking for a valid token.
|
|
/// </summary>
|
|
ITokenSource = interface(IANTLRInterface)
|
|
['{2C71FAD0-AEEE-417D-B576-4059F7C4CEB4}']
|
|
{ Property accessors }
|
|
function GetSourceName: String;
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// Returns a Token object from the input stream (usually a CharStream).
|
|
/// Does not fail/return upon lexing error; just keeps chewing on the
|
|
/// characters until it gets a good one; errors are not passed through
|
|
/// to the parser.
|
|
/// </summary>
|
|
function NextToken: IToken;
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>
|
|
/// Where are you getting tokens from? normally the implication will simply
|
|
/// ask lexers input stream.
|
|
/// </summary>
|
|
property SourceName: String read GetSourceName;
|
|
end;
|
|
|
|
/// <summary>A stream of tokens accessing tokens from a TokenSource </summary>
|
|
ITokenStream = interface(IIntStream)
|
|
['{59E5B39D-31A6-496D-9FA9-AC75CC584B68}']
|
|
{ Property accessors }
|
|
function GetTokenSource: ITokenSource;
|
|
procedure SetTokenSource(const Value: ITokenSource);
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// Get Token at current input pointer + I ahead (where I=1 is next
|
|
/// Token).
|
|
/// I < 0 indicates tokens in the past. So -1 is previous token and -2 is
|
|
/// two tokens ago. LT(0) is undefined. For I>=N, return Token.EOFToken.
|
|
/// Return null for LT(0) and any index that results in an absolute address
|
|
/// that is negative.
|
|
/// </summary>
|
|
function LT(const K: Integer): IToken;
|
|
|
|
/// <summary>
|
|
/// Get a token at an absolute index I; 0..N-1. This is really only
|
|
/// needed for profiling and debugging and token stream rewriting.
|
|
/// If you don't want to buffer up tokens, then this method makes no
|
|
/// sense for you. Naturally you can't use the rewrite stream feature.
|
|
/// I believe DebugTokenStream can easily be altered to not use
|
|
/// this method, removing the dependency.
|
|
/// </summary>
|
|
function Get(const I: Integer): IToken;
|
|
|
|
/// <summary>Return the text of all tokens from start to stop, inclusive.
|
|
/// If the stream does not buffer all the tokens then it can just
|
|
/// return ''; Users should not access $ruleLabel.text in
|
|
/// an action of course in that case.
|
|
/// </summary>
|
|
function ToString(const Start, Stop: Integer): String; overload;
|
|
|
|
/// <summary>Because the user is not required to use a token with an index stored
|
|
/// in it, we must provide a means for two token objects themselves to
|
|
/// indicate the start/end location. Most often this will just delegate
|
|
/// to the other ToString(Integer,Integer). This is also parallel with
|
|
/// the TreeNodeStream.ToString(Object,Object).
|
|
/// </summary>
|
|
function ToString(const Start, Stop: IToken): String; overload;
|
|
|
|
{ Properties }
|
|
property TokenSource: ITokenSource read GetTokenSource write SetTokenSource;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// This is the complete state of a stream.
|
|
///
|
|
/// When walking ahead with cyclic DFA for syntactic predicates, we
|
|
/// need to record the state of the input stream (char index, line,
|
|
/// etc...) so that we can rewind the state after scanning ahead.
|
|
/// </summary>
|
|
ICharStreamState = interface(IANTLRInterface)
|
|
['{62D2A1CD-ED3A-4C95-A366-AB8F2E54060B}']
|
|
{ Property accessors }
|
|
function GetP: Integer;
|
|
procedure SetP(const Value: Integer);
|
|
function GetLine: Integer;
|
|
procedure SetLine(const Value: Integer);
|
|
function GetCharPositionInLine: Integer;
|
|
procedure SetCharPositionInLine(const Value: Integer);
|
|
|
|
{ Properties }
|
|
/// <summary>Index into the char stream of next lookahead char </summary>
|
|
property P: Integer read GetP write SetP;
|
|
|
|
/// <summary>What line number is the scanner at before processing buffer[P]? </summary>
|
|
property Line: Integer read GetLine write SetLine;
|
|
|
|
/// <summary>What char position 0..N-1 in line is scanner before processing buffer[P]? </summary>
|
|
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A pretty quick <see cref="ICharStream"/> that uses a character array
|
|
/// directly as it's underlying source.
|
|
/// </summary>
|
|
IANTLRStringStream = interface(ICharStream)
|
|
['{2FA24299-FF97-4AB6-8CA6-5D3DA13C4AB2}']
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// Resets the stream so that it is in the same state it was
|
|
/// when the object was created *except* the data array is not
|
|
/// touched.
|
|
/// </summary>
|
|
procedure Reset;
|
|
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A character stream - an <see cref="ICharStream"/> - that loads
|
|
/// and caches the contents of it's underlying file fully during
|
|
/// object construction
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This looks very much like an ANTLReaderStream or an ANTLRInputStream
|
|
/// but, it is a special case. Since we know the exact size of the file to
|
|
/// load, we can avoid lots of data copying and buffer resizing.
|
|
/// </remarks>
|
|
IANTLRFileStream = interface(IANTLRStringStream)
|
|
['{2B0145DB-2DAA-48A0-8316-B47A69EDDD1A}']
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// Loads and buffers the specified file to be used as this
|
|
/// ANTLRFileStream's source
|
|
/// </summary>
|
|
/// <param name="FileName">File to load</param>
|
|
/// <param name="Encoding">Encoding to apply to file</param>
|
|
procedure Load(const FileName: String; const Encoding: TEncoding);
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A stripped-down version of org.antlr.misc.BitSet that is just
|
|
/// good enough to handle runtime requirements such as FOLLOW sets
|
|
/// for automatic error recovery.
|
|
/// </summary>
|
|
IBitSet = interface(IANTLRInterface)
|
|
['{F2045045-FC46-4779-A65D-56C65D257A8E}']
|
|
{ Property accessors }
|
|
function GetIsNil: Boolean;
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>return "this or a" in a new set </summary>
|
|
function BitSetOr(const A: IBitSet): IBitSet;
|
|
|
|
/// <summary>Or this element into this set (grow as necessary to accommodate)</summary>
|
|
procedure Add(const El: Integer);
|
|
|
|
/// <summary> Grows the set to a larger number of bits.</summary>
|
|
/// <param name="bit">element that must fit in set
|
|
/// </param>
|
|
procedure GrowToInclude(const Bit: Integer);
|
|
|
|
procedure OrInPlace(const A: IBitSet);
|
|
function Size: Integer;
|
|
function Member(const El: Integer): Boolean;
|
|
|
|
// remove this element from this set
|
|
procedure Remove(const El: Integer);
|
|
|
|
function NumBits: Integer;
|
|
|
|
/// <summary>return how much space is being used by the bits array not
|
|
/// how many actually have member bits on.
|
|
/// </summary>
|
|
function LengthInLongWords: Integer;
|
|
|
|
function ToArray: TIntegerArray;
|
|
function ToPackedArray: TUInt64Array;
|
|
|
|
function ToString: String; overload;
|
|
function ToString(const TokenNames: TStringArray): String; overload;
|
|
function Equals(Obj: TObject): Boolean;
|
|
|
|
{ Properties }
|
|
property IsNil: Boolean read GetIsNil;
|
|
end;
|
|
TBitSetArray = array of IBitSet;
|
|
|
|
/// <summary>
|
|
/// The set of fields needed by an abstract recognizer to recognize input
|
|
/// and recover from errors
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// As a separate state object, it can be shared among multiple grammars;
|
|
/// e.g., when one grammar imports another.
|
|
/// These fields are publicly visible but the actual state pointer per
|
|
/// parser is protected.
|
|
/// </remarks>
|
|
IRecognizerSharedState = interface(IANTLRInterface)
|
|
['{6CB6E17A-0B01-4AA7-8D49-5742A3CB8901}']
|
|
{ Property accessors }
|
|
function GetFollowing: TBitSetArray;
|
|
procedure SetFollowing(const Value: TBitSetArray);
|
|
function GetFollowingStackPointer: Integer;
|
|
procedure SetFollowingStackPointer(const Value: Integer);
|
|
function GetErrorRecovery: Boolean;
|
|
procedure SetErrorRecovery(const Value: Boolean);
|
|
function GetLastErrorIndex: Integer;
|
|
procedure SetLastErrorIndex(const Value: Integer);
|
|
function GetFailed: Boolean;
|
|
procedure SetFailed(const Value: Boolean);
|
|
function GetSyntaxErrors: Integer;
|
|
procedure SetSyntaxErrors(const Value: Integer);
|
|
function GetBacktracking: Integer;
|
|
procedure SetBacktracking(const Value: Integer);
|
|
function GetRuleMemo: TDictionaryArray<Integer, Integer>;
|
|
function GetRuleMemoCount: Integer;
|
|
procedure SetRuleMemoCount(const Value: Integer);
|
|
function GetToken: IToken;
|
|
procedure SetToken(const Value: IToken);
|
|
function GetTokenStartCharIndex: Integer;
|
|
procedure SetTokenStartCharIndex(const Value: Integer);
|
|
function GetTokenStartLine: Integer;
|
|
procedure SetTokenStartLine(const Value: Integer);
|
|
function GetTokenStartCharPositionInLine: Integer;
|
|
procedure SetTokenStartCharPositionInLine(const Value: Integer);
|
|
function GetChannel: Integer;
|
|
procedure SetChannel(const Value: Integer);
|
|
function GetTokenType: Integer;
|
|
procedure SetTokenType(const Value: Integer);
|
|
function GetText: String;
|
|
procedure SetText(const Value: String);
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>
|
|
/// Tracks the set of token types that can follow any rule invocation.
|
|
/// Stack grows upwards. When it hits the max, it grows 2x in size
|
|
/// and keeps going.
|
|
/// </summary>
|
|
property Following: TBitSetArray read GetFollowing write SetFollowing;
|
|
property FollowingStackPointer: Integer read GetFollowingStackPointer write SetFollowingStackPointer;
|
|
|
|
/// <summary>
|
|
/// This is true when we see an error and before having successfully
|
|
/// matched a token. Prevents generation of more than one error message
|
|
/// per error.
|
|
/// </summary>
|
|
property ErrorRecovery: Boolean read GetErrorRecovery write SetErrorRecovery;
|
|
|
|
/// <summary>
|
|
/// The index into the input stream where the last error occurred.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This is used to prevent infinite loops where an error is found
|
|
/// but no token is consumed during recovery...another error is found,
|
|
/// ad naseum. This is a failsafe mechanism to guarantee that at least
|
|
/// one token/tree node is consumed for two errors.
|
|
/// </remarks>
|
|
property LastErrorIndex: Integer read GetLastErrorIndex write SetLastErrorIndex;
|
|
|
|
/// <summary>
|
|
/// In lieu of a return value, this indicates that a rule or token
|
|
/// has failed to match. Reset to false upon valid token match.
|
|
/// </summary>
|
|
property Failed: Boolean read GetFailed write SetFailed;
|
|
|
|
/// <summary>
|
|
/// Did the recognizer encounter a syntax error? Track how many.
|
|
/// </summary>
|
|
property SyntaxErrors: Integer read GetSyntaxErrors write SetSyntaxErrors;
|
|
|
|
/// <summary>
|
|
/// If 0, no backtracking is going on. Safe to exec actions etc...
|
|
/// If >0 then it's the level of backtracking.
|
|
/// </summary>
|
|
property Backtracking: Integer read GetBacktracking write SetBacktracking;
|
|
|
|
/// <summary>
|
|
/// An array[size num rules] of Map<Integer,Integer> that tracks
|
|
/// the stop token index for each rule.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// RuleMemo[RuleIndex] is the memoization table for RuleIndex.
|
|
/// For key RuleStartIndex, you get back the stop token for
|
|
/// associated rule or MEMO_RULE_FAILED.
|
|
///
|
|
/// This is only used if rule memoization is on (which it is by default).
|
|
/// </remarks>
|
|
property RuleMemo: TDictionaryArray<Integer, Integer> read GetRuleMemo;
|
|
property RuleMemoCount: Integer read GetRuleMemoCount write SetRuleMemoCount;
|
|
|
|
// Lexer Specific Members
|
|
// LEXER FIELDS (must be in same state object to avoid casting
|
|
// constantly in generated code and Lexer object) :(
|
|
|
|
/// <summary>
|
|
/// Token object normally returned by NextToken() after matching lexer rules.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The goal of all lexer rules/methods is to create a token object.
|
|
/// This is an instance variable as multiple rules may collaborate to
|
|
/// create a single token. NextToken will return this object after
|
|
/// matching lexer rule(s). If you subclass to allow multiple token
|
|
/// emissions, then set this to the last token to be matched or
|
|
/// something nonnull so that the auto token emit mechanism will not
|
|
/// emit another token.
|
|
/// </remarks>
|
|
property Token: IToken read GetToken write SetToken;
|
|
|
|
/// <summary>
|
|
/// What character index in the stream did the current token start at?
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Needed, for example, to get the text for current token. Set at
|
|
/// the start of nextToken.
|
|
/// </remarks>
|
|
property TokenStartCharIndex: Integer read GetTokenStartCharIndex write SetTokenStartCharIndex;
|
|
|
|
/// <summary>
|
|
/// The line on which the first character of the token resides
|
|
/// </summary>
|
|
property TokenStartLine: Integer read GetTokenStartLine write SetTokenStartLine;
|
|
|
|
/// <summary>The character position of first character within the line</summary>
|
|
property TokenStartCharPositionInLine: Integer read GetTokenStartCharPositionInLine write SetTokenStartCharPositionInLine;
|
|
|
|
/// <summary>The channel number for the current token</summary>
|
|
property Channel: Integer read GetChannel write SetChannel;
|
|
|
|
/// <summary>The token type for the current token</summary>
|
|
property TokenType: Integer read GetTokenType write SetTokenType;
|
|
|
|
/// <summary>
|
|
/// You can set the text for the current token to override what is in
|
|
/// the input char buffer. Use setText() or can set this instance var.
|
|
/// </summary>
|
|
property Text: String read GetText write SetText;
|
|
end;
|
|
|
|
ICommonToken = interface(IToken)
|
|
['{06B1B0C3-2A0D-477A-AE30-414F51ACE8A0}']
|
|
{ Property accessors }
|
|
function GetStartIndex: Integer;
|
|
procedure SetStartIndex(const Value: Integer);
|
|
function GetStopIndex: Integer;
|
|
procedure SetStopIndex(const Value: Integer);
|
|
function GetInputStream: ICharStream;
|
|
procedure SetInputStream(const Value: ICharStream);
|
|
|
|
{ Methods }
|
|
function ToString: String;
|
|
|
|
{ Properties }
|
|
property StartIndex: Integer read GetStartIndex write SetStartIndex;
|
|
property StopIndex: Integer read GetStopIndex write SetStopIndex;
|
|
property InputStream: ICharStream read GetInputStream write SetInputStream;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A Token object like we'd use in ANTLR 2.x; has an actual string created
|
|
/// and associated with this object. These objects are needed for imaginary
|
|
/// tree nodes that have payload objects. We need to create a Token object
|
|
/// that has a string; the tree node will point at this token. CommonToken
|
|
/// has indexes into a char stream and hence cannot be used to introduce
|
|
/// new strings.
|
|
/// </summary>
|
|
IClassicToken = interface(IToken)
|
|
{ Property accessors }
|
|
function GetTokenType: Integer;
|
|
procedure SetTokenType(const Value: Integer);
|
|
function GetLine: Integer;
|
|
procedure SetLine(const Value: Integer);
|
|
function GetCharPositionInLine: Integer;
|
|
procedure SetCharPositionInLine(const Value: Integer);
|
|
function GetChannel: Integer;
|
|
procedure SetChannel(const Value: Integer);
|
|
function GetTokenIndex: Integer;
|
|
procedure SetTokenIndex(const Value: Integer);
|
|
function GetText: String;
|
|
procedure SetText(const Value: String);
|
|
function GetInputStream: ICharStream;
|
|
procedure SetInputStream(const Value: ICharStream);
|
|
|
|
{ Properties }
|
|
property TokenType: Integer read GetTokenType write SetTokenType;
|
|
property Line: Integer read GetLine write SetLine;
|
|
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
|
|
property Channel: Integer read GetChannel write SetChannel;
|
|
property TokenIndex: Integer read GetTokenIndex write SetTokenIndex;
|
|
property Text: String read GetText write SetText;
|
|
property InputStream: ICharStream read GetInputStream write SetInputStream;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A generic recognizer that can handle recognizers generated from
|
|
/// lexer, parser, and tree grammars. This is all the parsing
|
|
/// support code essentially; most of it is error recovery stuff and
|
|
/// backtracking.
|
|
/// </summary>
|
|
IBaseRecognizer = interface(IANTLRObject)
|
|
['{90813CE2-614B-4773-A26E-936E7DE7E9E9}']
|
|
{ Property accessors }
|
|
function GetInput: IIntStream;
|
|
function GetBacktrackingLevel: Integer;
|
|
function GetState: IRecognizerSharedState;
|
|
function GetNumberOfSyntaxErrors: Integer;
|
|
function GetGrammarFileName: String;
|
|
function GetSourceName: String;
|
|
function GetTokenNames: TStringArray;
|
|
|
|
{ Methods }
|
|
procedure BeginBacktrack(const Level: Integer);
|
|
procedure EndBacktrack(const Level: Integer; const Successful: Boolean);
|
|
|
|
/// <summary>Reset the parser's state. Subclasses must rewind the input stream.</summary>
|
|
procedure Reset;
|
|
|
|
/// <summary>
|
|
/// Match current input symbol against ttype. Attempt
|
|
/// single token insertion or deletion error recovery. If
|
|
/// that fails, throw EMismatchedTokenException.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// To turn off single token insertion or deletion error
|
|
/// recovery, override MismatchRecover() and have it call
|
|
/// plain Mismatch(), which does not recover. Then any error
|
|
/// in a rule will cause an exception and immediate exit from
|
|
/// rule. Rule would recover by resynchronizing to the set of
|
|
/// symbols that can follow rule ref.
|
|
/// </remarks>
|
|
function Match(const Input: IIntStream; const TokenType: Integer;
|
|
const Follow: IBitSet): IANTLRInterface;
|
|
|
|
function MismatchIsUnwantedToken(const Input: IIntStream;
|
|
const TokenType: Integer): Boolean;
|
|
|
|
function MismatchIsMissingToken(const Input: IIntStream;
|
|
const Follow: IBitSet): Boolean;
|
|
|
|
/// <summary>A hook to listen in on the token consumption during error recovery.
|
|
/// The DebugParser subclasses this to fire events to the listenter.
|
|
/// </summary>
|
|
procedure BeginResync;
|
|
procedure EndResync;
|
|
|
|
/// <summary>
|
|
/// Report a recognition problem.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This method sets errorRecovery to indicate the parser is recovering
|
|
/// not parsing. Once in recovery mode, no errors are generated.
|
|
/// To get out of recovery mode, the parser must successfully Match
|
|
/// a token (after a resync). So it will go:
|
|
///
|
|
/// 1. error occurs
|
|
/// 2. enter recovery mode, report error
|
|
/// 3. consume until token found in resynch set
|
|
/// 4. try to resume parsing
|
|
/// 5. next Match() will reset errorRecovery mode
|
|
///
|
|
/// If you override, make sure to update syntaxErrors if you care about that.
|
|
/// </remarks>
|
|
procedure ReportError(const E: ERecognitionException);
|
|
|
|
/// <summary> Match the wildcard: in a symbol</summary>
|
|
procedure MatchAny(const Input: IIntStream);
|
|
|
|
procedure DisplayRecognitionError(const TokenNames: TStringArray;
|
|
const E: ERecognitionException);
|
|
|
|
/// <summary>
|
|
/// What error message should be generated for the various exception types?
|
|
///
|
|
/// Not very object-oriented code, but I like having all error message generation
|
|
/// within one method rather than spread among all of the exception classes. This
|
|
/// also makes it much easier for the exception handling because the exception
|
|
/// classes do not have to have pointers back to this object to access utility
|
|
/// routines and so on. Also, changing the message for an exception type would be
|
|
/// difficult because you would have to subclassing exception, but then somehow get
|
|
/// ANTLR to make those kinds of exception objects instead of the default.
|
|
///
|
|
/// This looks weird, but trust me--it makes the most sense in terms of flexibility.
|
|
///
|
|
/// For grammar debugging, you will want to override this to add more information
|
|
/// such as the stack frame with GetRuleInvocationStack(e, this.GetType().Fullname)
|
|
/// and, for no viable alts, the decision description and state etc...
|
|
///
|
|
/// Override this to change the message generated for one or more exception types.
|
|
/// </summary>
|
|
function GetErrorMessage(const E: ERecognitionException;
|
|
const TokenNames: TStringArray): String;
|
|
|
|
/// <summary>
|
|
/// What is the error header, normally line/character position information?
|
|
/// </summary>
|
|
function GetErrorHeader(const E: ERecognitionException): String;
|
|
|
|
/// <summary>
|
|
/// How should a token be displayed in an error message? The default
|
|
/// is to display just the text, but during development you might
|
|
/// want to have a lot of information spit out. Override in that case
|
|
/// to use t.ToString() (which, for CommonToken, dumps everything about
|
|
/// the token). This is better than forcing you to override a method in
|
|
/// your token objects because you don't have to go modify your lexer
|
|
/// so that it creates a new type.
|
|
/// </summary>
|
|
function GetTokenErrorDisplay(const T: IToken): String;
|
|
|
|
/// <summary>
|
|
/// Override this method to change where error messages go
|
|
/// </summary>
|
|
procedure EmitErrorMessage(const Msg: String);
|
|
|
|
/// <summary>
|
|
/// Recover from an error found on the input stream. This is
|
|
/// for NoViableAlt and mismatched symbol exceptions. If you enable
|
|
/// single token insertion and deletion, this will usually not
|
|
/// handle mismatched symbol exceptions but there could be a mismatched
|
|
/// token that the Match() routine could not recover from.
|
|
/// </summary>
|
|
procedure Recover(const Input: IIntStream; const RE: ERecognitionException);
|
|
|
|
// Not currently used
|
|
function RecoverFromMismatchedSet(const Input: IIntStream;
|
|
const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface;
|
|
|
|
procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload;
|
|
|
|
/// <summary>Consume tokens until one matches the given token set </summary>
|
|
procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload;
|
|
|
|
/// <summary>
|
|
/// Returns List <String> of the rules in your parser instance
|
|
/// leading up to a call to this method. You could override if
|
|
/// you want more details such as the file/line info of where
|
|
/// in the parser source code a rule is invoked.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// NOT IMPLEMENTED IN THE DELPHI VERSION YET
|
|
/// This is very useful for error messages and for context-sensitive
|
|
/// error recovery.
|
|
/// </remarks>
|
|
//function GetRuleInvocationStack: IList<IANTLRInterface>; overload;
|
|
|
|
/// <summary>
|
|
/// A more general version of GetRuleInvocationStack where you can
|
|
/// pass in, for example, a RecognitionException to get it's rule
|
|
/// stack trace. This routine is shared with all recognizers, hence,
|
|
/// static.
|
|
///
|
|
/// TODO: move to a utility class or something; weird having lexer call this
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// NOT IMPLEMENTED IN THE DELPHI VERSION YET
|
|
/// </remarks>
|
|
//function GetRuleInvocationStack(const E: Exception;
|
|
// const RecognizerClassName: String): IList<IANTLRInterface>; overload;
|
|
|
|
/// <summary>A convenience method for use most often with template rewrites.
|
|
/// Convert a List<Token> to List<String>
|
|
/// </summary>
|
|
function ToStrings(const Tokens: IList<IToken>): IList<String>;
|
|
|
|
/// <summary>
|
|
/// Given a rule number and a start token index number, return
|
|
/// MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
|
|
/// start index. If this rule has parsed input starting from the
|
|
/// start index before, then return where the rule stopped parsing.
|
|
/// It returns the index of the last token matched by the rule.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// For now we use a hashtable and just the slow Object-based one.
|
|
/// Later, we can make a special one for ints and also one that
|
|
/// tosses out data after we commit past input position i.
|
|
/// </remarks>
|
|
function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer;
|
|
|
|
/// <summary>
|
|
/// Has this rule already parsed input at the current index in the
|
|
/// input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
|
|
/// If we attempted but failed to parse properly before, return
|
|
/// MEMO_RULE_FAILED.
|
|
///
|
|
/// This method has a side-effect: if we have seen this input for
|
|
/// this rule and successfully parsed before, then seek ahead to
|
|
/// 1 past the stop token matched for this rule last time.
|
|
/// </summary>
|
|
function AlreadyParsedRule(const Input: IIntStream;
|
|
const RuleIndex: Integer): Boolean;
|
|
|
|
/// <summary>
|
|
/// Record whether or not this rule parsed the input at this position
|
|
/// successfully. Use a standard hashtable for now.
|
|
/// </summary>
|
|
procedure Memoize(const Input: IIntStream; const RuleIndex,
|
|
RuleStartIndex: Integer);
|
|
|
|
/// <summary>
|
|
/// Return how many rule/input-index pairs there are in total.
|
|
/// TODO: this includes synpreds. :(
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
function GetRuleMemoizationChaceSize: Integer;
|
|
|
|
procedure TraceIn(const RuleName: String; const RuleIndex: Integer;
|
|
const InputSymbol: String);
|
|
procedure TraceOut(const RuleName: String; const RuleIndex: Integer;
|
|
const InputSymbol: String);
|
|
|
|
{ Properties }
|
|
property Input: IIntStream read GetInput;
|
|
property BacktrackingLevel: Integer read GetBacktrackingLevel;
|
|
property State: IRecognizerSharedState read GetState;
|
|
|
|
/// <summary>
|
|
/// Get number of recognition errors (lexer, parser, tree parser). Each
|
|
/// recognizer tracks its own number. So parser and lexer each have
|
|
/// separate count. Does not count the spurious errors found between
|
|
/// an error and next valid token match
|
|
///
|
|
/// See also ReportError()
|
|
/// </summary>
|
|
property NumberOfSyntaxErrors: Integer read GetNumberOfSyntaxErrors;
|
|
|
|
/// <summary>
|
|
/// For debugging and other purposes, might want the grammar name.
|
|
/// Have ANTLR generate an implementation for this property.
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
property GrammarFileName: String read GetGrammarFileName;
|
|
|
|
/// <summary>
|
|
/// For debugging and other purposes, might want the source name.
|
|
/// Have ANTLR provide a hook for this property.
|
|
/// </summary>
|
|
/// <returns>The source name</returns>
|
|
property SourceName: String read GetSourceName;
|
|
|
|
/// <summary>
|
|
/// Used to print out token names like ID during debugging and
|
|
/// error reporting. The generated parsers implement a method
|
|
/// that overrides this to point to their string[] tokenNames.
|
|
/// </summary>
|
|
property TokenNames: TStringArray read GetTokenNames;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// The most common stream of tokens is one where every token is buffered up
|
|
/// and tokens are prefiltered for a certain channel (the parser will only
|
|
/// see these tokens and cannot change the filter channel number during the
|
|
/// parse).
|
|
///
|
|
/// TODO: how to access the full token stream? How to track all tokens matched per rule?
|
|
/// </summary>
|
|
ICommonTokenStream = interface(ITokenStream)
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// A simple filter mechanism whereby you can tell this token stream
|
|
/// to force all tokens of type TType to be on Channel.
|
|
/// </summary>
|
|
///
|
|
/// <remarks>
|
|
/// For example,
|
|
/// when interpreting, we cannot exec actions so we need to tell
|
|
/// the stream to force all WS and NEWLINE to be a different, ignored
|
|
/// channel.
|
|
/// </remarks>
|
|
procedure SetTokenTypeChannel(const TType, Channel: Integer);
|
|
|
|
procedure DiscardTokenType(const TType: Integer);
|
|
|
|
procedure DiscardOffChannelTokens(const Discard: Boolean);
|
|
|
|
function GetTokens: IList<IToken>; overload;
|
|
function GetTokens(const Start, Stop: Integer): IList<IToken>; overload;
|
|
|
|
/// <summary>Given a start and stop index, return a List of all tokens in
|
|
/// the token type BitSet. Return null if no tokens were found. This
|
|
/// method looks at both on and off channel tokens.
|
|
/// </summary>
|
|
function GetTokens(const Start, Stop: Integer;
|
|
const Types: IBitSet): IList<IToken>; overload;
|
|
|
|
function GetTokens(const Start, Stop: Integer;
|
|
const Types: IList<Integer>): IList<IToken>; overload;
|
|
|
|
function GetTokens(const Start, Stop,
|
|
TokenType: Integer): IList<IToken>; overload;
|
|
|
|
procedure Reset;
|
|
end;
|
|
|
|
IDFA = interface;
|
|
|
|
TSpecialStateTransitionHandler = function(const DFA: IDFA; S: Integer;
|
|
const Input: IIntStream): Integer of Object;
|
|
|
|
/// <summary>
|
|
/// A DFA implemented as a set of transition tables.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// Any state that has a semantic predicate edge is special; those states are
|
|
/// generated with if-then-else structures in a SpecialStateTransition()
|
|
/// which is generated by cyclicDFA template.
|
|
/// </para>
|
|
/// <para>
|
|
/// There are at most 32767 states (16-bit signed short). Could get away with byte
|
|
/// sometimes but would have to generate different types and the simulation code too.
|
|
/// </para>
|
|
/// <para>
|
|
/// As a point of reference, the Tokens rule DFA for the lexer in the Java grammar
|
|
/// sample has approximately 326 states.
|
|
/// </para>
|
|
/// </remarks>
|
|
IDFA = interface(IANTLRInterface)
|
|
['{36312B59-B718-48EF-A0EC-4529DE70F4C2}']
|
|
{ Property accessors }
|
|
function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
|
|
procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler);
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// From the input stream, predict what alternative will succeed using this
|
|
/// DFA (representing the covering regular approximation to the underlying CFL).
|
|
/// </summary>
|
|
/// <param name="Input">Input stream</param>
|
|
/// <returns>Return an alternative number 1..N. Throw an exception upon error.</returns>
|
|
function Predict(const Input: IIntStream): Integer;
|
|
|
|
/// <summary>
|
|
/// A hook for debugging interface
|
|
/// </summary>
|
|
/// <param name="NVAE"></param>
|
|
procedure Error(const NVAE: ENoViableAltException);
|
|
|
|
function SpecialStateTransition(const S: Integer; const Input: IIntStream): Integer;
|
|
|
|
function Description: String;
|
|
|
|
function SpecialTransition(const State, Symbol: Integer): Integer;
|
|
|
|
{ Properties }
|
|
property SpecialStateTransitionHandler: TSpecialStateTransitionHandler read GetSpecialStateTransitionHandler write SetSpecialStateTransitionHandler;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A lexer is recognizer that draws input symbols from a character stream.
|
|
/// lexer grammars result in a subclass of this object. A Lexer object
|
|
/// uses simplified Match() and error recovery mechanisms in the interest
|
|
/// of speed.
|
|
/// </summary>
|
|
ILexer = interface(IBaseRecognizer)
|
|
['{331AAB49-E7CD-40E7-AEF5-427F7D6577AD}']
|
|
{ Property accessors }
|
|
function GetCharStream: ICharStream;
|
|
procedure SetCharStream(const Value: ICharStream);
|
|
function GetLine: Integer;
|
|
function GetCharPositionInLine: Integer;
|
|
function GetCharIndex: Integer;
|
|
function GetText: String;
|
|
procedure SetText(const Value: String);
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>
|
|
/// Return a token from this source; i.e., Match a token on the char stream.
|
|
/// </summary>
|
|
function NextToken: IToken;
|
|
|
|
/// <summary>
|
|
/// Instruct the lexer to skip creating a token for current lexer rule and
|
|
/// look for another token. NextToken() knows to keep looking when a lexer
|
|
/// rule finishes with token set to SKIP_TOKEN. Recall that if token==null
|
|
/// at end of any token rule, it creates one for you and emits it.
|
|
/// </summary>
|
|
procedure Skip;
|
|
|
|
/// <summary>This is the lexer entry point that sets instance var 'token' </summary>
|
|
procedure DoTokens;
|
|
|
|
/// <summary>
|
|
/// Currently does not support multiple emits per nextToken invocation
|
|
/// for efficiency reasons. Subclass and override this method and
|
|
/// NextToken (to push tokens into a list and pull from that list rather
|
|
/// than a single variable as this implementation does).
|
|
/// </summary>
|
|
procedure Emit(const Token: IToken); overload;
|
|
|
|
/// <summary>
|
|
/// The standard method called to automatically emit a token at the
|
|
/// outermost lexical rule. The token object should point into the
|
|
/// char buffer start..stop. If there is a text override in 'text',
|
|
/// use that to set the token's text.
|
|
/// </summary>
|
|
/// <remarks><para>Override this method to emit custom Token objects.</para>
|
|
/// <para>If you are building trees, then you should also override
|
|
/// Parser or TreeParser.GetMissingSymbol().</para>
|
|
///</remarks>
|
|
function Emit: IToken; overload;
|
|
|
|
procedure Match(const S: String); overload;
|
|
procedure Match(const C: Integer); overload;
|
|
procedure MatchAny;
|
|
procedure MatchRange(const A, B: Integer);
|
|
|
|
/// <summary>
|
|
/// Lexers can normally Match any char in it's vocabulary after matching
|
|
/// a token, so do the easy thing and just kill a character and hope
|
|
/// it all works out. You can instead use the rule invocation stack
|
|
/// to do sophisticated error recovery if you are in a Fragment rule.
|
|
/// </summary>
|
|
procedure Recover(const RE: ERecognitionException);
|
|
|
|
function GetCharErrorDisplay(const C: Integer): String;
|
|
|
|
procedure TraceIn(const RuleName: String; const RuleIndex: Integer);
|
|
procedure TraceOut(const RuleName: String; const RuleIndex: Integer);
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>Set the char stream and reset the lexer </summary>
|
|
property CharStream: ICharStream read GetCharStream write SetCharStream;
|
|
property Line: Integer read GetLine;
|
|
property CharPositionInLine: Integer read GetCharPositionInLine;
|
|
|
|
/// <summary>What is the index of the current character of lookahead? </summary>
|
|
property CharIndex: Integer read GetCharIndex;
|
|
|
|
/// <summary>
|
|
/// Gets or sets the 'lexeme' for the current token.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// The getter returns the text matched so far for the current token or any
|
|
/// text override.
|
|
/// </para>
|
|
/// <para>
|
|
/// The setter sets the complete text of this token. It overrides/wipes any
|
|
/// previous changes to the text.
|
|
/// </para>
|
|
/// </remarks>
|
|
property Text: String read GetText write SetText;
|
|
end;
|
|
|
|
/// <summary>A parser for TokenStreams. Parser grammars result in a subclass
|
|
/// of this.
|
|
/// </summary>
|
|
IParser = interface(IBaseRecognizer)
|
|
['{7420879A-5D1F-43CA-BD49-2264D7514501}']
|
|
{ Property accessors }
|
|
function GetTokenStream: ITokenStream;
|
|
procedure SetTokenStream(const Value: ITokenStream);
|
|
|
|
{ Methods }
|
|
procedure TraceIn(const RuleName: String; const RuleIndex: Integer);
|
|
procedure TraceOut(const RuleName: String; const RuleIndex: Integer);
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>Set the token stream and reset the parser </summary>
|
|
property TokenStream: ITokenStream read GetTokenStream write SetTokenStream;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// Rules can return start/stop info as well as possible trees and templates
|
|
/// </summary>
|
|
IRuleReturnScope = interface(IANTLRInterface)
|
|
['{E9870056-BF6D-4CB2-B71C-10B80797C0B4}']
|
|
{ Property accessors }
|
|
function GetStart: IANTLRInterface;
|
|
procedure SetStart(const Value: IANTLRInterface);
|
|
function GetStop: IANTLRInterface;
|
|
procedure SetStop(const Value: IANTLRInterface);
|
|
function GetTree: IANTLRInterface;
|
|
procedure SetTree(const Value: IANTLRInterface);
|
|
function GetTemplate: IANTLRInterface;
|
|
|
|
{ Properties }
|
|
|
|
/// <summary>Return the start token or tree </summary>
|
|
property Start: IANTLRInterface read GetStart write SetStart;
|
|
|
|
/// <summary>Return the stop token or tree </summary>
|
|
property Stop: IANTLRInterface read GetStop write SetStop;
|
|
|
|
/// <summary>Has a value potentially if output=AST; </summary>
|
|
property Tree: IANTLRInterface read GetTree write SetTree;
|
|
|
|
/// <summary>
|
|
/// Has a value potentially if output=template;
|
|
/// Don't use StringTemplate type to avoid dependency on ST assembly
|
|
/// </summary>
|
|
property Template: IANTLRInterface read GetTemplate;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// Rules that return more than a single value must return an object
|
|
/// containing all the values. Besides the properties defined in
|
|
/// RuleLabelScope.PredefinedRulePropertiesScope there may be user-defined
|
|
/// return values. This class simply defines the minimum properties that
|
|
/// are always defined and methods to access the others that might be
|
|
/// available depending on output option such as template and tree.
|
|
///
|
|
/// Note text is not an actual property of the return value, it is computed
|
|
/// from start and stop using the input stream's ToString() method. I
|
|
/// could add a ctor to this so that we can pass in and store the input
|
|
/// stream, but I'm not sure we want to do that. It would seem to be undefined
|
|
/// to get the .text property anyway if the rule matches tokens from multiple
|
|
/// input streams.
|
|
///
|
|
/// I do not use getters for fields of objects that are used simply to
|
|
/// group values such as this aggregate.
|
|
/// </summary>
|
|
IParserRuleReturnScope = interface(IRuleReturnScope)
|
|
['{9FB62050-E23B-4FE4-87D5-2C1EE67AEC3E}']
|
|
end;
|
|
|
|
/// <summary>Useful for dumping out the input stream after doing some
|
|
/// augmentation or other manipulations.
|
|
/// </summary>
|
|
///
|
|
/// <remarks>
|
|
/// You can insert stuff, Replace, and delete chunks. Note that the
|
|
/// operations are done lazily--only if you convert the buffer to a
|
|
/// String. This is very efficient because you are not moving data around
|
|
/// all the time. As the buffer of tokens is converted to strings, the
|
|
/// ToString() method(s) check to see if there is an operation at the
|
|
/// current index. If so, the operation is done and then normal String
|
|
/// rendering continues on the buffer. This is like having multiple Turing
|
|
/// machine instruction streams (programs) operating on a single input tape. :)
|
|
///
|
|
/// Since the operations are done lazily at ToString-time, operations do not
|
|
/// screw up the token index values. That is, an insert operation at token
|
|
/// index I does not change the index values for tokens I+1..N-1.
|
|
///
|
|
/// Because operations never actually alter the buffer, you may always get
|
|
/// the original token stream back without undoing anything. Since
|
|
/// the instructions are queued up, you can easily simulate transactions and
|
|
/// roll back any changes if there is an error just by removing instructions.
|
|
/// For example,
|
|
///
|
|
/// var
|
|
/// Input: ICharStream;
|
|
/// Lex: ILexer;
|
|
/// Tokens: ITokenRewriteStream;
|
|
/// Parser: IParser;
|
|
/// Input := TANTLRFileStream.Create('input');
|
|
/// Lex := TLexer.Create(Input);
|
|
/// Tokens := TTokenRewriteStream.Create(Lex);
|
|
/// Parser := TParser.Create(tokens);
|
|
/// Parser.startRule();
|
|
///
|
|
/// Then in the rules, you can execute
|
|
/// var
|
|
/// t,u: IToken;
|
|
/// ...
|
|
/// Input.InsertAfter(t, 'text to put after t');
|
|
/// Input.InsertAfter(u, 'text after u');
|
|
/// WriteLn(Tokens.ToString());
|
|
///
|
|
/// Actually, you have to cast the 'input' to a TokenRewriteStream. :(
|
|
///
|
|
/// You can also have multiple "instruction streams" and get multiple
|
|
/// rewrites from a single pass over the input. Just name the instruction
|
|
/// streams and use that name again when printing the buffer. This could be
|
|
/// useful for generating a C file and also its header file--all from the
|
|
/// same buffer:
|
|
///
|
|
/// Tokens.InsertAfter('pass1', t, 'text to put after t');
|
|
/// Tokens.InsertAfter('pass2', u, 'text after u');
|
|
/// WriteLn(Tokens.ToString('pass1'));
|
|
/// WriteLn(Tokens.ToString('pass2'));
|
|
///
|
|
/// If you don't use named rewrite streams, a "default" stream is used as
|
|
/// the first example shows.
|
|
/// </remarks>
|
|
ITokenRewriteStream = interface(ICommonTokenStream)
|
|
['{7B49CBB6-9395-4781-B616-F201889EEA13}']
|
|
{ Methods }
|
|
procedure Rollback(const InstructionIndex: Integer); overload;
|
|
|
|
/// <summary>Rollback the instruction stream for a program so that
|
|
/// the indicated instruction (via instructionIndex) is no
|
|
/// longer in the stream. UNTESTED!
|
|
/// </summary>
|
|
procedure Rollback(const ProgramName: String;
|
|
const InstructionIndex: Integer); overload;
|
|
|
|
procedure DeleteProgram; overload;
|
|
|
|
/// <summary>Reset the program so that no instructions exist </summary>
|
|
procedure DeleteProgram(const ProgramName: String); overload;
|
|
|
|
procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload;
|
|
procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload;
|
|
procedure InsertAfter(const ProgramName: String; const T: IToken;
|
|
const Text: IANTLRInterface); overload;
|
|
procedure InsertAfter(const ProgramName: String; const Index: Integer;
|
|
const Text: IANTLRInterface); overload;
|
|
procedure InsertAfter(const T: IToken; const Text: String); overload;
|
|
procedure InsertAfter(const Index: Integer; const Text: String); overload;
|
|
procedure InsertAfter(const ProgramName: String; const T: IToken;
|
|
const Text: String); overload;
|
|
procedure InsertAfter(const ProgramName: String; const Index: Integer;
|
|
const Text: String); overload;
|
|
|
|
procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload;
|
|
procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload;
|
|
procedure InsertBefore(const ProgramName: String; const T: IToken;
|
|
const Text: IANTLRInterface); overload;
|
|
procedure InsertBefore(const ProgramName: String; const Index: Integer;
|
|
const Text: IANTLRInterface); overload;
|
|
procedure InsertBefore(const T: IToken; const Text: String); overload;
|
|
procedure InsertBefore(const Index: Integer; const Text: String); overload;
|
|
procedure InsertBefore(const ProgramName: String; const T: IToken;
|
|
const Text: String); overload;
|
|
procedure InsertBefore(const ProgramName: String; const Index: Integer;
|
|
const Text: String); overload;
|
|
|
|
procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload;
|
|
procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload;
|
|
procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload;
|
|
procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
|
|
const Text: IANTLRInterface); overload;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
|
|
const Text: IANTLRInterface); overload;
|
|
procedure Replace(const Index: Integer; const Text: String); overload;
|
|
procedure Replace(const Start, Stop: Integer; const Text: String); overload;
|
|
procedure Replace(const IndexT: IToken; const Text: String); overload;
|
|
procedure Replace(const Start, Stop: IToken; const Text: String); overload;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
|
|
const Text: String); overload;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
|
|
const Text: String); overload;
|
|
|
|
procedure Delete(const Index: Integer); overload;
|
|
procedure Delete(const Start, Stop: Integer); overload;
|
|
procedure Delete(const IndexT: IToken); overload;
|
|
procedure Delete(const Start, Stop: IToken); overload;
|
|
procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload;
|
|
procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload;
|
|
|
|
function GetLastRewriteTokenIndex: Integer;
|
|
|
|
function ToOriginalString: String; overload;
|
|
function ToOriginalString(const Start, Stop: Integer): String; overload;
|
|
|
|
function ToString(const ProgramName: String): String; overload;
|
|
function ToString(const ProgramName: String;
|
|
const Start, Stop: Integer): String; overload;
|
|
|
|
function ToDebugString: String; overload;
|
|
function ToDebugString(const Start, Stop: Integer): String; overload;
|
|
end;
|
|
|
|
/// <summary>The root of the ANTLR exception hierarchy.</summary>
|
|
/// <remarks>
|
|
/// To avoid English-only error messages and to generally make things
|
|
/// as flexible as possible, these exceptions are not created with strings,
|
|
/// but rather the information necessary to generate an error. Then
|
|
/// the various reporting methods in Parser and Lexer can be overridden
|
|
/// to generate a localized error message. For example, MismatchedToken
|
|
/// exceptions are built with the expected token type.
|
|
/// So, don't expect getMessage() to return anything.
|
|
///
|
|
/// You can access the stack trace, which means that you can compute the
|
|
/// complete trace of rules from the start symbol. This gives you considerable
|
|
/// context information with which to generate useful error messages.
|
|
///
|
|
/// ANTLR generates code that throws exceptions upon recognition error and
|
|
/// also generates code to catch these exceptions in each rule. If you
|
|
/// want to quit upon first error, you can turn off the automatic error
|
|
/// handling mechanism using rulecatch action, but you still need to
|
|
/// override methods mismatch and recoverFromMismatchSet.
|
|
///
|
|
/// In general, the recognition exceptions can track where in a grammar a
|
|
/// problem occurred and/or what was the expected input. While the parser
|
|
/// knows its state (such as current input symbol and line info) that
|
|
/// state can change before the exception is reported so current token index
|
|
/// is computed and stored at exception time. From this info, you can
|
|
/// perhaps print an entire line of input not just a single token, for example.
|
|
/// Better to just say the recognizer had a problem and then let the parser
|
|
/// figure out a fancy report.
|
|
/// </remarks>
|
|
ERecognitionException = class(Exception)
|
|
strict private
|
|
FApproximateLineInfo: Boolean;
|
|
strict protected
|
|
/// <summary>What input stream did the error occur in? </summary>
|
|
FInput: IIntStream;
|
|
|
|
/// <summary>
|
|
/// What is index of token/char were we looking at when the error occurred?
|
|
/// </summary>
|
|
FIndex: Integer;
|
|
|
|
/// <summary>
|
|
/// The current Token when an error occurred. Since not all streams
|
|
/// can retrieve the ith Token, we have to track the Token object.
|
|
/// </summary>
|
|
FToken: IToken;
|
|
|
|
/// <summary>[Tree parser] Node with the problem.</summary>
|
|
FNode: IANTLRInterface;
|
|
|
|
/// <summary>The current char when an error occurred. For lexers. </summary>
|
|
FC: Integer;
|
|
|
|
/// <summary>Track the line at which the error occurred in case this is
|
|
/// generated from a lexer. We need to track this since the
|
|
/// unexpected char doesn't carry the line info.
|
|
/// </summary>
|
|
FLine: Integer;
|
|
FCharPositionInLine: Integer;
|
|
strict protected
|
|
procedure ExtractInformationFromTreeNodeStream(const Input: IIntStream);
|
|
function GetUnexpectedType: Integer; virtual;
|
|
public
|
|
/// <summary>Used for remote debugger deserialization </summary>
|
|
constructor Create; overload;
|
|
constructor Create(const AMessage: String); overload;
|
|
constructor Create(const AInput: IIntStream); overload;
|
|
constructor Create(const AMessage: String; const AInput: IIntStream); overload;
|
|
|
|
/// <summary>
|
|
/// If you are parsing a tree node stream, you will encounter some
|
|
/// imaginary nodes w/o line/col info. We now search backwards looking
|
|
/// for most recent token with line/col info, but notify getErrorHeader()
|
|
/// that info is approximate.
|
|
/// </summary>
|
|
property ApproximateLineInfo: Boolean read FApproximateLineInfo write FApproximateLineInfo;
|
|
|
|
/// <summary>
|
|
/// Returns the current Token when the error occurred (for parsers
|
|
/// although a tree parser might also set the token)
|
|
/// </summary>
|
|
property Token: IToken read FToken write FToken;
|
|
|
|
/// <summary>
|
|
/// Returns the [tree parser] node where the error occured (for tree parsers).
|
|
/// </summary>
|
|
property Node: IANTLRInterface read FNode write FNode;
|
|
|
|
/// <summary>
|
|
/// Returns the line at which the error occurred (for lexers)
|
|
/// </summary>
|
|
property Line: Integer read FLine write FLine;
|
|
|
|
/// <summary>
|
|
/// Returns the character position in the line when the error
|
|
/// occurred (for lexers)
|
|
/// </summary>
|
|
property CharPositionInLine: Integer read FCharPositionInLine write FCharPositionInLine;
|
|
|
|
/// <summary>Returns the input stream in which the error occurred</summary>
|
|
property Input: IIntStream read FInput write FInput;
|
|
|
|
/// <summary>
|
|
/// Returns the token type or char of the unexpected input element
|
|
/// </summary>
|
|
property UnexpectedType: Integer read GetUnexpectedType;
|
|
|
|
/// <summary>
|
|
/// Returns the current char when the error occurred (for lexers)
|
|
/// </summary>
|
|
property Character: Integer read FC write FC;
|
|
|
|
/// <summary>
|
|
/// Returns the token/char index in the stream when the error occurred
|
|
/// </summary>
|
|
property Index: Integer read FIndex write FIndex;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// A mismatched char or Token or tree node.
|
|
/// </summary>
|
|
EMismatchedTokenException = class(ERecognitionException)
|
|
strict private
|
|
FExpecting: Integer;
|
|
public
|
|
constructor Create(const AExpecting: Integer; const AInput: IIntStream);
|
|
|
|
function ToString: String; override;
|
|
|
|
property Expecting: Integer read FExpecting write FExpecting;
|
|
end;
|
|
|
|
EUnwantedTokenException = class(EMismatchedTokenException)
|
|
strict private
|
|
function GetUnexpectedToken: IToken;
|
|
public
|
|
property UnexpectedToken: IToken read GetUnexpectedToken;
|
|
|
|
function ToString: String; override;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// We were expecting a token but it's not found. The current token
|
|
/// is actually what we wanted next. Used for tree node errors too.
|
|
/// </summary>
|
|
EMissingTokenException = class(EMismatchedTokenException)
|
|
strict private
|
|
FInserted: IANTLRInterface;
|
|
function GetMissingType: Integer;
|
|
public
|
|
constructor Create(const AExpecting: Integer; const AInput: IIntStream;
|
|
const AInserted: IANTLRInterface);
|
|
|
|
function ToString: String; override;
|
|
|
|
property MissingType: Integer read GetMissingType;
|
|
property Inserted: IANTLRInterface read FInserted write FInserted;
|
|
end;
|
|
|
|
EMismatchedTreeNodeException = class(ERecognitionException)
|
|
strict private
|
|
FExpecting: Integer;
|
|
public
|
|
constructor Create(const AExpecting: Integer; const AInput: IIntStream);
|
|
|
|
function ToString: String; override;
|
|
|
|
property Expecting: Integer read FExpecting write FExpecting;
|
|
end;
|
|
|
|
ENoViableAltException = class(ERecognitionException)
|
|
strict private
|
|
FGrammarDecisionDescription: String;
|
|
FDecisionNumber: Integer;
|
|
FStateNumber: Integer;
|
|
public
|
|
constructor Create(const AGrammarDecisionDescription: String;
|
|
const ADecisionNumber, AStateNumber: Integer; const AInput: IIntStream);
|
|
|
|
function ToString: String; override;
|
|
|
|
property GrammarDecisionDescription: String read FGrammarDecisionDescription;
|
|
property DecisionNumber: Integer read FDecisionNumber;
|
|
property StateNumber: Integer read FStateNumber;
|
|
end;
|
|
|
|
EEarlyExitException = class(ERecognitionException)
|
|
strict private
|
|
FDecisionNumber: Integer;
|
|
public
|
|
constructor Create(const ADecisionNumber: Integer; const AInput: IIntStream);
|
|
|
|
property DecisionNumber: Integer read FDecisionNumber;
|
|
end;
|
|
|
|
EMismatchedSetException = class(ERecognitionException)
|
|
strict private
|
|
FExpecting: IBitSet;
|
|
public
|
|
constructor Create(const AExpecting: IBitSet; const AInput: IIntStream);
|
|
|
|
function ToString: String; override;
|
|
|
|
property Expecting: IBitSet read FExpecting write FExpecting;
|
|
end;
|
|
|
|
EMismatchedNotSetException = class(EMismatchedSetException)
|
|
|
|
public
|
|
function ToString: String; override;
|
|
end;
|
|
|
|
EFailedPredicateException = class(ERecognitionException)
|
|
strict private
|
|
FRuleName: String;
|
|
FPredicateText: String;
|
|
public
|
|
constructor Create(const AInput: IIntStream; const ARuleName,
|
|
APredicateText: String);
|
|
|
|
function ToString: String; override;
|
|
|
|
property RuleName: String read FRuleName write FRuleName;
|
|
property PredicateText: String read FPredicateText write FPredicateText;
|
|
end;
|
|
|
|
EMismatchedRangeException = class(ERecognitionException)
|
|
strict private
|
|
FA: Integer;
|
|
FB: Integer;
|
|
public
|
|
constructor Create(const AA, AB: Integer; const AInput: IIntStream);
|
|
|
|
function ToString: String; override;
|
|
|
|
property A: Integer read FA write FA;
|
|
property B: Integer read FB write FB;
|
|
end;
|
|
|
|
type
|
|
TCharStreamState = class(TANTLRObject, ICharStreamState)
|
|
strict private
|
|
FP: Integer;
|
|
FLine: Integer;
|
|
FCharPositionInLine: Integer;
|
|
protected
|
|
{ ICharStreamState }
|
|
function GetP: Integer;
|
|
procedure SetP(const Value: Integer);
|
|
function GetLine: Integer;
|
|
procedure SetLine(const Value: Integer);
|
|
function GetCharPositionInLine: Integer;
|
|
procedure SetCharPositionInLine(const Value: Integer);
|
|
end;
|
|
|
|
type
|
|
TANTLRStringStream = class(TANTLRObject, IANTLRStringStream, ICharStream)
|
|
private
|
|
FData: PChar;
|
|
FOwnsData: Boolean;
|
|
|
|
/// <summary>How many characters are actually in the buffer?</summary>
|
|
FN: Integer;
|
|
|
|
/// <summary>Current line number within the input (1..n )</summary>
|
|
FLine: Integer;
|
|
|
|
/// <summary>Index in our array for the next char (0..n-1)</summary>
|
|
FP: Integer;
|
|
|
|
/// <summary>
|
|
/// The index of the character relative to the beginning of the
|
|
/// line (0..n-1)
|
|
/// </summary>
|
|
FCharPositionInLine: Integer;
|
|
|
|
/// <summary>
|
|
/// Tracks the depth of nested <see cref="IIntStream.Mark"/> calls
|
|
/// </summary>
|
|
FMarkDepth: Integer;
|
|
|
|
/// <summary>
|
|
/// A list of CharStreamState objects that tracks the stream state
|
|
/// (i.e. line, charPositionInLine, and p) that can change as you
|
|
/// move through the input stream. Indexed from 1..markDepth.
|
|
/// A null is kept @ index 0. Create upon first call to Mark().
|
|
/// </summary>
|
|
FMarkers: IList<ICharStreamState>;
|
|
|
|
/// <summary>
|
|
/// Track the last Mark() call result value for use in Rewind().
|
|
/// </summary>
|
|
FLastMarker: Integer;
|
|
/// <summary>
|
|
/// What is name or source of this char stream?
|
|
/// </summary>
|
|
FName: String;
|
|
protected
|
|
{ IIntStream }
|
|
function GetSourceName: String; virtual;
|
|
|
|
procedure Consume; virtual;
|
|
function LA(I: Integer): Integer; virtual;
|
|
function LAChar(I: Integer): Char;
|
|
function Index: Integer;
|
|
function Size: Integer;
|
|
function Mark: Integer; virtual;
|
|
procedure Rewind(const Marker: Integer); overload; virtual;
|
|
procedure Rewind; overload; virtual;
|
|
procedure Release(const Marker: Integer); virtual;
|
|
procedure Seek(const Index: Integer); virtual;
|
|
|
|
property SourceName: String read GetSourceName write FName;
|
|
protected
|
|
{ ICharStream }
|
|
function GetLine: Integer; virtual;
|
|
procedure SetLine(const Value: Integer); virtual;
|
|
function GetCharPositionInLine: Integer; virtual;
|
|
procedure SetCharPositionInLine(const Value: Integer); virtual;
|
|
function LT(const I: Integer): Integer; virtual;
|
|
function Substring(const Start, Stop: Integer): String; virtual;
|
|
protected
|
|
{ IANTLRStringStream }
|
|
procedure Reset; virtual;
|
|
public
|
|
constructor Create; overload;
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the ANTLRStringStream class for the
|
|
/// specified string. This copies data from the string to a local
|
|
/// character array
|
|
/// </summary>
|
|
constructor Create(const AInput: String); overload;
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the ANTLRStringStream class for the
|
|
/// specified character array. This is the preferred constructor as
|
|
/// no data is copied
|
|
/// </summary>
|
|
constructor Create(const AData: PChar;
|
|
const ANumberOfActualCharsInArray: Integer); overload;
|
|
|
|
destructor Destroy; override;
|
|
end;
|
|
|
|
TANTLRFileStream = class(TANTLRStringStream, IANTLRFileStream)
|
|
strict private
|
|
/// <summary>Fully qualified name of the stream's underlying file</summary>
|
|
FFileName: String;
|
|
protected
|
|
{ IIntStream }
|
|
function GetSourceName: String; override;
|
|
protected
|
|
{ IANTLRFileStream }
|
|
|
|
procedure Load(const FileName: String; const Encoding: TEncoding); virtual;
|
|
public
|
|
/// <summary>
|
|
/// Initializes a new instance of the ANTLRFileStream class for the
|
|
/// specified file name
|
|
/// </summary>
|
|
constructor Create(const AFileName: String); overload;
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the ANTLRFileStream class for the
|
|
/// specified file name and encoding
|
|
/// </summary>
|
|
constructor Create(const AFileName: String; const AEncoding: TEncoding); overload;
|
|
end;
|
|
|
|
TBitSet = class(TANTLRObject, IBitSet, ICloneable)
|
|
strict private
|
|
const
|
|
BITS = 64; // number of bits / ulong
|
|
LOG_BITS = 6; // 2 shl 6 = 64
|
|
|
|
///<summary> We will often need to do a mod operator (i mod nbits).
|
|
/// Its turns out that, for powers of two, this mod operation is
|
|
/// same as <![CDATA[(I and (nbits-1))]]>. Since mod is slow, we use a precomputed
|
|
/// mod mask to do the mod instead.
|
|
/// </summary>
|
|
MOD_MASK = BITS - 1;
|
|
strict private
|
|
/// <summary>The actual data bits </summary>
|
|
FBits: TUInt64Array;
|
|
strict private
|
|
class function WordNumber(const Bit: Integer): Integer; static;
|
|
class function BitMask(const BitNumber: Integer): UInt64; static;
|
|
class function NumWordsToHold(const El: Integer): Integer; static;
|
|
protected
|
|
{ ICloneable }
|
|
function Clone: IANTLRInterface; virtual;
|
|
protected
|
|
{ IBitSet }
|
|
function GetIsNil: Boolean; virtual;
|
|
function BitSetOr(const A: IBitSet): IBitSet; virtual;
|
|
procedure Add(const El: Integer); virtual;
|
|
procedure GrowToInclude(const Bit: Integer); virtual;
|
|
procedure OrInPlace(const A: IBitSet); virtual;
|
|
function Size: Integer; virtual;
|
|
function Member(const El: Integer): Boolean; virtual;
|
|
procedure Remove(const El: Integer); virtual;
|
|
function NumBits: Integer; virtual;
|
|
function LengthInLongWords: Integer; virtual;
|
|
function ToArray: TIntegerArray; virtual;
|
|
function ToPackedArray: TUInt64Array; virtual;
|
|
function ToString(const TokenNames: TStringArray): String; reintroduce; overload; virtual;
|
|
public
|
|
/// <summary>Construct a bitset of size one word (64 bits) </summary>
|
|
constructor Create; overload;
|
|
|
|
/// <summary>Construction from a static array of ulongs </summary>
|
|
constructor Create(const ABits: array of UInt64); overload;
|
|
|
|
/// <summary>Construction from a list of integers </summary>
|
|
constructor Create(const AItems: IList<Integer>); overload;
|
|
|
|
/// <summary>Construct a bitset given the size</summary>
|
|
/// <param name="nbits">The size of the bitset in bits</param>
|
|
constructor Create(const ANBits: Integer); overload;
|
|
|
|
class function BitSetOf(const El: Integer): IBitSet; overload; static;
|
|
class function BitSetOf(const A, B: Integer): IBitSet; overload; static;
|
|
class function BitSetOf(const A, B, C: Integer): IBitSet; overload; static;
|
|
class function BitSetOf(const A, B, C, D: Integer): IBitSet; overload; static;
|
|
|
|
function ToString: String; overload; override;
|
|
function Equals(Obj: TObject): Boolean; override;
|
|
end;
|
|
|
|
TRecognizerSharedState = class(TANTLRObject, IRecognizerSharedState)
|
|
strict private
|
|
FFollowing: TBitSetArray;
|
|
FFollowingStackPointer: Integer;
|
|
FErrorRecovery: Boolean;
|
|
FLastErrorIndex: Integer;
|
|
FFailed: Boolean;
|
|
FSyntaxErrors: Integer;
|
|
FBacktracking: Integer;
|
|
FRuleMemo: TDictionaryArray<Integer, Integer>;
|
|
FToken: IToken;
|
|
FTokenStartCharIndex: Integer;
|
|
FTokenStartLine: Integer;
|
|
FTokenStartCharPositionInLine: Integer;
|
|
FChannel: Integer;
|
|
FTokenType: Integer;
|
|
FText: String;
|
|
protected
|
|
{ IRecognizerSharedState }
|
|
function GetFollowing: TBitSetArray;
|
|
procedure SetFollowing(const Value: TBitSetArray);
|
|
function GetFollowingStackPointer: Integer;
|
|
procedure SetFollowingStackPointer(const Value: Integer);
|
|
function GetErrorRecovery: Boolean;
|
|
procedure SetErrorRecovery(const Value: Boolean);
|
|
function GetLastErrorIndex: Integer;
|
|
procedure SetLastErrorIndex(const Value: Integer);
|
|
function GetFailed: Boolean;
|
|
procedure SetFailed(const Value: Boolean);
|
|
function GetSyntaxErrors: Integer;
|
|
procedure SetSyntaxErrors(const Value: Integer);
|
|
function GetBacktracking: Integer;
|
|
procedure SetBacktracking(const Value: Integer);
|
|
function GetRuleMemo: TDictionaryArray<Integer, Integer>;
|
|
function GetRuleMemoCount: Integer;
|
|
procedure SetRuleMemoCount(const Value: Integer);
|
|
function GetToken: IToken;
|
|
procedure SetToken(const Value: IToken);
|
|
function GetTokenStartCharIndex: Integer;
|
|
procedure SetTokenStartCharIndex(const Value: Integer);
|
|
function GetTokenStartLine: Integer;
|
|
procedure SetTokenStartLine(const Value: Integer);
|
|
function GetTokenStartCharPositionInLine: Integer;
|
|
procedure SetTokenStartCharPositionInLine(const Value: Integer);
|
|
function GetChannel: Integer;
|
|
procedure SetChannel(const Value: Integer);
|
|
function GetTokenType: Integer;
|
|
procedure SetTokenType(const Value: Integer);
|
|
function GetText: String;
|
|
procedure SetText(const Value: String);
|
|
public
|
|
constructor Create;
|
|
end;
|
|
|
|
TCommonToken = class(TANTLRObject, ICommonToken, IToken)
|
|
strict protected
|
|
FTokenType: Integer;
|
|
FLine: Integer;
|
|
FCharPositionInLine: Integer;
|
|
FChannel: Integer;
|
|
FInput: ICharStream;
|
|
|
|
/// <summary>We need to be able to change the text once in a while. If
|
|
/// this is non-null, then getText should return this. Note that
|
|
/// start/stop are not affected by changing this.
|
|
/// </summary>
|
|
FText: String;
|
|
|
|
/// <summary>What token number is this from 0..n-1 tokens; < 0 implies invalid index </summary>
|
|
FIndex: Integer;
|
|
|
|
/// <summary>The char position into the input buffer where this token starts </summary>
|
|
FStart: Integer;
|
|
|
|
/// <summary>The char position into the input buffer where this token stops </summary>
|
|
FStop: Integer;
|
|
protected
|
|
{ IToken }
|
|
function GetTokenType: Integer; virtual;
|
|
procedure SetTokenType(const Value: Integer); virtual;
|
|
function GetLine: Integer; virtual;
|
|
procedure SetLine(const Value: Integer); virtual;
|
|
function GetCharPositionInLine: Integer; virtual;
|
|
procedure SetCharPositionInLine(const Value: Integer); virtual;
|
|
function GetChannel: Integer; virtual;
|
|
procedure SetChannel(const Value: Integer); virtual;
|
|
function GetTokenIndex: Integer; virtual;
|
|
procedure SetTokenIndex(const Value: Integer); virtual;
|
|
function GetText: String; virtual;
|
|
procedure SetText(const Value: String); virtual;
|
|
protected
|
|
{ ICommonToken }
|
|
function GetStartIndex: Integer;
|
|
procedure SetStartIndex(const Value: Integer);
|
|
function GetStopIndex: Integer;
|
|
procedure SetStopIndex(const Value: Integer);
|
|
function GetInputStream: ICharStream;
|
|
procedure SetInputStream(const Value: ICharStream);
|
|
protected
|
|
constructor Create; overload;
|
|
public
|
|
constructor Create(const ATokenType: Integer); overload;
|
|
constructor Create(const AInput: ICharStream; const ATokenType, AChannel,
|
|
AStart, AStop: Integer); overload;
|
|
constructor Create(const ATokenType: Integer; const AText: String); overload;
|
|
constructor Create(const AOldToken: IToken); overload;
|
|
|
|
function ToString: String; override;
|
|
end;
|
|
|
|
TClassicToken = class(TANTLRObject, IClassicToken, IToken)
|
|
strict private
|
|
FText: String;
|
|
FTokenType: Integer;
|
|
FLine: Integer;
|
|
FCharPositionInLine: Integer;
|
|
FChannel: Integer;
|
|
|
|
/// <summary>What token number is this from 0..n-1 tokens </summary>
|
|
FIndex: Integer;
|
|
protected
|
|
{ IClassicToken }
|
|
function GetTokenType: Integer; virtual;
|
|
procedure SetTokenType(const Value: Integer); virtual;
|
|
function GetLine: Integer; virtual;
|
|
procedure SetLine(const Value: Integer); virtual;
|
|
function GetCharPositionInLine: Integer; virtual;
|
|
procedure SetCharPositionInLine(const Value: Integer); virtual;
|
|
function GetChannel: Integer; virtual;
|
|
procedure SetChannel(const Value: Integer); virtual;
|
|
function GetTokenIndex: Integer; virtual;
|
|
procedure SetTokenIndex(const Value: Integer); virtual;
|
|
function GetText: String; virtual;
|
|
procedure SetText(const Value: String); virtual;
|
|
function GetInputStream: ICharStream; virtual;
|
|
procedure SetInputStream(const Value: ICharStream); virtual;
|
|
public
|
|
constructor Create(const ATokenType: Integer); overload;
|
|
constructor Create(const AOldToken: IToken); overload;
|
|
constructor Create(const ATokenType: Integer; const AText: String); overload;
|
|
constructor Create(const ATokenType: Integer; const AText: String;
|
|
const AChannel: Integer); overload;
|
|
|
|
function ToString: String; override;
|
|
end;
|
|
|
|
TToken = class sealed
|
|
public
|
|
const
|
|
EOR_TOKEN_TYPE = 1;
|
|
|
|
/// <summary>imaginary tree navigation type; traverse "get child" link </summary>
|
|
DOWN = 2;
|
|
|
|
/// <summary>imaginary tree navigation type; finish with a child list </summary>
|
|
UP = 3;
|
|
|
|
MIN_TOKEN_TYPE = UP + 1;
|
|
EOF = Integer(cscEOF);
|
|
INVALID_TOKEN_TYPE = 0;
|
|
|
|
/// <summary>
|
|
/// All tokens go to the parser (unless skip() is called in that rule)
|
|
/// on a particular "channel". The parser tunes to a particular channel
|
|
/// so that whitespace etc... can go to the parser on a "hidden" channel.
|
|
/// </summary>
|
|
DEFAULT_CHANNEL = 0;
|
|
|
|
/// <summary>
|
|
/// Anything on different channel than DEFAULT_CHANNEL is not parsed by parser.
|
|
/// </summary>
|
|
HIDDEN_CHANNEL = 99;
|
|
public
|
|
class var
|
|
EOF_TOKEN: IToken;
|
|
INVALID_TOKEN: IToken;
|
|
/// <summary>
|
|
/// In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
|
|
/// will avoid creating a token for this symbol and try to fetch another.
|
|
/// </summary>
|
|
SKIP_TOKEN: IToken;
|
|
private
|
|
class procedure Initialize; static;
|
|
end;
|
|
|
|
/// <summary>
|
|
/// Global constants
|
|
/// </summary>
|
|
TConstants = class sealed
|
|
public
|
|
const
|
|
VERSION = '3.1b1';
|
|
|
|
// Moved to version 2 for v3.1: added grammar name to enter/exit Rule
|
|
DEBUG_PROTOCOL_VERSION = '2';
|
|
|
|
ANTLRWORKS_DIR = 'antlrworks';
|
|
end;
|
|
|
|
TBaseRecognizer = class abstract(TANTLRObject, IBaseRecognizer)
|
|
public
|
|
const
|
|
MEMO_RULE_FAILED = -2;
|
|
MEMO_RULE_UNKNOWN = -1;
|
|
INITIAL_FOLLOW_STACK_SIZE = 100;
|
|
NEXT_TOKEN_RULE_NAME = 'nextToken';
|
|
// copies from Token object for convenience in actions
|
|
DEFAULT_TOKEN_CHANNEL = TToken.DEFAULT_CHANNEL;
|
|
HIDDEN = TToken.HIDDEN_CHANNEL;
|
|
strict protected
|
|
/// <summary>
|
|
/// An externalized representation of the - shareable - internal state of
|
|
/// this lexer, parser or tree parser.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The state of a lexer, parser, or tree parser are collected into
|
|
/// external state objects so that the state can be shared. This sharing
|
|
/// is needed to have one grammar import others and share same error
|
|
/// variables and other state variables. It's a kind of explicit multiple
|
|
/// inheritance via delegation of methods and shared state.
|
|
/// </remarks>
|
|
FState: IRecognizerSharedState;
|
|
|
|
property State: IRecognizerSharedState read FState;
|
|
strict protected
|
|
/// <summary>
|
|
/// Match needs to return the current input symbol, which gets put
|
|
/// into the label for the associated token ref; e.g., x=ID. Token
|
|
/// and tree parsers need to return different objects. Rather than test
|
|
/// for input stream type or change the IntStream interface, I use
|
|
/// a simple method to ask the recognizer to tell me what the current
|
|
/// input symbol is.
|
|
/// </summary>
|
|
/// <remarks>This is ignored for lexers.</remarks>
|
|
function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; virtual;
|
|
|
|
/// <summary>
|
|
/// Factor out what to do upon token mismatch so tree parsers can behave
|
|
/// differently. Override and call MismatchRecover(input, ttype, follow)
|
|
/// to get single token insertion and deletion. Use this to turn off
|
|
/// single token insertion and deletion. Override mismatchRecover
|
|
/// to call this instead.
|
|
/// </summary>
|
|
procedure Mismatch(const Input: IIntStream; const TokenType: Integer;
|
|
const Follow: IBitSet); virtual;
|
|
|
|
/// <summary>
|
|
/// Attempt to Recover from a single missing or extra token.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// EXTRA TOKEN
|
|
///
|
|
/// LA(1) is not what we are looking for. If LA(2) has the right token,
|
|
/// however, then assume LA(1) is some extra spurious token. Delete it
|
|
/// and LA(2) as if we were doing a normal Match(), which advances the
|
|
/// input.
|
|
///
|
|
/// MISSING TOKEN
|
|
///
|
|
/// If current token is consistent with what could come after
|
|
/// ttype then it is ok to "insert" the missing token, else throw
|
|
/// exception For example, Input "i=(3;" is clearly missing the
|
|
/// ')'. When the parser returns from the nested call to expr, it
|
|
/// will have call chain:
|
|
///
|
|
/// stat -> expr -> atom
|
|
///
|
|
/// and it will be trying to Match the ')' at this point in the
|
|
/// derivation:
|
|
///
|
|
/// => ID '=' '(' INT ')' ('+' atom)* ';'
|
|
/// ^
|
|
/// Match() will see that ';' doesn't Match ')' and report a
|
|
/// mismatched token error. To Recover, it sees that LA(1)==';'
|
|
/// is in the set of tokens that can follow the ')' token
|
|
/// reference in rule atom. It can assume that you forgot the ')'.
|
|
/// </remarks>
|
|
function RecoverFromMismatchedToken(const Input: IIntStream;
|
|
const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; virtual;
|
|
|
|
/// <summary>
|
|
/// Conjure up a missing token during error recovery.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// The recognizer attempts to recover from single missing
|
|
/// symbols. But, actions might refer to that missing symbol.
|
|
/// For example, x=ID {f($x);}. The action clearly assumes
|
|
/// that there has been an identifier matched previously and that
|
|
/// $x points at that token. If that token is missing, but
|
|
/// the next token in the stream is what we want we assume that
|
|
/// this token is missing and we keep going. Because we
|
|
/// have to return some token to replace the missing token,
|
|
/// we have to conjure one up. This method gives the user control
|
|
/// over the tokens returned for missing tokens. Mostly,
|
|
/// you will want to create something special for identifier
|
|
/// tokens. For literals such as '{' and ',', the default
|
|
/// action in the parser or tree parser works. It simply creates
|
|
/// a CommonToken of the appropriate type. The text will be the token.
|
|
/// If you change what tokens must be created by the lexer,
|
|
/// override this method to create the appropriate tokens.
|
|
/// </remarks>
|
|
function GetMissingSymbol(const Input: IIntStream;
|
|
const E: ERecognitionException; const ExpectedTokenType: Integer;
|
|
const Follow: IBitSet): IANTLRInterface; virtual;
|
|
|
|
/// <summary>
|
|
/// Push a rule's follow set using our own hardcoded stack
|
|
/// </summary>
|
|
/// <param name="fset"></param>
|
|
procedure PushFollow(const FSet: IBitSet);
|
|
|
|
/// <summary>Compute the context-sensitive FOLLOW set for current rule.
|
|
/// This is set of token types that can follow a specific rule
|
|
/// reference given a specific call chain. You get the set of
|
|
/// viable tokens that can possibly come next (lookahead depth 1)
|
|
/// given the current call chain. Contrast this with the
|
|
/// definition of plain FOLLOW for rule r:
|
|
///
|
|
/// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
|
|
///
|
|
/// where x in T* and alpha, beta in V*; T is set of terminals and
|
|
/// V is the set of terminals and nonterminals. In other words,
|
|
/// FOLLOW(r) is the set of all tokens that can possibly follow
|
|
/// references to r in *any* sentential form (context). At
|
|
/// runtime, however, we know precisely which context applies as
|
|
/// we have the call chain. We may compute the exact (rather
|
|
/// than covering superset) set of following tokens.
|
|
///
|
|
/// For example, consider grammar:
|
|
///
|
|
/// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
|
|
/// | "return" expr '.'
|
|
/// ;
|
|
/// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
|
|
/// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
|
|
/// | '(' expr ')'
|
|
/// ;
|
|
///
|
|
/// The FOLLOW sets are all inclusive whereas context-sensitive
|
|
/// FOLLOW sets are precisely what could follow a rule reference.
|
|
/// For input input "i=(3);", here is the derivation:
|
|
///
|
|
/// stat => ID '=' expr ';'
|
|
/// => ID '=' atom ('+' atom)* ';'
|
|
/// => ID '=' '(' expr ')' ('+' atom)* ';'
|
|
/// => ID '=' '(' atom ')' ('+' atom)* ';'
|
|
/// => ID '=' '(' INT ')' ('+' atom)* ';'
|
|
/// => ID '=' '(' INT ')' ';'
|
|
///
|
|
/// At the "3" token, you'd have a call chain of
|
|
///
|
|
/// stat -> expr -> atom -> expr -> atom
|
|
///
|
|
/// What can follow that specific nested ref to atom? Exactly ')'
|
|
/// as you can see by looking at the derivation of this specific
|
|
/// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
|
|
///
|
|
/// You want the exact viable token set when recovering from a
|
|
/// token mismatch. Upon token mismatch, if LA(1) is member of
|
|
/// the viable next token set, then you know there is most likely
|
|
/// a missing token in the input stream. "Insert" one by just not
|
|
/// throwing an exception.
|
|
/// </summary>
|
|
function ComputeContextSensitiveRuleFOLLOW: IBitSet; virtual;
|
|
|
|
(* Compute the error recovery set for the current rule. During
|
|
* rule invocation, the parser pushes the set of tokens that can
|
|
* follow that rule reference on the stack; this amounts to
|
|
* computing FIRST of what follows the rule reference in the
|
|
* enclosing rule. This local follow set only includes tokens
|
|
* from within the rule; i.e., the FIRST computation done by
|
|
* ANTLR stops at the end of a rule.
|
|
*
|
|
* EXAMPLE
|
|
*
|
|
* When you find a "no viable alt exception", the input is not
|
|
* consistent with any of the alternatives for rule r. The best
|
|
* thing to do is to consume tokens until you see something that
|
|
* can legally follow a call to r *or* any rule that called r.
|
|
* You don't want the exact set of viable next tokens because the
|
|
* input might just be missing a token--you might consume the
|
|
* rest of the input looking for one of the missing tokens.
|
|
*
|
|
* Consider grammar:
|
|
*
|
|
* a : '[' b ']'
|
|
* | '(' b ')'
|
|
* ;
|
|
* b : c '^' INT ;
|
|
* c : ID
|
|
* | INT
|
|
* ;
|
|
*
|
|
* At each rule invocation, the set of tokens that could follow
|
|
* that rule is pushed on a stack. Here are the various "local"
|
|
* follow sets:
|
|
*
|
|
* FOLLOW(b1_in_a) = FIRST(']') = ']'
|
|
* FOLLOW(b2_in_a) = FIRST(')') = ')'
|
|
* FOLLOW(c_in_b) = FIRST('^') = '^'
|
|
*
|
|
* Upon erroneous input "[]", the call chain is
|
|
*
|
|
* a -> b -> c
|
|
*
|
|
* and, hence, the follow context stack is:
|
|
*
|
|
* depth local follow set after call to rule
|
|
* 0 <EOF> a (from main())
|
|
* 1 ']' b
|
|
* 3 '^' c
|
|
*
|
|
* Notice that ')' is not included, because b would have to have
|
|
* been called from a different context in rule a for ')' to be
|
|
* included.
|
|
*
|
|
* For error recovery, we cannot consider FOLLOW(c)
|
|
* (context-sensitive or otherwise). We need the combined set of
|
|
* all context-sensitive FOLLOW sets--the set of all tokens that
|
|
* could follow any reference in the call chain. We need to
|
|
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
|
|
* we resync'd to that token, we'd consume until EOF. We need to
|
|
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
|
|
* In this case, for input "[]", LA(1) is in this set so we would
|
|
* not consume anything and after printing an error rule c would
|
|
* return normally. It would not find the required '^' though.
|
|
* At this point, it gets a mismatched token error and throws an
|
|
* exception (since LA(1) is not in the viable following token
|
|
* set). The rule exception handler tries to Recover, but finds
|
|
* the same recovery set and doesn't consume anything. Rule b
|
|
* exits normally returning to rule a. Now it finds the ']' (and
|
|
* with the successful Match exits errorRecovery mode).
|
|
*
|
|
* So, you cna see that the parser walks up call chain looking
|
|
* for the token that was a member of the recovery set.
|
|
*
|
|
* Errors are not generated in errorRecovery mode.
|
|
*
|
|
* ANTLR's error recovery mechanism is based upon original ideas:
|
|
*
|
|
* "Algorithms + Data Structures = Programs" by Niklaus Wirth
|
|
*
|
|
* and
|
|
*
|
|
* "A note on error recovery in recursive descent parsers":
|
|
* http://portal.acm.org/citation.cfm?id=947902.947905
|
|
*
|
|
* Later, Josef Grosch had some good ideas:
|
|
*
|
|
* "Efficient and Comfortable Error Recovery in Recursive Descent
|
|
* Parsers":
|
|
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
|
|
*
|
|
* Like Grosch I implemented local FOLLOW sets that are combined
|
|
* at run-time upon error to avoid overhead during parsing.
|
|
*)
|
|
function ComputeErrorRecoverySet: IBitSet; virtual;
|
|
|
|
function CombineFollows(const Exact: Boolean): IBitSet;
|
|
protected
|
|
{ IBaseRecognizer }
|
|
function GetInput: IIntStream; virtual; abstract;
|
|
function GetBacktrackingLevel: Integer;
|
|
function GetState: IRecognizerSharedState;
|
|
function GetNumberOfSyntaxErrors: Integer;
|
|
function GetGrammarFileName: String; virtual;
|
|
function GetSourceName: String; virtual; abstract;
|
|
function GetTokenNames: TStringArray; virtual;
|
|
|
|
procedure BeginBacktrack(const Level: Integer); virtual;
|
|
procedure EndBacktrack(const Level: Integer; const Successful: Boolean); virtual;
|
|
procedure Reset; virtual;
|
|
function Match(const Input: IIntStream; const TokenType: Integer;
|
|
const Follow: IBitSet): IANTLRInterface; virtual;
|
|
function MismatchIsUnwantedToken(const Input: IIntStream;
|
|
const TokenType: Integer): Boolean;
|
|
function MismatchIsMissingToken(const Input: IIntStream;
|
|
const Follow: IBitSet): Boolean;
|
|
procedure BeginResync; virtual;
|
|
procedure EndResync; virtual;
|
|
procedure ReportError(const E: ERecognitionException); virtual;
|
|
procedure MatchAny(const Input: IIntStream); virtual;
|
|
procedure DisplayRecognitionError(const TokenNames: TStringArray;
|
|
const E: ERecognitionException); virtual;
|
|
function GetErrorMessage(const E: ERecognitionException;
|
|
const TokenNames: TStringArray): String; virtual;
|
|
function GetErrorHeader(const E: ERecognitionException): String; virtual;
|
|
function GetTokenErrorDisplay(const T: IToken): String; virtual;
|
|
procedure EmitErrorMessage(const Msg: String); virtual;
|
|
procedure Recover(const Input: IIntStream; const RE: ERecognitionException); virtual;
|
|
function RecoverFromMismatchedSet(const Input: IIntStream;
|
|
const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; virtual;
|
|
procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; virtual;
|
|
procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; virtual;
|
|
//function GetRuleInvocationStack: IList<IANTLRInterface>; overload; virtual;
|
|
//function GetRuleInvocationStack(const E: Exception;
|
|
// const RecognizerClassName: String): IList<IANTLRInterface>; overload;
|
|
function ToStrings(const Tokens: IList<IToken>): IList<String>; virtual;
|
|
function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; virtual;
|
|
function AlreadyParsedRule(const Input: IIntStream;
|
|
const RuleIndex: Integer): Boolean; virtual;
|
|
procedure Memoize(const Input: IIntStream; const RuleIndex,
|
|
RuleStartIndex: Integer); virtual;
|
|
function GetRuleMemoizationChaceSize: Integer;
|
|
|
|
procedure TraceIn(const RuleName: String; const RuleIndex: Integer;
|
|
const InputSymbol: String); virtual;
|
|
procedure TraceOut(const RuleName: String; const RuleIndex: Integer;
|
|
const InputSymbol: String); virtual;
|
|
|
|
property Input: IIntStream read GetInput;
|
|
public
|
|
constructor Create; overload;
|
|
constructor Create(const AState: IRecognizerSharedState); overload;
|
|
end;
|
|
|
|
TCommonTokenStream = class(TANTLRObject, ICommonTokenStream, ITokenStream)
|
|
strict private
|
|
FTokenSource: ITokenSource;
|
|
|
|
/// <summary>Record every single token pulled from the source so we can reproduce
|
|
/// chunks of it later.
|
|
/// </summary>
|
|
FTokens: IList<IToken>;
|
|
|
|
/// <summary><![CDATA[Map<tokentype, channel>]]> to override some Tokens' channel numbers </summary>
|
|
FChannelOverrideMap: IDictionary<Integer, Integer>;
|
|
|
|
/// <summary><![CDATA[Set<tokentype>;]]> discard any tokens with this type </summary>
|
|
FDiscardSet: IHashList<Integer, Integer>;
|
|
|
|
/// <summary>Skip tokens on any channel but this one; this is how we skip whitespace... </summary>
|
|
FChannel: Integer;
|
|
|
|
/// <summary>By default, track all incoming tokens </summary>
|
|
FDiscardOffChannelTokens: Boolean;
|
|
|
|
/// <summary>Track the last Mark() call result value for use in Rewind().</summary>
|
|
FLastMarker: Integer;
|
|
|
|
/// <summary>
|
|
/// The index into the tokens list of the current token (next token
|
|
/// to consume). p==-1 indicates that the tokens list is empty
|
|
/// </summary>
|
|
FP: Integer;
|
|
strict protected
|
|
/// <summary>Load all tokens from the token source and put in tokens.
|
|
/// This is done upon first LT request because you might want to
|
|
/// set some token type / channel overrides before filling buffer.
|
|
/// </summary>
|
|
procedure FillBuffer; virtual;
|
|
|
|
/// <summary>Look backwards k tokens on-channel tokens </summary>
|
|
function LB(const K: Integer): IToken; virtual;
|
|
|
|
/// <summary>Given a starting index, return the index of the first on-channel
|
|
/// token.
|
|
/// </summary>
|
|
function SkipOffTokenChannels(const I: Integer): Integer; virtual;
|
|
function SkipOffTokenChannelsReverse(const I: Integer): Integer; virtual;
|
|
protected
|
|
{ IIntStream }
|
|
function GetSourceName: String; virtual;
|
|
|
|
procedure Consume; virtual;
|
|
function LA(I: Integer): Integer; virtual;
|
|
function LAChar(I: Integer): Char;
|
|
function Mark: Integer; virtual;
|
|
function Index: Integer; virtual;
|
|
procedure Rewind(const Marker: Integer); overload; virtual;
|
|
procedure Rewind; overload; virtual;
|
|
procedure Release(const Marker: Integer); virtual;
|
|
procedure Seek(const Index: Integer); virtual;
|
|
function Size: Integer; virtual;
|
|
protected
|
|
{ ITokenStream }
|
|
function GetTokenSource: ITokenSource; virtual;
|
|
procedure SetTokenSource(const Value: ITokenSource); virtual;
|
|
|
|
function LT(const K: Integer): IToken; virtual;
|
|
function Get(const I: Integer): IToken; virtual;
|
|
function ToString(const Start, Stop: Integer): String; reintroduce; overload; virtual;
|
|
function ToString(const Start, Stop: IToken): String; reintroduce; overload; virtual;
|
|
protected
|
|
{ ICommonTokenStream }
|
|
procedure SetTokenTypeChannel(const TType, Channel: Integer);
|
|
procedure DiscardTokenType(const TType: Integer);
|
|
procedure DiscardOffChannelTokens(const Discard: Boolean);
|
|
function GetTokens: IList<IToken>; overload;
|
|
function GetTokens(const Start, Stop: Integer): IList<IToken>; overload;
|
|
function GetTokens(const Start, Stop: Integer;
|
|
const Types: IBitSet): IList<IToken>; overload;
|
|
function GetTokens(const Start, Stop: Integer;
|
|
const Types: IList<Integer>): IList<IToken>; overload;
|
|
function GetTokens(const Start, Stop,
|
|
TokenType: Integer): IList<IToken>; overload;
|
|
procedure Reset; virtual;
|
|
public
|
|
constructor Create; overload;
|
|
constructor Create(const ATokenSource: ITokenSource); overload;
|
|
constructor Create(const ATokenSource: ITokenSource;
|
|
const AChannel: Integer); overload;
|
|
constructor Create(const ALexer: ILexer); overload;
|
|
constructor Create(const ALexer: ILexer;
|
|
const AChannel: Integer); overload;
|
|
|
|
function ToString: String; overload; override;
|
|
end;
|
|
|
|
TDFA = class abstract(TANTLRObject, IDFA)
|
|
strict private
|
|
FSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
|
|
FEOT: TSmallintArray;
|
|
FEOF: TSmallintArray;
|
|
FMin: TCharArray;
|
|
FMax: TCharArray;
|
|
FAccept: TSmallintArray;
|
|
FSpecial: TSmallintArray;
|
|
FTransition: TSmallintMatrix;
|
|
FDecisionNumber: Integer;
|
|
FRecognizer: Pointer; { IBaseRecognizer }
|
|
function GetRecognizer: IBaseRecognizer;
|
|
procedure SetRecognizer(const Value: IBaseRecognizer);
|
|
strict protected
|
|
procedure NoViableAlt(const S: Integer; const Input: IIntStream);
|
|
|
|
property Recognizer: IBaseRecognizer read GetRecognizer write SetRecognizer;
|
|
property DecisionNumber: Integer read FDecisionNumber write FDecisionNumber;
|
|
property EOT: TSmallintArray read FEOT write FEOT;
|
|
property EOF: TSmallintArray read FEOF write FEOF;
|
|
property Min: TCharArray read FMin write FMin;
|
|
property Max: TCharArray read FMax write FMax;
|
|
property Accept: TSmallintArray read FAccept write FAccept;
|
|
property Special: TSmallintArray read FSpecial write FSpecial;
|
|
property Transition: TSmallintMatrix read FTransition write FTransition;
|
|
protected
|
|
{ IDFA }
|
|
function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
|
|
procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler);
|
|
|
|
function Predict(const Input: IIntStream): Integer;
|
|
procedure Error(const NVAE: ENoViableAltException); virtual;
|
|
function SpecialStateTransition(const S: Integer;
|
|
const Input: IIntStream): Integer; virtual;
|
|
function Description: String; virtual;
|
|
function SpecialTransition(const State, Symbol: Integer): Integer;
|
|
public
|
|
class function UnpackEncodedString(const EncodedString: String): TSmallintArray; static;
|
|
class function UnpackEncodedStringArray(const EncodedStrings: TStringArray): TSmallintMatrix; overload; static;
|
|
class function UnpackEncodedStringArray(const EncodedStrings: array of String): TSmallintMatrix; overload; static;
|
|
class function UnpackEncodedStringToUnsignedChars(const EncodedString: String): TCharArray; static;
|
|
end;
|
|
|
|
TLexer = class abstract(TBaseRecognizer, ILexer, ITokenSource)
|
|
strict private
|
|
const
|
|
TOKEN_dot_EOF = Ord(cscEOF);
|
|
strict private
|
|
/// <summary>Where is the lexer drawing characters from? </summary>
|
|
FInput: ICharStream;
|
|
protected
|
|
{ IBaseRecognizer }
|
|
function GetSourceName: String; override;
|
|
function GetInput: IIntStream; override;
|
|
procedure Reset; override;
|
|
procedure ReportError(const E: ERecognitionException); override;
|
|
function GetErrorMessage(const E: ERecognitionException;
|
|
const TokenNames: TStringArray): String; override;
|
|
protected
|
|
{ ILexer }
|
|
function GetCharStream: ICharStream; virtual;
|
|
procedure SetCharStream(const Value: ICharStream); virtual;
|
|
function GetLine: Integer; virtual;
|
|
function GetCharPositionInLine: Integer; virtual;
|
|
function GetCharIndex: Integer; virtual;
|
|
function GetText: String; virtual;
|
|
procedure SetText(const Value: String); virtual;
|
|
|
|
function NextToken: IToken; virtual;
|
|
procedure Skip;
|
|
procedure DoTokens; virtual; abstract;
|
|
procedure Emit(const Token: IToken); overload; virtual;
|
|
function Emit: IToken; overload; virtual;
|
|
procedure Match(const S: String); reintroduce; overload; virtual;
|
|
procedure Match(const C: Integer); reintroduce; overload; virtual;
|
|
procedure MatchAny; reintroduce; overload; virtual;
|
|
procedure MatchRange(const A, B: Integer); virtual;
|
|
procedure Recover(const RE: ERecognitionException); reintroduce; overload; virtual;
|
|
function GetCharErrorDisplay(const C: Integer): String;
|
|
procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual;
|
|
procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual;
|
|
strict protected
|
|
property Input: ICharStream read FInput;
|
|
property CharIndex: Integer read GetCharIndex;
|
|
property Text: String read GetText write SetText;
|
|
public
|
|
constructor Create; overload;
|
|
constructor Create(const AInput: ICharStream); overload;
|
|
constructor Create(const AInput: ICharStream;
|
|
const AState: IRecognizerSharedState); overload;
|
|
end;
|
|
|
|
TParser = class(TBaseRecognizer, IParser)
|
|
strict private
|
|
FInput: ITokenStream;
|
|
protected
|
|
property Input: ITokenStream read FInput;
|
|
protected
|
|
{ IBaseRecognizer }
|
|
procedure Reset; override;
|
|
function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; override;
|
|
function GetMissingSymbol(const Input: IIntStream;
|
|
const E: ERecognitionException; const ExpectedTokenType: Integer;
|
|
const Follow: IBitSet): IANTLRInterface; override;
|
|
function GetSourceName: String; override;
|
|
function GetInput: IIntStream; override;
|
|
protected
|
|
{ IParser }
|
|
function GetTokenStream: ITokenStream; virtual;
|
|
procedure SetTokenStream(const Value: ITokenStream); virtual;
|
|
|
|
procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload;
|
|
procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload;
|
|
public
|
|
constructor Create(const AInput: ITokenStream); overload;
|
|
constructor Create(const AInput: ITokenStream;
|
|
const AState: IRecognizerSharedState); overload;
|
|
end;
|
|
|
|
TRuleReturnScope = class(TANTLRObject, IRuleReturnScope)
|
|
protected
|
|
{ IRuleReturnScope }
|
|
function GetStart: IANTLRInterface; virtual;
|
|
procedure SetStart(const Value: IANTLRInterface); virtual;
|
|
function GetStop: IANTLRInterface; virtual;
|
|
procedure SetStop(const Value: IANTLRInterface); virtual;
|
|
function GetTree: IANTLRInterface; virtual;
|
|
procedure SetTree(const Value: IANTLRInterface); virtual;
|
|
function GetTemplate: IANTLRInterface; virtual;
|
|
end;
|
|
|
|
TParserRuleReturnScope = class(TRuleReturnScope, IParserRuleReturnScope)
|
|
strict private
|
|
FStart: IToken;
|
|
FStop: IToken;
|
|
protected
|
|
{ IRuleReturnScope }
|
|
function GetStart: IANTLRInterface; override;
|
|
procedure SetStart(const Value: IANTLRInterface); override;
|
|
function GetStop: IANTLRInterface; override;
|
|
procedure SetStop(const Value: IANTLRInterface); override;
|
|
end;
|
|
|
|
TTokenRewriteStream = class(TCommonTokenStream, ITokenRewriteStream)
|
|
public
|
|
const
|
|
DEFAULT_PROGRAM_NAME = 'default';
|
|
PROGRAM_INIT_SIZE = 100;
|
|
MIN_TOKEN_INDEX = 0;
|
|
strict protected
|
|
// Define the rewrite operation hierarchy
|
|
type
|
|
IRewriteOperation = interface(IANTLRInterface)
|
|
['{285A54ED-58FF-44B1-A268-2686476D4419}']
|
|
{ Property accessors }
|
|
function GetInstructionIndex: Integer;
|
|
procedure SetInstructionIndex(const Value: Integer);
|
|
function GetIndex: Integer;
|
|
procedure SetIndex(const Value: Integer);
|
|
function GetText: IANTLRInterface;
|
|
procedure SetText(const Value: IANTLRInterface);
|
|
function GetParent: ITokenRewriteStream;
|
|
procedure SetParent(const Value: ITokenRewriteStream);
|
|
|
|
{ Methods }
|
|
|
|
/// <summary>Execute the rewrite operation by possibly adding to the buffer.
|
|
/// Return the index of the next token to operate on.
|
|
/// </summary>
|
|
function Execute(const Buf: TStringBuilder): Integer;
|
|
|
|
{ Properties }
|
|
property InstructionIndex: Integer read GetInstructionIndex write SetInstructionIndex;
|
|
property Index: Integer read GetIndex write SetIndex;
|
|
property Text: IANTLRInterface read GetText write SetText;
|
|
property Parent: ITokenRewriteStream read GetParent write SetParent;
|
|
end;
|
|
|
|
TRewriteOperation = class(TANTLRObject, IRewriteOperation)
|
|
strict private
|
|
// What index into rewrites List are we?
|
|
FInstructionIndex: Integer;
|
|
// Token buffer index
|
|
FIndex: Integer;
|
|
FText: IANTLRInterface;
|
|
FParent: Pointer; {ITokenRewriteStream;}
|
|
protected
|
|
{ IRewriteOperation }
|
|
function GetInstructionIndex: Integer;
|
|
procedure SetInstructionIndex(const Value: Integer);
|
|
function GetIndex: Integer;
|
|
procedure SetIndex(const Value: Integer);
|
|
function GetText: IANTLRInterface;
|
|
procedure SetText(const Value: IANTLRInterface);
|
|
function GetParent: ITokenRewriteStream;
|
|
procedure SetParent(const Value: ITokenRewriteStream);
|
|
|
|
function Execute(const Buf: TStringBuilder): Integer; virtual;
|
|
protected
|
|
constructor Create(const AIndex: Integer; const AText: IANTLRInterface;
|
|
const AParent: ITokenRewriteStream);
|
|
|
|
property Index: Integer read FIndex write FIndex;
|
|
property Text: IANTLRInterface read FText write FText;
|
|
property Parent: ITokenRewriteStream read GetParent write SetParent;
|
|
public
|
|
function ToString: String; override;
|
|
end;
|
|
|
|
IInsertBeforeOp = interface(IRewriteOperation)
|
|
['{BFB732E2-BE6A-4691-AE3B-5C8013DE924E}']
|
|
end;
|
|
|
|
TInsertBeforeOp = class(TRewriteOperation, IInsertBeforeOp)
|
|
protected
|
|
{ IRewriteOperation }
|
|
function Execute(const Buf: TStringBuilder): Integer; override;
|
|
end;
|
|
|
|
/// <summary>I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
|
|
/// instructions.
|
|
/// </summary>
|
|
IReplaceOp = interface(IRewriteOperation)
|
|
['{630C434A-99EA-4589-A65D-64A7B3DAC407}']
|
|
{ Property accessors }
|
|
function GetLastIndex: Integer;
|
|
procedure SetLastIndex(const Value: Integer);
|
|
|
|
{ Properties }
|
|
property LastIndex: Integer read GetLastIndex write SetLastIndex;
|
|
end;
|
|
|
|
TReplaceOp = class(TRewriteOperation, IReplaceOp)
|
|
private
|
|
FLastIndex: Integer;
|
|
protected
|
|
{ IRewriteOperation }
|
|
function Execute(const Buf: TStringBuilder): Integer; override;
|
|
protected
|
|
{ IReplaceOp }
|
|
function GetLastIndex: Integer;
|
|
procedure SetLastIndex(const Value: Integer);
|
|
public
|
|
constructor Create(const AStart, AStop: Integer;
|
|
const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
|
|
|
|
function ToString: String; override;
|
|
end;
|
|
|
|
IDeleteOp = interface(IRewriteOperation)
|
|
['{C39345BC-F170-4C3A-A989-65E6B9F0712B}']
|
|
end;
|
|
|
|
TDeleteOp = class(TReplaceOp)
|
|
public
|
|
function ToString: String; override;
|
|
end;
|
|
strict private
|
|
type
|
|
TRewriteOpComparer<T: IRewriteOperation> = class(TComparer<T>)
|
|
public
|
|
function Compare(const Left, Right: T): Integer; override;
|
|
end;
|
|
strict private
|
|
/// <summary>You may have multiple, named streams of rewrite operations.
|
|
/// I'm calling these things "programs."
|
|
/// Maps String (name) -> rewrite (IList)
|
|
/// </summary>
|
|
FPrograms: IDictionary<String, IList<IRewriteOperation>>;
|
|
|
|
/// <summary>Map String (program name) -> Integer index </summary>
|
|
FLastRewriteTokenIndexes: IDictionary<String, Integer>;
|
|
strict private
|
|
function InitializeProgram(const Name: String): IList<IRewriteOperation>;
|
|
protected
|
|
{ ITokenRewriteStream }
|
|
procedure Rollback(const InstructionIndex: Integer); overload; virtual;
|
|
procedure Rollback(const ProgramName: String;
|
|
const InstructionIndex: Integer); overload; virtual;
|
|
|
|
procedure DeleteProgram; overload; virtual;
|
|
procedure DeleteProgram(const ProgramName: String); overload; virtual;
|
|
|
|
procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertAfter(const ProgramName: String; const T: IToken;
|
|
const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertAfter(const ProgramName: String; const Index: Integer;
|
|
const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertAfter(const T: IToken; const Text: String); overload;
|
|
procedure InsertAfter(const Index: Integer; const Text: String); overload;
|
|
procedure InsertAfter(const ProgramName: String; const T: IToken;
|
|
const Text: String); overload;
|
|
procedure InsertAfter(const ProgramName: String; const Index: Integer;
|
|
const Text: String); overload;
|
|
|
|
procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertBefore(const ProgramName: String; const T: IToken;
|
|
const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertBefore(const ProgramName: String; const Index: Integer;
|
|
const Text: IANTLRInterface); overload; virtual;
|
|
procedure InsertBefore(const T: IToken; const Text: String); overload;
|
|
procedure InsertBefore(const Index: Integer; const Text: String); overload;
|
|
procedure InsertBefore(const ProgramName: String; const T: IToken;
|
|
const Text: String); overload;
|
|
procedure InsertBefore(const ProgramName: String; const Index: Integer;
|
|
const Text: String); overload;
|
|
|
|
procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
|
|
procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; virtual;
|
|
procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; virtual;
|
|
procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; virtual;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
|
|
const Text: IANTLRInterface); overload; virtual;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
|
|
const Text: IANTLRInterface); overload; virtual;
|
|
procedure Replace(const Index: Integer; const Text: String); overload;
|
|
procedure Replace(const Start, Stop: Integer; const Text: String); overload;
|
|
procedure Replace(const IndexT: IToken; const Text: String); overload;
|
|
procedure Replace(const Start, Stop: IToken; const Text: String); overload;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
|
|
const Text: String); overload;
|
|
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
|
|
const Text: String); overload;
|
|
|
|
procedure Delete(const Index: Integer); overload; virtual;
|
|
procedure Delete(const Start, Stop: Integer); overload; virtual;
|
|
procedure Delete(const IndexT: IToken); overload; virtual;
|
|
procedure Delete(const Start, Stop: IToken); overload; virtual;
|
|
procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; virtual;
|
|
procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; virtual;
|
|
|
|
function GetLastRewriteTokenIndex: Integer; overload; virtual;
|
|
|
|
function ToOriginalString: String; overload; virtual;
|
|
function ToOriginalString(const Start, Stop: Integer): String; overload; virtual;
|
|
|
|
function ToString(const ProgramName: String): String; overload; virtual;
|
|
function ToString(const ProgramName: String;
|
|
const Start, Stop: Integer): String; overload; virtual;
|
|
|
|
function ToDebugString: String; overload; virtual;
|
|
function ToDebugString(const Start, Stop: Integer): String; overload; virtual;
|
|
protected
|
|
{ ITokenStream }
|
|
function ToString(const Start, Stop: Integer): String; overload; override;
|
|
strict protected
|
|
procedure Init; virtual;
|
|
function GetProgram(const Name: String): IList<IRewriteOperation>; virtual;
|
|
function GetLastRewriteTokenIndex(const ProgramName: String): Integer; overload; virtual;
|
|
procedure SetLastRewriteTokenIndex(const ProgramName: String; const I: Integer); overload; virtual;
|
|
|
|
/// <summary>
|
|
/// Return a map from token index to operation.
|
|
/// </summary>
|
|
/// <remarks>We need to combine operations and report invalid operations (like
|
|
/// overlapping replaces that are not completed nested). Inserts to
|
|
/// same index need to be combined etc... Here are the cases:
|
|
///
|
|
/// I.i.u I.j.v leave alone, nonoverlapping
|
|
/// I.i.u I.i.v combine: Iivu
|
|
///
|
|
/// R.i-j.u R.x-y.v | i-j in x-y delete first R
|
|
/// R.i-j.u R.i-j.v delete first R
|
|
/// R.i-j.u R.x-y.v | x-y in i-j ERROR
|
|
/// R.i-j.u R.x-y.v | boundaries overlap ERROR
|
|
///
|
|
/// I.i.u R.x-y.v | i in x-y delete I
|
|
/// I.i.u R.x-y.v | i not in x-y leave alone, nonoverlapping
|
|
/// R.x-y.v I.i.u | i in x-y ERROR
|
|
/// R.x-y.v I.x.u R.x-y.uv (combine, delete I)
|
|
/// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping
|
|
///
|
|
/// I.i.u = insert u before op @ index i
|
|
/// R.x-y.u = replace x-y indexed tokens with u
|
|
///
|
|
/// First we need to examine replaces. For any replace op:
|
|
///
|
|
/// 1. wipe out any insertions before op within that range.
|
|
/// 2. Drop any replace op before that is contained completely within
|
|
/// that range.
|
|
/// 3. Throw exception upon boundary overlap with any previous replace.
|
|
///
|
|
/// Then we can deal with inserts:
|
|
///
|
|
/// 1. for any inserts to same index, combine even if not adjacent.
|
|
/// 2. for any prior replace with same left boundary, combine this
|
|
/// insert with replace and delete this replace.
|
|
/// 3. throw exception if index in same range as previous replace
|
|
///
|
|
/// Don't actually delete; make op null in list. Easier to walk list.
|
|
/// Later we can throw as we add to index -> op map.
|
|
///
|
|
/// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
|
|
/// inserted stuff would be before the replace range. But, if you
|
|
/// add tokens in front of a method body '{' and then delete the method
|
|
/// body, I think the stuff before the '{' you added should disappear too.
|
|
/// </remarks>
|
|
function ReduceToSingleOperationPerIndex(
|
|
const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>;
|
|
|
|
function GetKindOfOps(const Rewrites: IList<IRewriteOperation>;
|
|
const Kind: TGUID): IList<IRewriteOperation>; overload;
|
|
/// <summary>
|
|
/// Get all operations before an index of a particular kind
|
|
/// </summary>
|
|
function GetKindOfOps(const Rewrites: IList<IRewriteOperation>;
|
|
const Kind: TGUID; const Before: Integer): IList<IRewriteOperation>; overload;
|
|
|
|
function CatOpText(const A, B: IANTLRInterface): IANTLRInterface;
|
|
public
|
|
constructor Create; overload;
|
|
constructor Create(const ATokenSource: ITokenSource); overload;
|
|
constructor Create(const ATokenSource: ITokenSource;
|
|
const AChannel: Integer); overload;
|
|
constructor Create(const ALexer: ILexer); overload;
|
|
constructor Create(const ALexer: ILexer;
|
|
const AChannel: Integer); overload;
|
|
|
|
function ToString: String; overload; override;
|
|
end;
|
|
|
|
{ These functions return X or, if X = nil, an empty default instance }
|
|
function Def(const X: IToken): IToken; overload;
|
|
function Def(const X: IRuleReturnScope): IRuleReturnScope; overload;
|
|
|
|
implementation
|
|
|
|
uses
|
|
StrUtils,
|
|
Math,
|
|
Antlr.Runtime.Tree;
|
|
|
|
{ ERecognitionException }
|
|
|
|
constructor ERecognitionException.Create;
|
|
begin
|
|
Create('', nil);
|
|
end;
|
|
|
|
constructor ERecognitionException.Create(const AMessage: String);
|
|
begin
|
|
Create(AMessage, nil);
|
|
end;
|
|
|
|
constructor ERecognitionException.Create(const AInput: IIntStream);
|
|
begin
|
|
Create('', AInput);
|
|
end;
|
|
|
|
constructor ERecognitionException.Create(const AMessage: String;
|
|
const AInput: IIntStream);
|
|
var
|
|
TokenStream: ITokenStream;
|
|
CharStream: ICharStream;
|
|
begin
|
|
inherited Create(AMessage);
|
|
FInput := AInput;
|
|
FIndex := AInput.Index;
|
|
|
|
if Supports(AInput, ITokenStream, TokenStream) then
|
|
begin
|
|
FToken := TokenStream.LT(1);
|
|
FLine := FToken.Line;
|
|
FCharPositionInLine := FToken.CharPositionInLine;
|
|
end;
|
|
|
|
if Supports(AInput, ITreeNodeStream) then
|
|
ExtractInformationFromTreeNodeStream(AInput)
|
|
else
|
|
begin
|
|
if Supports(AInput, ICharStream, CharStream) then
|
|
begin
|
|
FC := AInput.LA(1);
|
|
FLine := CharStream.Line;
|
|
FCharPositionInLine := CharStream.CharPositionInLine;
|
|
end
|
|
else
|
|
FC := AInput.LA(1);
|
|
end;
|
|
end;
|
|
|
|
procedure ERecognitionException.ExtractInformationFromTreeNodeStream(
|
|
const Input: IIntStream);
|
|
var
|
|
Nodes: ITreeNodeStream;
|
|
Adaptor: ITreeAdaptor;
|
|
Payload, PriorPayload: IToken;
|
|
I, NodeType: Integer;
|
|
PriorNode: IANTLRInterface;
|
|
Tree: ITree;
|
|
Text: String;
|
|
CommonTree: ICommonTree;
|
|
begin
|
|
Nodes := Input as ITreeNodeStream;
|
|
FNode := Nodes.LT(1);
|
|
Adaptor := Nodes.TreeAdaptor;
|
|
Payload := Adaptor.GetToken(FNode);
|
|
|
|
if Assigned(Payload) then
|
|
begin
|
|
FToken := Payload;
|
|
if (Payload.Line <= 0) then
|
|
begin
|
|
// imaginary node; no line/pos info; scan backwards
|
|
I := -1;
|
|
PriorNode := Nodes.LT(I);
|
|
while Assigned(PriorNode) do
|
|
begin
|
|
PriorPayload := Adaptor.GetToken(PriorNode);
|
|
if Assigned(PriorPayload) and (PriorPayload.Line > 0) then
|
|
begin
|
|
// we found the most recent real line / pos info
|
|
FLine := PriorPayload.Line;
|
|
FCharPositionInLine := PriorPayload.CharPositionInLine;
|
|
FApproximateLineInfo := True;
|
|
Break;
|
|
end;
|
|
Dec(I);
|
|
PriorNode := Nodes.LT(I)
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
// node created from real token
|
|
FLine := Payload.Line;
|
|
FCharPositionInLine := Payload.CharPositionInLine;
|
|
end;
|
|
end else
|
|
if Supports(FNode, ITree, Tree) then
|
|
begin
|
|
FLine := Tree.Line;
|
|
FCharPositionInLine := Tree.CharPositionInLine;
|
|
if Supports(FNode, ICommonTree, CommonTree) then
|
|
FToken := CommonTree.Token;
|
|
end
|
|
else
|
|
begin
|
|
NodeType := Adaptor.GetNodeType(FNode);
|
|
Text := Adaptor.GetNodeText(FNode);
|
|
FToken := TCommonToken.Create(NodeType, Text);
|
|
end;
|
|
end;
|
|
|
|
function ERecognitionException.GetUnexpectedType: Integer;
|
|
var
|
|
Nodes: ITreeNodeStream;
|
|
Adaptor: ITreeAdaptor;
|
|
begin
|
|
if Supports(FInput, ITokenStream) then
|
|
Result := FToken.TokenType
|
|
else
|
|
if Supports(FInput, ITreeNodeStream, Nodes) then
|
|
begin
|
|
Adaptor := Nodes.TreeAdaptor;
|
|
Result := Adaptor.GetNodeType(FNode);
|
|
end else
|
|
Result := FC;
|
|
end;
|
|
|
|
{ EMismatchedTokenException }
|
|
|
|
constructor EMismatchedTokenException.Create(const AExpecting: Integer;
|
|
const AInput: IIntStream);
|
|
begin
|
|
inherited Create(AInput);
|
|
FExpecting := AExpecting;
|
|
end;
|
|
|
|
function EMismatchedTokenException.ToString: String;
|
|
begin
|
|
Result := 'MismatchedTokenException(' + IntToStr(UnexpectedType)
|
|
+ '!=' + IntToStr(Expecting) + ')';
|
|
|
|
end;
|
|
|
|
{ EUnwantedTokenException }
|
|
|
|
function EUnwantedTokenException.GetUnexpectedToken: IToken;
|
|
begin
|
|
Result := FToken;
|
|
end;
|
|
|
|
function EUnwantedTokenException.ToString: String;
|
|
var
|
|
Exp: String;
|
|
begin
|
|
if (Expecting = TToken.INVALID_TOKEN_TYPE) then
|
|
Exp := ''
|
|
else
|
|
Exp := ', expected ' + IntToStr(Expecting);
|
|
if (Token = nil) then
|
|
Result := 'UnwantedTokenException(found=nil' + Exp + ')'
|
|
else
|
|
Result := 'UnwantedTokenException(found=' + Token.Text + Exp + ')'
|
|
end;
|
|
|
|
{ EMissingTokenException }
|
|
|
|
constructor EMissingTokenException.Create(const AExpecting: Integer;
|
|
const AInput: IIntStream; const AInserted: IANTLRInterface);
|
|
begin
|
|
inherited Create(AExpecting, AInput);
|
|
FInserted := AInserted;
|
|
end;
|
|
|
|
function EMissingTokenException.GetMissingType: Integer;
|
|
begin
|
|
Result := Expecting;
|
|
end;
|
|
|
|
function EMissingTokenException.ToString: String;
|
|
begin
|
|
if Assigned(FInserted) and Assigned(FToken) then
|
|
Result := 'MissingTokenException(inserted ' + FInserted.ToString
|
|
+ ' at ' + FToken.Text + ')'
|
|
else
|
|
if Assigned(FToken) then
|
|
Result := 'MissingTokenException(at ' + FToken.Text + ')'
|
|
else
|
|
Result := 'MissingTokenException';
|
|
end;
|
|
|
|
{ EMismatchedTreeNodeException }
|
|
|
|
constructor EMismatchedTreeNodeException.Create(const AExpecting: Integer;
|
|
const AInput: IIntStream);
|
|
begin
|
|
inherited Create(AInput);
|
|
FExpecting := AExpecting;
|
|
end;
|
|
|
|
function EMismatchedTreeNodeException.ToString: String;
|
|
begin
|
|
Result := 'MismatchedTreeNodeException(' + IntToStr(UnexpectedType)
|
|
+ '!=' + IntToStr(Expecting) + ')';
|
|
end;
|
|
|
|
{ ENoViableAltException }
|
|
|
|
constructor ENoViableAltException.Create(
|
|
const AGrammarDecisionDescription: String; const ADecisionNumber,
|
|
AStateNumber: Integer; const AInput: IIntStream);
|
|
begin
|
|
inherited Create(AInput);
|
|
FGrammarDecisionDescription := AGrammarDecisionDescription;
|
|
FDecisionNumber := ADecisionNumber;
|
|
FStateNumber := AStateNumber;
|
|
end;
|
|
|
|
function ENoViableAltException.ToString: String;
|
|
begin
|
|
if Supports(Input, ICharStream) then
|
|
Result := 'NoViableAltException(''' + Char(UnexpectedType) + '''@['
|
|
+ FGrammarDecisionDescription + '])'
|
|
else
|
|
Result := 'NoViableAltException(''' + IntToStr(UnexpectedType) + '''@['
|
|
+ FGrammarDecisionDescription + '])'
|
|
end;
|
|
|
|
{ EEarlyExitException }
|
|
|
|
constructor EEarlyExitException.Create(const ADecisionNumber: Integer;
|
|
const AInput: IIntStream);
|
|
begin
|
|
inherited Create(AInput);
|
|
FDecisionNumber := ADecisionNumber;
|
|
end;
|
|
|
|
{ EMismatchedSetException }
|
|
|
|
constructor EMismatchedSetException.Create(const AExpecting: IBitSet;
|
|
const AInput: IIntStream);
|
|
begin
|
|
inherited Create(AInput);
|
|
FExpecting := AExpecting;
|
|
end;
|
|
|
|
function EMismatchedSetException.ToString: String;
|
|
begin
|
|
Result := 'MismatchedSetException(' + IntToStr(UnexpectedType)
|
|
+ '!=' + Expecting.ToString + ')';
|
|
end;
|
|
|
|
{ EMismatchedNotSetException }
|
|
|
|
function EMismatchedNotSetException.ToString: String;
|
|
begin
|
|
Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType)
|
|
+ '!=' + Expecting.ToString + ')';
|
|
end;
|
|
|
|
{ EFailedPredicateException }
|
|
|
|
constructor EFailedPredicateException.Create(const AInput: IIntStream;
|
|
const ARuleName, APredicateText: String);
|
|
begin
|
|
inherited Create(AInput);
|
|
FRuleName := ARuleName;
|
|
FPredicateText := APredicateText;
|
|
end;
|
|
|
|
function EFailedPredicateException.ToString: String;
|
|
begin
|
|
Result := 'FailedPredicateException(' + FRuleName + ',{' + FPredicateText + '}?)';
|
|
end;
|
|
|
|
{ EMismatchedRangeException }
|
|
|
|
constructor EMismatchedRangeException.Create(const AA, AB: Integer;
|
|
const AInput: IIntStream);
|
|
begin
|
|
inherited Create(FInput);
|
|
FA := AA;
|
|
FB := AB;
|
|
end;
|
|
|
|
function EMismatchedRangeException.ToString: String;
|
|
begin
|
|
Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType)
|
|
+ ' not in [' + IntToStr(FA)+ ',' + IntToStr(FB) + '])';
|
|
end;
|
|
|
|
{ TCharStreamState }
|
|
|
|
function TCharStreamState.GetCharPositionInLine: Integer;
|
|
begin
|
|
Result := FCharPositionInLine;
|
|
end;
|
|
|
|
function TCharStreamState.GetLine: Integer;
|
|
begin
|
|
Result := FLine;
|
|
end;
|
|
|
|
function TCharStreamState.GetP: Integer;
|
|
begin
|
|
Result := FP;
|
|
end;
|
|
|
|
procedure TCharStreamState.SetCharPositionInLine(const Value: Integer);
|
|
begin
|
|
FCharPositionInLine := Value;
|
|
end;
|
|
|
|
procedure TCharStreamState.SetLine(const Value: Integer);
|
|
begin
|
|
FLine := Value;
|
|
end;
|
|
|
|
procedure TCharStreamState.SetP(const Value: Integer);
|
|
begin
|
|
FP := Value;
|
|
end;
|
|
|
|
{ TANTLRStringStream }
|
|
|
|
constructor TANTLRStringStream.Create(const AInput: String);
|
|
begin
|
|
inherited Create;
|
|
FLine := 1;
|
|
FOwnsData := True;
|
|
FN := Length(AInput);
|
|
if (FN > 0) then
|
|
begin
|
|
GetMem(FData,FN * SizeOf(Char));
|
|
Move(AInput[1],FData^,FN * SizeOf(Char));
|
|
end;
|
|
end;
|
|
|
|
procedure TANTLRStringStream.Consume;
|
|
begin
|
|
if (FP < FN) then
|
|
begin
|
|
Inc(FCharPositionInLine);
|
|
if (FData[FP] = #10) then
|
|
begin
|
|
Inc(FLine);
|
|
FCharPositionInLine := 0;
|
|
end;
|
|
Inc(FP);
|
|
end;
|
|
end;
|
|
|
|
constructor TANTLRStringStream.Create(const AData: PChar;
|
|
const ANumberOfActualCharsInArray: Integer);
|
|
begin
|
|
inherited Create;
|
|
FLine := 1;
|
|
FOwnsData := False;
|
|
FData := AData;
|
|
FN := ANumberOfActualCharsInArray;
|
|
end;
|
|
|
|
constructor TANTLRStringStream.Create;
|
|
begin
|
|
inherited Create;
|
|
FLine := 1;
|
|
end;
|
|
|
|
destructor TANTLRStringStream.Destroy;
|
|
begin
|
|
if (FOwnsData) then
|
|
FreeMem(FData);
|
|
inherited;
|
|
end;
|
|
|
|
function TANTLRStringStream.GetCharPositionInLine: Integer;
|
|
begin
|
|
Result := FCharPositionInLine;
|
|
end;
|
|
|
|
function TANTLRStringStream.GetLine: Integer;
|
|
begin
|
|
Result := FLine;
|
|
end;
|
|
|
|
function TANTLRStringStream.GetSourceName: String;
|
|
begin
|
|
Result := FName;
|
|
end;
|
|
|
|
function TANTLRStringStream.Index: Integer;
|
|
begin
|
|
Result := FP;
|
|
end;
|
|
|
|
function TANTLRStringStream.LA(I: Integer): Integer;
|
|
begin
|
|
if (I = 0) then
|
|
Result := 0 // undefined
|
|
else begin
|
|
if (I < 0) then
|
|
begin
|
|
Inc(I); // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
|
|
if ((FP + I - 1) < 0) then
|
|
begin
|
|
Result := Integer(cscEOF);
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
if ((FP + I - 1) >= FN) then
|
|
Result := Integer(cscEOF)
|
|
else
|
|
Result := Integer(FData[FP + I - 1]);
|
|
end;
|
|
end;
|
|
|
|
function TANTLRStringStream.LAChar(I: Integer): Char;
|
|
begin
|
|
Result := Char(LA(I));
|
|
end;
|
|
|
|
function TANTLRStringStream.LT(const I: Integer): Integer;
|
|
begin
|
|
Result := LA(I);
|
|
end;
|
|
|
|
function TANTLRStringStream.Mark: Integer;
|
|
var
|
|
State: ICharStreamState;
|
|
begin
|
|
if (FMarkers = nil) then
|
|
begin
|
|
FMarkers := TList<ICharStreamState>.Create;
|
|
FMarkers.Add(nil); // depth 0 means no backtracking, leave blank
|
|
end;
|
|
|
|
Inc(FMarkDepth);
|
|
if (FMarkDepth >= FMarkers.Count) then
|
|
begin
|
|
State := TCharStreamState.Create;
|
|
FMarkers.Add(State);
|
|
end
|
|
else
|
|
State := FMarkers[FMarkDepth];
|
|
|
|
State.P := FP;
|
|
State.Line := FLine;
|
|
State.CharPositionInLine := FCharPositionInLine;
|
|
FLastMarker := FMarkDepth;
|
|
Result := FMarkDepth;
|
|
end;
|
|
|
|
procedure TANTLRStringStream.Release(const Marker: Integer);
|
|
begin
|
|
// unwind any other markers made after m and release m
|
|
FMarkDepth := Marker;
|
|
// release this marker
|
|
Dec(FMarkDepth);
|
|
end;
|
|
|
|
procedure TANTLRStringStream.Reset;
|
|
begin
|
|
FP := 0;
|
|
FLine := 1;
|
|
FCharPositionInLine := 0;
|
|
FMarkDepth := 0;
|
|
end;
|
|
|
|
procedure TANTLRStringStream.Rewind(const Marker: Integer);
|
|
var
|
|
State: ICharStreamState;
|
|
begin
|
|
State := FMarkers[Marker];
|
|
// restore stream state
|
|
Seek(State.P);
|
|
FLine := State.Line;
|
|
FCharPositionInLine := State.CharPositionInLine;
|
|
Release(Marker);
|
|
end;
|
|
|
|
procedure TANTLRStringStream.Rewind;
|
|
begin
|
|
Rewind(FLastMarker);
|
|
end;
|
|
|
|
procedure TANTLRStringStream.Seek(const Index: Integer);
|
|
begin
|
|
if (Index <= FP) then
|
|
FP := Index // just jump; don't update stream state (line, ...)
|
|
else begin
|
|
// seek forward, consume until p hits index
|
|
while (FP < Index) do
|
|
Consume;
|
|
end;
|
|
end;
|
|
|
|
procedure TANTLRStringStream.SetCharPositionInLine(const Value: Integer);
|
|
begin
|
|
FCharPositionInLine := Value;
|
|
end;
|
|
|
|
procedure TANTLRStringStream.SetLine(const Value: Integer);
|
|
begin
|
|
FLine := Value;
|
|
end;
|
|
|
|
function TANTLRStringStream.Size: Integer;
|
|
begin
|
|
Result := FN;
|
|
end;
|
|
|
|
function TANTLRStringStream.Substring(const Start, Stop: Integer): String;
|
|
begin
|
|
Result := Copy(FData, Start + 1, Stop - Start + 1);
|
|
end;
|
|
|
|
{ TANTLRFileStream }
|
|
|
|
constructor TANTLRFileStream.Create(const AFileName: String);
|
|
begin
|
|
Create(AFilename,TEncoding.Default);
|
|
end;
|
|
|
|
constructor TANTLRFileStream.Create(const AFileName: String;
|
|
const AEncoding: TEncoding);
|
|
begin
|
|
inherited Create;
|
|
FFileName := AFileName;
|
|
Load(FFileName, AEncoding);
|
|
end;
|
|
|
|
function TANTLRFileStream.GetSourceName: String;
|
|
begin
|
|
Result := FFileName;
|
|
end;
|
|
|
|
procedure TANTLRFileStream.Load(const FileName: String;
|
|
const Encoding: TEncoding);
|
|
var
|
|
FR: TStreamReader;
|
|
S: String;
|
|
begin
|
|
if (FFileName <> '') then
|
|
begin
|
|
if (Encoding = nil) then
|
|
FR := TStreamReader.Create(FileName,TEncoding.Default)
|
|
else
|
|
FR := TStreamReader.Create(FileName,Encoding);
|
|
|
|
try
|
|
if (FOwnsData) then
|
|
begin
|
|
FreeMem(FData);
|
|
FData := nil;
|
|
end;
|
|
|
|
FOwnsData := True;
|
|
S := FR.ReadToEnd;
|
|
FN := Length(S);
|
|
if (FN > 0) then
|
|
begin
|
|
GetMem(FData,FN * SizeOf(Char));
|
|
Move(S[1],FData^,FN * SizeOf(Char));
|
|
end;
|
|
finally
|
|
FR.Free;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{ TBitSet }
|
|
|
|
class function TBitSet.BitSetOf(const El: Integer): IBitSet;
|
|
begin
|
|
Result := TBitSet.Create(El + 1);
|
|
Result.Add(El);
|
|
end;
|
|
|
|
class function TBitSet.BitSetOf(const A, B: Integer): IBitSet;
|
|
begin
|
|
Result := TBitSet.Create(Max(A,B) + 1);
|
|
Result.Add(A);
|
|
Result.Add(B);
|
|
end;
|
|
|
|
class function TBitSet.BitSetOf(const A, B, C: Integer): IBitSet;
|
|
begin
|
|
Result := TBitSet.Create;
|
|
Result.Add(A);
|
|
Result.Add(B);
|
|
Result.Add(C);
|
|
end;
|
|
|
|
class function TBitSet.BitSetOf(const A, B, C, D: Integer): IBitSet;
|
|
begin
|
|
Result := TBitSet.Create;
|
|
Result.Add(A);
|
|
Result.Add(B);
|
|
Result.Add(C);
|
|
Result.Add(D);
|
|
end;
|
|
|
|
procedure TBitSet.Add(const El: Integer);
|
|
var
|
|
N: Integer;
|
|
begin
|
|
N := WordNumber(El);
|
|
if (N >= Length(FBits)) then
|
|
GrowToInclude(El);
|
|
FBits[N] := FBits[N] or BitMask(El);
|
|
end;
|
|
|
|
class function TBitSet.BitMask(const BitNumber: Integer): UInt64;
|
|
var
|
|
BitPosition: Integer;
|
|
begin
|
|
BitPosition := BitNumber and MOD_MASK;
|
|
Result := UInt64(1) shl BitPosition;
|
|
end;
|
|
|
|
function TBitSet.BitSetOr(const A: IBitSet): IBitSet;
|
|
begin
|
|
Result := Clone as IBitSet;
|
|
Result.OrInPlace(A);
|
|
end;
|
|
|
|
function TBitSet.Clone: IANTLRInterface;
|
|
var
|
|
BS: TBitSet;
|
|
begin
|
|
BS := TBitSet.Create;
|
|
Result := BS;
|
|
SetLength(BS.FBits,Length(FBits));
|
|
if (Length(FBits) > 0) then
|
|
Move(FBits[0],BS.FBits[0],Length(FBits) * SizeOf(UInt64));
|
|
end;
|
|
|
|
constructor TBitSet.Create;
|
|
begin
|
|
Create(BITS);
|
|
end;
|
|
|
|
constructor TBitSet.Create(const ABits: array of UInt64);
|
|
begin
|
|
inherited Create;
|
|
SetLength(FBits, Length(ABits));
|
|
if (Length(ABits) > 0) then
|
|
Move(ABits[0], FBits[0], Length(ABits) * SizeOf(UInt64));
|
|
end;
|
|
|
|
constructor TBitSet.Create(const AItems: IList<Integer>);
|
|
var
|
|
V: Integer;
|
|
begin
|
|
Create(BITS);
|
|
for V in AItems do
|
|
Add(V);
|
|
end;
|
|
|
|
constructor TBitSet.Create(const ANBits: Integer);
|
|
begin
|
|
inherited Create;
|
|
SetLength(FBits,((ANBits - 1) shr LOG_BITS) + 1);
|
|
end;
|
|
|
|
function TBitSet.Equals(Obj: TObject): Boolean;
|
|
var
|
|
OtherSet: TBitSet absolute Obj;
|
|
I, N: Integer;
|
|
begin
|
|
Result := False;
|
|
if (Obj = nil) or (not (Obj is TBitSet)) then
|
|
Exit;
|
|
|
|
N := Min(Length(FBits), Length(OtherSet.FBits));
|
|
|
|
// for any bits in common, compare
|
|
for I := 0 to N - 1 do
|
|
begin
|
|
if (FBits[I] <> OtherSet.FBits[I]) then
|
|
Exit;
|
|
end;
|
|
|
|
// make sure any extra bits are off
|
|
if (Length(FBits) > N) then
|
|
begin
|
|
for I := N + 1 to Length(FBits) - 1 do
|
|
begin
|
|
if (FBits[I] <> 0) then
|
|
Exit;
|
|
end;
|
|
end
|
|
else
|
|
if (Length(OtherSet.FBits) > N) then
|
|
begin
|
|
for I := N + 1 to Length(OtherSet.FBits) - 1 do
|
|
begin
|
|
if (OtherSet.FBits[I] <> 0) then
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
Result := True;
|
|
end;
|
|
|
|
function TBitSet.GetIsNil: Boolean;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
for I := Length(FBits) - 1 downto 0 do
|
|
if (FBits[I] <> 0) then
|
|
begin
|
|
Result := False;
|
|
Exit;
|
|
end;
|
|
Result := True;
|
|
end;
|
|
|
|
procedure TBitSet.GrowToInclude(const Bit: Integer);
|
|
var
|
|
NewSize: Integer;
|
|
begin
|
|
NewSize := Max(Length(FBits) shl 1,NumWordsToHold(Bit));
|
|
SetLength(FBits,NewSize);
|
|
end;
|
|
|
|
function TBitSet.LengthInLongWords: Integer;
|
|
begin
|
|
Result := Length(FBits);
|
|
end;
|
|
|
|
function TBitSet.Member(const El: Integer): Boolean;
|
|
var
|
|
N: Integer;
|
|
begin
|
|
if (El < 0) then
|
|
Result := False
|
|
else
|
|
begin
|
|
N := WordNumber(El);
|
|
if (N >= Length(FBits)) then
|
|
Result := False
|
|
else
|
|
Result := ((FBits[N] and BitMask(El)) <> 0);
|
|
end;
|
|
end;
|
|
|
|
function TBitSet.NumBits: Integer;
|
|
begin
|
|
Result := Length(FBits) shl LOG_BITS;
|
|
end;
|
|
|
|
class function TBitSet.NumWordsToHold(const El: Integer): Integer;
|
|
begin
|
|
Result := (El shr LOG_BITS) + 1;
|
|
end;
|
|
|
|
procedure TBitSet.OrInPlace(const A: IBitSet);
|
|
var
|
|
I, M: Integer;
|
|
ABits: TUInt64Array;
|
|
begin
|
|
if Assigned(A) then
|
|
begin
|
|
// If this is smaller than a, grow this first
|
|
if (A.LengthInLongWords > Length(FBits)) then
|
|
SetLength(FBits,A.LengthInLongWords);
|
|
M := Min(Length(FBits), A.LengthInLongWords);
|
|
ABits := A.ToPackedArray;
|
|
for I := M - 1 downto 0 do
|
|
FBits[I] := FBits[I] or ABits[I];
|
|
end;
|
|
end;
|
|
|
|
procedure TBitSet.Remove(const El: Integer);
|
|
var
|
|
N: Integer;
|
|
begin
|
|
N := WordNumber(El);
|
|
if (N < Length(FBits)) then
|
|
FBits[N] := (FBits[N] and not BitMask(El));
|
|
end;
|
|
|
|
function TBitSet.Size: Integer;
|
|
var
|
|
I, Bit: Integer;
|
|
W: UInt64;
|
|
begin
|
|
Result := 0;
|
|
for I := Length(FBits) - 1 downto 0 do
|
|
begin
|
|
W := FBits[I];
|
|
if (W <> 0) then
|
|
begin
|
|
for Bit := BITS - 1 downto 0 do
|
|
begin
|
|
if ((W and (UInt64(1) shl Bit)) <> 0) then
|
|
Inc(Result);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TBitSet.ToArray: TIntegerArray;
|
|
var
|
|
I, En: Integer;
|
|
begin
|
|
SetLength(Result,Size);
|
|
En := 0;
|
|
for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do
|
|
begin
|
|
if Member(I) then
|
|
begin
|
|
Result[En] := I;
|
|
Inc(En);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TBitSet.ToPackedArray: TUInt64Array;
|
|
begin
|
|
Result := FBits;
|
|
end;
|
|
|
|
function TBitSet.ToString: String;
|
|
begin
|
|
Result := ToString(nil);
|
|
end;
|
|
|
|
function TBitSet.ToString(const TokenNames: TStringArray): String;
|
|
var
|
|
Buf: TStringBuilder;
|
|
I: Integer;
|
|
HavePrintedAnElement: Boolean;
|
|
begin
|
|
HavePrintedAnElement := False;
|
|
Buf := TStringBuilder.Create;
|
|
try
|
|
Buf.Append('{');
|
|
for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do
|
|
begin
|
|
if Member(I) then
|
|
begin
|
|
if (I > 0) and HavePrintedAnElement then
|
|
Buf.Append(',');
|
|
if Assigned(TokenNames) then
|
|
Buf.Append(TokenNames[I])
|
|
else
|
|
Buf.Append(I);
|
|
HavePrintedAnElement := True;
|
|
end;
|
|
end;
|
|
Buf.Append('}');
|
|
Result := Buf.ToString;
|
|
finally
|
|
Buf.Free;
|
|
end;
|
|
end;
|
|
|
|
class function TBitSet.WordNumber(const Bit: Integer): Integer;
|
|
begin
|
|
Result := Bit shr LOG_BITS; // Bit / BITS
|
|
end;
|
|
|
|
{ TRecognizerSharedState }
|
|
|
|
constructor TRecognizerSharedState.Create;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
inherited;
|
|
SetLength(FFollowing,TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE);
|
|
for I := 0 to TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE - 1 do
|
|
FFollowing[I] := TBitSet.Create;
|
|
FFollowingStackPointer := -1;
|
|
FLastErrorIndex := -1;
|
|
FTokenStartCharIndex := -1;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetBacktracking: Integer;
|
|
begin
|
|
Result := FBacktracking;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetChannel: Integer;
|
|
begin
|
|
Result := FChannel;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetErrorRecovery: Boolean;
|
|
begin
|
|
Result := FErrorRecovery;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetFailed: Boolean;
|
|
begin
|
|
Result := FFailed;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetFollowing: TBitSetArray;
|
|
begin
|
|
Result := FFollowing;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetFollowingStackPointer: Integer;
|
|
begin
|
|
Result := FFollowingStackPointer;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetLastErrorIndex: Integer;
|
|
begin
|
|
Result := FLastErrorIndex;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetRuleMemo: TDictionaryArray<Integer, Integer>;
|
|
begin
|
|
Result := FRuleMemo;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetRuleMemoCount: Integer;
|
|
begin
|
|
Result := Length(FRuleMemo);
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetSyntaxErrors: Integer;
|
|
begin
|
|
Result := FSyntaxErrors;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetText: String;
|
|
begin
|
|
Result := FText;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetToken: IToken;
|
|
begin
|
|
Result := FToken;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetTokenStartCharIndex: Integer;
|
|
begin
|
|
Result := FTokenStartCharIndex;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetTokenStartCharPositionInLine: Integer;
|
|
begin
|
|
Result := FTokenStartCharPositionInLine;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetTokenStartLine: Integer;
|
|
begin
|
|
Result := FTokenStartLine;
|
|
end;
|
|
|
|
function TRecognizerSharedState.GetTokenType: Integer;
|
|
begin
|
|
Result := FTokenType;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetBacktracking(const Value: Integer);
|
|
begin
|
|
FBacktracking := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetChannel(const Value: Integer);
|
|
begin
|
|
FChannel := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetErrorRecovery(const Value: Boolean);
|
|
begin
|
|
FErrorRecovery := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetFailed(const Value: Boolean);
|
|
begin
|
|
FFailed := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetFollowing(const Value: TBitSetArray);
|
|
begin
|
|
FFollowing := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetFollowingStackPointer(const Value: Integer);
|
|
begin
|
|
FFollowingStackPointer := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetLastErrorIndex(const Value: Integer);
|
|
begin
|
|
FLastErrorIndex := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetRuleMemoCount(const Value: Integer);
|
|
begin
|
|
SetLength(FRuleMemo, Value);
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetSyntaxErrors(const Value: Integer);
|
|
begin
|
|
FSyntaxErrors := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetText(const Value: String);
|
|
begin
|
|
FText := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetToken(const Value: IToken);
|
|
begin
|
|
FToken := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetTokenStartCharIndex(const Value: Integer);
|
|
begin
|
|
FTokenStartCharIndex := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetTokenStartCharPositionInLine(
|
|
const Value: Integer);
|
|
begin
|
|
FTokenStartCharPositionInLine := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetTokenStartLine(const Value: Integer);
|
|
begin
|
|
FTokenStartLine := Value;
|
|
end;
|
|
|
|
procedure TRecognizerSharedState.SetTokenType(const Value: Integer);
|
|
begin
|
|
FTokenType := Value;
|
|
end;
|
|
|
|
{ TCommonToken }
|
|
|
|
constructor TCommonToken.Create;
|
|
begin
|
|
inherited;
|
|
FChannel := TToken.DEFAULT_CHANNEL;
|
|
FCharPositionInLine := -1;
|
|
FIndex := -1;
|
|
end;
|
|
|
|
constructor TCommonToken.Create(const ATokenType: Integer);
|
|
begin
|
|
Create;
|
|
FTokenType := ATokenType;
|
|
end;
|
|
|
|
constructor TCommonToken.Create(const AInput: ICharStream; const ATokenType,
|
|
AChannel, AStart, AStop: Integer);
|
|
begin
|
|
Create;
|
|
FInput := AInput;
|
|
FTokenType := ATokenType;
|
|
FChannel := AChannel;
|
|
FStart := AStart;
|
|
FStop := AStop;
|
|
end;
|
|
|
|
constructor TCommonToken.Create(const ATokenType: Integer; const AText: String);
|
|
begin
|
|
Create;
|
|
FTokenType := ATokenType;
|
|
FChannel := TToken.DEFAULT_CHANNEL;
|
|
FText := AText;
|
|
end;
|
|
|
|
function TCommonToken.GetChannel: Integer;
|
|
begin
|
|
Result := FChannel;
|
|
end;
|
|
|
|
function TCommonToken.GetCharPositionInLine: Integer;
|
|
begin
|
|
Result := FCharPositionInLine;
|
|
end;
|
|
|
|
function TCommonToken.GetInputStream: ICharStream;
|
|
begin
|
|
Result := FInput;
|
|
end;
|
|
|
|
function TCommonToken.GetLine: Integer;
|
|
begin
|
|
Result := FLine;
|
|
end;
|
|
|
|
function TCommonToken.GetStartIndex: Integer;
|
|
begin
|
|
Result := FStart;
|
|
end;
|
|
|
|
function TCommonToken.GetStopIndex: Integer;
|
|
begin
|
|
Result := FStop;
|
|
end;
|
|
|
|
function TCommonToken.GetText: String;
|
|
begin
|
|
if (FText <> '') then
|
|
Result := FText
|
|
else
|
|
if (FInput = nil) then
|
|
Result := ''
|
|
else
|
|
Result := FInput.Substring(FStart, FStop);
|
|
end;
|
|
|
|
function TCommonToken.GetTokenIndex: Integer;
|
|
begin
|
|
Result := FIndex;
|
|
end;
|
|
|
|
function TCommonToken.GetTokenType: Integer;
|
|
begin
|
|
Result := FTokenType;
|
|
end;
|
|
|
|
procedure TCommonToken.SetChannel(const Value: Integer);
|
|
begin
|
|
FChannel := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetCharPositionInLine(const Value: Integer);
|
|
begin
|
|
FCharPositionInLine := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetInputStream(const Value: ICharStream);
|
|
begin
|
|
FInput := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetLine(const Value: Integer);
|
|
begin
|
|
FLine := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetStartIndex(const Value: Integer);
|
|
begin
|
|
FStart := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetStopIndex(const Value: Integer);
|
|
begin
|
|
FStop := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetText(const Value: String);
|
|
begin
|
|
(* Override the text for this token. The property getter
|
|
* will return this text rather than pulling from the buffer.
|
|
* Note that this does not mean that start/stop indexes are
|
|
* not valid. It means that the input was converted to a new
|
|
* string in the token object.
|
|
*)
|
|
FText := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetTokenIndex(const Value: Integer);
|
|
begin
|
|
FIndex := Value;
|
|
end;
|
|
|
|
procedure TCommonToken.SetTokenType(const Value: Integer);
|
|
begin
|
|
FTokenType := Value;
|
|
end;
|
|
|
|
function TCommonToken.ToString: String;
|
|
var
|
|
ChannelStr, Txt: String;
|
|
begin
|
|
if (FChannel > 0) then
|
|
ChannelStr := ',channel=' + IntToStr(FChannel)
|
|
else
|
|
ChannelStr := '';
|
|
|
|
Txt := GetText;
|
|
if (Txt <> '') then
|
|
begin
|
|
Txt := ReplaceStr(Txt,#10,'\n');
|
|
Txt := ReplaceStr(Txt,#13,'\r');
|
|
Txt := ReplaceStr(Txt,#9,'\t');
|
|
end else
|
|
Txt := '<no text>';
|
|
|
|
Result := Format('[@%d,%d:%d=''%s'',<%d>%s,%d:%d]',
|
|
[FIndex,FStart,FStop,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]);
|
|
end;
|
|
|
|
constructor TCommonToken.Create(const AOldToken: IToken);
|
|
var
|
|
OldCommonToken: ICommonToken;
|
|
begin
|
|
Create;
|
|
FText := AOldToken.Text;
|
|
FTokenType := AOldToken.TokenType;
|
|
FLine := AOldToken.Line;
|
|
FIndex := AOldToken.TokenIndex;
|
|
FCharPositionInLine := AOldToken.CharPositionInLine;
|
|
FChannel := AOldToken.Channel;
|
|
if Supports(AOldToken, ICommonToken, OldCommonToken) then
|
|
begin
|
|
FStart := OldCommonToken.StartIndex;
|
|
FStop := OldCommonToken.StopIndex;
|
|
end;
|
|
end;
|
|
|
|
{ TClassicToken }
|
|
|
|
constructor TClassicToken.Create(const AOldToken: IToken);
|
|
begin
|
|
inherited Create;
|
|
FText := AOldToken.Text;
|
|
FTokenType := AOldToken.TokenType;
|
|
FLine := AOldToken.Line;
|
|
FCharPositionInLine := AOldToken.CharPositionInLine;
|
|
FChannel := AOldToken.Channel;
|
|
end;
|
|
|
|
constructor TClassicToken.Create(const ATokenType: Integer);
|
|
begin
|
|
inherited Create;
|
|
FTokenType := ATokenType;
|
|
end;
|
|
|
|
constructor TClassicToken.Create(const ATokenType: Integer; const AText: String;
|
|
const AChannel: Integer);
|
|
begin
|
|
inherited Create;
|
|
FTokenType := ATokenType;
|
|
FText := AText;
|
|
FChannel := AChannel;
|
|
end;
|
|
|
|
constructor TClassicToken.Create(const ATokenType: Integer;
|
|
const AText: String);
|
|
begin
|
|
inherited Create;
|
|
FTokenType := ATokenType;
|
|
FText := AText;
|
|
end;
|
|
|
|
function TClassicToken.GetChannel: Integer;
|
|
begin
|
|
Result := FChannel;
|
|
end;
|
|
|
|
function TClassicToken.GetCharPositionInLine: Integer;
|
|
begin
|
|
Result := FCharPositionInLine;
|
|
end;
|
|
|
|
function TClassicToken.GetInputStream: ICharStream;
|
|
begin
|
|
// No default implementation
|
|
Result := nil;
|
|
end;
|
|
|
|
function TClassicToken.GetLine: Integer;
|
|
begin
|
|
Result := FLine;
|
|
end;
|
|
|
|
function TClassicToken.GetText: String;
|
|
begin
|
|
Result := FText;
|
|
end;
|
|
|
|
function TClassicToken.GetTokenIndex: Integer;
|
|
begin
|
|
Result := FIndex;
|
|
end;
|
|
|
|
function TClassicToken.GetTokenType: Integer;
|
|
begin
|
|
Result := FTokenType;
|
|
end;
|
|
|
|
procedure TClassicToken.SetChannel(const Value: Integer);
|
|
begin
|
|
FChannel := Value;
|
|
end;
|
|
|
|
procedure TClassicToken.SetCharPositionInLine(const Value: Integer);
|
|
begin
|
|
FCharPositionInLine := Value;
|
|
end;
|
|
|
|
procedure TClassicToken.SetInputStream(const Value: ICharStream);
|
|
begin
|
|
// No default implementation
|
|
end;
|
|
|
|
procedure TClassicToken.SetLine(const Value: Integer);
|
|
begin
|
|
FLine := Value;
|
|
end;
|
|
|
|
procedure TClassicToken.SetText(const Value: String);
|
|
begin
|
|
FText := Value;
|
|
end;
|
|
|
|
procedure TClassicToken.SetTokenIndex(const Value: Integer);
|
|
begin
|
|
FIndex := Value;
|
|
end;
|
|
|
|
procedure TClassicToken.SetTokenType(const Value: Integer);
|
|
begin
|
|
FTokenType := Value;
|
|
end;
|
|
|
|
function TClassicToken.ToString: String;
|
|
var
|
|
ChannelStr, Txt: String;
|
|
begin
|
|
if (FChannel > 0) then
|
|
ChannelStr := ',channel=' + IntToStr(FChannel)
|
|
else
|
|
ChannelStr := '';
|
|
Txt := FText;
|
|
if (Txt <> '') then
|
|
begin
|
|
Txt := ReplaceStr(Txt,#10,'\n');
|
|
Txt := ReplaceStr(Txt,#13,'\r');
|
|
Txt := ReplaceStr(Txt,#9,'\t');
|
|
end else
|
|
Txt := '<no text>';
|
|
|
|
Result := Format('[@%d,''%s'',<%d>%s,%d:%d]',
|
|
[FIndex,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]);
|
|
end;
|
|
|
|
{ TToken }
|
|
|
|
class procedure TToken.Initialize;
|
|
begin
|
|
EOF_TOKEN := TCommonToken.Create(EOF);
|
|
INVALID_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE);
|
|
SKIP_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE);
|
|
end;
|
|
|
|
{ TBaseRecognizer }
|
|
|
|
constructor TBaseRecognizer.Create;
|
|
begin
|
|
inherited;
|
|
FState := TRecognizerSharedState.Create;
|
|
end;
|
|
|
|
function TBaseRecognizer.AlreadyParsedRule(const Input: IIntStream;
|
|
const RuleIndex: Integer): Boolean;
|
|
var
|
|
StopIndex: Integer;
|
|
begin
|
|
StopIndex := GetRuleMemoization(RuleIndex, Input.Index);
|
|
Result := (StopIndex <> MEMO_RULE_UNKNOWN);
|
|
if Result then
|
|
begin
|
|
if (StopIndex = MEMO_RULE_FAILED) then
|
|
FState.Failed := True
|
|
else
|
|
Input.Seek(StopIndex + 1); // jump to one past stop token
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.BeginBacktrack(const Level: Integer);
|
|
begin
|
|
// No defeault implementation
|
|
end;
|
|
|
|
procedure TBaseRecognizer.BeginResync;
|
|
begin
|
|
// No defeault implementation
|
|
end;
|
|
|
|
procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream;
|
|
const TokenType: Integer);
|
|
var
|
|
TType: Integer;
|
|
begin
|
|
TType := Input.LA(1);
|
|
while (TType <> TToken.EOF) and (TType <> TokenType) do
|
|
begin
|
|
Input.Consume;
|
|
TType := Input.LA(1);
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.CombineFollows(const Exact: Boolean): IBitSet;
|
|
var
|
|
I, Top: Integer;
|
|
LocalFollowSet: IBitSet;
|
|
begin
|
|
Top := FState.FollowingStackPointer;
|
|
Result := TBitSet.Create;
|
|
for I := Top downto 0 do
|
|
begin
|
|
LocalFollowSet := FState.Following[I];
|
|
Result.OrInPlace(LocalFollowSet);
|
|
if (Exact) then
|
|
begin
|
|
// can we see end of rule?
|
|
if LocalFollowSet.Member(TToken.EOR_TOKEN_TYPE) then
|
|
begin
|
|
// Only leave EOR in set if at top (start rule); this lets
|
|
// us know if have to include follow(start rule); i.e., EOF
|
|
if (I > 0) then
|
|
Result.Remove(TToken.EOR_TOKEN_TYPE);
|
|
end
|
|
else
|
|
// can't see end of rule, quit
|
|
Break;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.ComputeContextSensitiveRuleFOLLOW: IBitSet;
|
|
begin
|
|
Result := CombineFollows(True);
|
|
end;
|
|
|
|
function TBaseRecognizer.ComputeErrorRecoverySet: IBitSet;
|
|
begin
|
|
Result := CombineFollows(False);
|
|
end;
|
|
|
|
procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream;
|
|
const BitSet: IBitSet);
|
|
var
|
|
TType: Integer;
|
|
begin
|
|
TType := Input.LA(1);
|
|
while (TType <> TToken.EOF) and (not BitSet.Member(TType)) do
|
|
begin
|
|
Input.Consume;
|
|
TType := Input.LA(1);
|
|
end;
|
|
end;
|
|
|
|
constructor TBaseRecognizer.Create(const AState: IRecognizerSharedState);
|
|
begin
|
|
if (AState = nil) then
|
|
Create
|
|
else
|
|
begin
|
|
inherited Create;
|
|
FState := AState;
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.DisplayRecognitionError(
|
|
const TokenNames: TStringArray; const E: ERecognitionException);
|
|
var
|
|
Hdr, Msg: String;
|
|
begin
|
|
Hdr := GetErrorHeader(E);
|
|
Msg := GetErrorMessage(E, TokenNames);
|
|
EmitErrorMessage(Hdr + ' ' + Msg);
|
|
end;
|
|
|
|
procedure TBaseRecognizer.EmitErrorMessage(const Msg: String);
|
|
begin
|
|
WriteLn(Msg);
|
|
end;
|
|
|
|
procedure TBaseRecognizer.EndBacktrack(const Level: Integer;
|
|
const Successful: Boolean);
|
|
begin
|
|
// No defeault implementation
|
|
end;
|
|
|
|
procedure TBaseRecognizer.EndResync;
|
|
begin
|
|
// No defeault implementation
|
|
end;
|
|
|
|
function TBaseRecognizer.GetBacktrackingLevel: Integer;
|
|
begin
|
|
Result := FState.Backtracking;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetCurrentInputSymbol(
|
|
const Input: IIntStream): IANTLRInterface;
|
|
begin
|
|
// No defeault implementation
|
|
Result := nil;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetErrorHeader(const E: ERecognitionException): String;
|
|
begin
|
|
Result := 'line ' + IntToStr(E.Line) + ':' + IntToStr(E.CharPositionInLine);
|
|
end;
|
|
|
|
function TBaseRecognizer.GetErrorMessage(const E: ERecognitionException;
|
|
const TokenNames: TStringArray): String;
|
|
var
|
|
UTE: EUnwantedTokenException absolute E;
|
|
MTE: EMissingTokenException absolute E;
|
|
MMTE: EMismatchedTokenException absolute E;
|
|
MTNE: EMismatchedTreeNodeException absolute E;
|
|
NVAE: ENoViableAltException absolute E;
|
|
EEE: EEarlyExitException absolute E;
|
|
MSE: EMismatchedSetException absolute E;
|
|
MNSE: EMismatchedNotSetException absolute E;
|
|
FPE: EFailedPredicateException absolute E;
|
|
TokenName: String;
|
|
begin
|
|
Result := E.Message;
|
|
if (E is EUnwantedTokenException) then
|
|
begin
|
|
if (UTE.Expecting = TToken.EOF) then
|
|
TokenName := 'EOF'
|
|
else
|
|
TokenName := TokenNames[UTE.Expecting];
|
|
Result := 'extraneous input ' + GetTokenErrorDisplay(UTE.UnexpectedToken)
|
|
+ ' expecting ' + TokenName;
|
|
end
|
|
else
|
|
if (E is EMissingTokenException) then
|
|
begin
|
|
if (MTE.Expecting = TToken.EOF) then
|
|
TokenName := 'EOF'
|
|
else
|
|
TokenName := TokenNames[MTE.Expecting];
|
|
Result := 'missing ' + TokenName + ' at ' + GetTokenErrorDisplay(E.Token);
|
|
end
|
|
else
|
|
if (E is EMismatchedTokenException) then
|
|
begin
|
|
if (MMTE.Expecting = TToken.EOF) then
|
|
TokenName := 'EOF'
|
|
else
|
|
TokenName := TokenNames[MMTE.Expecting];
|
|
Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
|
|
+ ' expecting ' + TokenName;
|
|
end
|
|
else
|
|
if (E is EMismatchedTreeNodeException) then
|
|
begin
|
|
if (MTNE.Expecting = TToken.EOF) then
|
|
Result := 'EOF'
|
|
else
|
|
Result := TokenNames[MTNE.Expecting];
|
|
// The ternary operator is only necessary because of a bug in the .NET framework
|
|
Result := 'mismatched tree node: ';
|
|
if (MTNE.Node <> nil) and (MTNE.Node.ToString <> '') then
|
|
Result := Result + MTNE.Node.ToString;
|
|
Result := Result + ' expecting ' + TokenName;
|
|
end
|
|
else
|
|
if (E is ENoViableAltException) then
|
|
begin
|
|
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
|
|
// and "(decision="+nvae.decisionNumber+") and
|
|
// "state "+nvae.stateNumber
|
|
Result := 'no viable alternative at input ' + GetTokenErrorDisplay(E.Token);
|
|
end
|
|
else
|
|
if (E is EEarlyExitException) then
|
|
begin
|
|
// for development, can add "(decision="+eee.decisionNumber+")"
|
|
Result := 'required (...)+ loop did not match anyting at input '
|
|
+ GetTokenErrorDisplay(E.Token);
|
|
end else
|
|
if (E is EMismatchedSetException) then
|
|
begin
|
|
Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
|
|
+ ' expecting set ' + MSE.Expecting.ToString;
|
|
end
|
|
else
|
|
if (E is EMismatchedNotSetException) then
|
|
begin
|
|
Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
|
|
+ ' expecting set ' + MSE.Expecting.ToString;
|
|
end
|
|
else
|
|
if (E is EFailedPredicateException) then
|
|
begin
|
|
Result := 'rule ' + FPE.RuleName
|
|
+ ' failed predicate: {' + FPE.PredicateText + '}?';
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetGrammarFileName: String;
|
|
begin
|
|
// No defeault implementation
|
|
Result := '';
|
|
end;
|
|
|
|
function TBaseRecognizer.GetMissingSymbol(const Input: IIntStream;
|
|
const E: ERecognitionException; const ExpectedTokenType: Integer;
|
|
const Follow: IBitSet): IANTLRInterface;
|
|
begin
|
|
// No defeault implementation
|
|
Result := nil;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetNumberOfSyntaxErrors: Integer;
|
|
begin
|
|
Result := FState.SyntaxErrors;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetRuleMemoization(const RuleIndex,
|
|
RuleStartIndex: Integer): Integer;
|
|
var
|
|
Dict: IDictionary<Integer, Integer>;
|
|
begin
|
|
Dict := FState.RuleMemo[RuleIndex];
|
|
if (Dict = nil) then
|
|
begin
|
|
Dict := TDictionary<Integer, Integer>.Create;
|
|
FState.RuleMemo[RuleIndex] := Dict;
|
|
end;
|
|
if (not Dict.TryGetValue(RuleStartIndex, Result)) then
|
|
Result := MEMO_RULE_UNKNOWN;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetRuleMemoizationChaceSize: Integer;
|
|
var
|
|
RuleMap: IDictionary<Integer, Integer>;
|
|
begin
|
|
Result := 0;
|
|
if Assigned(FState.RuleMemo) then
|
|
begin
|
|
for RuleMap in FState.RuleMemo do
|
|
if Assigned(RuleMap) then
|
|
Inc(Result,RuleMap.Count); // how many input indexes are recorded?
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetState: IRecognizerSharedState;
|
|
begin
|
|
Result := FState;
|
|
end;
|
|
|
|
function TBaseRecognizer.GetTokenErrorDisplay(const T: IToken): String;
|
|
begin
|
|
Result := T.Text;
|
|
if (Result = '') then
|
|
begin
|
|
if (T.TokenType = TToken.EOF) then
|
|
Result := '<EOF>'
|
|
else
|
|
Result := '<' + IntToStr(T.TokenType) + '>';
|
|
end;
|
|
Result := ReplaceStr(Result,#10,'\n');
|
|
Result := ReplaceStr(Result,#13,'\r');
|
|
Result := ReplaceStr(Result,#9,'\t');
|
|
Result := '''' + Result + '''';
|
|
end;
|
|
|
|
function TBaseRecognizer.GetTokenNames: TStringArray;
|
|
begin
|
|
// no default implementation
|
|
Result := nil;
|
|
end;
|
|
|
|
function TBaseRecognizer.Match(const Input: IIntStream;
|
|
const TokenType: Integer; const Follow: IBitSet): IANTLRInterface;
|
|
begin
|
|
Result := GetCurrentInputSymbol(Input);
|
|
if (Input.LA(1) = TokenType) then
|
|
begin
|
|
Input.Consume;
|
|
FState.ErrorRecovery := False;
|
|
FState.Failed := False;
|
|
end else
|
|
begin
|
|
if (FState.Backtracking > 0) then
|
|
FState.Failed := True
|
|
else
|
|
begin
|
|
Mismatch(Input, TokenType, Follow);
|
|
Result := RecoverFromMismatchedToken(Input, TokenType, Follow);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.MatchAny(const Input: IIntStream);
|
|
begin
|
|
FState.ErrorRecovery := False;
|
|
FState.Failed := False;
|
|
Input.Consume;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.Memoize(const Input: IIntStream; const RuleIndex,
|
|
RuleStartIndex: Integer);
|
|
var
|
|
StopTokenIndex: Integer;
|
|
Dict: IDictionary<Integer, Integer>;
|
|
begin
|
|
Dict := FState.RuleMemo[RuleIndex];
|
|
if Assigned(Dict) then
|
|
begin
|
|
if FState.Failed then
|
|
StopTokenIndex := MEMO_RULE_FAILED
|
|
else
|
|
StopTokenIndex := Input.Index - 1;
|
|
Dict.AddOrSetValue(RuleStartIndex, StopTokenIndex);
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.Mismatch(const Input: IIntStream;
|
|
const TokenType: Integer; const Follow: IBitSet);
|
|
begin
|
|
if MismatchIsUnwantedToken(Input, TokenType) then
|
|
raise EUnwantedTokenException.Create(TokenType, Input)
|
|
else
|
|
if MismatchIsMissingToken(Input, Follow) then
|
|
raise EMissingTokenException.Create(TokenType, Input, nil)
|
|
else
|
|
raise EMismatchedTokenException.Create(TokenType, Input);
|
|
end;
|
|
|
|
function TBaseRecognizer.MismatchIsMissingToken(const Input: IIntStream;
|
|
const Follow: IBitSet): Boolean;
|
|
var
|
|
ViableTokensFollowingThisRule, Follow2: IBitSet;
|
|
begin
|
|
if (Follow = nil) then
|
|
// we have no information about the follow; we can only consume
|
|
// a single token and hope for the best
|
|
Result := False
|
|
else
|
|
begin
|
|
Follow2 := Follow;
|
|
// compute what can follow this grammar element reference
|
|
if (Follow.Member(TToken.EOR_TOKEN_TYPE)) then
|
|
begin
|
|
ViableTokensFollowingThisRule := ComputeContextSensitiveRuleFOLLOW();
|
|
Follow2 := Follow.BitSetOr(ViableTokensFollowingThisRule);
|
|
if (FState.FollowingStackPointer >= 0) then
|
|
// remove EOR if we're not the start symbol
|
|
Follow2.Remove(TToken.EOR_TOKEN_TYPE);
|
|
end;
|
|
|
|
// if current token is consistent with what could come after set
|
|
// then we know we're missing a token; error recovery is free to
|
|
// "insert" the missing token
|
|
|
|
// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
|
|
// in follow set to indicate that the fall of the start symbol is
|
|
// in the set (EOF can follow).
|
|
if (Follow2.Member(Input.LA(1)) or Follow2.Member(TToken.EOR_TOKEN_TYPE)) then
|
|
Result := True
|
|
else
|
|
Result := False;
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.MismatchIsUnwantedToken(const Input: IIntStream;
|
|
const TokenType: Integer): Boolean;
|
|
begin
|
|
Result := (Input.LA(2) = TokenType);
|
|
end;
|
|
|
|
procedure TBaseRecognizer.PushFollow(const FSet: IBitSet);
|
|
var
|
|
F: TBitSetArray;
|
|
I: Integer;
|
|
begin
|
|
if ((FState.FollowingStackPointer + 1) >= Length(FState.Following)) then
|
|
begin
|
|
SetLength(F, Length(FState.Following) * 2);
|
|
FillChar(F[0], Length(F) * SizeOf(IBitSet), 0);
|
|
for I := 0 to Length(FState.Following) - 1 do
|
|
F[I] := FState.Following[I];
|
|
FState.Following := F;
|
|
end;
|
|
FState.FollowingStackPointer := FState.FollowingStackPointer + 1;
|
|
FState.Following[FState.FollowingStackPointer] := FSet;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.Recover(const Input: IIntStream;
|
|
const RE: ERecognitionException);
|
|
var
|
|
FollowSet: IBitSet;
|
|
begin
|
|
if (FState.LastErrorIndex = Input.Index) then
|
|
// uh oh, another error at same token index; must be a case
|
|
// where LT(1) is in the recovery token set so nothing is
|
|
// consumed; consume a single token so at least to prevent
|
|
// an infinite loop; this is a failsafe.
|
|
Input.Consume;
|
|
FState.LastErrorIndex := Input.Index;
|
|
FollowSet := ComputeErrorRecoverySet;
|
|
BeginResync;
|
|
ConsumeUntil(Input,FollowSet);
|
|
EndResync;
|
|
end;
|
|
|
|
function TBaseRecognizer.RecoverFromMismatchedSet(const Input: IIntStream;
|
|
const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface;
|
|
begin
|
|
if MismatchIsMissingToken(Input, Follow) then
|
|
begin
|
|
ReportError(E);
|
|
// we don't know how to conjure up a token for sets yet
|
|
Result := GetMissingSymbol(Input, E, TToken.INVALID_TOKEN_TYPE, Follow);
|
|
end
|
|
else
|
|
begin
|
|
// TODO do single token deletion like above for Token mismatch
|
|
Result := nil;
|
|
raise E;
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.RecoverFromMismatchedToken(const Input: IIntStream;
|
|
const TokenType: Integer; const Follow: IBitSet): IANTLRInterface;
|
|
var
|
|
E: ERecognitionException;
|
|
begin
|
|
// if next token is what we are looking for then "delete" this token
|
|
if MismatchIsUnwantedToken(Input, TokenType) then
|
|
begin
|
|
E := EUnwantedTokenException.Create(TokenType, Input);
|
|
BeginResync;
|
|
Input.Consume; // simply delete extra token
|
|
EndResync;
|
|
ReportError(E); // report after consuming so AW sees the token in the exception
|
|
// we want to return the token we're actually matching
|
|
Result := GetCurrentInputSymbol(Input);
|
|
Input.Consume; // move past ttype token as if all were ok
|
|
end
|
|
else
|
|
begin
|
|
// can't recover with single token deletion, try insertion
|
|
if MismatchIsMissingToken(Input, Follow) then
|
|
begin
|
|
E := nil;
|
|
Result := GetMissingSymbol(Input, E, TokenType, Follow);
|
|
E := EMissingTokenException.Create(TokenType, Input, Result);
|
|
ReportError(E); // report after inserting so AW sees the token in the exception
|
|
end
|
|
else
|
|
begin
|
|
// even that didn't work; must throw the exception
|
|
raise EMismatchedTokenException.Create(TokenType, Input);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.ReportError(const E: ERecognitionException);
|
|
begin
|
|
// if we've already reported an error and have not matched a token
|
|
// yet successfully, don't report any errors.
|
|
if (not FState.ErrorRecovery) then
|
|
begin
|
|
FState.SyntaxErrors := FState.SyntaxErrors + 1; // don't count spurious
|
|
FState.ErrorRecovery := True;
|
|
DisplayRecognitionError(GetTokenNames, E);
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.Reset;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
// wack everything related to error recovery
|
|
if (FState = nil) then
|
|
Exit; // no shared state work to do
|
|
|
|
FState.FollowingStackPointer := -1;
|
|
FState.ErrorRecovery := False;
|
|
FState.LastErrorIndex := -1;
|
|
FState.Failed := False;
|
|
FState.SyntaxErrors := 0;
|
|
|
|
// wack everything related to backtracking and memoization
|
|
FState.Backtracking := 0;
|
|
if Assigned(FState.RuleMemo) then
|
|
for I := 0 to Length(FState.RuleMemo) - 1 do
|
|
begin
|
|
// wipe cache
|
|
FState.RuleMemo[I] := nil;
|
|
end;
|
|
end;
|
|
|
|
function TBaseRecognizer.ToStrings(const Tokens: IList<IToken>): IList<String>;
|
|
var
|
|
Token: IToken;
|
|
begin
|
|
if (Tokens = nil) then
|
|
Result := nil
|
|
else
|
|
begin
|
|
Result := TList<String>.Create;
|
|
for Token in Tokens do
|
|
Result.Add(Token.Text);
|
|
end;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.TraceIn(const RuleName: String;
|
|
const RuleIndex: Integer; const InputSymbol: String);
|
|
begin
|
|
Write('enter ' + RuleName + ' ' + InputSymbol);
|
|
if (FState.Failed) then
|
|
WriteLn(' failed=True');
|
|
if (FState.Backtracking > 0) then
|
|
Write(' backtracking=' + IntToStr(FState.Backtracking));
|
|
WriteLn;
|
|
end;
|
|
|
|
procedure TBaseRecognizer.TraceOut(const RuleName: String;
|
|
const RuleIndex: Integer; const InputSymbol: String);
|
|
begin
|
|
Write('exit ' + RuleName + ' ' + InputSymbol);
|
|
if (FState.Failed) then
|
|
WriteLn(' failed=True');
|
|
if (FState.Backtracking > 0) then
|
|
Write(' backtracking=' + IntToStr(FState.Backtracking));
|
|
WriteLn;
|
|
end;
|
|
|
|
{ TCommonTokenStream }
|
|
|
|
procedure TCommonTokenStream.Consume;
|
|
begin
|
|
if (FP < FTokens.Count) then
|
|
begin
|
|
Inc(FP);
|
|
FP := SkipOffTokenChannels(FP); // leave p on valid token
|
|
end;
|
|
end;
|
|
|
|
constructor TCommonTokenStream.Create;
|
|
begin
|
|
inherited;
|
|
FP := -1;
|
|
FChannel := TToken.DEFAULT_CHANNEL;
|
|
FTokens := TList<IToken>.Create;
|
|
FTokens.Capacity := 500;
|
|
end;
|
|
|
|
constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource);
|
|
begin
|
|
Create;
|
|
FTokenSource := ATokenSource;
|
|
end;
|
|
|
|
procedure TCommonTokenStream.DiscardOffChannelTokens(const Discard: Boolean);
|
|
begin
|
|
FDiscardOffChannelTokens := Discard;
|
|
end;
|
|
|
|
procedure TCommonTokenStream.DiscardTokenType(const TType: Integer);
|
|
begin
|
|
if (FDiscardSet = nil) then
|
|
FDiscardSet := THashList<Integer, Integer>.Create;
|
|
FDiscardSet.Add(TType, TType);
|
|
end;
|
|
|
|
procedure TCommonTokenStream.FillBuffer;
|
|
var
|
|
Index: Integer;
|
|
T: IToken;
|
|
Discard: Boolean;
|
|
begin
|
|
Index := 0;
|
|
T := FTokenSource.NextToken;
|
|
while Assigned(T) and (T.TokenType <> Integer(cscEOF)) do
|
|
begin
|
|
Discard := False;
|
|
// is there a channel override for token type?
|
|
if Assigned(FChannelOverrideMap) then
|
|
if FChannelOverrideMap.ContainsKey(T.TokenType) then
|
|
T.Channel := FChannelOverrideMap[T.TokenType];
|
|
|
|
if Assigned(FDiscardSet) and FDiscardSet.ContainsKey(T.TokenType) then
|
|
Discard := True
|
|
else
|
|
if FDiscardOffChannelTokens and (T.Channel <> FChannel) then
|
|
Discard := True;
|
|
|
|
if (not Discard) then
|
|
begin
|
|
T.TokenIndex := Index;
|
|
FTokens.Add(T);
|
|
Inc(Index);
|
|
end;
|
|
|
|
T := FTokenSource.NextToken;
|
|
end;
|
|
// leave p pointing at first token on channel
|
|
FP := 0;
|
|
FP := SkipOffTokenChannels(FP);
|
|
end;
|
|
|
|
function TCommonTokenStream.Get(const I: Integer): IToken;
|
|
begin
|
|
Result := FTokens[I];
|
|
end;
|
|
|
|
function TCommonTokenStream.GetSourceName: String;
|
|
begin
|
|
Result := FTokenSource.SourceName;
|
|
end;
|
|
|
|
function TCommonTokenStream.GetTokens(const Start, Stop: Integer;
|
|
const Types: IList<Integer>): IList<IToken>;
|
|
begin
|
|
Result := GetTokens(Start, Stop, TBitSet.Create(Types));
|
|
end;
|
|
|
|
function TCommonTokenStream.GetTokens(const Start, Stop,
|
|
TokenType: Integer): IList<IToken>;
|
|
begin
|
|
Result := GetTokens(Start, Stop, TBitSet.BitSetOf(TokenType));
|
|
end;
|
|
|
|
function TCommonTokenStream.GetTokens(const Start, Stop: Integer;
|
|
const Types: IBitSet): IList<IToken>;
|
|
var
|
|
I, StartIndex, StopIndex: Integer;
|
|
T: IToken;
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
StopIndex := Min(Stop,FTokens.Count - 1);
|
|
StartIndex := Max(Start,0);
|
|
if (StartIndex > StopIndex) then
|
|
Result := nil
|
|
else
|
|
begin
|
|
Result := TList<IToken>.Create;
|
|
for I := StartIndex to StopIndex do
|
|
begin
|
|
T := FTokens[I];
|
|
if (Types = nil) or Types.Member(T.TokenType) then
|
|
Result.Add(T);
|
|
end;
|
|
if (Result.Count = 0) then
|
|
Result := nil;
|
|
end;
|
|
end;
|
|
|
|
function TCommonTokenStream.GetTokens: IList<IToken>;
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
Result := FTokens;
|
|
end;
|
|
|
|
function TCommonTokenStream.GetTokens(const Start,
|
|
Stop: Integer): IList<IToken>;
|
|
begin
|
|
Result := GetTokens(Start, Stop, IBitSet(nil));
|
|
end;
|
|
|
|
function TCommonTokenStream.GetTokenSource: ITokenSource;
|
|
begin
|
|
Result := FTokenSource;
|
|
end;
|
|
|
|
function TCommonTokenStream.Index: Integer;
|
|
begin
|
|
Result := FP;
|
|
end;
|
|
|
|
function TCommonTokenStream.LA(I: Integer): Integer;
|
|
begin
|
|
Result := LT(I).TokenType;
|
|
end;
|
|
|
|
function TCommonTokenStream.LAChar(I: Integer): Char;
|
|
begin
|
|
Result := Char(LA(I));
|
|
end;
|
|
|
|
function TCommonTokenStream.LB(const K: Integer): IToken;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
if (K = 0) then
|
|
Result := nil
|
|
else
|
|
if ((FP - K) < 0) then
|
|
Result := nil
|
|
else
|
|
begin
|
|
I := FP;
|
|
N := 1;
|
|
// find k good tokens looking backwards
|
|
while (N <= K) do
|
|
begin
|
|
// skip off-channel tokens
|
|
I := SkipOffTokenChannelsReverse(I - 1); // leave p on valid token
|
|
Inc(N);
|
|
end;
|
|
if (I < 0) then
|
|
Result := nil
|
|
else
|
|
Result := FTokens[I];
|
|
end;
|
|
end;
|
|
|
|
function TCommonTokenStream.LT(const K: Integer): IToken;
|
|
var
|
|
I, N: Integer;
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
if (K = 0) then
|
|
Result := nil
|
|
else
|
|
if (K < 0) then
|
|
Result := LB(-K)
|
|
else
|
|
if ((FP + K - 1) >= FTokens.Count) then
|
|
Result := TToken.EOF_TOKEN
|
|
else
|
|
begin
|
|
I := FP;
|
|
N := 1;
|
|
// find k good tokens
|
|
while (N < K) do
|
|
begin
|
|
// skip off-channel tokens
|
|
I := SkipOffTokenChannels(I + 1); // leave p on valid token
|
|
Inc(N);
|
|
end;
|
|
if (I >= FTokens.Count) then
|
|
Result := TToken.EOF_TOKEN
|
|
else
|
|
Result := FTokens[I];
|
|
end;
|
|
end;
|
|
|
|
function TCommonTokenStream.Mark: Integer;
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
FLastMarker := Index;
|
|
Result := FLastMarker;
|
|
end;
|
|
|
|
procedure TCommonTokenStream.Release(const Marker: Integer);
|
|
begin
|
|
// no resources to release
|
|
end;
|
|
|
|
procedure TCommonTokenStream.Reset;
|
|
begin
|
|
FP := 0;
|
|
FLastMarker := 0;
|
|
end;
|
|
|
|
procedure TCommonTokenStream.Rewind(const Marker: Integer);
|
|
begin
|
|
Seek(Marker);
|
|
end;
|
|
|
|
procedure TCommonTokenStream.Rewind;
|
|
begin
|
|
Seek(FLastMarker);
|
|
end;
|
|
|
|
procedure TCommonTokenStream.Seek(const Index: Integer);
|
|
begin
|
|
FP := Index;
|
|
end;
|
|
|
|
procedure TCommonTokenStream.SetTokenSource(const Value: ITokenSource);
|
|
begin
|
|
FTokenSource := Value;
|
|
FTokens.Clear;
|
|
FP := -1;
|
|
FChannel := TToken.DEFAULT_CHANNEL;
|
|
end;
|
|
|
|
procedure TCommonTokenStream.SetTokenTypeChannel(const TType, Channel: Integer);
|
|
begin
|
|
if (FChannelOverrideMap = nil) then
|
|
FChannelOverrideMap := TDictionary<Integer, Integer>.Create;
|
|
FChannelOverrideMap[TType] := Channel;
|
|
end;
|
|
|
|
function TCommonTokenStream.Size: Integer;
|
|
begin
|
|
Result := FTokens.Count;
|
|
end;
|
|
|
|
function TCommonTokenStream.SkipOffTokenChannels(const I: Integer): Integer;
|
|
var
|
|
N: Integer;
|
|
begin
|
|
Result := I;
|
|
N := FTokens.Count;
|
|
while (Result < N) and (FTokens[Result].Channel <> FChannel) do
|
|
Inc(Result);
|
|
end;
|
|
|
|
function TCommonTokenStream.SkipOffTokenChannelsReverse(
|
|
const I: Integer): Integer;
|
|
begin
|
|
Result := I;
|
|
while (Result >= 0) and (FTokens[Result].Channel <> FChannel) do
|
|
Dec(Result);
|
|
end;
|
|
|
|
function TCommonTokenStream.ToString: String;
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
Result := ToString(0, FTokens.Count - 1);
|
|
end;
|
|
|
|
function TCommonTokenStream.ToString(const Start, Stop: Integer): String;
|
|
var
|
|
I, Finish: Integer;
|
|
Buf: TStringBuilder;
|
|
T: IToken;
|
|
begin
|
|
if (Start < 0) or (Stop < 0) then
|
|
Result := ''
|
|
else
|
|
begin
|
|
if (FP = -1) then
|
|
FillBuffer;
|
|
if (Stop >= FTokens.Count) then
|
|
Finish := FTokens.Count - 1
|
|
else
|
|
Finish := Stop;
|
|
Buf := TStringBuilder.Create;
|
|
try
|
|
for I := Start to Finish do
|
|
begin
|
|
T := FTokens[I];
|
|
Buf.Append(T.Text);
|
|
end;
|
|
Result := Buf.ToString;
|
|
finally
|
|
Buf.Free;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TCommonTokenStream.ToString(const Start, Stop: IToken): String;
|
|
begin
|
|
if Assigned(Start) and Assigned(Stop) then
|
|
Result := ToString(Start.TokenIndex, Stop.TokenIndex)
|
|
else
|
|
Result := '';
|
|
end;
|
|
|
|
constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource;
|
|
const AChannel: Integer);
|
|
begin
|
|
Create(ATokenSource);
|
|
FChannel := AChannel;
|
|
end;
|
|
|
|
constructor TCommonTokenStream.Create(const ALexer: ILexer);
|
|
begin
|
|
Create(ALexer as ITokenSource);
|
|
end;
|
|
|
|
constructor TCommonTokenStream.Create(const ALexer: ILexer;
|
|
const AChannel: Integer);
|
|
begin
|
|
Create(ALexer as ITokenSource, AChannel);
|
|
end;
|
|
|
|
{ TDFA }
|
|
|
|
function TDFA.Description: String;
|
|
begin
|
|
Result := 'n/a';
|
|
end;
|
|
|
|
procedure TDFA.Error(const NVAE: ENoViableAltException);
|
|
begin
|
|
// No default implementation
|
|
end;
|
|
|
|
function TDFA.GetRecognizer: IBaseRecognizer;
|
|
begin
|
|
Result := IBaseRecognizer(FRecognizer);
|
|
end;
|
|
|
|
function TDFA.GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
|
|
begin
|
|
Result := FSpecialStateTransitionHandler;
|
|
end;
|
|
|
|
procedure TDFA.NoViableAlt(const S: Integer; const Input: IIntStream);
|
|
var
|
|
NVAE: ENoViableAltException;
|
|
begin
|
|
if (Recognizer.State.Backtracking > 0) then
|
|
Recognizer.State.Failed := True
|
|
else
|
|
begin
|
|
NVAE := ENoViableAltException.Create(Description, FDecisionNumber, S, Input);
|
|
Error(NVAE);
|
|
raise NVAE;
|
|
end;
|
|
end;
|
|
|
|
function TDFA.Predict(const Input: IIntStream): Integer;
|
|
var
|
|
Mark, S, SNext, SpecialState: Integer;
|
|
C: Char;
|
|
begin
|
|
Result := 0;
|
|
Mark := Input.Mark; // remember where decision started in input
|
|
S := 0; // we always start at s0
|
|
try
|
|
while True do
|
|
begin
|
|
SpecialState := FSpecial[S];
|
|
if (SpecialState >= 0) then
|
|
begin
|
|
S := FSpecialStateTransitionHandler(Self, SpecialState, Input);
|
|
if (S = -1) then
|
|
begin
|
|
NoViableAlt(S, Input);
|
|
Exit;
|
|
end;
|
|
Input.Consume;
|
|
Continue;
|
|
end;
|
|
|
|
if (FAccept[S] >= 1) then
|
|
begin
|
|
Result := FAccept[S];
|
|
Exit;
|
|
end;
|
|
|
|
// look for a normal char transition
|
|
C := Char(Input.LA(1)); // -1 == \uFFFF, all tokens fit in 65000 space
|
|
if (C >= FMin[S]) and (C <= FMax[S]) then
|
|
begin
|
|
SNext := FTransition[S,Integer(C) - Integer(FMin[S])]; // move to next state
|
|
if (SNext < 0) then
|
|
begin
|
|
// was in range but not a normal transition
|
|
// must check EOT, which is like the else clause.
|
|
// eot[s]>=0 indicates that an EOT edge goes to another
|
|
// state.
|
|
if (FEOT[S] >= 0) then // EOT Transition to accept state?
|
|
begin
|
|
S := FEOT[S];
|
|
Input.Consume;
|
|
// TODO: I had this as return accept[eot[s]]
|
|
// which assumed here that the EOT edge always
|
|
// went to an accept...faster to do this, but
|
|
// what about predicated edges coming from EOT
|
|
// target?
|
|
Continue;
|
|
end;
|
|
|
|
NoViableAlt(S, Input);
|
|
Exit;
|
|
end;
|
|
S := SNext;
|
|
Input.Consume;
|
|
Continue;
|
|
end;
|
|
|
|
if (FEOT[S] >= 0) then
|
|
begin
|
|
// EOT Transition?
|
|
S := FEOT[S];
|
|
Input.Consume;
|
|
Continue;
|
|
end;
|
|
|
|
if (C = Char(TToken.EOF)) and (FEOF[S] >= 0) then
|
|
begin
|
|
// EOF Transition to accept state?
|
|
Result := FAccept[FEOF[S]];
|
|
Exit;
|
|
end;
|
|
|
|
// not in range and not EOF/EOT, must be invalid symbol
|
|
NoViableAlt(S, Input);
|
|
Exit;
|
|
end;
|
|
finally
|
|
Input.Rewind(Mark);
|
|
end;
|
|
end;
|
|
|
|
procedure TDFA.SetRecognizer(const Value: IBaseRecognizer);
|
|
begin
|
|
FRecognizer := Pointer(Value);
|
|
end;
|
|
|
|
procedure TDFA.SetSpecialStateTransitionHandler(
|
|
const Value: TSpecialStateTransitionHandler);
|
|
begin
|
|
FSpecialStateTransitionHandler := Value;
|
|
end;
|
|
|
|
function TDFA.SpecialStateTransition(const S: Integer;
|
|
const Input: IIntStream): Integer;
|
|
begin
|
|
// No default implementation
|
|
Result := -1;
|
|
end;
|
|
|
|
function TDFA.SpecialTransition(const State, Symbol: Integer): Integer;
|
|
begin
|
|
Result := 0;
|
|
end;
|
|
|
|
class function TDFA.UnpackEncodedString(
|
|
const EncodedString: String): TSmallintArray;
|
|
var
|
|
I, J, DI, Size: Integer;
|
|
N, V: Char;
|
|
begin
|
|
Size := 0;
|
|
I := 1;
|
|
while (I <= Length(EncodedString)) do
|
|
begin
|
|
Inc(Size,Integer(EncodedString[I]));
|
|
Inc(I,2);
|
|
end;
|
|
|
|
SetLength(Result,Size);
|
|
DI := 0;
|
|
I := 1;
|
|
while (I <= Length(EncodedString)) do
|
|
begin
|
|
N := EncodedString[I];
|
|
V := EncodedString[I + 1];
|
|
// add v n times to data
|
|
for J := 1 to Integer(N) do
|
|
begin
|
|
Result[DI] := Smallint(V);
|
|
Inc(DI);
|
|
end;
|
|
Inc(I,2);
|
|
end;
|
|
end;
|
|
|
|
class function TDFA.UnpackEncodedStringArray(
|
|
const EncodedStrings: array of String): TSmallintMatrix;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
SetLength(Result,Length(EncodedStrings));
|
|
for I := 0 to Length(EncodedStrings) - 1 do
|
|
Result[I] := UnpackEncodedString(EncodedStrings[I]);
|
|
end;
|
|
|
|
class function TDFA.UnpackEncodedStringArray(
|
|
const EncodedStrings: TStringArray): TSmallintMatrix;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
SetLength(Result,Length(EncodedStrings));
|
|
for I := 0 to Length(EncodedStrings) - 1 do
|
|
Result[I] := UnpackEncodedString(EncodedStrings[I]);
|
|
end;
|
|
|
|
class function TDFA.UnpackEncodedStringToUnsignedChars(
|
|
const EncodedString: String): TCharArray;
|
|
var
|
|
I, J, DI, Size: Integer;
|
|
N, V: Char;
|
|
begin
|
|
Size := 0;
|
|
I := 1;
|
|
while (I <= Length(EncodedString)) do
|
|
begin
|
|
Inc(Size,Integer(EncodedString[I]));
|
|
Inc(I,2);
|
|
end;
|
|
|
|
SetLength(Result,Size);
|
|
DI := 0;
|
|
I := 1;
|
|
while (I <= Length(EncodedString)) do
|
|
begin
|
|
N := EncodedString[I];
|
|
V := EncodedString[I + 1];
|
|
// add v n times to data
|
|
for J := 1 to Integer(N) do
|
|
begin
|
|
Result[DI] := V;
|
|
Inc(DI);
|
|
end;
|
|
Inc(I,2);
|
|
end;
|
|
end;
|
|
|
|
{ TLexer }
|
|
|
|
constructor TLexer.Create;
|
|
begin
|
|
inherited;
|
|
end;
|
|
|
|
constructor TLexer.Create(const AInput: ICharStream);
|
|
begin
|
|
inherited Create;
|
|
FInput := AInput;
|
|
end;
|
|
|
|
constructor TLexer.Create(const AInput: ICharStream;
|
|
const AState: IRecognizerSharedState);
|
|
begin
|
|
inherited Create(AState);
|
|
FInput := AInput;
|
|
end;
|
|
|
|
function TLexer.Emit: IToken;
|
|
begin
|
|
Result := TCommonToken.Create(FInput, FState.TokenType, FState.Channel,
|
|
FState.TokenStartCharIndex, GetCharIndex - 1);
|
|
Result.Line := FState.TokenStartLine;
|
|
Result.Text := FState.Text;
|
|
Result.CharPositionInLine := FState.TokenStartCharPositionInLine;
|
|
Emit(Result);
|
|
end;
|
|
|
|
procedure TLexer.Emit(const Token: IToken);
|
|
begin
|
|
FState.Token := Token;
|
|
end;
|
|
|
|
function TLexer.GetCharErrorDisplay(const C: Integer): String;
|
|
begin
|
|
case C of
|
|
// TToken.EOF
|
|
TOKEN_dot_EOF:
|
|
Result := '<EOF>';
|
|
10:
|
|
Result := '\n';
|
|
9:
|
|
Result := '\t';
|
|
13:
|
|
Result := '\r';
|
|
else
|
|
Result := Char(C);
|
|
end;
|
|
Result := '''' + Result + '''';
|
|
end;
|
|
|
|
function TLexer.GetCharIndex: Integer;
|
|
begin
|
|
Result := FInput.Index;
|
|
end;
|
|
|
|
function TLexer.GetCharPositionInLine: Integer;
|
|
begin
|
|
Result := FInput.CharPositionInLine;
|
|
end;
|
|
|
|
function TLexer.GetCharStream: ICharStream;
|
|
begin
|
|
Result := FInput;
|
|
end;
|
|
|
|
function TLexer.GetErrorMessage(const E: ERecognitionException;
|
|
const TokenNames: TStringArray): String;
|
|
var
|
|
MTE: EMismatchedTokenException absolute E;
|
|
NVAE: ENoViableAltException absolute E;
|
|
EEE: EEarlyExitException absolute E;
|
|
MNSE: EMismatchedNotSetException absolute E;
|
|
MSE: EMismatchedSetException absolute E;
|
|
MRE: EMismatchedRangeException absolute E;
|
|
begin
|
|
if (E is EMismatchedTokenException) then
|
|
Result := 'mismatched character ' + GetCharErrorDisplay(E.Character)
|
|
+ ' expecting ' + GetCharErrorDisplay(MTE.Expecting)
|
|
else
|
|
if (E is ENoViableAltException) then
|
|
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
|
|
// and "(decision="+nvae.decisionNumber+") and
|
|
// "state "+nvae.stateNumber
|
|
Result := 'no viable alternative at character ' + GetCharErrorDisplay(NVAE.Character)
|
|
else
|
|
if (E is EEarlyExitException) then
|
|
// for development, can add "(decision="+eee.decisionNumber+")"
|
|
Result := 'required (...)+ loop did not match anything at character '
|
|
+ GetCharErrorDisplay(EEE.Character)
|
|
else
|
|
if (E is EMismatchedNotSetException) then
|
|
Result := 'mismatched character ' + GetCharErrorDisplay(MNSE.Character)
|
|
+ ' expecting set ' + MNSE.Expecting.ToString
|
|
else
|
|
if (E is EMismatchedSetException) then
|
|
Result := 'mismatched character ' + GetCharErrorDisplay(MSE.Character)
|
|
+ ' expecting set ' + MSE.Expecting.ToString
|
|
else
|
|
if (E is EMismatchedRangeException) then
|
|
Result := 'mismatched character ' + GetCharErrorDisplay(MRE.Character)
|
|
+ ' expecting set ' + GetCharErrorDisplay(MRE.A) + '..'
|
|
+ GetCharErrorDisplay(MRE.B)
|
|
else
|
|
Result := inherited GetErrorMessage(E, TokenNames);
|
|
end;
|
|
|
|
function TLexer.GetInput: IIntStream;
|
|
begin
|
|
Result := FInput;
|
|
end;
|
|
|
|
function TLexer.GetLine: Integer;
|
|
begin
|
|
Result := FInput.Line;
|
|
end;
|
|
|
|
function TLexer.GetSourceName: String;
|
|
begin
|
|
Result := FInput.SourceName;
|
|
end;
|
|
|
|
function TLexer.GetText: String;
|
|
begin
|
|
if (FState.Text <> '') then
|
|
Result := FState.Text
|
|
else
|
|
Result := FInput.Substring(FState.TokenStartCharIndex, GetCharIndex - 1)
|
|
end;
|
|
|
|
procedure TLexer.Match(const S: String);
|
|
var
|
|
I: Integer;
|
|
MTE: EMismatchedTokenException;
|
|
begin
|
|
for I := 1 to Length(S) do
|
|
begin
|
|
if (FInput.LA(1) <> Integer(S[I])) then
|
|
begin
|
|
if (FState.Backtracking > 0) then
|
|
begin
|
|
FState.Failed := True;
|
|
Exit;
|
|
end;
|
|
MTE := EMismatchedTokenException.Create(Integer(S[I]), FInput);
|
|
Recover(MTE); // don't really recover; just consume in lexer
|
|
raise MTE;
|
|
end;
|
|
FInput.Consume;
|
|
FState.Failed := False;
|
|
end;
|
|
end;
|
|
|
|
procedure TLexer.Match(const C: Integer);
|
|
var
|
|
MTE: EMismatchedTokenException;
|
|
begin
|
|
if (FInput.LA(1) <> C) then
|
|
begin
|
|
if (FState.Backtracking > 0) then
|
|
begin
|
|
FState.Failed := True;
|
|
Exit;
|
|
end;
|
|
MTE := EMismatchedTokenException.Create(C, FInput);
|
|
Recover(MTE);
|
|
raise MTE;
|
|
end;
|
|
FInput.Consume;
|
|
FState.Failed := False;
|
|
end;
|
|
|
|
procedure TLexer.MatchAny;
|
|
begin
|
|
FInput.Consume;
|
|
end;
|
|
|
|
procedure TLexer.MatchRange(const A, B: Integer);
|
|
var
|
|
MRE: EMismatchedRangeException;
|
|
begin
|
|
if (FInput.LA(1) < A) or (FInput.LA(1) > B) then
|
|
begin
|
|
if (FState.Backtracking > 0) then
|
|
begin
|
|
FState.Failed := True;
|
|
Exit;
|
|
end;
|
|
MRE := EMismatchedRangeException.Create(A, B, FInput);
|
|
Recover(MRE);
|
|
raise MRE;
|
|
end;
|
|
FInput.Consume;
|
|
FState.Failed := False;
|
|
end;
|
|
|
|
function TLexer.NextToken: IToken;
|
|
begin
|
|
while True do
|
|
begin
|
|
FState.Token := nil;
|
|
FState.Channel := TToken.DEFAULT_CHANNEL;
|
|
FState.TokenStartCharIndex := FInput.Index;
|
|
FState.TokenStartCharPositionInLine := FInput.CharPositionInLine;
|
|
FState.TokenStartLine := Finput.Line;
|
|
FState.Text := '';
|
|
if (FInput.LA(1) = Integer(cscEOF)) then
|
|
begin
|
|
Result := TToken.EOF_TOKEN;
|
|
Exit;
|
|
end;
|
|
|
|
try
|
|
DoTokens;
|
|
if (FState.Token = nil) then
|
|
Emit
|
|
else
|
|
if (FState.Token = TToken.SKIP_TOKEN) then
|
|
Continue;
|
|
Exit(FState.Token);
|
|
except
|
|
on NVA: ENoViableAltException do
|
|
begin
|
|
ReportError(NVA);
|
|
Recover(NVA); // throw out current char and try again
|
|
end;
|
|
|
|
on RE: ERecognitionException do
|
|
begin
|
|
ReportError(RE);
|
|
// Match() routine has already called Recover()
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TLexer.Recover(const RE: ERecognitionException);
|
|
begin
|
|
FInput.Consume;
|
|
end;
|
|
|
|
procedure TLexer.ReportError(const E: ERecognitionException);
|
|
begin
|
|
DisplayRecognitionError(GetTokenNames, E);
|
|
end;
|
|
|
|
procedure TLexer.Reset;
|
|
begin
|
|
inherited; // reset all recognizer state variables
|
|
// wack Lexer state variables
|
|
if Assigned(FInput) then
|
|
FInput.Seek(0); // rewind the input
|
|
if (FState = nil) then
|
|
Exit; // no shared state work to do
|
|
FState.Token := nil;
|
|
FState.TokenType := TToken.INVALID_TOKEN_TYPE;
|
|
FState.Channel := TToken.DEFAULT_CHANNEL;
|
|
FState.TokenStartCharIndex := -1;
|
|
FState.TokenStartCharPositionInLine := -1;
|
|
FState.TokenStartLine := -1;
|
|
FState.Text := '';
|
|
end;
|
|
|
|
procedure TLexer.SetCharStream(const Value: ICharStream);
|
|
begin
|
|
FInput := nil;
|
|
Reset;
|
|
FInput := Value;
|
|
end;
|
|
|
|
procedure TLexer.SetText(const Value: String);
|
|
begin
|
|
FState.Text := Value;
|
|
end;
|
|
|
|
procedure TLexer.Skip;
|
|
begin
|
|
FState.Token := TToken.SKIP_TOKEN;
|
|
end;
|
|
|
|
procedure TLexer.TraceIn(const RuleName: String; const RuleIndex: Integer);
|
|
var
|
|
InputSymbol: String;
|
|
begin
|
|
InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':'
|
|
+ IntToStr(GetCharPositionInLine);
|
|
inherited TraceIn(RuleName, RuleIndex, InputSymbol);
|
|
end;
|
|
|
|
procedure TLexer.TraceOut(const RuleName: String; const RuleIndex: Integer);
|
|
var
|
|
InputSymbol: String;
|
|
begin
|
|
InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':'
|
|
+ IntToStr(GetCharPositionInLine);
|
|
inherited TraceOut(RuleName, RuleIndex, InputSymbol);
|
|
end;
|
|
|
|
{ TParser }
|
|
|
|
constructor TParser.Create(const AInput: ITokenStream);
|
|
begin
|
|
inherited Create; // highlight that we go to base class to set state object
|
|
SetTokenStream(AInput);
|
|
end;
|
|
|
|
constructor TParser.Create(const AInput: ITokenStream;
|
|
const AState: IRecognizerSharedState);
|
|
begin
|
|
inherited Create(AState); // share the state object with another parser
|
|
SetTokenStream(AInput);
|
|
end;
|
|
|
|
function TParser.GetCurrentInputSymbol(
|
|
const Input: IIntStream): IANTLRInterface;
|
|
begin
|
|
Result := FInput.LT(1)
|
|
end;
|
|
|
|
function TParser.GetInput: IIntStream;
|
|
begin
|
|
Result := FInput;
|
|
end;
|
|
|
|
function TParser.GetMissingSymbol(const Input: IIntStream;
|
|
const E: ERecognitionException; const ExpectedTokenType: Integer;
|
|
const Follow: IBitSet): IANTLRInterface;
|
|
var
|
|
TokenText: String;
|
|
T: ICommonToken;
|
|
Current: IToken;
|
|
begin
|
|
if (ExpectedTokenType = TToken.EOF) then
|
|
TokenText := '<missing EOF>'
|
|
else
|
|
TokenText := '<missing ' + GetTokenNames[ExpectedTokenType] + '>';
|
|
T := TCommonToken.Create(ExpectedTokenType, TokenText);
|
|
Current := FInput.LT(1);
|
|
if (Current.TokenType = TToken.EOF) then
|
|
Current := FInput.LT(-1);
|
|
T.Line := Current.Line;
|
|
T.CharPositionInLine := Current.CharPositionInLine;
|
|
T.Channel := DEFAULT_TOKEN_CHANNEL;
|
|
Result := T;
|
|
end;
|
|
|
|
function TParser.GetSourceName: String;
|
|
begin
|
|
Result := FInput.SourceName;
|
|
end;
|
|
|
|
function TParser.GetTokenStream: ITokenStream;
|
|
begin
|
|
Result := FInput;
|
|
end;
|
|
|
|
procedure TParser.Reset;
|
|
begin
|
|
inherited; // reset all recognizer state variables
|
|
if Assigned(FInput) then
|
|
FInput.Seek(0); // rewind the input
|
|
end;
|
|
|
|
procedure TParser.SetTokenStream(const Value: ITokenStream);
|
|
begin
|
|
FInput := nil;
|
|
Reset;
|
|
FInput := Value;
|
|
end;
|
|
|
|
procedure TParser.TraceIn(const RuleName: String; const RuleIndex: Integer);
|
|
begin
|
|
inherited TraceIn(RuleName, RuleIndex, FInput.LT(1).ToString);
|
|
end;
|
|
|
|
procedure TParser.TraceOut(const RuleName: String; const RuleIndex: Integer);
|
|
begin
|
|
inherited TraceOut(RuleName, RuleIndex, FInput.LT(1).ToString);
|
|
end;
|
|
|
|
{ TRuleReturnScope }
|
|
|
|
function TRuleReturnScope.GetStart: IANTLRInterface;
|
|
begin
|
|
Result := nil;
|
|
end;
|
|
|
|
function TRuleReturnScope.GetStop: IANTLRInterface;
|
|
begin
|
|
Result := nil;
|
|
end;
|
|
|
|
function TRuleReturnScope.GetTemplate: IANTLRInterface;
|
|
begin
|
|
Result := nil;
|
|
end;
|
|
|
|
function TRuleReturnScope.GetTree: IANTLRInterface;
|
|
begin
|
|
Result := nil;
|
|
end;
|
|
|
|
procedure TRuleReturnScope.SetStart(const Value: IANTLRInterface);
|
|
begin
|
|
raise EInvalidOperation.Create('Setter has not been defined for this property.');
|
|
end;
|
|
|
|
procedure TRuleReturnScope.SetStop(const Value: IANTLRInterface);
|
|
begin
|
|
raise EInvalidOperation.Create('Setter has not been defined for this property.');
|
|
end;
|
|
|
|
procedure TRuleReturnScope.SetTree(const Value: IANTLRInterface);
|
|
begin
|
|
raise EInvalidOperation.Create('Setter has not been defined for this property.');
|
|
end;
|
|
|
|
{ TParserRuleReturnScope }
|
|
|
|
function TParserRuleReturnScope.GetStart: IANTLRInterface;
|
|
begin
|
|
Result := FStart;
|
|
end;
|
|
|
|
function TParserRuleReturnScope.GetStop: IANTLRInterface;
|
|
begin
|
|
Result := FStop;
|
|
end;
|
|
|
|
procedure TParserRuleReturnScope.SetStart(const Value: IANTLRInterface);
|
|
begin
|
|
FStart := Value as IToken;
|
|
end;
|
|
|
|
procedure TParserRuleReturnScope.SetStop(const Value: IANTLRInterface);
|
|
begin
|
|
FStop := Value as IToken;
|
|
end;
|
|
|
|
{ TTokenRewriteStream }
|
|
|
|
procedure TTokenRewriteStream.Delete(const Start, Stop: IToken);
|
|
begin
|
|
Delete(DEFAULT_PROGRAM_NAME, Start, Stop);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Delete(const IndexT: IToken);
|
|
begin
|
|
Delete(DEFAULT_PROGRAM_NAME, IndexT, IndexT);
|
|
end;
|
|
|
|
constructor TTokenRewriteStream.Create;
|
|
begin
|
|
inherited;
|
|
Init;
|
|
end;
|
|
|
|
constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource);
|
|
begin
|
|
inherited Create(ATokenSource);
|
|
Init;
|
|
end;
|
|
|
|
constructor TTokenRewriteStream.Create(const ALexer: ILexer);
|
|
begin
|
|
Create(ALexer as ITokenSource);
|
|
end;
|
|
|
|
constructor TTokenRewriteStream.Create(const ALexer: ILexer;
|
|
const AChannel: Integer);
|
|
begin
|
|
Create(ALexer as ITokenSource, AChannel);
|
|
end;
|
|
|
|
function TTokenRewriteStream.CatOpText(const A, B: IANTLRInterface): IANTLRInterface;
|
|
var
|
|
X, Y: String;
|
|
begin
|
|
if Assigned(A) then
|
|
X := A.ToString
|
|
else
|
|
X := '';
|
|
|
|
if Assigned(B) then
|
|
Y := B.ToString
|
|
else
|
|
Y := '';
|
|
|
|
Result := TANTLRString.Create(X + Y);
|
|
end;
|
|
|
|
constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource;
|
|
const AChannel: Integer);
|
|
begin
|
|
inherited Create(ATokenSource, AChannel);
|
|
Init;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start,
|
|
Stop: IToken);
|
|
begin
|
|
Replace(ProgramName, Start, Stop, nil);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start,
|
|
Stop: Integer);
|
|
begin
|
|
Replace(ProgramName, Start, Stop, nil);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Delete(const Start, Stop: Integer);
|
|
begin
|
|
Delete(DEFAULT_PROGRAM_NAME, Start, Stop);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Delete(const Index: Integer);
|
|
begin
|
|
Delete(DEFAULT_PROGRAM_NAME, Index, Index);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.DeleteProgram(const ProgramName: String);
|
|
begin
|
|
Rollback(ProgramName, MIN_TOKEN_INDEX);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.DeleteProgram;
|
|
begin
|
|
DeleteProgram(DEFAULT_PROGRAM_NAME);
|
|
end;
|
|
|
|
function TTokenRewriteStream.GetLastRewriteTokenIndex: Integer;
|
|
begin
|
|
Result := GetLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
|
|
end;
|
|
|
|
function TTokenRewriteStream.GetKindOfOps(
|
|
const Rewrites: IList<IRewriteOperation>;
|
|
const Kind: TGUID): IList<IRewriteOperation>;
|
|
begin
|
|
Result := GetKindOfOps(Rewrites, Kind, Rewrites.Count);
|
|
end;
|
|
|
|
function TTokenRewriteStream.GetKindOfOps(
|
|
const Rewrites: IList<IRewriteOperation>; const Kind: TGUID;
|
|
const Before: Integer): IList<IRewriteOperation>;
|
|
var
|
|
I: Integer;
|
|
Op: IRewriteOperation;
|
|
Obj: IInterface;
|
|
begin
|
|
Result := TList<IRewriteOperation>.Create;
|
|
I := 0;
|
|
while (I < Before) and (I < Rewrites.Count) do
|
|
begin
|
|
Op := Rewrites[I];
|
|
if Assigned(Op) and (Op.QueryInterface(Kind, Obj) = 0) then
|
|
Result.Add(Op);
|
|
Inc(I);
|
|
end;
|
|
end;
|
|
|
|
function TTokenRewriteStream.GetLastRewriteTokenIndex(
|
|
const ProgramName: String): Integer;
|
|
begin
|
|
if (not FLastRewriteTokenIndexes.TryGetValue(ProgramName, Result)) then
|
|
Result := -1;
|
|
end;
|
|
|
|
function TTokenRewriteStream.GetProgram(
|
|
const Name: String): IList<IRewriteOperation>;
|
|
var
|
|
InstructionStream: IList<IRewriteOperation>;
|
|
begin
|
|
InstructionStream := FPrograms[Name];
|
|
if (InstructionStream = nil) then
|
|
InstructionStream := InitializeProgram(Name);
|
|
Result := InstructionStream;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
|
|
const T: IToken; const Text: IANTLRInterface);
|
|
begin
|
|
InsertAfter(ProgramName, T.TokenIndex, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Init;
|
|
var
|
|
List: IList<IRewriteOperation>;
|
|
begin
|
|
FPrograms := TDictionary<String, IList<IRewriteOperation>>.Create;
|
|
List := TList<IRewriteOperation>.Create;
|
|
List.Capacity := PROGRAM_INIT_SIZE;
|
|
FPrograms.Add(DEFAULT_PROGRAM_NAME, List);
|
|
FLastRewriteTokenIndexes := TDictionary<String, Integer>.Create;
|
|
end;
|
|
|
|
function TTokenRewriteStream.InitializeProgram(
|
|
const Name: String): IList<IRewriteOperation>;
|
|
begin
|
|
Result := TList<IRewriteOperation>.Create;
|
|
Result.Capacity := PROGRAM_INIT_SIZE;
|
|
FPrograms[Name] := Result;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
|
|
const Index: Integer; const Text: IANTLRInterface);
|
|
begin
|
|
// to insert after, just insert before next index (even if past end)
|
|
InsertBefore(ProgramName, Index + 1, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const T: IToken;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
InsertAfter(DEFAULT_PROGRAM_NAME, T, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const Index: Integer;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
InsertAfter(DEFAULT_PROGRAM_NAME, Index, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const Index: Integer;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
InsertBefore(DEFAULT_PROGRAM_NAME, Index, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
|
|
const T: IToken; const Text: IANTLRInterface);
|
|
begin
|
|
InsertBefore(ProgramName, T.TokenIndex, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
|
|
const Index: Integer; const Text: IANTLRInterface);
|
|
var
|
|
Op: IRewriteOperation;
|
|
begin
|
|
Op := TInsertBeforeOp.Create(Index, Text, Self);
|
|
GetProgram(ProgramName).Add(Op);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const T: IToken;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
InsertBefore(DEFAULT_PROGRAM_NAME, T, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const Start, Stop: IToken;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
Replace(DEFAULT_PROGRAM_NAME, Stop, Stop, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const IndexT: IToken;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
Replace(DEFAULT_PROGRAM_NAME, IndexT, IndexT, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
|
|
Stop: Integer; const Text: IANTLRInterface);
|
|
var
|
|
Op: IRewriteOperation;
|
|
Rewrites: IList<IRewriteOperation>;
|
|
begin
|
|
if (Start > Stop) or (Start < 0) or (Stop < 0) or (Stop >= GetTokens.Count) then
|
|
raise EArgumentOutOfRangeException.Create('replace: range invalid: '
|
|
+ IntToStr(Start) + '..' + IntToStr(Stop) + '(size='
|
|
+ IntToStr(GetTokens.Count) + ')');
|
|
|
|
Op := TReplaceOp.Create(Start, Stop, Text, Self);
|
|
Rewrites := GetProgram(ProgramName);
|
|
Op.InstructionIndex := Rewrites.Count;
|
|
Rewrites.Add(Op);
|
|
end;
|
|
|
|
function TTokenRewriteStream.ReduceToSingleOperationPerIndex(
|
|
const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>;
|
|
var
|
|
I, J: Integer;
|
|
Op: IRewriteOperation;
|
|
ROp, PrevROp: IReplaceOp;
|
|
IOp, PrevIOp: IInsertBeforeOp;
|
|
Inserts, PrevInserts, PrevReplaces: IList<IRewriteOperation>;
|
|
Disjoint, Same: Boolean;
|
|
begin
|
|
// WALK REPLACES
|
|
for I := 0 to Rewrites.Count - 1 do
|
|
begin
|
|
Op := Rewrites[I];
|
|
if (Op = nil) then
|
|
Continue;
|
|
if (not Supports(Op, IReplaceOp, ROp)) then
|
|
Continue;
|
|
|
|
// Wipe prior inserts within range
|
|
Inserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I);
|
|
for J := 0 to Inserts.Count - 1 do
|
|
begin
|
|
IOp := Inserts[J] as IInsertBeforeOp;
|
|
if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then
|
|
begin
|
|
// delete insert as it's a no-op.
|
|
Rewrites[IOp.InstructionIndex] := nil;
|
|
end;
|
|
end;
|
|
|
|
// Drop any prior replaces contained within
|
|
PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I);
|
|
for J := 0 to PrevReplaces.Count - 1 do
|
|
begin
|
|
PrevROp := PrevReplaces[J] as IReplaceOp;
|
|
if (PrevROp.Index >= ROp.Index) and (PrevROp.LastIndex <= ROp.LastIndex) then
|
|
begin
|
|
// delete replace as it's a no-op.
|
|
Rewrites[PrevROp.InstructionIndex] := nil;
|
|
Continue;
|
|
end;
|
|
// throw exception unless disjoint or identical
|
|
Disjoint := (PrevROp.LastIndex < ROp.Index) or (PrevROp.Index > ROp.LastIndex);
|
|
Same := (PrevROp.Index = ROp.Index) and (PrevROp.LastIndex = ROp.LastIndex);
|
|
if (not Disjoint) and (not Same) then
|
|
raise EArgumentOutOfRangeException.Create('replace of boundaries of '
|
|
+ ROp.ToString + ' overlap with previous ' + PrevROp.ToString);
|
|
end;
|
|
end;
|
|
|
|
// WALK INSERTS
|
|
for I := 0 to Rewrites.Count - 1 do
|
|
begin
|
|
Op := Rewrites[I];
|
|
if (Op = nil) then
|
|
Continue;
|
|
if (not Supports(Op, IInsertBeforeOp, IOp)) then
|
|
Continue;
|
|
|
|
// combine current insert with prior if any at same index
|
|
PrevInserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I);
|
|
for J := 0 to PrevInserts.Count - 1 do
|
|
begin
|
|
PrevIOp := PrevInserts[J] as IInsertBeforeOp;
|
|
if (PrevIOp.Index = IOp.Index) then
|
|
begin
|
|
// combine objects
|
|
// convert to strings...we're in process of toString'ing
|
|
// whole token buffer so no lazy eval issue with any templates
|
|
IOp.Text := CatOpText(IOp.Text, PrevIOp.Text);
|
|
// delete redundant prior insert
|
|
Rewrites[PrevIOp.InstructionIndex] := nil;
|
|
end;
|
|
end;
|
|
|
|
// look for replaces where iop.index is in range; error
|
|
PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I);
|
|
for J := 0 to PrevReplaces.Count - 1 do
|
|
begin
|
|
Rop := PrevReplaces[J] as IReplaceOp;
|
|
if (IOp.Index = ROp.Index) then
|
|
begin
|
|
ROp.Text := CatOpText(IOp.Text, ROp.Text);
|
|
Rewrites[I] := nil; // delete current insert
|
|
Continue;
|
|
end;
|
|
if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then
|
|
raise EArgumentOutOfRangeException.Create('insert op '
|
|
+ IOp.ToString + ' within boundaries of previous ' + ROp.ToString);
|
|
end;
|
|
end;
|
|
|
|
Result := TDictionary<Integer, IRewriteOperation>.Create;
|
|
for Op in Rewrites do
|
|
begin
|
|
if (Op = nil) then
|
|
Continue; // ignore deleted ops
|
|
if (Result.ContainsKey(Op.Index)) then
|
|
raise Exception.Create('should only be one op per index');
|
|
Result.Add(Op.Index, Op);
|
|
end;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
|
|
Stop: IToken; const Text: IANTLRInterface);
|
|
begin
|
|
Replace(ProgramName, Start.TokenIndex, Stop.TokenIndex, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const Index: Integer;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
Replace(DEFAULT_PROGRAM_NAME, Index, Index, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const Start, Stop: Integer;
|
|
const Text: IANTLRInterface);
|
|
begin
|
|
Replace(DEFAULT_PROGRAM_NAME, Start, Stop, Text);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Rollback(const InstructionIndex: Integer);
|
|
begin
|
|
Rollback(DEFAULT_PROGRAM_NAME, InstructionIndex);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Rollback(const ProgramName: String;
|
|
const InstructionIndex: Integer);
|
|
var
|
|
InstructionStream: IList<IRewriteOperation>;
|
|
begin
|
|
InstructionStream := FPrograms[ProgramName];
|
|
if Assigned(InstructionStream) then
|
|
FPrograms[ProgramName] := InstructionStream.GetRange(MIN_TOKEN_INDEX,
|
|
InstructionIndex - MIN_TOKEN_INDEX);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.SetLastRewriteTokenIndex(
|
|
const ProgramName: String; const I: Integer);
|
|
begin
|
|
FLastRewriteTokenIndexes[ProgramName] := I;
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToDebugString: String;
|
|
begin
|
|
Result := ToDebugString(MIN_TOKEN_INDEX, Size - 1);
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToDebugString(const Start, Stop: Integer): String;
|
|
var
|
|
Buf: TStringBuilder;
|
|
I: Integer;
|
|
begin
|
|
Buf := TStringBuilder.Create;
|
|
try
|
|
if (Start >= MIN_TOKEN_INDEX) then
|
|
for I := Start to Min(Stop,GetTokens.Count - 1) do
|
|
Buf.Append(Get(I).ToString);
|
|
finally
|
|
Buf.Free;
|
|
end;
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToOriginalString: String;
|
|
begin
|
|
Result := ToOriginalString(MIN_TOKEN_INDEX, Size - 1);
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToOriginalString(const Start,
|
|
Stop: Integer): String;
|
|
var
|
|
Buf: TStringBuilder;
|
|
I: Integer;
|
|
begin
|
|
Buf := TStringBuilder.Create;
|
|
try
|
|
if (Start >= MIN_TOKEN_INDEX) then
|
|
for I := Start to Min(Stop, GetTokens.Count - 1) do
|
|
Buf.Append(Get(I).Text);
|
|
Result := Buf.ToString;
|
|
finally
|
|
Buf.Free;
|
|
end;
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToString: String;
|
|
begin
|
|
Result := ToString(MIN_TOKEN_INDEX, Size - 1);
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToString(const ProgramName: String): String;
|
|
begin
|
|
Result := ToString(ProgramName, MIN_TOKEN_INDEX, Size - 1);
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToString(const ProgramName: String; const Start,
|
|
Stop: Integer): String;
|
|
var
|
|
Rewrites: IList<IRewriteOperation>;
|
|
I, StartIndex, StopIndex: Integer;
|
|
IndexToOp: IDictionary<Integer, IRewriteOperation>;
|
|
Buf: TStringBuilder;
|
|
Tokens: IList<IToken>;
|
|
T: IToken;
|
|
Op: IRewriteOperation;
|
|
Pair: TPair<Integer, IRewriteOperation>;
|
|
begin
|
|
Rewrites := FPrograms[ProgramName];
|
|
Tokens := GetTokens;
|
|
// ensure start/end are in range
|
|
StopIndex := Min(Stop,Tokens.Count - 1);
|
|
StartIndex := Max(Start,0);
|
|
|
|
if (Rewrites = nil) or (Rewrites.Count = 0) then
|
|
begin
|
|
// no instructions to execute
|
|
Result := ToOriginalString(StartIndex, StopIndex);
|
|
Exit;
|
|
end;
|
|
|
|
Buf := TStringBuilder.Create;
|
|
try
|
|
// First, optimize instruction stream
|
|
IndexToOp := ReduceToSingleOperationPerIndex(Rewrites);
|
|
|
|
// Walk buffer, executing instructions and emitting tokens
|
|
I := StartIndex;
|
|
while (I <= StopIndex) and (I < Tokens.Count) do
|
|
begin
|
|
if (not IndexToOp.TryGetValue(I, Op)) then
|
|
Op := nil;
|
|
IndexToOp.Remove(I); // remove so any left have index size-1
|
|
T := Tokens[I];
|
|
if (Op = nil) then
|
|
begin
|
|
// no operation at that index, just dump token
|
|
Buf.Append(T.Text);
|
|
Inc(I); // move to next token
|
|
end
|
|
else
|
|
I := Op.Execute(Buf); // execute operation and skip
|
|
end;
|
|
|
|
// include stuff after end if it's last index in buffer
|
|
// So, if they did an insertAfter(lastValidIndex, "foo"), include
|
|
// foo if end==lastValidIndex.
|
|
if (StopIndex = Tokens.Count - 1) then
|
|
begin
|
|
// Scan any remaining operations after last token
|
|
// should be included (they will be inserts).
|
|
for Pair in IndexToOp do
|
|
begin
|
|
if (Pair.Value.Index >= Tokens.Count - 1) then
|
|
Buf.Append(Pair.Value.Text.ToString);
|
|
end;
|
|
end;
|
|
Result := Buf.ToString;
|
|
finally
|
|
Buf.Free;
|
|
end;
|
|
end;
|
|
|
|
function TTokenRewriteStream.ToString(const Start, Stop: Integer): String;
|
|
begin
|
|
Result := ToString(DEFAULT_PROGRAM_NAME, Start, Stop);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const Index: Integer;
|
|
const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertBefore(Index, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const T: IToken; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertBefore(T, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
|
|
const Index: Integer; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertBefore(ProgramName, Index, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
|
|
const T: IToken; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertBefore(ProgramName, T, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const Index: Integer;
|
|
const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertAfter(Index,S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const T: IToken; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertAfter(T,S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
|
|
const Index: Integer; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertAfter(ProgramName,Index,S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
|
|
const T: IToken; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
InsertAfter(ProgramName,T,S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const IndexT: IToken; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
Replace(IndexT, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const Start, Stop: Integer;
|
|
const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
Replace(Start, Stop, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const Index: Integer; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
Replace(Index, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
|
|
Stop: IToken; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
Replace(ProgramName, Start, Stop, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
|
|
Stop: Integer; const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
Replace(ProgramName, Start, Stop, S);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.Replace(const Start, Stop: IToken;
|
|
const Text: String);
|
|
var
|
|
S: IANTLRString;
|
|
begin
|
|
S := TANTLRString.Create(Text);
|
|
Replace(Start, Stop, S);
|
|
end;
|
|
|
|
{ TTokenRewriteStream.TRewriteOperation }
|
|
|
|
constructor TTokenRewriteStream.TRewriteOperation.Create(const AIndex: Integer;
|
|
const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
|
|
begin
|
|
inherited Create;
|
|
FIndex := AIndex;
|
|
FText := AText;
|
|
FParent := Pointer(AParent);
|
|
end;
|
|
|
|
function TTokenRewriteStream.TRewriteOperation.Execute(
|
|
const Buf: TStringBuilder): Integer;
|
|
begin
|
|
Result := FIndex;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TRewriteOperation.GetIndex: Integer;
|
|
begin
|
|
Result := FIndex;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TRewriteOperation.GetInstructionIndex: Integer;
|
|
begin
|
|
Result := FInstructionIndex;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TRewriteOperation.GetParent: ITokenRewriteStream;
|
|
begin
|
|
Result := ITokenRewriteStream(FParent);
|
|
end;
|
|
|
|
function TTokenRewriteStream.TRewriteOperation.GetText: IANTLRInterface;
|
|
begin
|
|
Result := FText;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.TRewriteOperation.SetIndex(const Value: Integer);
|
|
begin
|
|
FIndex := Value;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.TRewriteOperation.SetInstructionIndex(
|
|
const Value: Integer);
|
|
begin
|
|
FInstructionIndex := Value;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.TRewriteOperation.SetParent(
|
|
const Value: ITokenRewriteStream);
|
|
begin
|
|
FParent := Pointer(Value);
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.TRewriteOperation.SetText(
|
|
const Value: IANTLRInterface);
|
|
begin
|
|
FText := Value;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TRewriteOperation.ToString: String;
|
|
var
|
|
OpName: String;
|
|
DollarIndex: Integer;
|
|
begin
|
|
OpName := ClassName;
|
|
DollarIndex := Pos('$',OpName) - 1; // Delphi strings are 1-based
|
|
if (DollarIndex >= 0) then
|
|
OpName := Copy(OpName,DollarIndex + 1,Length(OpName) - (DollarIndex + 1));
|
|
Result := '<' + OpName + '@' + IntToStr(FIndex) + ':"' + FText.ToString + '">';
|
|
end;
|
|
|
|
{ TTokenRewriteStream.TRewriteOpComparer<T> }
|
|
|
|
function TTokenRewriteStream.TRewriteOpComparer<T>.Compare(const Left,
|
|
Right: T): Integer;
|
|
begin
|
|
if (Left.GetIndex < Right.GetIndex) then
|
|
Result := -1
|
|
else
|
|
if (Left.GetIndex > Right.GetIndex) then
|
|
Result := 1
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
{ TTokenRewriteStream.TInsertBeforeOp }
|
|
|
|
function TTokenRewriteStream.TInsertBeforeOp.Execute(
|
|
const Buf: TStringBuilder): Integer;
|
|
begin
|
|
Buf.Append(Text.ToString);
|
|
Buf.Append(Parent.Get(Index).Text);
|
|
Result := Index + 1;
|
|
end;
|
|
|
|
{ TTokenRewriteStream.TReplaceOp }
|
|
|
|
constructor TTokenRewriteStream.TReplaceOp.Create(const AStart, AStop: Integer;
|
|
const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
|
|
begin
|
|
inherited Create(AStart, AText, AParent);
|
|
FLastIndex := AStop;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TReplaceOp.Execute(
|
|
const Buf: TStringBuilder): Integer;
|
|
begin
|
|
if (Text <> nil) then
|
|
Buf.Append(Text.ToString);
|
|
Result := FLastIndex + 1;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TReplaceOp.GetLastIndex: Integer;
|
|
begin
|
|
Result := FLastIndex;
|
|
end;
|
|
|
|
procedure TTokenRewriteStream.TReplaceOp.SetLastIndex(const Value: Integer);
|
|
begin
|
|
FLastIndex := Value;
|
|
end;
|
|
|
|
function TTokenRewriteStream.TReplaceOp.ToString: String;
|
|
begin
|
|
Result := '<ReplaceOp@' + IntToStr(Index) + '..' + IntToStr(FLastIndex)
|
|
+ ':"' + Text.ToString + '">';
|
|
end;
|
|
|
|
{ TTokenRewriteStream.TDeleteOp }
|
|
|
|
function TTokenRewriteStream.TDeleteOp.ToString: String;
|
|
begin
|
|
Result := '<DeleteOp@' + IntToStr(Index) + '..' + IntToStr(FLastIndex) + '>';
|
|
end;
|
|
|
|
{ Utilities }
|
|
|
|
var
|
|
EmptyToken: IToken = nil;
|
|
EmptyRuleReturnScope: IRuleReturnScope = nil;
|
|
|
|
function Def(const X: IToken): IToken; overload;
|
|
begin
|
|
if Assigned(X) then
|
|
Result := X
|
|
else
|
|
begin
|
|
if (EmptyToken = nil) then
|
|
EmptyToken := TCommonToken.Create;
|
|
Result := EmptyToken;
|
|
end;
|
|
end;
|
|
|
|
function Def(const X: IRuleReturnScope): IRuleReturnScope;
|
|
begin
|
|
if Assigned(X) then
|
|
Result := X
|
|
else
|
|
begin
|
|
if (EmptyRuleReturnScope = nil) then
|
|
EmptyRuleReturnScope := TRuleReturnScope.Create;
|
|
Result := EmptyRuleReturnScope;
|
|
end;
|
|
end;
|
|
|
|
initialization
|
|
TToken.Initialize;
|
|
|
|
end.
|