1 /*
2  * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.DefaultErrorStrategy;
8 
9 import antlr.v4.runtime.ANTLRErrorStrategy;
10 import antlr.v4.runtime.CharStream;
11 import antlr.v4.runtime.FailedPredicateException;
12 import antlr.v4.runtime.InputMismatchException;
13 import antlr.v4.runtime.NoViableAltException;
14 import antlr.v4.runtime.Parser;
15 import antlr.v4.runtime.ParserRuleContext;
16 import antlr.v4.runtime.RecognitionException;
17 import antlr.v4.runtime.RuleContext;
18 import antlr.v4.runtime.Token;
19 import antlr.v4.runtime.TokenConstantDefinition;
20 import antlr.v4.runtime.TokenSource;
21 import antlr.v4.runtime.TokenStream;
22 import antlr.v4.runtime.atn.ATN;
23 import antlr.v4.runtime.atn.ATNState;
24 import antlr.v4.runtime.atn.RuleTransition;
25 import antlr.v4.runtime.atn.StateNames;
26 import antlr.v4.runtime.misc.IntervalSet;
27 import std.array;
28 import std.format;
29 import std.stdio;
30 import std.typecons;
31 
32 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b");
33 
34 // Class DefaultErrorStrategy
35 /**
36  * This is the default implementation of {@link ANTLRErrorStrategy} used for
37  * error reporting and recovery in ANTLR parsers.
38  */
39 class DefaultErrorStrategy : ANTLRErrorStrategy
40 {
41 
42     /**
43      * Indicates whether the error strategy is currently "recovering from an
44      * error". This is used to suppress reporting multiple error messages while
45      * attempting to recover from a detected syntax error.
46      *
47      *  @see #inErrorRecoveryMode
48      */
49     protected bool errorRecoveryMode;
50 
51     /**
52      * The index into the input stream where the last error occurred.
53      * This is used to prevent infinite loops where an error is found
54      * but no token is consumed during recovery...another error is found,
55      * ad nauseum.  This is a failsafe mechanism to guarantee that at least
56      * one token/tree node is consumed for two errors.
57      */
58     protected int lastErrorIndex = -1;
59 
60     public IntervalSet lastErrorStates;
61 
62     /**
63      * This field is used to propagate information about the lookahead following
64      * the previous match. Since prediction prefers completing the current rule
65      * to error recovery efforts, error reporting may occur later than the
66      * original point where it was discoverable. The original context is used to
67      * compute the true expected sets as though the reporting occurred as early
68      * as possible.
69      */
70     protected ParserRuleContext nextTokensContext;
71 
72     /**
73      * @see #nextTokensContext
74      */
75     protected int nextTokensState;
76 
77     /**
78      * <p>The default implementation simply calls {@link #endErrorCondition} to
79      * ensure that the handler is not in error recovery mode.</p>
80      */
81     public void reset(Parser recognizer)
82     {
83         endErrorCondition(recognizer);
84     }
85 
86     /**
87      * This method is called to enter error recovery mode when a recognition
88      * exception is reported.
89      *
90      * @param recognizer the parser instance
91      */
92     protected void beginErrorCondition(Parser recognizer)
93     {
94         errorRecoveryMode = true;
95     }
96 
97     public bool inErrorRecoveryMode(Parser recognizer)
98     {
99         return errorRecoveryMode;
100     }
101 
102     /**
103      * This method is called to leave error recovery mode after recovering from
104      * a recognition exception.
105      *
106      * @param recognizer
107      */
108     protected void endErrorCondition(Parser recognizer)
109     {
110 	errorRecoveryMode = false;
111         lastErrorStates = null;
112         lastErrorIndex = -1;
113     }
114 
115     /**
116      * {@inheritDoc}
117      *
118      * <p>The default implementation simply calls {@link #endErrorCondition}.</p>
119      */
120     public void reportMatch(Parser recognizer)
121     {
122 	endErrorCondition(recognizer);
123     }
124 
125     /**
126      * {@inheritDoc}
127      *
128      * <p>The default implementation returns immediately if the handler is already
129      * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
130      * and dispatches the reporting task based on the runtime type of {@code e}
131      * according to the following table.</p>
132      *
133      * <ul>
134      * <li>{@link NoViableAltException}: Dispatches the call to
135      * {@link #reportNoViableAlternative}</li>
136      * <li>{@link InputMismatchException}: Dispatches the call to
137      * {@link #reportInputMismatch}</li>
138      * <li>{@link FailedPredicateException}: Dispatches the call to
139      * {@link #reportFailedPredicate}</li>
140      * <li>All other types: calls {@link Parser#notifyErrorListeners} to report
141      * the exception</li>
142      * </ul>
143      */
144     public void reportError(Parser recognizer, RecognitionException e)
145     {
146 	// if we've already reported an error and have not matched a token
147         // yet successfully, don't report any errors.
148         if (inErrorRecoveryMode(recognizer)) {
149             debug(DefaultErrorStrategy)
150             {
151                 writefln("DefaultErrorStrategy:[SPURIOUS]");
152             }
153             return; // don't report spurious errors
154         }
155         beginErrorCondition(recognizer);
156         if (cast(NoViableAltException)e) {
157             reportNoViableAlternative(recognizer, cast(NoViableAltException)e);
158         }
159         else if (cast(InputMismatchException)e) {
160             reportInputMismatch(recognizer, cast(InputMismatchException)e);
161         }
162         else if (cast(FailedPredicateException)e) {
163             reportFailedPredicate(recognizer, cast(FailedPredicateException)e);
164         }
165         else {
166             stderr.writefln("unknown recognition error type: %s", e.stringof);
167             recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e);
168         }
169     }
170 
171     /**
172      * {@inheritDoc}
173      *
174      * <p>The default implementation resynchronizes the parser by consuming tokens
175      * until we find one in the resynchronization set--loosely the set of tokens
176      * that can follow the current rule.</p>
177      */
178     public void recover(Parser recognizer, RecognitionException e)
179     {
180         debug(DefaultErrorStrategy)
181         {
182             writefln("recover in %s index=%s, lastErrorIndex=%s, states=%s",
183                      recognizer.getRuleInvocationStack,
184                      recognizer.getInputStream().index(),
185                      lastErrorIndex,
186                      lastErrorStates);
187         }
188         if (lastErrorIndex == recognizer.getInputStream.index() &&
189             lastErrorStates !is null &&
190             lastErrorStates.contains(recognizer.getState))
191             {
192                 // uh oh, another error at same token index and previously-visited
193                 // state in ATN; must be a case where LT(1) is in the recovery
194                 // token set so nothing got consumed. Consume a single token
195                 // at least to prevent an infinite loop; this is a failsafe.
196                 debug(DefaultErrorStrategy)
197                 {
198                     stderr.writefln("seen error condition before index= %s, states=%s",
199                                     lastErrorIndex, lastErrorStates);
200                     stderr.writefln("FAILSAFE consumes %s",
201                                     recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
202                 }
203                 recognizer.consume;
204             }
205         lastErrorIndex = recognizer.getInputStream.index;
206         if (!lastErrorStates)
207             lastErrorStates = new IntervalSet;
208         lastErrorStates.add(recognizer.getState);
209         IntervalSet followSet = getErrorRecoverySet(recognizer);
210         consumeUntil(recognizer, followSet);
211     }
212 
213     /**
214      * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
215      * that the current lookahead symbol is consistent with what were expecting
216      * at this point in the ATN. You can call this anytime but ANTLR only
217      * generates code to check before subrules/loops and each iteration.
218      *
219      * <p>Implements Jim Idle's magic sync mechanism in closures and optional
220      * subrules. E.g.,</p>
221      *
222      * <pre>
223      * a : sync ( stuff sync )* ;
224      * sync : {consume to what can follow sync} ;
225      * </pre>
226      *
227      * At the start of a sub rule upon error, {@link #sync} performs single
228      * token deletion, if possible. If it can't do that, it bails on the current
229      * rule and uses the default error recovery, which consumes until the
230      * resynchronization set of the current rule.
231      *
232      * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
233      * with an empty alternative), then the expected set includes what follows
234      * the subrule.</p>
235      *
236      * <p>During loop iteration, it consumes until it sees a token that can start a
237      * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
238      * stay in the loop as long as possible.</p>
239      *
240      * <p><strong>ORIGINS</strong></p>
241      *
242      * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
243      * A single mismatch token or missing token would force the parser to bail
244      * out of the entire rules surrounding the loop. So, for rule</p>
245      *
246      * <pre>
247      * classDef : 'class' ID '{' member* '}'
248      * </pre>
249      *
250      * input with an extra token between members would force the parser to
251      * consume until it found the next class definition rather than the next
252      * member definition of the current class.
253      *
254      * <p>This functionality cost a little bit of effort because the parser has to
255      * compare token set at the start of the loop and at each iteration. If for
256      * some reason speed is suffering for you, you can turn off this
257      * functionality by simply overriding this method as a blank { }.</p>
258      */
259     public void sync(Parser recognizer)
260     {
261 	ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState];
262         debug(ErrorHandling)
263             {
264                 writefln("sync @ %s=%s", s.stateNumber, s.classinfo);
265             }
266         // If already recovering, don't try to sync
267         if (inErrorRecoveryMode(recognizer)) {
268             return;
269         }
270         TokenStream tokens = recognizer.getInputStream;
271         int la = tokens.LA(1);
272 
273         // try cheaper subset first; might get lucky. seems to shave a wee bit off
274         IntervalSet nextTokens = recognizer.getATN.nextTokens(s);
275         if (nextTokens.contains(la)) {
276             // We are sure the token matches
277             nextTokensContext = null;
278             nextTokensState = ATNState.INVALID_STATE_NUMBER;
279             return;
280         }
281 
282 	if (nextTokens.contains(TokenConstantDefinition.EPSILON)) {
283             if (nextTokensContext is null) {
284                 // It's possible the next token won't match; information tracked
285                 // by sync is restricted for performance.
286                 nextTokensContext = recognizer.ctx;
287                 nextTokensState = recognizer.getState;
288             }
289             return;
290         }
291 
292         switch (s.getStateType) {
293         case StateNames.BLOCK_START:
294         case StateNames.STAR_BLOCK_START:
295         case StateNames.PLUS_BLOCK_START:
296         case StateNames.STAR_LOOP_ENTRY:
297             // report error and recover if possible
298             if (singleTokenDeletion(recognizer) !is null) {
299                 return;
300             }
301             throw new InputMismatchException(recognizer);
302 
303         case StateNames.PLUS_LOOP_BACK:
304         case StateNames.STAR_LOOP_BACK:
305             debug(ErrorHandling)
306             {
307                 stderr.writefln("at loop back: %s", s);
308             }
309             reportUnwantedToken(recognizer);
310             IntervalSet expecting = recognizer.getExpectedTokens;
311             IntervalSet whatFollowsLoopIterationOrRule =
312                 expecting.or(getErrorRecoverySet(recognizer));
313             consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
314             break;
315 
316         default:
317             // do nothing if we can't identify the exact kind of ATN state
318             break;
319         }
320     }
321 
322     /**
323      * This is called by {@link #reportError} when the exception is a
324      * {@link NoViableAltException}.
325      *
326      * @see #reportError
327      *
328      * @param recognizer the parser instance
329      * @param e the recognition exception
330      */
331     protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e)
332     {
333 	TokenStream tokens = recognizer.getInputStream();
334         string input;
335         if (tokens !is null) {
336             if (e.getStartToken.getType == TokenConstantDefinition.EOF)
337                 input = "<EOF>";
338             else
339                 input = tokens.getText(e.getStartToken, e.getOffendingToken);
340         }
341         else {
342             input = "<unknown input>";
343         }
344         string msg = format("no viable alternative at input %s", escapeWSAndQuote(input));
345         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
346     }
347 
348     /**
349      * This is called by {@link #reportError} when the exception is an
350      * {@link InputMismatchException}.
351      *
352      * @see #reportError
353      *
354      * @param recognizer the parser instance
355      * @param e the recognition exception
356      */
357     protected void reportInputMismatch(Parser recognizer, InputMismatchException e)
358     {
359         string msg = format("mismatched input %1$s expecting %2$s",
360                             getTokenErrorDisplay(e.getOffendingToken),
361                             e.getExpectedTokens.toString(recognizer.getVocabulary));
362         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
363     }
364 
365     /**
366      * This is called by {@link #reportError} when the exception is a
367      * {@link FailedPredicateException}.
368      *
369      * @see #reportError
370      *
371      * @param recognizer the parser instance
372      * @param e the recognition exception
373      */
374     protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e)
375     {
376 	string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()];
377         string msg = "rule " ~ ruleName ~ " " ~ e.msg;
378         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
379     }
380 
381     /**
382      * This method is called to report a syntax error which requires the removal
383      * of a token from the input stream. At the time this method is called, the
384      * erroneous symbol is current {@code LT(1)} symbol and has not yet been
385      * removed from the input stream. When this method returns,
386      * {@code recognizer} is in error recovery mode.
387      *
388      * <p>This method is called when {@link #singleTokenDeletion} identifies
389      * single-token deletion as a viable recovery strategy for a mismatched
390      * input error.</p>
391      *
392      * <p>The default implementation simply returns if the handler is already in
393      * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
394      * enter error recovery mode, followed by calling
395      * {@link Parser#notifyErrorListeners}.</p>
396      *
397      * @param recognizer the parser instance
398      */
399     protected void reportUnwantedToken(Parser recognizer)
400     {
401 	if (inErrorRecoveryMode(recognizer)) {
402             return;
403         }
404 
405         beginErrorCondition(recognizer);
406 
407         Token t = recognizer.getCurrentToken();
408         string tokenName = getTokenErrorDisplay(t);
409         IntervalSet expecting = getExpectedTokens(recognizer);
410         string msg = "extraneous input " ~ tokenName ~ " expecting " ~
411             expecting.toString(recognizer.getVocabulary());
412         recognizer.notifyErrorListeners(t, msg, null);
413     }
414 
415     /**
416      * This method is called to report a syntax error which requires the
417      * insertion of a missing token into the input stream. At the time this
418      * method is called, the missing token has not yet been inserted. When this
419      * method returns, {@code recognizer} is in error recovery mode.
420      *
421      * <p>This method is called when {@link #singleTokenInsertion} identifies
422      * single-token insertion as a viable recovery strategy for a mismatched
423      * input error.</p>
424      *
425      * <p>The default implementation simply returns if the handler is already in
426      * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
427      * enter error recovery mode, followed by calling
428      * {@link Parser#notifyErrorListeners}.</p>
429      *
430      * @param recognizer the parser instance
431      */
432     protected void reportMissingToken(Parser recognizer)
433     {
434 	if (inErrorRecoveryMode(recognizer)) {
435             return;
436         }
437 
438         beginErrorCondition(recognizer);
439 
440         Token t = recognizer.getCurrentToken();
441         IntervalSet expecting = getExpectedTokens(recognizer);
442         string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary),
443                             getTokenErrorDisplay(t));
444 
445         recognizer.notifyErrorListeners(t, msg, null);
446     }
447 
448     /**
449      * {@inheritDoc}
450      *
451      * <p>The default implementation attempts to recover from the mismatched input
452      * by using single token insertion and deletion as described below. If the
453      * recovery attempt fails, this method throws an
454      * {@link InputMismatchException}.</p>
455      *
456      * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
457      *
458      * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
459      * right token, however, then assume {@code LA(1)} is some extra spurious
460      * token and delete it. Then consume and return the next token (which was
461      * the {@code LA(2)} token) as the successful result of the match operation.</p>
462      *
463      * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
464      *
465      * <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
466      *
467      * <p>If current token (at {@code LA(1)}) is consistent with what could come
468      * after the expected {@code LA(1)} token, then assume the token is missing
469      * and use the parser's {@link TokenFactory} to create it on the fly. The
470      * "insertion" is performed by returning the created token as the successful
471      * result of the match operation.</p>
472      *
473      * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
474      *
475      * <p><strong>EXAMPLE</strong></p>
476      *
477      * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
478      * the parser returns from the nested call to {@code expr}, it will have
479      * call chain:</p>
480      *
481      * <pre>
482      * stat &rarr; expr &rarr; atom
483      * </pre>
484      *
485      * and it will be trying to match the {@code ')'} at this point in the
486      * derivation:
487      *
488      * <pre>
489      * =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
490      *                    ^
491      * </pre>
492      *
493      * The attempt to match {@code ')'} will fail when it sees {@code ';'} and
494      * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
495      * is in the set of tokens that can follow the {@code ')'} token reference
496      * in rule {@code atom}. It can assume that you forgot the {@code ')'}.
497      */
498     public Token recoverInline(Parser recognizer)
499     {
500 	// SINGLE TOKEN DELETION
501         Token matchedSymbol = singleTokenDeletion(recognizer);
502         if (matchedSymbol !is null) {
503             // we have deleted the extra token.
504             // now, move past ttype token as if all were ok
505             recognizer.consume();
506             return matchedSymbol;
507         }
508 
509         // SINGLE TOKEN INSERTION
510         if (singleTokenInsertion(recognizer)) {
511             return getMissingSymbol(recognizer);
512         }
513 
514         // even that didn't work; must throw the exception
515         InputMismatchException e;
516         if (nextTokensContext is null) {
517             e = new InputMismatchException(recognizer);
518         } else {
519             e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext);
520         }
521         throw e;
522     }
523 
524     /**
525      * This method implements the single-token insertion inline error recovery
526      * strategy. It is called by {@link #recoverInline} if the single-token
527      * deletion strategy fails to recover from the mismatched input. If this
528      * method returns {@code true}, {@code recognizer} will be in error recovery
529      * mode.
530      *
531      * <p>This method determines whether or not single-token insertion is viable by
532      * checking if the {@code LA(1)} input symbol could be successfully matched
533      * if it were instead the {@code LA(2)} symbol. If this method returns
534      * {@code true}, the caller is responsible for creating and inserting a
535      * token with the correct type to produce this behavior.</p>
536      *
537      * @param recognizer the parser instance
538      * @return {@code true} if single-token insertion is a viable recovery
539      * strategy for the current mismatched input, otherwise {@code false}
540      */
541     protected bool singleTokenInsertion(Parser recognizer)
542     {
543 	int currentSymbolType = recognizer.getInputStream().LA(1);
544         // if current token is consistent with what could come after current
545         // ATN state, then we know we're missing a token; error recovery
546         // is free to conjure up and insert the missing token
547         ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState];
548         ATNState next = currentState.transition(0).target;
549         ATN atn = recognizer.getInterpreter().atn;
550         IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx);
551         debug(ErrorHandling)
552             {
553                 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames));
554             }
555         if ( expectingAtLL2.contains(currentSymbolType) ) {
556             reportMissingToken(recognizer);
557             return true;
558         }
559         return false;
560     }
561 
562     /**
563      * This method implements the single-token deletion inline error recovery
564      * strategy. It is called by {@link #recoverInline} to attempt to recover
565      * from mismatched input. If this method returns null, the parser and error
566      * handler state will not have changed. If this method returns non-null,
567      * {@code recognizer} will <em>not</em> be in error recovery mode since the
568      * returned token was a successful match.
569      *
570      * <p>If the single-token deletion is successful, this method calls
571      * {@link #reportUnwantedToken} to report the error, followed by
572      * {@link Parser#consume} to actually "delete" the extraneous token. Then,
573      * before returning {@link #reportMatch} is called to signal a successful
574      * match.</p>
575      *
576      * @param recognizer the parser instance
577      * @return the successfully matched {@link Token} instance if single-token
578      * deletion successfully recovers from the mismatched input, otherwise
579      * {@code null}
580      */
581     protected Token singleTokenDeletion(Parser recognizer)
582     {
583 	int nextTokenType = recognizer.getInputStream().LA(2);
584         IntervalSet expecting = getExpectedTokens(recognizer);
585         if ( expecting.contains(nextTokenType) ) {
586             reportUnwantedToken(recognizer);
587             /*
588               System.err.println("recoverFromMismatchedToken deleting "+
589               ((TokenStream)recognizer.getInputStream()).LT(1)+
590               " since "+((TokenStream)recognizer.getInputStream()).LT(2)+
591               " is what we want");
592             */
593             recognizer.consume(); // simply delete extra token
594             // we want to return the token we're actually matching
595             Token matchedSymbol = recognizer.getCurrentToken();
596             reportMatch(recognizer);  // we know current token is correct
597             return matchedSymbol;
598         }
599         return null;
600     }
601 
602     /**
603      * Conjure up a missing token during error recovery.
604      *
605      *  The recognizer attempts to recover from single missing
606      *  symbols. But, actions might refer to that missing symbol.
607      *  For example, x=ID {f($x);}. The action clearly assumes
608      *  that there has been an identifier matched previously and that
609      *  $x points at that token. If that token is missing, but
610      *  the next token in the stream is what we want we assume that
611      *  this token is missing and we keep going. Because we
612      *  have to return some token to replace the missing token,
613      *  we have to conjure one up. This method gives the user control
614      *  over the tokens returned for missing tokens. Mostly,
615      *  you will want to create something special for identifier
616      *  tokens. For literals such as '{' and ',', the default
617      *  action in the parser or tree parser works. It simply creates
618      *  a CommonToken of the appropriate type. The text will be the token.
619      *  If you change what tokens must be created by the lexer,
620      *  override this method to create the appropriate tokens.
621      */
622     protected Token getMissingSymbol(Parser recognizer)
623     {
624 	Token currentSymbol = recognizer.getCurrentToken();
625         IntervalSet expecting = getExpectedTokens(recognizer);
626         int expectedTokenType = TokenConstantDefinition.INVALID_TYPE;
627         if (!expecting.isNil) {
628             expectedTokenType = expecting.getMinElement(); // get any element
629         }
630         string tokenText;
631         if (expectedTokenType == TokenConstantDefinition.EOF)
632             tokenText = "<missing EOF>";
633         else
634             tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType));
635         Token current = currentSymbol;
636         Token lookback = recognizer.getInputStream().LT(-1);
637         if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) {
638             current = lookback;
639         }
640         TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream());
641         return
642             recognizer.tokenFactory().create(tsp, expectedTokenType, tokenText,
643                                              TokenConstantDefinition.DEFAULT_CHANNEL,
644                                              -1, -1,
645                                              current.getLine(), current.getCharPositionInLine());
646 
647     }
648 
649     protected IntervalSet getExpectedTokens(Parser recognizer)
650     {
651 	return recognizer.getExpectedTokens;
652     }
653 
654     /**
655      * How should a token be displayed in an error message? The default
656      * is to display just the text, but during development you might
657      * want to have a lot of information spit out.  Override in that case
658      * to use t.toString() (which, for CommonToken, dumps everything about
659      * the token). This is better than forcing you to override a method in
660      * your token objects because you don't have to go modify your lexer
661      * so that it creates a new Java type.
662      */
663     protected string getTokenErrorDisplay(Token t)
664     {
665 	if (t is null) return "<no token>";
666         string s = getSymbolText(t);
667         if (s is null) {
668             if (getSymbolType(t) == TokenConstantDefinition.EOF) {
669                 s = "<EOF>";
670             }
671             else {
672                 s = format("<%s>", getSymbolType(t));
673             }
674         }
675         return escapeWSAndQuote(s);
676     }
677 
678     protected string getSymbolText(Token symbol)
679     {
680         return symbol.getText;
681     }
682 
683     protected int getSymbolType(Token symbol)
684     {
685 	return symbol.getType();
686     }
687 
688     protected string escapeWSAndQuote(string s)
689     {
690         //		if ( s==null ) return s;
691         s = s.replace("\n","\\n");
692         s = s.replace("\r","\\r");
693         s = s.replace("\t","\\t");
694         return "'" ~ s ~ "'";
695     }
696 
697     /**
698      * Compute the error recovery set for the current rule.  During
699      *  rule invocation, the parser pushes the set of tokens that can
700      *  follow that rule reference on the stack; this amounts to
701      *  computing FIRST of what follows the rule reference in the
702      *  enclosing rule. See LinearApproximator.FIRST().
703      *  This local follow set only includes tokens
704      *  from within the rule; i.e., the FIRST computation done by
705      *  ANTLR stops at the end of a rule.
706      *
707      *  EXAMPLE
708      *
709      *  When you find a "no viable alt exception", the input is not
710      *  consistent with any of the alternatives for rule r.  The best
711      *  thing to do is to consume tokens until you see something that
712      *  can legally follow a call to r *or* any rule that called r.
713      *  You don't want the exact set of viable next tokens because the
714      *  input might just be missing a token--you might consume the
715      *  rest of the input looking for one of the missing tokens.
716      *
717      *  Consider grammar:
718      *
719      *  a : '[' b ']'
720      *    | '(' b ')'
721      *    ;
722      *  b : c '^' INT ;
723      *  c : ID
724      *    | INT
725      *    ;
726      *
727      *  At each rule invocation, the set of tokens that could follow
728      *  that rule is pushed on a stack.  Here are the various
729      *  context-sensitive follow sets:
730      *
731      *  FOLLOW(b1_in_a) = FIRST(']') = ']'
732      *  FOLLOW(b2_in_a) = FIRST(')') = ')'
733      *  FOLLOW(c_in_b) = FIRST('^') = '^'
734      *
735      *  Upon erroneous input "[]", the call chain is
736      *
737      *  a -> b -> c
738      *
739      *  and, hence, the follow context stack is:
740      *
741      *  depth     follow set       start of rule execution
742      *    0         <EOF>                    a (from main())
743      *    1          ']'                     b
744      *    2          '^'                     c
745      *
746      *  Notice that ')' is not included, because b would have to have
747      *  been called from a different context in rule a for ')' to be
748      *  included.
749      *
750      *  For error recovery, we cannot consider FOLLOW(c)
751      *  (context-sensitive or otherwise).  We need the combined set of
752      *  all context-sensitive FOLLOW sets--the set of all tokens that
753      *  could follow any reference in the call chain.  We need to
754      *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
755      *  we resync'd to that token, we'd consume until EOF.  We need to
756      *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
757      *  In this case, for input "[]", LA(1) is ']' and in the set, so we would
758      *  not consume anything. After printing an error, rule c would
759      *  return normally.  Rule b would not find the required '^' though.
760      *  At this point, it gets a mismatched token error and throws an
761      *  exception (since LA(1) is not in the viable following token
762      *  set).  The rule exception handler tries to recover, but finds
763      *  the same recovery set and doesn't consume anything.  Rule b
764      *  exits normally returning to rule a.  Now it finds the ']' (and
765      *  with the successful match exits errorRecovery mode).
766      *
767      *  So, you can see that the parser walks up the call chain looking
768      *  for the token that was a member of the recovery set.
769      *
770      *  Errors are not generated in errorRecovery mode.
771      *
772      *  ANTLR's error recovery mechanism is based upon original ideas:
773      *
774      *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
775      *
776      *  and
777      *
778      *  "A note on error recovery in recursive descent parsers":
779      *  http://portal.acm.org/citation.cfm?id=947902.947905
780      *
781      *  Later, Josef Grosch had some good ideas:
782      *
783      *  "Efficient and Comfortable Error Recovery in Recursive Descent
784      *  Parsers":
785      *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
786      *
787      *  Like Grosch I implement context-sensitive FOLLOW sets that are combined
788      *  at run-time upon error to avoid overhead during parsing.
789      */
790     protected IntervalSet getErrorRecoverySet(Parser recognizer)
791     {
792 	ATN atn = recognizer.getInterpreter().atn;
793         RuleContext ctx = recognizer.ctx;
794         IntervalSet recoverSet = new IntervalSet();
795         while (ctx !is null && ctx.invokingState >= 0) {
796             // compute what follows who invoked us
797             ATNState invokingState = atn.states[ctx.invokingState];
798             RuleTransition rt = cast(RuleTransition)invokingState.transition(0);
799             IntervalSet follow = atn.nextTokens(rt.followState);
800             recoverSet.addAll(follow);
801             ctx = ctx.parent;
802         }
803         recoverSet.remove(TokenConstantDefinition.EPSILON);
804         //		System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
805         return recoverSet;
806     }
807 
808     /**
809      * Consume tokens until one matches the given token set.
810      */
811     protected void consumeUntil(Parser recognizer, IntervalSet set)
812     {
813 	//stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames()));
814         int ttype = recognizer.getInputStream().LA(1);
815         while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) {
816             debug(ErrorHandling)
817             {
818                 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]);
819             }
820             // recognizer.getInputStream().consume();
821             recognizer.consume();
822             ttype = recognizer.getInputStream().LA(1);
823         }
824     }
825 
826 }