1 /*
2  * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.DefaultErrorStrategy;
8 
9 import antlr.v4.runtime.ANTLRErrorStrategy;
10 import antlr.v4.runtime.CharStream;
11 import antlr.v4.runtime.FailedPredicateException;
12 import antlr.v4.runtime.InputMismatchException;
13 import antlr.v4.runtime.NoViableAltException;
14 import antlr.v4.runtime.Parser;
15 import antlr.v4.runtime.ParserRuleContext;
16 import antlr.v4.runtime.RecognitionException;
17 import antlr.v4.runtime.RuleContext;
18 import antlr.v4.runtime.Token;
19 import antlr.v4.runtime.TokenConstantDefinition;
20 import antlr.v4.runtime.TokenSource;
21 import antlr.v4.runtime.TokenStream;
22 import antlr.v4.runtime.atn.ATN;
23 import antlr.v4.runtime.atn.ATNState;
24 import antlr.v4.runtime.atn.RuleTransition;
25 import antlr.v4.runtime.atn.StateNames;
26 import antlr.v4.runtime.misc.IntervalSet;
27 import std.array;
28 import std.format;
29 import std.stdio;
30 import std.typecons;
31 
32 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b");
33 
34 /**
35  * This is the default implementation of {@link ANTLRErrorStrategy} used for
36  * error reporting and recovery in ANTLR parsers.
37  */
38 class DefaultErrorStrategy : ANTLRErrorStrategy
39 {
40 
41     /**
42      * Indicates whether the error strategy is currently "recovering from an
43      * error". This is used to suppress reporting multiple error messages while
44      * attempting to recover from a detected syntax error.
45      *
46      *  @see #inErrorRecoveryMode
47      */
48     protected bool errorRecoveryMode;
49 
50     /**
51      * The index into the input stream where the last error occurred.
52      * This is used to prevent infinite loops where an error is found
53      * but no token is consumed during recovery...another error is found,
54      * ad nauseum.  This is a failsafe mechanism to guarantee that at least
55      * one token/tree node is consumed for two errors.
56      */
57     protected int lastErrorIndex = -1;
58 
59     public IntervalSet lastErrorStates;
60 
61     /**
62      * This field is used to propagate information about the lookahead following
63      * the previous match. Since prediction prefers completing the current rule
64      * to error recovery efforts, error reporting may occur later than the
65      * original point where it was discoverable. The original context is used to
66      * compute the true expected sets as though the reporting occurred as early
67      * as possible.
68      */
69     protected ParserRuleContext nextTokensContext;
70 
71     /**
72      * @see #nextTokensContext
73      */
74     protected int nextTokensState;
75 
76     /**
77      * <p>The default implementation simply calls {@link #endErrorCondition} to
78      * ensure that the handler is not in error recovery mode.</p>
79      */
80     public void reset(Parser recognizer)
81     {
82         endErrorCondition(recognizer);
83     }
84 
85     /**
86      * This method is called to enter error recovery mode when a recognition
87      * exception is reported.
88      *
89      * @param recognizer the parser instance
90      */
91     protected void beginErrorCondition(Parser recognizer)
92     {
93         errorRecoveryMode = true;
94     }
95 
96     public bool inErrorRecoveryMode(Parser recognizer)
97     {
98         return errorRecoveryMode;
99     }
100 
101     /**
102      * This method is called to leave error recovery mode after recovering from
103      * a recognition exception.
104      *
105      * @param recognizer
106      */
107     protected void endErrorCondition(Parser recognizer)
108     {
109 	errorRecoveryMode = false;
110         lastErrorStates = null;
111         lastErrorIndex = -1;
112     }
113 
114     /**
115      * {@inheritDoc}
116      *
117      * <p>The default implementation simply calls {@link #endErrorCondition}.</p>
118      */
119     public void reportMatch(Parser recognizer)
120     {
121 	endErrorCondition(recognizer);
122     }
123 
124     /**
125      * {@inheritDoc}
126      *
127      * <p>The default implementation returns immediately if the handler is already
128      * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
129      * and dispatches the reporting task based on the runtime type of {@code e}
130      * according to the following table.</p>
131      *
132      * <ul>
133      * <li>{@link NoViableAltException}: Dispatches the call to
134      * {@link #reportNoViableAlternative}</li>
135      * <li>{@link InputMismatchException}: Dispatches the call to
136      * {@link #reportInputMismatch}</li>
137      * <li>{@link FailedPredicateException}: Dispatches the call to
138      * {@link #reportFailedPredicate}</li>
139      * <li>All other types: calls {@link Parser#notifyErrorListeners} to report
140      * the exception</li>
141      * </ul>
142      */
143     public void reportError(Parser recognizer, RecognitionException e)
144     {
145 	// if we've already reported an error and have not matched a token
146         // yet successfully, don't report any errors.
147         if (inErrorRecoveryMode(recognizer)) {
148             debug(DefaultErrorStrategy)
149             {
150                 writefln("DefaultErrorStrategy:[SPURIOUS]");
151             }
152             return; // don't report spurious errors
153         }
154         beginErrorCondition(recognizer);
155         if (cast(NoViableAltException)e) {
156             reportNoViableAlternative(recognizer, cast(NoViableAltException)e);
157         }
158         else if (cast(InputMismatchException)e) {
159             reportInputMismatch(recognizer, cast(InputMismatchException)e);
160         }
161         else if (cast(FailedPredicateException)e) {
162             reportFailedPredicate(recognizer, cast(FailedPredicateException)e);
163         }
164         else {
165             stderr.writefln("unknown recognition error type: %s", e.stringof);
166             recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e);
167         }
168     }
169 
170     /**
171      * {@inheritDoc}
172      *
173      * <p>The default implementation resynchronizes the parser by consuming tokens
174      * until we find one in the resynchronization set--loosely the set of tokens
175      * that can follow the current rule.</p>
176      */
177     public void recover(Parser recognizer, RecognitionException e)
178     {
179         debug(DefaultErrorStrategy)
180         {
181             writefln("recover in %s index=%s, lastErrorIndex=%s, states=%s",
182                      recognizer.getRuleInvocationStack,
183                      recognizer.getInputStream().index(),
184                      lastErrorIndex,
185                      lastErrorStates);
186         }
187         if (lastErrorIndex == recognizer.getInputStream.index() &&
188             lastErrorStates !is null &&
189             lastErrorStates.contains(recognizer.getState))
190             {
191                 // uh oh, another error at same token index and previously-visited
192                 // state in ATN; must be a case where LT(1) is in the recovery
193                 // token set so nothing got consumed. Consume a single token
194                 // at least to prevent an infinite loop; this is a failsafe.
195                 debug(DefaultErrorStrategy)
196                 {
197                     stderr.writefln("seen error condition before index= %s, states=%s",
198                                     lastErrorIndex, lastErrorStates);
199                     stderr.writefln("FAILSAFE consumes %s",
200                                     recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
201                 }
202                 recognizer.consume;
203             }
204         lastErrorIndex = recognizer.getInputStream.index;
205         if (!lastErrorStates)
206             lastErrorStates = new IntervalSet;
207         lastErrorStates.add(recognizer.getState);
208         IntervalSet followSet = getErrorRecoverySet(recognizer);
209         consumeUntil(recognizer, followSet);
210     }
211 
212     /**
213      * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
214      * that the current lookahead symbol is consistent with what were expecting
215      * at this point in the ATN. You can call this anytime but ANTLR only
216      * generates code to check before subrules/loops and each iteration.
217      *
218      * <p>Implements Jim Idle's magic sync mechanism in closures and optional
219      * subrules. E.g.,</p>
220      *
221      * <pre>
222      * a : sync ( stuff sync )* ;
223      * sync : {consume to what can follow sync} ;
224      * </pre>
225      *
226      * At the start of a sub rule upon error, {@link #sync} performs single
227      * token deletion, if possible. If it can't do that, it bails on the current
228      * rule and uses the default error recovery, which consumes until the
229      * resynchronization set of the current rule.
230      *
231      * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
232      * with an empty alternative), then the expected set includes what follows
233      * the subrule.</p>
234      *
235      * <p>During loop iteration, it consumes until it sees a token that can start a
236      * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
237      * stay in the loop as long as possible.</p>
238      *
239      * <p><strong>ORIGINS</strong></p>
240      *
241      * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
242      * A single mismatch token or missing token would force the parser to bail
243      * out of the entire rules surrounding the loop. So, for rule</p>
244      *
245      * <pre>
246      * classDef : 'class' ID '{' member* '}'
247      * </pre>
248      *
249      * input with an extra token between members would force the parser to
250      * consume until it found the next class definition rather than the next
251      * member definition of the current class.
252      *
253      * <p>This functionality cost a little bit of effort because the parser has to
254      * compare token set at the start of the loop and at each iteration. If for
255      * some reason speed is suffering for you, you can turn off this
256      * functionality by simply overriding this method as a blank { }.</p>
257      */
258     public void sync(Parser recognizer)
259     {
260 	ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState];
261         debug(ErrorHandling)
262             {
263                 writefln("sync @ %s=%s", s.stateNumber, s.classinfo);
264             }
265         // If already recovering, don't try to sync
266         if (inErrorRecoveryMode(recognizer)) {
267             return;
268         }
269         TokenStream tokens = recognizer.getInputStream;
270         int la = tokens.LA(1);
271 
272         // try cheaper subset first; might get lucky. seems to shave a wee bit off
273         IntervalSet nextTokens = recognizer.getATN.nextTokens(s);
274         if (nextTokens.contains(la)) {
275             // We are sure the token matches
276             nextTokensContext = null;
277             nextTokensState = ATNState.INVALID_STATE_NUMBER;
278             return;
279         }
280 
281 	if (nextTokens.contains(TokenConstantDefinition.EPSILON)) {
282             if (nextTokensContext is null) {
283                 // It's possible the next token won't match; information tracked
284                 // by sync is restricted for performance.
285                 nextTokensContext = recognizer.ctx;
286                 nextTokensState = recognizer.getState;
287             }
288             return;
289         }
290 
291         switch (s.getStateType) {
292         case StateNames.BLOCK_START:
293         case StateNames.STAR_BLOCK_START:
294         case StateNames.PLUS_BLOCK_START:
295         case StateNames.STAR_LOOP_ENTRY:
296             // report error and recover if possible
297             if (singleTokenDeletion(recognizer) !is null) {
298                 return;
299             }
300             throw new InputMismatchException(recognizer);
301 
302         case StateNames.PLUS_LOOP_BACK:
303         case StateNames.STAR_LOOP_BACK:
304             debug(ErrorHandling)
305             {
306                 stderr.writefln("at loop back: %s", s);
307             }
308             reportUnwantedToken(recognizer);
309             IntervalSet expecting = recognizer.getExpectedTokens;
310             IntervalSet whatFollowsLoopIterationOrRule =
311                 expecting.or(getErrorRecoverySet(recognizer));
312             consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
313             break;
314 
315         default:
316             // do nothing if we can't identify the exact kind of ATN state
317             break;
318         }
319     }
320 
321     /**
322      * This is called by {@link #reportError} when the exception is a
323      * {@link NoViableAltException}.
324      *
325      * @see #reportError
326      *
327      * @param recognizer the parser instance
328      * @param e the recognition exception
329      */
330     protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e)
331     {
332 	TokenStream tokens = recognizer.getInputStream();
333         string input;
334         if (tokens !is null) {
335             if (e.getStartToken.getType == TokenConstantDefinition.EOF)
336                 input = "<EOF>";
337             else
338                 input = tokens.getText(e.getStartToken, e.getOffendingToken);
339         }
340         else {
341             input = "<unknown input>";
342         }
343         string msg = format("no viable alternative at input %s", escapeWSAndQuote(input));
344         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
345     }
346 
347     /**
348      * This is called by {@link #reportError} when the exception is an
349      * {@link InputMismatchException}.
350      *
351      * @see #reportError
352      *
353      * @param recognizer the parser instance
354      * @param e the recognition exception
355      */
356     protected void reportInputMismatch(Parser recognizer, InputMismatchException e)
357     {
358         string msg = format("mismatched input %1$s expecting %2$s",
359                             getTokenErrorDisplay(e.getOffendingToken),
360                             e.getExpectedTokens.toString(recognizer.getVocabulary));
361         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
362     }
363 
364     /**
365      * This is called by {@link #reportError} when the exception is a
366      * {@link FailedPredicateException}.
367      *
368      * @see #reportError
369      *
370      * @param recognizer the parser instance
371      * @param e the recognition exception
372      */
373     protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e)
374     {
375 	string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()];
376         string msg = "rule " ~ ruleName ~ " " ~ e.msg;
377         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
378     }
379 
380     /**
381      * This method is called to report a syntax error which requires the removal
382      * of a token from the input stream. At the time this method is called, the
383      * erroneous symbol is current {@code LT(1)} symbol and has not yet been
384      * removed from the input stream. When this method returns,
385      * {@code recognizer} is in error recovery mode.
386      *
387      * <p>This method is called when {@link #singleTokenDeletion} identifies
388      * single-token deletion as a viable recovery strategy for a mismatched
389      * input error.</p>
390      *
391      * <p>The default implementation simply returns if the handler is already in
392      * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
393      * enter error recovery mode, followed by calling
394      * {@link Parser#notifyErrorListeners}.</p>
395      *
396      * @param recognizer the parser instance
397      */
398     protected void reportUnwantedToken(Parser recognizer)
399     {
400 	if (inErrorRecoveryMode(recognizer)) {
401             return;
402         }
403 
404         beginErrorCondition(recognizer);
405 
406         Token t = recognizer.getCurrentToken();
407         string tokenName = getTokenErrorDisplay(t);
408         IntervalSet expecting = getExpectedTokens(recognizer);
409         string msg = "extraneous input " ~ tokenName ~ " expecting " ~
410             expecting.toString(recognizer.getVocabulary());
411         recognizer.notifyErrorListeners(t, msg, null);
412     }
413 
414     /**
415      * This method is called to report a syntax error which requires the
416      * insertion of a missing token into the input stream. At the time this
417      * method is called, the missing token has not yet been inserted. When this
418      * method returns, {@code recognizer} is in error recovery mode.
419      *
420      * <p>This method is called when {@link #singleTokenInsertion} identifies
421      * single-token insertion as a viable recovery strategy for a mismatched
422      * input error.</p>
423      *
424      * <p>The default implementation simply returns if the handler is already in
425      * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
426      * enter error recovery mode, followed by calling
427      * {@link Parser#notifyErrorListeners}.</p>
428      *
429      * @param recognizer the parser instance
430      */
431     protected void reportMissingToken(Parser recognizer)
432     {
433 	if (inErrorRecoveryMode(recognizer)) {
434             return;
435         }
436 
437         beginErrorCondition(recognizer);
438 
439         Token t = recognizer.getCurrentToken();
440         IntervalSet expecting = getExpectedTokens(recognizer);
441         string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary),
442                             getTokenErrorDisplay(t));
443 
444         recognizer.notifyErrorListeners(t, msg, null);
445     }
446 
447     /**
448      * {@inheritDoc}
449      *
450      * <p>The default implementation attempts to recover from the mismatched input
451      * by using single token insertion and deletion as described below. If the
452      * recovery attempt fails, this method throws an
453      * {@link InputMismatchException}.</p>
454      *
455      * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
456      *
457      * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
458      * right token, however, then assume {@code LA(1)} is some extra spurious
459      * token and delete it. Then consume and return the next token (which was
460      * the {@code LA(2)} token) as the successful result of the match operation.</p>
461      *
462      * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
463      *
464      * <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
465      *
466      * <p>If current token (at {@code LA(1)}) is consistent with what could come
467      * after the expected {@code LA(1)} token, then assume the token is missing
468      * and use the parser's {@link TokenFactory} to create it on the fly. The
469      * "insertion" is performed by returning the created token as the successful
470      * result of the match operation.</p>
471      *
472      * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
473      *
474      * <p><strong>EXAMPLE</strong></p>
475      *
476      * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
477      * the parser returns from the nested call to {@code expr}, it will have
478      * call chain:</p>
479      *
480      * <pre>
481      * stat &rarr; expr &rarr; atom
482      * </pre>
483      *
484      * and it will be trying to match the {@code ')'} at this point in the
485      * derivation:
486      *
487      * <pre>
488      * =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
489      *                    ^
490      * </pre>
491      *
492      * The attempt to match {@code ')'} will fail when it sees {@code ';'} and
493      * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
494      * is in the set of tokens that can follow the {@code ')'} token reference
495      * in rule {@code atom}. It can assume that you forgot the {@code ')'}.
496      */
497     public Token recoverInline(Parser recognizer)
498     {
499 	// SINGLE TOKEN DELETION
500         Token matchedSymbol = singleTokenDeletion(recognizer);
501         if (matchedSymbol !is null) {
502             // we have deleted the extra token.
503             // now, move past ttype token as if all were ok
504             recognizer.consume();
505             return matchedSymbol;
506         }
507 
508         // SINGLE TOKEN INSERTION
509         if (singleTokenInsertion(recognizer)) {
510             return getMissingSymbol(recognizer);
511         }
512 
513         // even that didn't work; must throw the exception
514         InputMismatchException e;
515         if (nextTokensContext is null) {
516             e = new InputMismatchException(recognizer);
517         } else {
518             e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext);
519         }
520         throw e;
521     }
522 
523     /**
524      * This method implements the single-token insertion inline error recovery
525      * strategy. It is called by {@link #recoverInline} if the single-token
526      * deletion strategy fails to recover from the mismatched input. If this
527      * method returns {@code true}, {@code recognizer} will be in error recovery
528      * mode.
529      *
530      * <p>This method determines whether or not single-token insertion is viable by
531      * checking if the {@code LA(1)} input symbol could be successfully matched
532      * if it were instead the {@code LA(2)} symbol. If this method returns
533      * {@code true}, the caller is responsible for creating and inserting a
534      * token with the correct type to produce this behavior.</p>
535      *
536      * @param recognizer the parser instance
537      * @return {@code true} if single-token insertion is a viable recovery
538      * strategy for the current mismatched input, otherwise {@code false}
539      */
540     protected bool singleTokenInsertion(Parser recognizer)
541     {
542 	int currentSymbolType = recognizer.getInputStream().LA(1);
543         // if current token is consistent with what could come after current
544         // ATN state, then we know we're missing a token; error recovery
545         // is free to conjure up and insert the missing token
546         ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState];
547         ATNState next = currentState.transition(0).target;
548         ATN atn = recognizer.getInterpreter().atn;
549         IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx);
550         debug(ErrorHandling)
551             {
552                 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames));
553             }
554         if ( expectingAtLL2.contains(currentSymbolType) ) {
555             reportMissingToken(recognizer);
556             return true;
557         }
558         return false;
559     }
560 
561     /**
562      * This method implements the single-token deletion inline error recovery
563      * strategy. It is called by {@link #recoverInline} to attempt to recover
564      * from mismatched input. If this method returns null, the parser and error
565      * handler state will not have changed. If this method returns non-null,
566      * {@code recognizer} will <em>not</em> be in error recovery mode since the
567      * returned token was a successful match.
568      *
569      * <p>If the single-token deletion is successful, this method calls
570      * {@link #reportUnwantedToken} to report the error, followed by
571      * {@link Parser#consume} to actually "delete" the extraneous token. Then,
572      * before returning {@link #reportMatch} is called to signal a successful
573      * match.</p>
574      *
575      * @param recognizer the parser instance
576      * @return the successfully matched {@link Token} instance if single-token
577      * deletion successfully recovers from the mismatched input, otherwise
578      * {@code null}
579      */
580     protected Token singleTokenDeletion(Parser recognizer)
581     {
582 	int nextTokenType = recognizer.getInputStream().LA(2);
583         IntervalSet expecting = getExpectedTokens(recognizer);
584         if ( expecting.contains(nextTokenType) ) {
585             reportUnwantedToken(recognizer);
586             /*
587               System.err.println("recoverFromMismatchedToken deleting "+
588               ((TokenStream)recognizer.getInputStream()).LT(1)+
589               " since "+((TokenStream)recognizer.getInputStream()).LT(2)+
590               " is what we want");
591             */
592             recognizer.consume(); // simply delete extra token
593             // we want to return the token we're actually matching
594             Token matchedSymbol = recognizer.getCurrentToken();
595             reportMatch(recognizer);  // we know current token is correct
596             return matchedSymbol;
597         }
598         return null;
599     }
600 
601     /**
602      * Conjure up a missing token during error recovery.
603      *
604      *  The recognizer attempts to recover from single missing
605      *  symbols. But, actions might refer to that missing symbol.
606      *  For example, x=ID {f($x);}. The action clearly assumes
607      *  that there has been an identifier matched previously and that
608      *  $x points at that token. If that token is missing, but
609      *  the next token in the stream is what we want we assume that
610      *  this token is missing and we keep going. Because we
611      *  have to return some token to replace the missing token,
612      *  we have to conjure one up. This method gives the user control
613      *  over the tokens returned for missing tokens. Mostly,
614      *  you will want to create something special for identifier
615      *  tokens. For literals such as '{' and ',', the default
616      *  action in the parser or tree parser works. It simply creates
617      *  a CommonToken of the appropriate type. The text will be the token.
618      *  If you change what tokens must be created by the lexer,
619      *  override this method to create the appropriate tokens.
620      */
621     protected Token getMissingSymbol(Parser recognizer)
622     {
623 	Token currentSymbol = recognizer.getCurrentToken();
624         IntervalSet expecting = getExpectedTokens(recognizer);
625         int expectedTokenType = TokenConstantDefinition.INVALID_TYPE;
626         if (!expecting.isNil) {
627             expectedTokenType = expecting.getMinElement(); // get any element
628         }
629         string tokenText;
630         if (expectedTokenType == TokenConstantDefinition.EOF)
631             tokenText = "<missing EOF>";
632         else
633             tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType));
634         Token current = currentSymbol;
635         Token lookback = recognizer.getInputStream().LT(-1);
636         if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) {
637             current = lookback;
638         }
639         TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream());
640         return
641             recognizer.tokenFactory().create(tsp, expectedTokenType, tokenText,
642                                              TokenConstantDefinition.DEFAULT_CHANNEL,
643                                              -1, -1,
644                                              current.getLine(), current.getCharPositionInLine());
645 
646     }
647 
648     protected IntervalSet getExpectedTokens(Parser recognizer)
649     {
650 	return recognizer.getExpectedTokens;
651     }
652 
653     /**
654      * How should a token be displayed in an error message? The default
655      * is to display just the text, but during development you might
656      * want to have a lot of information spit out.  Override in that case
657      * to use t.toString() (which, for CommonToken, dumps everything about
658      * the token). This is better than forcing you to override a method in
659      * your token objects because you don't have to go modify your lexer
660      * so that it creates a new Java type.
661      */
662     protected string getTokenErrorDisplay(Token t)
663     {
664 	if (t is null) return "<no token>";
665         string s = getSymbolText(t);
666         if (s is null) {
667             if (getSymbolType(t) == TokenConstantDefinition.EOF) {
668                 s = "<EOF>";
669             }
670             else {
671                 s = format("<%s>", getSymbolType(t));
672             }
673         }
674         return escapeWSAndQuote(s);
675     }
676 
677     protected string getSymbolText(Token symbol)
678     {
679         return symbol.getText;
680     }
681 
682     protected int getSymbolType(Token symbol)
683     {
684 	return symbol.getType();
685     }
686 
687     protected string escapeWSAndQuote(string s)
688     {
689         //		if ( s==null ) return s;
690         s = s.replace("\n","\\n");
691         s = s.replace("\r","\\r");
692         s = s.replace("\t","\\t");
693         return "'" ~ s ~ "'";
694     }
695 
696     /**
697      * Compute the error recovery set for the current rule.  During
698      *  rule invocation, the parser pushes the set of tokens that can
699      *  follow that rule reference on the stack; this amounts to
700      *  computing FIRST of what follows the rule reference in the
701      *  enclosing rule. See LinearApproximator.FIRST().
702      *  This local follow set only includes tokens
703      *  from within the rule; i.e., the FIRST computation done by
704      *  ANTLR stops at the end of a rule.
705      *
706      *  EXAMPLE
707      *
708      *  When you find a "no viable alt exception", the input is not
709      *  consistent with any of the alternatives for rule r.  The best
710      *  thing to do is to consume tokens until you see something that
711      *  can legally follow a call to r *or* any rule that called r.
712      *  You don't want the exact set of viable next tokens because the
713      *  input might just be missing a token--you might consume the
714      *  rest of the input looking for one of the missing tokens.
715      *
716      *  Consider grammar:
717      *
718      *  a : '[' b ']'
719      *    | '(' b ')'
720      *    ;
721      *  b : c '^' INT ;
722      *  c : ID
723      *    | INT
724      *    ;
725      *
726      *  At each rule invocation, the set of tokens that could follow
727      *  that rule is pushed on a stack.  Here are the various
728      *  context-sensitive follow sets:
729      *
730      *  FOLLOW(b1_in_a) = FIRST(']') = ']'
731      *  FOLLOW(b2_in_a) = FIRST(')') = ')'
732      *  FOLLOW(c_in_b) = FIRST('^') = '^'
733      *
734      *  Upon erroneous input "[]", the call chain is
735      *
736      *  a -> b -> c
737      *
738      *  and, hence, the follow context stack is:
739      *
740      *  depth     follow set       start of rule execution
741      *    0         <EOF>                    a (from main())
742      *    1          ']'                     b
743      *    2          '^'                     c
744      *
745      *  Notice that ')' is not included, because b would have to have
746      *  been called from a different context in rule a for ')' to be
747      *  included.
748      *
749      *  For error recovery, we cannot consider FOLLOW(c)
750      *  (context-sensitive or otherwise).  We need the combined set of
751      *  all context-sensitive FOLLOW sets--the set of all tokens that
752      *  could follow any reference in the call chain.  We need to
753      *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
754      *  we resync'd to that token, we'd consume until EOF.  We need to
755      *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
756      *  In this case, for input "[]", LA(1) is ']' and in the set, so we would
757      *  not consume anything. After printing an error, rule c would
758      *  return normally.  Rule b would not find the required '^' though.
759      *  At this point, it gets a mismatched token error and throws an
760      *  exception (since LA(1) is not in the viable following token
761      *  set).  The rule exception handler tries to recover, but finds
762      *  the same recovery set and doesn't consume anything.  Rule b
763      *  exits normally returning to rule a.  Now it finds the ']' (and
764      *  with the successful match exits errorRecovery mode).
765      *
766      *  So, you can see that the parser walks up the call chain looking
767      *  for the token that was a member of the recovery set.
768      *
769      *  Errors are not generated in errorRecovery mode.
770      *
771      *  ANTLR's error recovery mechanism is based upon original ideas:
772      *
773      *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
774      *
775      *  and
776      *
777      *  "A note on error recovery in recursive descent parsers":
778      *  http://portal.acm.org/citation.cfm?id=947902.947905
779      *
780      *  Later, Josef Grosch had some good ideas:
781      *
782      *  "Efficient and Comfortable Error Recovery in Recursive Descent
783      *  Parsers":
784      *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
785      *
786      *  Like Grosch I implement context-sensitive FOLLOW sets that are combined
787      *  at run-time upon error to avoid overhead during parsing.
788      */
789     protected IntervalSet getErrorRecoverySet(Parser recognizer)
790     {
791 	ATN atn = recognizer.getInterpreter().atn;
792         RuleContext ctx = recognizer.ctx;
793         IntervalSet recoverSet = new IntervalSet();
794         while (ctx !is null && ctx.invokingState >= 0) {
795             // compute what follows who invoked us
796             ATNState invokingState = atn.states[ctx.invokingState];
797             RuleTransition rt = cast(RuleTransition)invokingState.transition(0);
798             IntervalSet follow = atn.nextTokens(rt.followState);
799             recoverSet.addAll(follow);
800             ctx = ctx.parent;
801         }
802         recoverSet.remove(TokenConstantDefinition.EPSILON);
803         //		System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
804         return recoverSet;
805     }
806 
807     /**
808      * Consume tokens until one matches the given token set.
809      */
810     protected void consumeUntil(Parser recognizer, IntervalSet set)
811     {
812 	//stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames()));
813         int ttype = recognizer.getInputStream().LA(1);
814         while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) {
815             debug(ErrorHandling)
816             {
817                 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]);
818             }
819             // recognizer.getInputStream().consume();
820             recognizer.consume();
821             ttype = recognizer.getInputStream().LA(1);
822         }
823     }
824 
825 }