antlr.v4.runtime.DefaultErrorStrategy source code

1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.DefaultErrorStrategy;
8 
9 import antlr.v4.runtime.ANTLRErrorStrategy;
10 import antlr.v4.runtime.CharStream;
11 import antlr.v4.runtime.FailedPredicateException;
12 import antlr.v4.runtime.InputMismatchException;
13 import antlr.v4.runtime.NoViableAltException;
14 import antlr.v4.runtime.Parser;
15 import antlr.v4.runtime.ParserRuleContext;
16 import antlr.v4.runtime.RecognitionException;
17 import antlr.v4.runtime.RuleContext;
18 import antlr.v4.runtime.Token;
19 import antlr.v4.runtime.TokenConstantDefinition;
20 import antlr.v4.runtime.TokenSource;
21 import antlr.v4.runtime.TokenStream;
22 import antlr.v4.runtime.atn.ATN;
23 import antlr.v4.runtime.atn.ATNState;
24 import antlr.v4.runtime.atn.RuleTransition;
25 import antlr.v4.runtime.atn.StateNames;
26 import antlr.v4.runtime.misc.IntervalSet;
27 import std.array;
28 import std.conv;
29 import std.format;
30 import std.stdio;
31 import std.typecons;
32 import std.variant;
33 
34 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b");
35 
36 /**
37  * This is the default implementation of {@link ANTLRErrorStrategy} used for
38  * error reporting and recovery in ANTLR parsers.
39  */
40 class DefaultErrorStrategy : ANTLRErrorStrategy
41 {
42 
43     /**
44      * Indicates whether the error strategy is currently "recovering from an
45      * error". This is used to suppress reporting multiple error messages while
46      * attempting to recover from a detected syntax error.
47      *
48      *  @see #inErrorRecoveryMode
49      */
50     protected bool errorRecoveryMode;
51 
52     /**
53      * The index into the input stream where the last error occurred.
54      * This is used to prevent infinite loops where an error is found
55      * but no token is consumed during recovery...another error is found,
56      * ad nauseum.  This is a failsafe mechanism to guarantee that at least
57      * one token/tree node is consumed for two errors.
58      */
59     protected int lastErrorIndex = -1;
60 
61     public IntervalSet lastErrorStates;
62 
63     /**
64      * This field is used to propagate information about the lookahead following
65      * the previous match. Since prediction prefers completing the current rule
66      * to error recovery efforts, error reporting may occur later than the
67      * original point where it was discoverable. The original context is used to
68      * compute the true expected sets as though the reporting occurred as early
69      * as possible.
70      */
71     protected ParserRuleContext nextTokensContext;
72 
73     /**
74      * @see #nextTokensContext
75      */
76     protected int nextTokensState;
77 
78     /**
79      * <p>The default implementation simply calls {@link #endErrorCondition} to
80      * ensure that the handler is not in error recovery mode.</p>
81      */
82     public void reset(Parser recognizer)
83     {
84         endErrorCondition(recognizer);
85     }
86 
87     /**
88      * This method is called to enter error recovery mode when a recognition
89      * exception is reported.
90      *
91      * @param recognizer the parser instance
92      */
93     protected void beginErrorCondition(Parser recognizer)
94     {
95         errorRecoveryMode = true;
96     }
97 
98     public bool inErrorRecoveryMode(Parser recognizer)
99     {
100         return errorRecoveryMode;
101     }
102 
103     /**
104      * This method is called to leave error recovery mode after recovering from
105      * a recognition exception.
106      *
107      * @param recognizer
108      */
109     protected void endErrorCondition(Parser recognizer)
110     {
111     errorRecoveryMode = false;
112         lastErrorStates = null;
113         lastErrorIndex = -1;
114     }
115 
116     /**
117      * {@inheritDoc}
118      *
119      * <p>The default implementation simply calls {@link #endErrorCondition}.</p>
120      */
121     public void reportMatch(Parser recognizer)
122     {
123     endErrorCondition(recognizer);
124     }
125 
126     /**
127      * {@inheritDoc}
128      *
129      * <p>The default implementation returns immediately if the handler is already
130      * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
131      * and dispatches the reporting task based on the runtime type of {@code e}
132      * according to the following table.</p>
133      *
134      * <ul>
135      * <li>{@link NoViableAltException}: Dispatches the call to
136      * {@link #reportNoViableAlternative}</li>
137      * <li>{@link InputMismatchException}: Dispatches the call to
138      * {@link #reportInputMismatch}</li>
139      * <li>{@link FailedPredicateException}: Dispatches the call to
140      * {@link #reportFailedPredicate}</li>
141      * <li>All other types: calls {@link Parser#notifyErrorListeners} to report
142      * the exception</li>
143      * </ul>
144      */
145     public void reportError(Parser recognizer, RecognitionException e)
146     {
147     // if we've already reported an error and have not matched a token
148         // yet successfully, don't report any errors.
149         if (inErrorRecoveryMode(recognizer)) {
150             debug(DefaultErrorStrategy)
151             {
152                 writefln("DefaultErrorStrategy:[SPURIOUS]");
153             }
154             return; // don't report spurious errors
155         }
156         beginErrorCondition(recognizer);
157         if (cast(NoViableAltException)e) {
158             reportNoViableAlternative(recognizer, cast(NoViableAltException)e);
159         }
160         else if (cast(InputMismatchException)e) {
161             reportInputMismatch(recognizer, cast(InputMismatchException)e);
162         }
163         else if (cast(FailedPredicateException)e) {
164             reportFailedPredicate(recognizer, cast(FailedPredicateException)e);
165         }
166         else {
167             stderr.writefln!"unknown recognition error type: %s"(e.stringof);
168             recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e);
169         }
170     }
171 
172     /**
173      * {@inheritDoc}
174      *
175      * <p>The default implementation resynchronizes the parser by consuming tokens
176      * until we find one in the resynchronization set--loosely the set of tokens
177      * that can follow the current rule.</p>
178      */
179     public void recover(Parser recognizer, RecognitionException e)
180     {
181         debug(DefaultErrorStrategy)
182         {
183             writefln!"recover in %s index=%s, lastErrorIndex=%s, states=%s"(
184                      recognizer.getRuleInvocationStack,
185                      recognizer.getInputStream().index(),
186                      lastErrorIndex,
187                      lastErrorStates);
188         }
189         if (lastErrorIndex == recognizer.getInputStream.index() &&
190             lastErrorStates !is null &&
191             lastErrorStates.contains(recognizer.getState))
192             {
193                 // uh oh, another error at same token index and previously-visited
194                 // state in ATN; must be a case where LT(1) is in the recovery
195                 // token set so nothing got consumed. Consume a single token
196                 // at least to prevent an infinite loop; this is a failsafe.
197                 debug(DefaultErrorStrategy)
198                 {
199                     stderr.writefln("seen error condition before index= %s, states=%s",
200                                     lastErrorIndex, lastErrorStates);
201                     stderr.writefln("FAILSAFE consumes %s",
202                                     recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]);
203                 }
204                 recognizer.consume;
205             }
206         lastErrorIndex = to!int(recognizer.getInputStream.index);
207         if (!lastErrorStates)
208             lastErrorStates = new IntervalSet;
209         lastErrorStates.add(recognizer.getState);
210         IntervalSet followSet = getErrorRecoverySet(recognizer);
211         consumeUntil(recognizer, followSet);
212     }
213 
214     /**
215      * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
216      * that the current lookahead symbol is consistent with what were expecting
217      * at this point in the ATN. You can call this anytime but ANTLR only
218      * generates code to check before subrules/loops and each iteration.
219      *
220      * <p>Implements Jim Idle's magic sync mechanism in closures and optional
221      * subrules. E.g.,</p>
222      *
223      * <pre>
224      * a : sync ( stuff sync )* ;
225      * sync : {consume to what can follow sync} ;
226      * </pre>
227      *
228      * At the start of a sub rule upon error, {@link #sync} performs single
229      * token deletion, if possible. If it can't do that, it bails on the current
230      * rule and uses the default error recovery, which consumes until the
231      * resynchronization set of the current rule.
232      *
233      * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
234      * with an empty alternative), then the expected set includes what follows
235      * the subrule.</p>
236      *
237      * <p>During loop iteration, it consumes until it sees a token that can start a
238      * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
239      * stay in the loop as long as possible.</p>
240      *
241      * <p><strong>ORIGINS</strong></p>
242      *
243      * <p>Previous versions of ANTLR did a poor job of their recovery within loops.
244      * A single mismatch token or missing token would force the parser to bail
245      * out of the entire rules surrounding the loop. So, for rule</p>
246      *
247      * <pre>
248      * classDef : 'class' ID '{' member* '}'
249      * </pre>
250      *
251      * input with an extra token between members would force the parser to
252      * consume until it found the next class definition rather than the next
253      * member definition of the current class.
254      *
255      * <p>This functionality cost a little bit of effort because the parser has to
256      * compare token set at the start of the loop and at each iteration. If for
257      * some reason speed is suffering for you, you can turn off this
258      * functionality by simply overriding this method as a blank { }.</p>
259      */
260     public void sync(Parser recognizer)
261     {
262     ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState];
263         debug(ErrorHandling)
264             {
265                 writefln("sync @ %s=%s", s.stateNumber, s.classinfo);
266             }
267         // If already recovering, don't try to sync
268         if (inErrorRecoveryMode(recognizer)) {
269             return;
270         }
271         TokenStream tokens = recognizer.getInputStream;
272         int la = tokens.LA(1);
273 
274         // try cheaper subset first; might get lucky. seems to shave a wee bit off
275         IntervalSet nextTokens = recognizer.getATN.nextTokens(s);
276         if (nextTokens.contains(la)) {
277             // We are sure the token matches
278             nextTokensContext = null;
279             nextTokensState = ATNState.INVALID_STATE_NUMBER;
280             return;
281         }
282 
283     if (nextTokens.contains(TokenConstantDefinition.EPSILON)) {
284             if (nextTokensContext is null) {
285                 // It's possible the next token won't match; information tracked
286                 // by sync is restricted for performance.
287                 nextTokensContext = recognizer.ctx;
288                 nextTokensState = recognizer.getState;
289             }
290             return;
291         }
292 
293         switch (s.getStateType) {
294         case StateNames.BLOCK_START:
295         case StateNames.STAR_BLOCK_START:
296         case StateNames.PLUS_BLOCK_START:
297         case StateNames.STAR_LOOP_ENTRY:
298             // report error and recover if possible
299             if (singleTokenDeletion(recognizer) !is null) {
300                 return;
301             }
302             throw new InputMismatchException(recognizer);
303 
304         case StateNames.PLUS_LOOP_BACK:
305         case StateNames.STAR_LOOP_BACK:
306             debug(ErrorHandling)
307             {
308                 stderr.writefln("at loop back: %s", s);
309             }
310             reportUnwantedToken(recognizer);
311             IntervalSet expecting = recognizer.getExpectedTokens;
312             IntervalSet whatFollowsLoopIterationOrRule =
313                 expecting.or(getErrorRecoverySet(recognizer));
314             consumeUntil(recognizer, whatFollowsLoopIterationOrRule);
315             break;
316 
317         default:
318             // do nothing if we can't identify the exact kind of ATN state
319             break;
320         }
321     }
322 
323     /**
324      * This is called by {@link #reportError} when the exception is a
325      * {@link NoViableAltException}.
326      *
327      * @see #reportError
328      *
329      * @param recognizer the parser instance
330      * @param e the recognition exception
331      */
332     protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e)
333     {
334     TokenStream tokens = recognizer.getInputStream();
335         string input;
336         if (tokens) {
337             if (e.getStartToken.getType == TokenConstantDefinition.EOF)
338                 input = "<EOF>";
339             else
340                 input = to!string(tokens.getText(e.getStartToken, e.getOffendingToken));
341         }
342         else {
343             input = "<unknown input>";
344         }
345         string msg = format("no viable alternative at input %s", escapeWSAndQuote(input));
346         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
347     }
348 
349     /**
350      * This is called by {@link #reportError} when the exception is an
351      * {@link InputMismatchException}.
352      *
353      * @see #reportError
354      *
355      * @param recognizer the parser instance
356      * @param e the recognition exception
357      */
358     protected void reportInputMismatch(Parser recognizer, InputMismatchException e)
359     {
360         string msg = format!"mismatched input %1$s expecting %2$s"
361                             (
362                                 getTokenErrorDisplay(e.getOffendingToken),
363                                 e.getExpectedTokens.toString(recognizer.getVocabulary)
364                             );
365         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
366     }
367 
368     /**
369      * This is called by {@link #reportError} when the exception is a
370      * {@link FailedPredicateException}.
371      *
372      * @see #reportError
373      *
374      * @param recognizer the parser instance
375      * @param e the recognition exception
376      */
377     protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e)
378     {
379     string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()];
380         string msg = "rule " ~ ruleName ~ " " ~ e.msg;
381         recognizer.notifyErrorListeners(e.getOffendingToken, msg, e);
382     }
383 
384     /**
385      * This method is called to report a syntax error which requires the removal
386      * of a token from the input stream. At the time this method is called, the
387      * erroneous symbol is current {@code LT(1)} symbol and has not yet been
388      * removed from the input stream. When this method returns,
389      * {@code recognizer} is in error recovery mode.
390      *
391      * <p>This method is called when {@link #singleTokenDeletion} identifies
392      * single-token deletion as a viable recovery strategy for a mismatched
393      * input error.</p>
394      *
395      * <p>The default implementation simply returns if the handler is already in
396      * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
397      * enter error recovery mode, followed by calling
398      * {@link Parser#notifyErrorListeners}.</p>
399      *
400      * @param recognizer the parser instance
401      */
402     protected void reportUnwantedToken(Parser recognizer)
403     {
404     if (inErrorRecoveryMode(recognizer)) {
405             return;
406         }
407 
408         beginErrorCondition(recognizer);
409 
410         Token t = recognizer.getCurrentToken();
411         string tokenName = getTokenErrorDisplay(t);
412         IntervalSet expecting = getExpectedTokens(recognizer);
413         string msg = "extraneous input " ~ tokenName ~ " expecting " ~
414             expecting.toString(recognizer.getVocabulary());
415         recognizer.notifyErrorListeners(t, msg, null);
416     }
417 
418     /**
419      * This method is called to report a syntax error which requires the
420      * insertion of a missing token into the input stream. At the time this
421      * method is called, the missing token has not yet been inserted. When this
422      * method returns, {@code recognizer} is in error recovery mode.
423      *
424      * <p>This method is called when {@link #singleTokenInsertion} identifies
425      * single-token insertion as a viable recovery strategy for a mismatched
426      * input error.</p>
427      *
428      * <p>The default implementation simply returns if the handler is already in
429      * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
430      * enter error recovery mode, followed by calling
431      * {@link Parser#notifyErrorListeners}.</p>
432      *
433      * @param recognizer the parser instance
434      */
435     protected void reportMissingToken(Parser recognizer)
436     {
437     if (inErrorRecoveryMode(recognizer)) {
438             return;
439         }
440 
441         beginErrorCondition(recognizer);
442 
443         Token t = recognizer.getCurrentToken();
444         IntervalSet expecting = getExpectedTokens(recognizer);
445         string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary),
446                             getTokenErrorDisplay(t));
447 
448         recognizer.notifyErrorListeners(t, msg, null);
449     }
450 
451     /**
452      * {@inheritDoc}
453      *
454      * <p>The default implementation attempts to recover from the mismatched input
455      * by using single token insertion and deletion as described below. If the
456      * recovery attempt fails, this method throws an
457      * {@link InputMismatchException}.</p>
458      *
459      * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
460      *
461      * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
462      * right token, however, then assume {@code LA(1)} is some extra spurious
463      * token and delete it. Then consume and return the next token (which was
464      * the {@code LA(2)} token) as the successful result of the match operation.</p>
465      *
466      * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
467      *
468      * <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
469      *
470      * <p>If current token (at {@code LA(1)}) is consistent with what could come
471      * after the expected {@code LA(1)} token, then assume the token is missing
472      * and use the parser's {@link TokenFactory} to create it on the fly. The
473      * "insertion" is performed by returning the created token as the successful
474      * result of the match operation.</p>
475      *
476      * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
477      *
478      * <p><strong>EXAMPLE</strong></p>
479      *
480      * <p>For example, Input {@code i=(3;} is clearly missing the {@code '$(RPAREN)'}. When
481      * the parser returns from the nested call to {@code expr}, it will have
482      * call chain:</p>
483      *
484      * <pre>
485      * stat &rarr; expr &rarr; atom
486      * </pre>
487      *
488      * and it will be trying to match the {@code '$(RPAREN)'} at this point in the
489      * derivation:
490      *
491      * <pre>
492      * =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
493      *                    ^
494      * </pre>
495      *
496      * The attempt to match {@code '$(RPAREN)'} will fail when it sees {@code ';'} and
497      * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
498      * is in the set of tokens that can follow the {@code '$(RPAREN)'} token reference
499      * in rule {@code atom}. It can assume that you forgot the {@code ')'}.
500      */
501     public Token recoverInline(Parser recognizer)
502     {
503     // SINGLE TOKEN DELETION
504         Token matchedSymbol = singleTokenDeletion(recognizer);
505         if (matchedSymbol !is null) {
506             // we have deleted the extra token.
507             // now, move past ttype token as if all were ok
508             recognizer.consume();
509             return matchedSymbol;
510         }
511 
512         // SINGLE TOKEN INSERTION
513         if (singleTokenInsertion(recognizer)) {
514             return getMissingSymbol(recognizer);
515         }
516 
517         // even that didn't work; must throw the exception
518         InputMismatchException e;
519         if (nextTokensContext is null) {
520             e = new InputMismatchException(recognizer);
521         } else {
522             e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext);
523         }
524         throw e;
525     }
526 
527     /**
528      * This method implements the single-token insertion inline error recovery
529      * strategy. It is called by {@link #recoverInline} if the single-token
530      * deletion strategy fails to recover from the mismatched input. If this
531      * method returns {@code true}, {@code recognizer} will be in error recovery
532      * mode.
533      *
534      * <p>This method determines whether or not single-token insertion is viable by
535      * checking if the {@code LA(1)} input symbol could be successfully matched
536      * if it were instead the {@code LA(2)} symbol. If this method returns
537      * {@code true}, the caller is responsible for creating and inserting a
538      * token with the correct type to produce this behavior.</p>
539      *
540      * @param recognizer the parser instance
541      * @return {@code true} if single-token insertion is a viable recovery
542      * strategy for the current mismatched input, otherwise {@code false}
543      */
544     protected bool singleTokenInsertion(Parser recognizer)
545     {
546     int currentSymbolType = recognizer.getInputStream().LA(1);
547         // if current token is consistent with what could come after current
548         // ATN state, then we know we're missing a token; error recovery
549         // is free to conjure up and insert the missing token
550         ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState];
551         ATNState next = currentState.transition(0).target;
552         ATN atn = recognizer.getInterpreter().atn;
553         IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx);
554         debug(ErrorHandling)
555             {
556                 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames));
557             }
558         if ( expectingAtLL2.contains(currentSymbolType) ) {
559             reportMissingToken(recognizer);
560             return true;
561         }
562         return false;
563     }
564 
565     /**
566      * This method implements the single-token deletion inline error recovery
567      * strategy. It is called by {@link #recoverInline} to attempt to recover
568      * from mismatched input. If this method returns null, the parser and error
569      * handler state will not have changed. If this method returns non-null,
570      * {@code recognizer} will <em>not</em> be in error recovery mode since the
571      * returned token was a successful match.
572      *
573      * <p>If the single-token deletion is successful, this method calls
574      * {@link #reportUnwantedToken} to report the error, followed by
575      * {@link Parser#consume} to actually "delete" the extraneous token. Then,
576      * before returning {@link #reportMatch} is called to signal a successful
577      * match.</p>
578      *
579      * @param recognizer the parser instance
580      * @return the successfully matched {@link Token} instance if single-token
581      * deletion successfully recovers from the mismatched input, otherwise
582      * {@code null}
583      */
584     protected Token singleTokenDeletion(Parser recognizer)
585     {
586     int nextTokenType = recognizer.getInputStream().LA(2);
587         IntervalSet expecting = getExpectedTokens(recognizer);
588         if ( expecting.contains(nextTokenType) ) {
589             reportUnwantedToken(recognizer);
590             /*
591               System.err.println("recoverFromMismatchedToken deleting "+
592               ((TokenStream)recognizer.getInputStream()).LT(1)+
593               " since "+((TokenStream)recognizer.getInputStream()).LT(2)+
594               " is what we want");
595             */
596             recognizer.consume(); // simply delete extra token
597             // we want to return the token we're actually matching
598             Token matchedSymbol = recognizer.getCurrentToken();
599             reportMatch(recognizer);  // we know current token is correct
600             return matchedSymbol;
601         }
602         return null;
603     }
604 
605     /**
606      * Conjure up a missing token during error recovery.
607      *
608      *  The recognizer attempts to recover from single missing
609      *  symbols. But, actions might refer to that missing symbol.
610      *  For example, x=ID {f($x);}. The action clearly assumes
611      *  that there has been an identifier matched previously and that
612      *  $x points at that token. If that token is missing, but
613      *  the next token in the stream is what we want we assume that
614      *  this token is missing and we keep going. Because we
615      *  have to return some token to replace the missing token,
616      *  we have to conjure one up. This method gives the user control
617      *  over the tokens returned for missing tokens. Mostly,
618      *  you will want to create something special for identifier
619      *  tokens. For literals such as '{' and ',', the default
620      *  action in the parser or tree parser works. It simply creates
621      *  a CommonToken of the appropriate type. The text will be the token.
622      *  If you change what tokens must be created by the lexer,
623      *  override this method to create the appropriate tokens.
624      */
625     protected Token getMissingSymbol(Parser recognizer)
626     {
627     Token currentSymbol = recognizer.getCurrentToken();
628         IntervalSet expecting = getExpectedTokens(recognizer);
629         int expectedTokenType = TokenConstantDefinition.INVALID_TYPE;
630         if (!expecting.isNil) {
631             expectedTokenType = expecting.getMinElement(); // get any element
632         }
633         string tokenText;
634         if (expectedTokenType == TokenConstantDefinition.EOF)
635             tokenText = "<missing EOF>";
636         else
637             tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType));
638         Token current = currentSymbol;
639         Token lookback = recognizer.getInputStream().LT(-1);
640         if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) {
641             current = lookback;
642         }
643         TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream());
644         Variant v = tokenText;
645         return
646             recognizer.tokenFactory().create(tsp, expectedTokenType, v,
647                                              TokenConstantDefinition.DEFAULT_CHANNEL,
648                                              -1, -1,
649                                              current.getLine(), current.getCharPositionInLine());
650 
651     }
652 
653     protected IntervalSet getExpectedTokens(Parser recognizer)
654     {
655     return recognizer.getExpectedTokens;
656     }
657 
658     /**
659      * How should a token be displayed in an error message? The default
660      * is to display just the text, but during development you might
661      * want to have a lot of information spit out.  Override in that case
662      * to use t.toString() (which, for CommonToken, dumps everything about
663      * the token). This is better than forcing you to override a method in
664      * your token objects because you don't have to go modify your lexer
665      * so that it creates a new Java type.
666      */
667     protected string getTokenErrorDisplay(Token t)
668     {
669     if (t is null) return "<no token>";
670         string s = getSymbolText(t);
671         if (s is null) {
672             if (getSymbolType(t) == TokenConstantDefinition.EOF) {
673                 s = "<EOF>";
674             }
675             else {
676                 s = format("<%s>", getSymbolType(t));
677             }
678         }
679         return escapeWSAndQuote(s);
680     }
681 
682     protected string getSymbolText(Token symbol)
683     {
684         return to!string(symbol.getText);
685     }
686 
687     protected int getSymbolType(Token symbol)
688     {
689     return symbol.getType();
690     }
691 
692     protected string escapeWSAndQuote(string s)
693     {
694         s = s.replace("\n","\\n");
695         s = s.replace("\r","\\r");
696         s = s.replace("\t","\\t");
697         return "'" ~ s ~ "'";
698     }
699 
700     /**
701      * Compute the error recovery set for the current rule.  During
702      *  rule invocation, the parser pushes the set of tokens that can
703      *  follow that rule reference on the stack; this amounts to
704      *  computing FIRST of what follows the rule reference in the
705      *  enclosing rule. See LinearApproximator.FIRST().
706      *  This local follow set only includes tokens
707      *  from within the rule; i.e., the FIRST computation done by
708      *  ANTLR stops at the end of a rule.
709      *
710      *  EXAMPLE
711      *
712      *  When you find a "no viable alt exception", the input is not
713      *  consistent with any of the alternatives for rule r.  The best
714      *  thing to do is to consume tokens until you see something that
715      *  can legally follow a call to r *or* any rule that called r.
716      *  You don't want the exact set of viable next tokens because the
717      *  input might just be missing a token--you might consume the
718      *  rest of the input looking for one of the missing tokens.
719      *
720      *  Consider grammar:
721      *
722      *  a : '[' b ']'
723      *    | '(' b ')'
724      *    ;
725      *  b : c '^' INT ;
726      *  c : ID
727      *    | INT
728      *    ;
729      *
730      *  At each rule invocation, the set of tokens that could follow
731      *  that rule is pushed on a stack.  Here are the various
732      *  context-sensitive follow sets:
733      *
734      *  FOLLOW(b1_in_a) = FIRST(']') = ']'
735      *  FOLLOW(b2_in_a) = FIRST('$(RPAREN)') = '$(RPAREN)'
736      *  FOLLOW(c_in_b) = FIRST('^') = '^'
737      *
738      *  Upon erroneous input "[]", the call chain is
739      *
740      *  a -> b -> c
741      *
742      *  and, hence, the follow context stack is:
743      *
744      *  depth     follow set       start of rule execution
745      *    0         <EOF>                    a (from main())
746      *    1          ']'                     b
747      *    2          '^'                     c
748      *
749      *  Notice that '$(RPAREN)' is not included, because b would have to have
750      *  been called from a different context in rule a for '$(RPAREN)' to be
751      *  included.
752      *
753      *  For error recovery, we cannot consider FOLLOW(c)
754      *  (context-sensitive or otherwise).  We need the combined set of
755      *  all context-sensitive FOLLOW sets--the set of all tokens that
756      *  could follow any reference in the call chain.  We need to
757      *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
758      *  we resync'd to that token, we'd consume until EOF.  We need to
759      *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
760      *  In this case, for input "[]", LA(1) is ']' and in the set, so we would
761      *  not consume anything. After printing an error, rule c would
762      *  return normally.  Rule b would not find the required '^' though.
763      *  At this point, it gets a mismatched token error and throws an
764      *  exception (since LA(1) is not in the viable following token
765      *  set).  The rule exception handler tries to recover, but finds
766      *  the same recovery set and doesn't consume anything.  Rule b
767      *  exits normally returning to rule a.  Now it finds the ']' (and
768      *  with the successful match exits errorRecovery mode).
769      *
770      *  So, you can see that the parser walks up the call chain looking
771      *  for the token that was a member of the recovery set.
772      *
773      *  Errors are not generated in errorRecovery mode.
774      *
775      *  ANTLR's error recovery mechanism is based upon original ideas:
776      *
777      *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
778      *
779      *  and
780      *
781      *  "A note on error recovery in recursive descent parsers":
782      *  http://portal.acm.org/citation.cfm?id=947902.947905
783      *
784      *  Later, Josef Grosch had some good ideas:
785      *
786      *  "Efficient and Comfortable Error Recovery in Recursive Descent
787      *  Parsers":
788      *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
789      *
790      *  Like Grosch I implement context-sensitive FOLLOW sets that are combined
791      *  at run-time upon error to avoid overhead during parsing.
792      */
793     protected IntervalSet getErrorRecoverySet(Parser recognizer)
794     {
795     ATN atn = recognizer.getInterpreter().atn;
796         RuleContext ctx = recognizer.ctx;
797         IntervalSet recoverSet = new IntervalSet();
798         while (ctx !is null && ctx.invokingState >= 0) {
799             // compute what follows who invoked us
800             ATNState invokingState = atn.states[ctx.invokingState];
801             RuleTransition rt = cast(RuleTransition)invokingState.transition(0);
802             IntervalSet follow = atn.nextTokens(rt.followState);
803             recoverSet.addAll(follow);
804             ctx = ctx.parent;
805         }
806         recoverSet.remove(TokenConstantDefinition.EPSILON);
807         //      System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames()));
808         return recoverSet;
809     }
810 
811     /**
812      * Consume tokens until one matches the given token set.
813      */
814     protected void consumeUntil(Parser recognizer, IntervalSet set)
815     {
816     //stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames()));
817         int ttype = recognizer.getInputStream().LA(1);
818         while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) {
819             debug(ErrorHandling)
820             {
821                 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]);
822             }
823             // recognizer.getInputStream().consume();
824             recognizer.consume();
825             ttype = recognizer.getInputStream().LA(1);
826         }
827     }
828 
829 }