1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.DefaultErrorStrategy; 8 9 import antlr.v4.runtime.ANTLRErrorStrategy; 10 import antlr.v4.runtime.CharStream; 11 import antlr.v4.runtime.FailedPredicateException; 12 import antlr.v4.runtime.InputMismatchException; 13 import antlr.v4.runtime.NoViableAltException; 14 import antlr.v4.runtime.Parser; 15 import antlr.v4.runtime.ParserRuleContext; 16 import antlr.v4.runtime.RecognitionException; 17 import antlr.v4.runtime.RuleContext; 18 import antlr.v4.runtime.Token; 19 import antlr.v4.runtime.TokenConstantDefinition; 20 import antlr.v4.runtime.TokenSource; 21 import antlr.v4.runtime.TokenStream; 22 import antlr.v4.runtime.atn.ATN; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.RuleTransition; 25 import antlr.v4.runtime.atn.StateNames; 26 import antlr.v4.runtime.misc.IntervalSet; 27 import std.array; 28 import std.format; 29 import std.stdio; 30 import std.typecons; 31 32 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b"); 33 34 /** 35 * This is the default implementation of {@link ANTLRErrorStrategy} used for 36 * error reporting and recovery in ANTLR parsers. 37 */ 38 class DefaultErrorStrategy : ANTLRErrorStrategy 39 { 40 41 /** 42 * Indicates whether the error strategy is currently "recovering from an 43 * error". This is used to suppress reporting multiple error messages while 44 * attempting to recover from a detected syntax error. 45 * 46 * @see #inErrorRecoveryMode 47 */ 48 protected bool errorRecoveryMode; 49 50 /** 51 * The index into the input stream where the last error occurred. 52 * This is used to prevent infinite loops where an error is found 53 * but no token is consumed during recovery...another error is found, 54 * ad nauseum. This is a failsafe mechanism to guarantee that at least 55 * one token/tree node is consumed for two errors. 56 */ 57 protected int lastErrorIndex = -1; 58 59 public IntervalSet lastErrorStates; 60 61 /** 62 * This field is used to propagate information about the lookahead following 63 * the previous match. Since prediction prefers completing the current rule 64 * to error recovery efforts, error reporting may occur later than the 65 * original point where it was discoverable. The original context is used to 66 * compute the true expected sets as though the reporting occurred as early 67 * as possible. 68 */ 69 protected ParserRuleContext nextTokensContext; 70 71 /** 72 * @see #nextTokensContext 73 */ 74 protected int nextTokensState; 75 76 /** 77 * <p>The default implementation simply calls {@link #endErrorCondition} to 78 * ensure that the handler is not in error recovery mode.</p> 79 */ 80 public void reset(Parser recognizer) 81 { 82 endErrorCondition(recognizer); 83 } 84 85 /** 86 * This method is called to enter error recovery mode when a recognition 87 * exception is reported. 88 * 89 * @param recognizer the parser instance 90 */ 91 protected void beginErrorCondition(Parser recognizer) 92 { 93 errorRecoveryMode = true; 94 } 95 96 public bool inErrorRecoveryMode(Parser recognizer) 97 { 98 return errorRecoveryMode; 99 } 100 101 /** 102 * This method is called to leave error recovery mode after recovering from 103 * a recognition exception. 104 * 105 * @param recognizer 106 */ 107 protected void endErrorCondition(Parser recognizer) 108 { 109 errorRecoveryMode = false; 110 lastErrorStates = null; 111 lastErrorIndex = -1; 112 } 113 114 /** 115 * {@inheritDoc} 116 * 117 * <p>The default implementation simply calls {@link #endErrorCondition}.</p> 118 */ 119 public void reportMatch(Parser recognizer) 120 { 121 endErrorCondition(recognizer); 122 } 123 124 /** 125 * {@inheritDoc} 126 * 127 * <p>The default implementation returns immediately if the handler is already 128 * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} 129 * and dispatches the reporting task based on the runtime type of {@code e} 130 * according to the following table.</p> 131 * 132 * <ul> 133 * <li>{@link NoViableAltException}: Dispatches the call to 134 * {@link #reportNoViableAlternative}</li> 135 * <li>{@link InputMismatchException}: Dispatches the call to 136 * {@link #reportInputMismatch}</li> 137 * <li>{@link FailedPredicateException}: Dispatches the call to 138 * {@link #reportFailedPredicate}</li> 139 * <li>All other types: calls {@link Parser#notifyErrorListeners} to report 140 * the exception</li> 141 * </ul> 142 */ 143 public void reportError(Parser recognizer, RecognitionException e) 144 { 145 // if we've already reported an error and have not matched a token 146 // yet successfully, don't report any errors. 147 if (inErrorRecoveryMode(recognizer)) { 148 debug(DefaultErrorStrategy) 149 { 150 writefln("DefaultErrorStrategy:[SPURIOUS]"); 151 } 152 return; // don't report spurious errors 153 } 154 beginErrorCondition(recognizer); 155 if (cast(NoViableAltException)e) { 156 reportNoViableAlternative(recognizer, cast(NoViableAltException)e); 157 } 158 else if (cast(InputMismatchException)e) { 159 reportInputMismatch(recognizer, cast(InputMismatchException)e); 160 } 161 else if (cast(FailedPredicateException)e) { 162 reportFailedPredicate(recognizer, cast(FailedPredicateException)e); 163 } 164 else { 165 stderr.writefln("unknown recognition error type: %s", e.stringof); 166 recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e); 167 } 168 } 169 170 /** 171 * {@inheritDoc} 172 * 173 * <p>The default implementation resynchronizes the parser by consuming tokens 174 * until we find one in the resynchronization set--loosely the set of tokens 175 * that can follow the current rule.</p> 176 */ 177 public void recover(Parser recognizer, RecognitionException e) 178 { 179 debug(DefaultErrorStrategy) 180 { 181 writefln("recover in %s index=%s, lastErrorIndex=%s, states=%s", 182 recognizer.getRuleInvocationStack, 183 recognizer.getInputStream().index(), 184 lastErrorIndex, 185 lastErrorStates); 186 } 187 if (lastErrorIndex == recognizer.getInputStream.index() && 188 lastErrorStates !is null && 189 lastErrorStates.contains(recognizer.getState)) 190 { 191 // uh oh, another error at same token index and previously-visited 192 // state in ATN; must be a case where LT(1) is in the recovery 193 // token set so nothing got consumed. Consume a single token 194 // at least to prevent an infinite loop; this is a failsafe. 195 debug(DefaultErrorStrategy) 196 { 197 stderr.writefln("seen error condition before index= %s, states=%s", 198 lastErrorIndex, lastErrorStates); 199 stderr.writefln("FAILSAFE consumes %s", 200 recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]); 201 } 202 recognizer.consume; 203 } 204 lastErrorIndex = recognizer.getInputStream.index; 205 if (!lastErrorStates) 206 lastErrorStates = new IntervalSet; 207 lastErrorStates.add(recognizer.getState); 208 IntervalSet followSet = getErrorRecoverySet(recognizer); 209 consumeUntil(recognizer, followSet); 210 } 211 212 /** 213 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure 214 * that the current lookahead symbol is consistent with what were expecting 215 * at this point in the ATN. You can call this anytime but ANTLR only 216 * generates code to check before subrules/loops and each iteration. 217 * 218 * <p>Implements Jim Idle's magic sync mechanism in closures and optional 219 * subrules. E.g.,</p> 220 * 221 * <pre> 222 * a : sync ( stuff sync )* ; 223 * sync : {consume to what can follow sync} ; 224 * </pre> 225 * 226 * At the start of a sub rule upon error, {@link #sync} performs single 227 * token deletion, if possible. If it can't do that, it bails on the current 228 * rule and uses the default error recovery, which consumes until the 229 * resynchronization set of the current rule. 230 * 231 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block 232 * with an empty alternative), then the expected set includes what follows 233 * the subrule.</p> 234 * 235 * <p>During loop iteration, it consumes until it sees a token that can start a 236 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to 237 * stay in the loop as long as possible.</p> 238 * 239 * <p><strong>ORIGINS</strong></p> 240 * 241 * <p>Previous versions of ANTLR did a poor job of their recovery within loops. 242 * A single mismatch token or missing token would force the parser to bail 243 * out of the entire rules surrounding the loop. So, for rule</p> 244 * 245 * <pre> 246 * classDef : 'class' ID '{' member* '}' 247 * </pre> 248 * 249 * input with an extra token between members would force the parser to 250 * consume until it found the next class definition rather than the next 251 * member definition of the current class. 252 * 253 * <p>This functionality cost a little bit of effort because the parser has to 254 * compare token set at the start of the loop and at each iteration. If for 255 * some reason speed is suffering for you, you can turn off this 256 * functionality by simply overriding this method as a blank { }.</p> 257 */ 258 public void sync(Parser recognizer) 259 { 260 ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState]; 261 debug(ErrorHandling) 262 { 263 writefln("sync @ %s=%s", s.stateNumber, s.classinfo); 264 } 265 // If already recovering, don't try to sync 266 if (inErrorRecoveryMode(recognizer)) { 267 return; 268 } 269 TokenStream tokens = recognizer.getInputStream; 270 int la = tokens.LA(1); 271 272 // try cheaper subset first; might get lucky. seems to shave a wee bit off 273 IntervalSet nextTokens = recognizer.getATN.nextTokens(s); 274 if (nextTokens.contains(la)) { 275 // We are sure the token matches 276 nextTokensContext = null; 277 nextTokensState = ATNState.INVALID_STATE_NUMBER; 278 return; 279 } 280 281 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 282 if (nextTokensContext is null) { 283 // It's possible the next token won't match; information tracked 284 // by sync is restricted for performance. 285 nextTokensContext = recognizer.ctx; 286 nextTokensState = recognizer.getState; 287 } 288 return; 289 } 290 291 switch (s.getStateType) { 292 case StateNames.BLOCK_START: 293 case StateNames.STAR_BLOCK_START: 294 case StateNames.PLUS_BLOCK_START: 295 case StateNames.STAR_LOOP_ENTRY: 296 // report error and recover if possible 297 if (singleTokenDeletion(recognizer) !is null) { 298 return; 299 } 300 throw new InputMismatchException(recognizer); 301 302 case StateNames.PLUS_LOOP_BACK: 303 case StateNames.STAR_LOOP_BACK: 304 debug(ErrorHandling) 305 { 306 stderr.writefln("at loop back: %s", s); 307 } 308 reportUnwantedToken(recognizer); 309 IntervalSet expecting = recognizer.getExpectedTokens; 310 IntervalSet whatFollowsLoopIterationOrRule = 311 expecting.or(getErrorRecoverySet(recognizer)); 312 consumeUntil(recognizer, whatFollowsLoopIterationOrRule); 313 break; 314 315 default: 316 // do nothing if we can't identify the exact kind of ATN state 317 break; 318 } 319 } 320 321 /** 322 * This is called by {@link #reportError} when the exception is a 323 * {@link NoViableAltException}. 324 * 325 * @see #reportError 326 * 327 * @param recognizer the parser instance 328 * @param e the recognition exception 329 */ 330 protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e) 331 { 332 TokenStream tokens = recognizer.getInputStream(); 333 string input; 334 if (tokens !is null) { 335 if (e.getStartToken.getType == TokenConstantDefinition.EOF) 336 input = "<EOF>"; 337 else 338 input = tokens.getText(e.getStartToken, e.getOffendingToken); 339 } 340 else { 341 input = "<unknown input>"; 342 } 343 string msg = format("no viable alternative at input %s", escapeWSAndQuote(input)); 344 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 345 } 346 347 /** 348 * This is called by {@link #reportError} when the exception is an 349 * {@link InputMismatchException}. 350 * 351 * @see #reportError 352 * 353 * @param recognizer the parser instance 354 * @param e the recognition exception 355 */ 356 protected void reportInputMismatch(Parser recognizer, InputMismatchException e) 357 { 358 string msg = format("mismatched input %1$s expecting %2$s", 359 getTokenErrorDisplay(e.getOffendingToken), 360 e.getExpectedTokens.toString(recognizer.getVocabulary)); 361 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 362 } 363 364 /** 365 * This is called by {@link #reportError} when the exception is a 366 * {@link FailedPredicateException}. 367 * 368 * @see #reportError 369 * 370 * @param recognizer the parser instance 371 * @param e the recognition exception 372 */ 373 protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e) 374 { 375 string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()]; 376 string msg = "rule " ~ ruleName ~ " " ~ e.msg; 377 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 378 } 379 380 /** 381 * This method is called to report a syntax error which requires the removal 382 * of a token from the input stream. At the time this method is called, the 383 * erroneous symbol is current {@code LT(1)} symbol and has not yet been 384 * removed from the input stream. When this method returns, 385 * {@code recognizer} is in error recovery mode. 386 * 387 * <p>This method is called when {@link #singleTokenDeletion} identifies 388 * single-token deletion as a viable recovery strategy for a mismatched 389 * input error.</p> 390 * 391 * <p>The default implementation simply returns if the handler is already in 392 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 393 * enter error recovery mode, followed by calling 394 * {@link Parser#notifyErrorListeners}.</p> 395 * 396 * @param recognizer the parser instance 397 */ 398 protected void reportUnwantedToken(Parser recognizer) 399 { 400 if (inErrorRecoveryMode(recognizer)) { 401 return; 402 } 403 404 beginErrorCondition(recognizer); 405 406 Token t = recognizer.getCurrentToken(); 407 string tokenName = getTokenErrorDisplay(t); 408 IntervalSet expecting = getExpectedTokens(recognizer); 409 string msg = "extraneous input " ~ tokenName ~ " expecting " ~ 410 expecting.toString(recognizer.getVocabulary()); 411 recognizer.notifyErrorListeners(t, msg, null); 412 } 413 414 /** 415 * This method is called to report a syntax error which requires the 416 * insertion of a missing token into the input stream. At the time this 417 * method is called, the missing token has not yet been inserted. When this 418 * method returns, {@code recognizer} is in error recovery mode. 419 * 420 * <p>This method is called when {@link #singleTokenInsertion} identifies 421 * single-token insertion as a viable recovery strategy for a mismatched 422 * input error.</p> 423 * 424 * <p>The default implementation simply returns if the handler is already in 425 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 426 * enter error recovery mode, followed by calling 427 * {@link Parser#notifyErrorListeners}.</p> 428 * 429 * @param recognizer the parser instance 430 */ 431 protected void reportMissingToken(Parser recognizer) 432 { 433 if (inErrorRecoveryMode(recognizer)) { 434 return; 435 } 436 437 beginErrorCondition(recognizer); 438 439 Token t = recognizer.getCurrentToken(); 440 IntervalSet expecting = getExpectedTokens(recognizer); 441 string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary), 442 getTokenErrorDisplay(t)); 443 444 recognizer.notifyErrorListeners(t, msg, null); 445 } 446 447 /** 448 * {@inheritDoc} 449 * 450 * <p>The default implementation attempts to recover from the mismatched input 451 * by using single token insertion and deletion as described below. If the 452 * recovery attempt fails, this method throws an 453 * {@link InputMismatchException}.</p> 454 * 455 * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> 456 * 457 * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the 458 * right token, however, then assume {@code LA(1)} is some extra spurious 459 * token and delete it. Then consume and return the next token (which was 460 * the {@code LA(2)} token) as the successful result of the match operation.</p> 461 * 462 * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> 463 * 464 * <p><strong>MISSING TOKEN</strong> (single token insertion)</p> 465 * 466 * <p>If current token (at {@code LA(1)}) is consistent with what could come 467 * after the expected {@code LA(1)} token, then assume the token is missing 468 * and use the parser's {@link TokenFactory} to create it on the fly. The 469 * "insertion" is performed by returning the created token as the successful 470 * result of the match operation.</p> 471 * 472 * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> 473 * 474 * <p><strong>EXAMPLE</strong></p> 475 * 476 * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When 477 * the parser returns from the nested call to {@code expr}, it will have 478 * call chain:</p> 479 * 480 * <pre> 481 * stat → expr → atom 482 * </pre> 483 * 484 * and it will be trying to match the {@code ')'} at this point in the 485 * derivation: 486 * 487 * <pre> 488 * => ID '=' '(' INT ')' ('+' atom)* ';' 489 * ^ 490 * </pre> 491 * 492 * The attempt to match {@code ')'} will fail when it sees {@code ';'} and 493 * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} 494 * is in the set of tokens that can follow the {@code ')'} token reference 495 * in rule {@code atom}. It can assume that you forgot the {@code ')'}. 496 */ 497 public Token recoverInline(Parser recognizer) 498 { 499 // SINGLE TOKEN DELETION 500 Token matchedSymbol = singleTokenDeletion(recognizer); 501 if (matchedSymbol !is null) { 502 // we have deleted the extra token. 503 // now, move past ttype token as if all were ok 504 recognizer.consume(); 505 return matchedSymbol; 506 } 507 508 // SINGLE TOKEN INSERTION 509 if (singleTokenInsertion(recognizer)) { 510 return getMissingSymbol(recognizer); 511 } 512 513 // even that didn't work; must throw the exception 514 InputMismatchException e; 515 if (nextTokensContext is null) { 516 e = new InputMismatchException(recognizer); 517 } else { 518 e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext); 519 } 520 throw e; 521 } 522 523 /** 524 * This method implements the single-token insertion inline error recovery 525 * strategy. It is called by {@link #recoverInline} if the single-token 526 * deletion strategy fails to recover from the mismatched input. If this 527 * method returns {@code true}, {@code recognizer} will be in error recovery 528 * mode. 529 * 530 * <p>This method determines whether or not single-token insertion is viable by 531 * checking if the {@code LA(1)} input symbol could be successfully matched 532 * if it were instead the {@code LA(2)} symbol. If this method returns 533 * {@code true}, the caller is responsible for creating and inserting a 534 * token with the correct type to produce this behavior.</p> 535 * 536 * @param recognizer the parser instance 537 * @return {@code true} if single-token insertion is a viable recovery 538 * strategy for the current mismatched input, otherwise {@code false} 539 */ 540 protected bool singleTokenInsertion(Parser recognizer) 541 { 542 int currentSymbolType = recognizer.getInputStream().LA(1); 543 // if current token is consistent with what could come after current 544 // ATN state, then we know we're missing a token; error recovery 545 // is free to conjure up and insert the missing token 546 ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState]; 547 ATNState next = currentState.transition(0).target; 548 ATN atn = recognizer.getInterpreter().atn; 549 IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx); 550 debug(ErrorHandling) 551 { 552 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames)); 553 } 554 if ( expectingAtLL2.contains(currentSymbolType) ) { 555 reportMissingToken(recognizer); 556 return true; 557 } 558 return false; 559 } 560 561 /** 562 * This method implements the single-token deletion inline error recovery 563 * strategy. It is called by {@link #recoverInline} to attempt to recover 564 * from mismatched input. If this method returns null, the parser and error 565 * handler state will not have changed. If this method returns non-null, 566 * {@code recognizer} will <em>not</em> be in error recovery mode since the 567 * returned token was a successful match. 568 * 569 * <p>If the single-token deletion is successful, this method calls 570 * {@link #reportUnwantedToken} to report the error, followed by 571 * {@link Parser#consume} to actually "delete" the extraneous token. Then, 572 * before returning {@link #reportMatch} is called to signal a successful 573 * match.</p> 574 * 575 * @param recognizer the parser instance 576 * @return the successfully matched {@link Token} instance if single-token 577 * deletion successfully recovers from the mismatched input, otherwise 578 * {@code null} 579 */ 580 protected Token singleTokenDeletion(Parser recognizer) 581 { 582 int nextTokenType = recognizer.getInputStream().LA(2); 583 IntervalSet expecting = getExpectedTokens(recognizer); 584 if ( expecting.contains(nextTokenType) ) { 585 reportUnwantedToken(recognizer); 586 /* 587 System.err.println("recoverFromMismatchedToken deleting "+ 588 ((TokenStream)recognizer.getInputStream()).LT(1)+ 589 " since "+((TokenStream)recognizer.getInputStream()).LT(2)+ 590 " is what we want"); 591 */ 592 recognizer.consume(); // simply delete extra token 593 // we want to return the token we're actually matching 594 Token matchedSymbol = recognizer.getCurrentToken(); 595 reportMatch(recognizer); // we know current token is correct 596 return matchedSymbol; 597 } 598 return null; 599 } 600 601 /** 602 * Conjure up a missing token during error recovery. 603 * 604 * The recognizer attempts to recover from single missing 605 * symbols. But, actions might refer to that missing symbol. 606 * For example, x=ID {f($x);}. The action clearly assumes 607 * that there has been an identifier matched previously and that 608 * $x points at that token. If that token is missing, but 609 * the next token in the stream is what we want we assume that 610 * this token is missing and we keep going. Because we 611 * have to return some token to replace the missing token, 612 * we have to conjure one up. This method gives the user control 613 * over the tokens returned for missing tokens. Mostly, 614 * you will want to create something special for identifier 615 * tokens. For literals such as '{' and ',', the default 616 * action in the parser or tree parser works. It simply creates 617 * a CommonToken of the appropriate type. The text will be the token. 618 * If you change what tokens must be created by the lexer, 619 * override this method to create the appropriate tokens. 620 */ 621 protected Token getMissingSymbol(Parser recognizer) 622 { 623 Token currentSymbol = recognizer.getCurrentToken(); 624 IntervalSet expecting = getExpectedTokens(recognizer); 625 int expectedTokenType = TokenConstantDefinition.INVALID_TYPE; 626 if (!expecting.isNil) { 627 expectedTokenType = expecting.getMinElement(); // get any element 628 } 629 string tokenText; 630 if (expectedTokenType == TokenConstantDefinition.EOF) 631 tokenText = "<missing EOF>"; 632 else 633 tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType)); 634 Token current = currentSymbol; 635 Token lookback = recognizer.getInputStream().LT(-1); 636 if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) { 637 current = lookback; 638 } 639 TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream()); 640 return 641 recognizer.tokenFactory().create(tsp, expectedTokenType, tokenText, 642 TokenConstantDefinition.DEFAULT_CHANNEL, 643 -1, -1, 644 current.getLine(), current.getCharPositionInLine()); 645 646 } 647 648 protected IntervalSet getExpectedTokens(Parser recognizer) 649 { 650 return recognizer.getExpectedTokens; 651 } 652 653 /** 654 * How should a token be displayed in an error message? The default 655 * is to display just the text, but during development you might 656 * want to have a lot of information spit out. Override in that case 657 * to use t.toString() (which, for CommonToken, dumps everything about 658 * the token). This is better than forcing you to override a method in 659 * your token objects because you don't have to go modify your lexer 660 * so that it creates a new Java type. 661 */ 662 protected string getTokenErrorDisplay(Token t) 663 { 664 if (t is null) return "<no token>"; 665 string s = getSymbolText(t); 666 if (s is null) { 667 if (getSymbolType(t) == TokenConstantDefinition.EOF) { 668 s = "<EOF>"; 669 } 670 else { 671 s = format("<%s>", getSymbolType(t)); 672 } 673 } 674 return escapeWSAndQuote(s); 675 } 676 677 protected string getSymbolText(Token symbol) 678 { 679 return symbol.getText; 680 } 681 682 protected int getSymbolType(Token symbol) 683 { 684 return symbol.getType(); 685 } 686 687 protected string escapeWSAndQuote(string s) 688 { 689 // if ( s==null ) return s; 690 s = s.replace("\n","\\n"); 691 s = s.replace("\r","\\r"); 692 s = s.replace("\t","\\t"); 693 return "'" ~ s ~ "'"; 694 } 695 696 /** 697 * Compute the error recovery set for the current rule. During 698 * rule invocation, the parser pushes the set of tokens that can 699 * follow that rule reference on the stack; this amounts to 700 * computing FIRST of what follows the rule reference in the 701 * enclosing rule. See LinearApproximator.FIRST(). 702 * This local follow set only includes tokens 703 * from within the rule; i.e., the FIRST computation done by 704 * ANTLR stops at the end of a rule. 705 * 706 * EXAMPLE 707 * 708 * When you find a "no viable alt exception", the input is not 709 * consistent with any of the alternatives for rule r. The best 710 * thing to do is to consume tokens until you see something that 711 * can legally follow a call to r *or* any rule that called r. 712 * You don't want the exact set of viable next tokens because the 713 * input might just be missing a token--you might consume the 714 * rest of the input looking for one of the missing tokens. 715 * 716 * Consider grammar: 717 * 718 * a : '[' b ']' 719 * | '(' b ')' 720 * ; 721 * b : c '^' INT ; 722 * c : ID 723 * | INT 724 * ; 725 * 726 * At each rule invocation, the set of tokens that could follow 727 * that rule is pushed on a stack. Here are the various 728 * context-sensitive follow sets: 729 * 730 * FOLLOW(b1_in_a) = FIRST(']') = ']' 731 * FOLLOW(b2_in_a) = FIRST(')') = ')' 732 * FOLLOW(c_in_b) = FIRST('^') = '^' 733 * 734 * Upon erroneous input "[]", the call chain is 735 * 736 * a -> b -> c 737 * 738 * and, hence, the follow context stack is: 739 * 740 * depth follow set start of rule execution 741 * 0 <EOF> a (from main()) 742 * 1 ']' b 743 * 2 '^' c 744 * 745 * Notice that ')' is not included, because b would have to have 746 * been called from a different context in rule a for ')' to be 747 * included. 748 * 749 * For error recovery, we cannot consider FOLLOW(c) 750 * (context-sensitive or otherwise). We need the combined set of 751 * all context-sensitive FOLLOW sets--the set of all tokens that 752 * could follow any reference in the call chain. We need to 753 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 754 * we resync'd to that token, we'd consume until EOF. We need to 755 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 756 * In this case, for input "[]", LA(1) is ']' and in the set, so we would 757 * not consume anything. After printing an error, rule c would 758 * return normally. Rule b would not find the required '^' though. 759 * At this point, it gets a mismatched token error and throws an 760 * exception (since LA(1) is not in the viable following token 761 * set). The rule exception handler tries to recover, but finds 762 * the same recovery set and doesn't consume anything. Rule b 763 * exits normally returning to rule a. Now it finds the ']' (and 764 * with the successful match exits errorRecovery mode). 765 * 766 * So, you can see that the parser walks up the call chain looking 767 * for the token that was a member of the recovery set. 768 * 769 * Errors are not generated in errorRecovery mode. 770 * 771 * ANTLR's error recovery mechanism is based upon original ideas: 772 * 773 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 774 * 775 * and 776 * 777 * "A note on error recovery in recursive descent parsers": 778 * http://portal.acm.org/citation.cfm?id=947902.947905 779 * 780 * Later, Josef Grosch had some good ideas: 781 * 782 * "Efficient and Comfortable Error Recovery in Recursive Descent 783 * Parsers": 784 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 785 * 786 * Like Grosch I implement context-sensitive FOLLOW sets that are combined 787 * at run-time upon error to avoid overhead during parsing. 788 */ 789 protected IntervalSet getErrorRecoverySet(Parser recognizer) 790 { 791 ATN atn = recognizer.getInterpreter().atn; 792 RuleContext ctx = recognizer.ctx; 793 IntervalSet recoverSet = new IntervalSet(); 794 while (ctx !is null && ctx.invokingState >= 0) { 795 // compute what follows who invoked us 796 ATNState invokingState = atn.states[ctx.invokingState]; 797 RuleTransition rt = cast(RuleTransition)invokingState.transition(0); 798 IntervalSet follow = atn.nextTokens(rt.followState); 799 recoverSet.addAll(follow); 800 ctx = ctx.parent; 801 } 802 recoverSet.remove(TokenConstantDefinition.EPSILON); 803 // System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); 804 return recoverSet; 805 } 806 807 /** 808 * Consume tokens until one matches the given token set. 809 */ 810 protected void consumeUntil(Parser recognizer, IntervalSet set) 811 { 812 //stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames())); 813 int ttype = recognizer.getInputStream().LA(1); 814 while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) { 815 debug(ErrorHandling) 816 { 817 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]); 818 } 819 // recognizer.getInputStream().consume(); 820 recognizer.consume(); 821 ttype = recognizer.getInputStream().LA(1); 822 } 823 } 824 825 }