1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.DefaultErrorStrategy; 8 9 import antlr.v4.runtime.ANTLRErrorStrategy; 10 import antlr.v4.runtime.CharStream; 11 import antlr.v4.runtime.FailedPredicateException; 12 import antlr.v4.runtime.InputMismatchException; 13 import antlr.v4.runtime.NoViableAltException; 14 import antlr.v4.runtime.Parser; 15 import antlr.v4.runtime.ParserRuleContext; 16 import antlr.v4.runtime.RecognitionException; 17 import antlr.v4.runtime.RuleContext; 18 import antlr.v4.runtime.Token; 19 import antlr.v4.runtime.TokenConstantDefinition; 20 import antlr.v4.runtime.TokenSource; 21 import antlr.v4.runtime.TokenStream; 22 import antlr.v4.runtime.atn.ATN; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.RuleTransition; 25 import antlr.v4.runtime.atn.StateNames; 26 import antlr.v4.runtime.misc.IntervalSet; 27 import std.array; 28 import std.conv; 29 import std.format; 30 import std.stdio; 31 import std.typecons; 32 import std.variant; 33 34 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b"); 35 36 /** 37 * This is the default implementation of {@link ANTLRErrorStrategy} used for 38 * error reporting and recovery in ANTLR parsers. 39 */ 40 class DefaultErrorStrategy : ANTLRErrorStrategy 41 { 42 43 /** 44 * Indicates whether the error strategy is currently "recovering from an 45 * error". This is used to suppress reporting multiple error messages while 46 * attempting to recover from a detected syntax error. 47 * 48 * @see #inErrorRecoveryMode 49 */ 50 protected bool errorRecoveryMode; 51 52 /** 53 * The index into the input stream where the last error occurred. 54 * This is used to prevent infinite loops where an error is found 55 * but no token is consumed during recovery...another error is found, 56 * ad nauseum. This is a failsafe mechanism to guarantee that at least 57 * one token/tree node is consumed for two errors. 58 */ 59 protected int lastErrorIndex = -1; 60 61 public IntervalSet lastErrorStates; 62 63 /** 64 * This field is used to propagate information about the lookahead following 65 * the previous match. Since prediction prefers completing the current rule 66 * to error recovery efforts, error reporting may occur later than the 67 * original point where it was discoverable. The original context is used to 68 * compute the true expected sets as though the reporting occurred as early 69 * as possible. 70 */ 71 protected ParserRuleContext nextTokensContext; 72 73 /** 74 * @see #nextTokensContext 75 */ 76 protected int nextTokensState; 77 78 /** 79 * <p>The default implementation simply calls {@link #endErrorCondition} to 80 * ensure that the handler is not in error recovery mode.</p> 81 */ 82 public void reset(Parser recognizer) 83 { 84 endErrorCondition(recognizer); 85 } 86 87 /** 88 * This method is called to enter error recovery mode when a recognition 89 * exception is reported. 90 * 91 * @param recognizer the parser instance 92 */ 93 protected void beginErrorCondition(Parser recognizer) 94 { 95 errorRecoveryMode = true; 96 } 97 98 public bool inErrorRecoveryMode(Parser recognizer) 99 { 100 return errorRecoveryMode; 101 } 102 103 /** 104 * This method is called to leave error recovery mode after recovering from 105 * a recognition exception. 106 * 107 * @param recognizer 108 */ 109 protected void endErrorCondition(Parser recognizer) 110 { 111 errorRecoveryMode = false; 112 lastErrorStates = null; 113 lastErrorIndex = -1; 114 } 115 116 /** 117 * {@inheritDoc} 118 * 119 * <p>The default implementation simply calls {@link #endErrorCondition}.</p> 120 */ 121 public void reportMatch(Parser recognizer) 122 { 123 endErrorCondition(recognizer); 124 } 125 126 /** 127 * {@inheritDoc} 128 * 129 * <p>The default implementation returns immediately if the handler is already 130 * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} 131 * and dispatches the reporting task based on the runtime type of {@code e} 132 * according to the following table.</p> 133 * 134 * <ul> 135 * <li>{@link NoViableAltException}: Dispatches the call to 136 * {@link #reportNoViableAlternative}</li> 137 * <li>{@link InputMismatchException}: Dispatches the call to 138 * {@link #reportInputMismatch}</li> 139 * <li>{@link FailedPredicateException}: Dispatches the call to 140 * {@link #reportFailedPredicate}</li> 141 * <li>All other types: calls {@link Parser#notifyErrorListeners} to report 142 * the exception</li> 143 * </ul> 144 */ 145 public void reportError(Parser recognizer, RecognitionException e) 146 { 147 // if we've already reported an error and have not matched a token 148 // yet successfully, don't report any errors. 149 if (inErrorRecoveryMode(recognizer)) { 150 debug(DefaultErrorStrategy) 151 { 152 writefln("DefaultErrorStrategy:[SPURIOUS]"); 153 } 154 return; // don't report spurious errors 155 } 156 beginErrorCondition(recognizer); 157 if (cast(NoViableAltException)e) { 158 reportNoViableAlternative(recognizer, cast(NoViableAltException)e); 159 } 160 else if (cast(InputMismatchException)e) { 161 reportInputMismatch(recognizer, cast(InputMismatchException)e); 162 } 163 else if (cast(FailedPredicateException)e) { 164 reportFailedPredicate(recognizer, cast(FailedPredicateException)e); 165 } 166 else { 167 stderr.writefln!"unknown recognition error type: %s"(e.stringof); 168 recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e); 169 } 170 } 171 172 /** 173 * {@inheritDoc} 174 * 175 * <p>The default implementation resynchronizes the parser by consuming tokens 176 * until we find one in the resynchronization set--loosely the set of tokens 177 * that can follow the current rule.</p> 178 */ 179 public void recover(Parser recognizer, RecognitionException e) 180 { 181 debug(DefaultErrorStrategy) 182 { 183 writefln!"recover in %s index=%s, lastErrorIndex=%s, states=%s"( 184 recognizer.getRuleInvocationStack, 185 recognizer.getInputStream().index(), 186 lastErrorIndex, 187 lastErrorStates); 188 } 189 if (lastErrorIndex == recognizer.getInputStream.index() && 190 lastErrorStates !is null && 191 lastErrorStates.contains(recognizer.getState)) 192 { 193 // uh oh, another error at same token index and previously-visited 194 // state in ATN; must be a case where LT(1) is in the recovery 195 // token set so nothing got consumed. Consume a single token 196 // at least to prevent an infinite loop; this is a failsafe. 197 debug(DefaultErrorStrategy) 198 { 199 stderr.writefln("seen error condition before index= %s, states=%s", 200 lastErrorIndex, lastErrorStates); 201 stderr.writefln("FAILSAFE consumes %s", 202 recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]); 203 } 204 recognizer.consume; 205 } 206 lastErrorIndex = to!int(recognizer.getInputStream.index); 207 if (!lastErrorStates) 208 lastErrorStates = new IntervalSet; 209 lastErrorStates.add(recognizer.getState); 210 IntervalSet followSet = getErrorRecoverySet(recognizer); 211 consumeUntil(recognizer, followSet); 212 } 213 214 /** 215 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure 216 * that the current lookahead symbol is consistent with what were expecting 217 * at this point in the ATN. You can call this anytime but ANTLR only 218 * generates code to check before subrules/loops and each iteration. 219 * 220 * <p>Implements Jim Idle's magic sync mechanism in closures and optional 221 * subrules. E.g.,</p> 222 * 223 * <pre> 224 * a : sync ( stuff sync )* ; 225 * sync : {consume to what can follow sync} ; 226 * </pre> 227 * 228 * At the start of a sub rule upon error, {@link #sync} performs single 229 * token deletion, if possible. If it can't do that, it bails on the current 230 * rule and uses the default error recovery, which consumes until the 231 * resynchronization set of the current rule. 232 * 233 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block 234 * with an empty alternative), then the expected set includes what follows 235 * the subrule.</p> 236 * 237 * <p>During loop iteration, it consumes until it sees a token that can start a 238 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to 239 * stay in the loop as long as possible.</p> 240 * 241 * <p><strong>ORIGINS</strong></p> 242 * 243 * <p>Previous versions of ANTLR did a poor job of their recovery within loops. 244 * A single mismatch token or missing token would force the parser to bail 245 * out of the entire rules surrounding the loop. So, for rule</p> 246 * 247 * <pre> 248 * classDef : 'class' ID '{' member* '}' 249 * </pre> 250 * 251 * input with an extra token between members would force the parser to 252 * consume until it found the next class definition rather than the next 253 * member definition of the current class. 254 * 255 * <p>This functionality cost a little bit of effort because the parser has to 256 * compare token set at the start of the loop and at each iteration. If for 257 * some reason speed is suffering for you, you can turn off this 258 * functionality by simply overriding this method as a blank { }.</p> 259 */ 260 public void sync(Parser recognizer) 261 { 262 ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState]; 263 debug(ErrorHandling) 264 { 265 writefln("sync @ %s=%s", s.stateNumber, s.classinfo); 266 } 267 // If already recovering, don't try to sync 268 if (inErrorRecoveryMode(recognizer)) { 269 return; 270 } 271 TokenStream tokens = recognizer.getInputStream; 272 int la = tokens.LA(1); 273 274 // try cheaper subset first; might get lucky. seems to shave a wee bit off 275 IntervalSet nextTokens = recognizer.getATN.nextTokens(s); 276 if (nextTokens.contains(la)) { 277 // We are sure the token matches 278 nextTokensContext = null; 279 nextTokensState = ATNState.INVALID_STATE_NUMBER; 280 return; 281 } 282 283 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 284 if (nextTokensContext is null) { 285 // It's possible the next token won't match; information tracked 286 // by sync is restricted for performance. 287 nextTokensContext = recognizer.ctx; 288 nextTokensState = recognizer.getState; 289 } 290 return; 291 } 292 293 switch (s.getStateType) { 294 case StateNames.BLOCK_START: 295 case StateNames.STAR_BLOCK_START: 296 case StateNames.PLUS_BLOCK_START: 297 case StateNames.STAR_LOOP_ENTRY: 298 // report error and recover if possible 299 if (singleTokenDeletion(recognizer) !is null) { 300 return; 301 } 302 throw new InputMismatchException(recognizer); 303 304 case StateNames.PLUS_LOOP_BACK: 305 case StateNames.STAR_LOOP_BACK: 306 debug(ErrorHandling) 307 { 308 stderr.writefln("at loop back: %s", s); 309 } 310 reportUnwantedToken(recognizer); 311 IntervalSet expecting = recognizer.getExpectedTokens; 312 IntervalSet whatFollowsLoopIterationOrRule = 313 expecting.or(getErrorRecoverySet(recognizer)); 314 consumeUntil(recognizer, whatFollowsLoopIterationOrRule); 315 break; 316 317 default: 318 // do nothing if we can't identify the exact kind of ATN state 319 break; 320 } 321 } 322 323 /** 324 * This is called by {@link #reportError} when the exception is a 325 * {@link NoViableAltException}. 326 * 327 * @see #reportError 328 * 329 * @param recognizer the parser instance 330 * @param e the recognition exception 331 */ 332 protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e) 333 { 334 TokenStream tokens = recognizer.getInputStream(); 335 string input; 336 if (tokens) { 337 if (e.getStartToken.getType == TokenConstantDefinition.EOF) 338 input = "<EOF>"; 339 else 340 input = to!string(tokens.getText(e.getStartToken, e.getOffendingToken)); 341 } 342 else { 343 input = "<unknown input>"; 344 } 345 string msg = format("no viable alternative at input %s", escapeWSAndQuote(input)); 346 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 347 } 348 349 /** 350 * This is called by {@link #reportError} when the exception is an 351 * {@link InputMismatchException}. 352 * 353 * @see #reportError 354 * 355 * @param recognizer the parser instance 356 * @param e the recognition exception 357 */ 358 protected void reportInputMismatch(Parser recognizer, InputMismatchException e) 359 { 360 string msg = format!"mismatched input %1$s expecting %2$s" 361 ( 362 getTokenErrorDisplay(e.getOffendingToken), 363 e.getExpectedTokens.toString(recognizer.getVocabulary) 364 ); 365 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 366 } 367 368 /** 369 * This is called by {@link #reportError} when the exception is a 370 * {@link FailedPredicateException}. 371 * 372 * @see #reportError 373 * 374 * @param recognizer the parser instance 375 * @param e the recognition exception 376 */ 377 protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e) 378 { 379 string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()]; 380 string msg = "rule " ~ ruleName ~ " " ~ e.msg; 381 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 382 } 383 384 /** 385 * This method is called to report a syntax error which requires the removal 386 * of a token from the input stream. At the time this method is called, the 387 * erroneous symbol is current {@code LT(1)} symbol and has not yet been 388 * removed from the input stream. When this method returns, 389 * {@code recognizer} is in error recovery mode. 390 * 391 * <p>This method is called when {@link #singleTokenDeletion} identifies 392 * single-token deletion as a viable recovery strategy for a mismatched 393 * input error.</p> 394 * 395 * <p>The default implementation simply returns if the handler is already in 396 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 397 * enter error recovery mode, followed by calling 398 * {@link Parser#notifyErrorListeners}.</p> 399 * 400 * @param recognizer the parser instance 401 */ 402 protected void reportUnwantedToken(Parser recognizer) 403 { 404 if (inErrorRecoveryMode(recognizer)) { 405 return; 406 } 407 408 beginErrorCondition(recognizer); 409 410 Token t = recognizer.getCurrentToken(); 411 string tokenName = getTokenErrorDisplay(t); 412 IntervalSet expecting = getExpectedTokens(recognizer); 413 string msg = "extraneous input " ~ tokenName ~ " expecting " ~ 414 expecting.toString(recognizer.getVocabulary()); 415 recognizer.notifyErrorListeners(t, msg, null); 416 } 417 418 /** 419 * This method is called to report a syntax error which requires the 420 * insertion of a missing token into the input stream. At the time this 421 * method is called, the missing token has not yet been inserted. When this 422 * method returns, {@code recognizer} is in error recovery mode. 423 * 424 * <p>This method is called when {@link #singleTokenInsertion} identifies 425 * single-token insertion as a viable recovery strategy for a mismatched 426 * input error.</p> 427 * 428 * <p>The default implementation simply returns if the handler is already in 429 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 430 * enter error recovery mode, followed by calling 431 * {@link Parser#notifyErrorListeners}.</p> 432 * 433 * @param recognizer the parser instance 434 */ 435 protected void reportMissingToken(Parser recognizer) 436 { 437 if (inErrorRecoveryMode(recognizer)) { 438 return; 439 } 440 441 beginErrorCondition(recognizer); 442 443 Token t = recognizer.getCurrentToken(); 444 IntervalSet expecting = getExpectedTokens(recognizer); 445 string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary), 446 getTokenErrorDisplay(t)); 447 448 recognizer.notifyErrorListeners(t, msg, null); 449 } 450 451 /** 452 * {@inheritDoc} 453 * 454 * <p>The default implementation attempts to recover from the mismatched input 455 * by using single token insertion and deletion as described below. If the 456 * recovery attempt fails, this method throws an 457 * {@link InputMismatchException}.</p> 458 * 459 * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> 460 * 461 * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the 462 * right token, however, then assume {@code LA(1)} is some extra spurious 463 * token and delete it. Then consume and return the next token (which was 464 * the {@code LA(2)} token) as the successful result of the match operation.</p> 465 * 466 * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> 467 * 468 * <p><strong>MISSING TOKEN</strong> (single token insertion)</p> 469 * 470 * <p>If current token (at {@code LA(1)}) is consistent with what could come 471 * after the expected {@code LA(1)} token, then assume the token is missing 472 * and use the parser's {@link TokenFactory} to create it on the fly. The 473 * "insertion" is performed by returning the created token as the successful 474 * result of the match operation.</p> 475 * 476 * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> 477 * 478 * <p><strong>EXAMPLE</strong></p> 479 * 480 * <p>For example, Input {@code i=(3;} is clearly missing the {@code '$(RPAREN)'}. When 481 * the parser returns from the nested call to {@code expr}, it will have 482 * call chain:</p> 483 * 484 * <pre> 485 * stat → expr → atom 486 * </pre> 487 * 488 * and it will be trying to match the {@code '$(RPAREN)'} at this point in the 489 * derivation: 490 * 491 * <pre> 492 * => ID '=' '(' INT ')' ('+' atom)* ';' 493 * ^ 494 * </pre> 495 * 496 * The attempt to match {@code '$(RPAREN)'} will fail when it sees {@code ';'} and 497 * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} 498 * is in the set of tokens that can follow the {@code '$(RPAREN)'} token reference 499 * in rule {@code atom}. It can assume that you forgot the {@code ')'}. 500 */ 501 public Token recoverInline(Parser recognizer) 502 { 503 // SINGLE TOKEN DELETION 504 Token matchedSymbol = singleTokenDeletion(recognizer); 505 if (matchedSymbol !is null) { 506 // we have deleted the extra token. 507 // now, move past ttype token as if all were ok 508 recognizer.consume(); 509 return matchedSymbol; 510 } 511 512 // SINGLE TOKEN INSERTION 513 if (singleTokenInsertion(recognizer)) { 514 return getMissingSymbol(recognizer); 515 } 516 517 // even that didn't work; must throw the exception 518 InputMismatchException e; 519 if (nextTokensContext is null) { 520 e = new InputMismatchException(recognizer); 521 } else { 522 e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext); 523 } 524 throw e; 525 } 526 527 /** 528 * This method implements the single-token insertion inline error recovery 529 * strategy. It is called by {@link #recoverInline} if the single-token 530 * deletion strategy fails to recover from the mismatched input. If this 531 * method returns {@code true}, {@code recognizer} will be in error recovery 532 * mode. 533 * 534 * <p>This method determines whether or not single-token insertion is viable by 535 * checking if the {@code LA(1)} input symbol could be successfully matched 536 * if it were instead the {@code LA(2)} symbol. If this method returns 537 * {@code true}, the caller is responsible for creating and inserting a 538 * token with the correct type to produce this behavior.</p> 539 * 540 * @param recognizer the parser instance 541 * @return {@code true} if single-token insertion is a viable recovery 542 * strategy for the current mismatched input, otherwise {@code false} 543 */ 544 protected bool singleTokenInsertion(Parser recognizer) 545 { 546 int currentSymbolType = recognizer.getInputStream().LA(1); 547 // if current token is consistent with what could come after current 548 // ATN state, then we know we're missing a token; error recovery 549 // is free to conjure up and insert the missing token 550 ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState]; 551 ATNState next = currentState.transition(0).target; 552 ATN atn = recognizer.getInterpreter().atn; 553 IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx); 554 debug(ErrorHandling) 555 { 556 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames)); 557 } 558 if ( expectingAtLL2.contains(currentSymbolType) ) { 559 reportMissingToken(recognizer); 560 return true; 561 } 562 return false; 563 } 564 565 /** 566 * This method implements the single-token deletion inline error recovery 567 * strategy. It is called by {@link #recoverInline} to attempt to recover 568 * from mismatched input. If this method returns null, the parser and error 569 * handler state will not have changed. If this method returns non-null, 570 * {@code recognizer} will <em>not</em> be in error recovery mode since the 571 * returned token was a successful match. 572 * 573 * <p>If the single-token deletion is successful, this method calls 574 * {@link #reportUnwantedToken} to report the error, followed by 575 * {@link Parser#consume} to actually "delete" the extraneous token. Then, 576 * before returning {@link #reportMatch} is called to signal a successful 577 * match.</p> 578 * 579 * @param recognizer the parser instance 580 * @return the successfully matched {@link Token} instance if single-token 581 * deletion successfully recovers from the mismatched input, otherwise 582 * {@code null} 583 */ 584 protected Token singleTokenDeletion(Parser recognizer) 585 { 586 int nextTokenType = recognizer.getInputStream().LA(2); 587 IntervalSet expecting = getExpectedTokens(recognizer); 588 if ( expecting.contains(nextTokenType) ) { 589 reportUnwantedToken(recognizer); 590 /* 591 System.err.println("recoverFromMismatchedToken deleting "+ 592 ((TokenStream)recognizer.getInputStream()).LT(1)+ 593 " since "+((TokenStream)recognizer.getInputStream()).LT(2)+ 594 " is what we want"); 595 */ 596 recognizer.consume(); // simply delete extra token 597 // we want to return the token we're actually matching 598 Token matchedSymbol = recognizer.getCurrentToken(); 599 reportMatch(recognizer); // we know current token is correct 600 return matchedSymbol; 601 } 602 return null; 603 } 604 605 /** 606 * Conjure up a missing token during error recovery. 607 * 608 * The recognizer attempts to recover from single missing 609 * symbols. But, actions might refer to that missing symbol. 610 * For example, x=ID {f($x);}. The action clearly assumes 611 * that there has been an identifier matched previously and that 612 * $x points at that token. If that token is missing, but 613 * the next token in the stream is what we want we assume that 614 * this token is missing and we keep going. Because we 615 * have to return some token to replace the missing token, 616 * we have to conjure one up. This method gives the user control 617 * over the tokens returned for missing tokens. Mostly, 618 * you will want to create something special for identifier 619 * tokens. For literals such as '{' and ',', the default 620 * action in the parser or tree parser works. It simply creates 621 * a CommonToken of the appropriate type. The text will be the token. 622 * If you change what tokens must be created by the lexer, 623 * override this method to create the appropriate tokens. 624 */ 625 protected Token getMissingSymbol(Parser recognizer) 626 { 627 Token currentSymbol = recognizer.getCurrentToken(); 628 IntervalSet expecting = getExpectedTokens(recognizer); 629 int expectedTokenType = TokenConstantDefinition.INVALID_TYPE; 630 if (!expecting.isNil) { 631 expectedTokenType = expecting.getMinElement(); // get any element 632 } 633 string tokenText; 634 if (expectedTokenType == TokenConstantDefinition.EOF) 635 tokenText = "<missing EOF>"; 636 else 637 tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType)); 638 Token current = currentSymbol; 639 Token lookback = recognizer.getInputStream().LT(-1); 640 if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) { 641 current = lookback; 642 } 643 TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream()); 644 Variant v = tokenText; 645 return 646 recognizer.tokenFactory().create(tsp, expectedTokenType, v, 647 TokenConstantDefinition.DEFAULT_CHANNEL, 648 -1, -1, 649 current.getLine(), current.getCharPositionInLine()); 650 651 } 652 653 protected IntervalSet getExpectedTokens(Parser recognizer) 654 { 655 return recognizer.getExpectedTokens; 656 } 657 658 /** 659 * How should a token be displayed in an error message? The default 660 * is to display just the text, but during development you might 661 * want to have a lot of information spit out. Override in that case 662 * to use t.toString() (which, for CommonToken, dumps everything about 663 * the token). This is better than forcing you to override a method in 664 * your token objects because you don't have to go modify your lexer 665 * so that it creates a new Java type. 666 */ 667 protected string getTokenErrorDisplay(Token t) 668 { 669 if (t is null) return "<no token>"; 670 string s = getSymbolText(t); 671 if (s is null) { 672 if (getSymbolType(t) == TokenConstantDefinition.EOF) { 673 s = "<EOF>"; 674 } 675 else { 676 s = format("<%s>", getSymbolType(t)); 677 } 678 } 679 return escapeWSAndQuote(s); 680 } 681 682 protected string getSymbolText(Token symbol) 683 { 684 return to!string(symbol.getText); 685 } 686 687 protected int getSymbolType(Token symbol) 688 { 689 return symbol.getType(); 690 } 691 692 protected string escapeWSAndQuote(string s) 693 { 694 s = s.replace("\n","\\n"); 695 s = s.replace("\r","\\r"); 696 s = s.replace("\t","\\t"); 697 return "'" ~ s ~ "'"; 698 } 699 700 /** 701 * Compute the error recovery set for the current rule. During 702 * rule invocation, the parser pushes the set of tokens that can 703 * follow that rule reference on the stack; this amounts to 704 * computing FIRST of what follows the rule reference in the 705 * enclosing rule. See LinearApproximator.FIRST(). 706 * This local follow set only includes tokens 707 * from within the rule; i.e., the FIRST computation done by 708 * ANTLR stops at the end of a rule. 709 * 710 * EXAMPLE 711 * 712 * When you find a "no viable alt exception", the input is not 713 * consistent with any of the alternatives for rule r. The best 714 * thing to do is to consume tokens until you see something that 715 * can legally follow a call to r *or* any rule that called r. 716 * You don't want the exact set of viable next tokens because the 717 * input might just be missing a token--you might consume the 718 * rest of the input looking for one of the missing tokens. 719 * 720 * Consider grammar: 721 * 722 * a : '[' b ']' 723 * | '(' b ')' 724 * ; 725 * b : c '^' INT ; 726 * c : ID 727 * | INT 728 * ; 729 * 730 * At each rule invocation, the set of tokens that could follow 731 * that rule is pushed on a stack. Here are the various 732 * context-sensitive follow sets: 733 * 734 * FOLLOW(b1_in_a) = FIRST(']') = ']' 735 * FOLLOW(b2_in_a) = FIRST('$(RPAREN)') = '$(RPAREN)' 736 * FOLLOW(c_in_b) = FIRST('^') = '^' 737 * 738 * Upon erroneous input "[]", the call chain is 739 * 740 * a -> b -> c 741 * 742 * and, hence, the follow context stack is: 743 * 744 * depth follow set start of rule execution 745 * 0 <EOF> a (from main()) 746 * 1 ']' b 747 * 2 '^' c 748 * 749 * Notice that '$(RPAREN)' is not included, because b would have to have 750 * been called from a different context in rule a for '$(RPAREN)' to be 751 * included. 752 * 753 * For error recovery, we cannot consider FOLLOW(c) 754 * (context-sensitive or otherwise). We need the combined set of 755 * all context-sensitive FOLLOW sets--the set of all tokens that 756 * could follow any reference in the call chain. We need to 757 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 758 * we resync'd to that token, we'd consume until EOF. We need to 759 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 760 * In this case, for input "[]", LA(1) is ']' and in the set, so we would 761 * not consume anything. After printing an error, rule c would 762 * return normally. Rule b would not find the required '^' though. 763 * At this point, it gets a mismatched token error and throws an 764 * exception (since LA(1) is not in the viable following token 765 * set). The rule exception handler tries to recover, but finds 766 * the same recovery set and doesn't consume anything. Rule b 767 * exits normally returning to rule a. Now it finds the ']' (and 768 * with the successful match exits errorRecovery mode). 769 * 770 * So, you can see that the parser walks up the call chain looking 771 * for the token that was a member of the recovery set. 772 * 773 * Errors are not generated in errorRecovery mode. 774 * 775 * ANTLR's error recovery mechanism is based upon original ideas: 776 * 777 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 778 * 779 * and 780 * 781 * "A note on error recovery in recursive descent parsers": 782 * http://portal.acm.org/citation.cfm?id=947902.947905 783 * 784 * Later, Josef Grosch had some good ideas: 785 * 786 * "Efficient and Comfortable Error Recovery in Recursive Descent 787 * Parsers": 788 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 789 * 790 * Like Grosch I implement context-sensitive FOLLOW sets that are combined 791 * at run-time upon error to avoid overhead during parsing. 792 */ 793 protected IntervalSet getErrorRecoverySet(Parser recognizer) 794 { 795 ATN atn = recognizer.getInterpreter().atn; 796 RuleContext ctx = recognizer.ctx; 797 IntervalSet recoverSet = new IntervalSet(); 798 while (ctx !is null && ctx.invokingState >= 0) { 799 // compute what follows who invoked us 800 ATNState invokingState = atn.states[ctx.invokingState]; 801 RuleTransition rt = cast(RuleTransition)invokingState.transition(0); 802 IntervalSet follow = atn.nextTokens(rt.followState); 803 recoverSet.addAll(follow); 804 ctx = ctx.parent; 805 } 806 recoverSet.remove(TokenConstantDefinition.EPSILON); 807 // System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); 808 return recoverSet; 809 } 810 811 /** 812 * Consume tokens until one matches the given token set. 813 */ 814 protected void consumeUntil(Parser recognizer, IntervalSet set) 815 { 816 //stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames())); 817 int ttype = recognizer.getInputStream().LA(1); 818 while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) { 819 debug(ErrorHandling) 820 { 821 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]); 822 } 823 // recognizer.getInputStream().consume(); 824 recognizer.consume(); 825 ttype = recognizer.getInputStream().LA(1); 826 } 827 } 828 829 }