1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.DefaultErrorStrategy; 8 9 import antlr.v4.runtime.ANTLRErrorStrategy; 10 import antlr.v4.runtime.CharStream; 11 import antlr.v4.runtime.FailedPredicateException; 12 import antlr.v4.runtime.InputMismatchException; 13 import antlr.v4.runtime.NoViableAltException; 14 import antlr.v4.runtime.Parser; 15 import antlr.v4.runtime.ParserRuleContext; 16 import antlr.v4.runtime.RecognitionException; 17 import antlr.v4.runtime.RuleContext; 18 import antlr.v4.runtime.Token; 19 import antlr.v4.runtime.TokenConstantDefinition; 20 import antlr.v4.runtime.TokenSource; 21 import antlr.v4.runtime.TokenStream; 22 import antlr.v4.runtime.atn.ATN; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.RuleTransition; 25 import antlr.v4.runtime.atn.StateNames; 26 import antlr.v4.runtime.misc.IntervalSet; 27 import std.array; 28 import std.conv; 29 import std.format; 30 import std.stdio; 31 import std.typecons; 32 import std.variant; 33 34 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b"); 35 36 /** 37 * This is the default implementation of {@link ANTLRErrorStrategy} used for 38 * error reporting and recovery in ANTLR parsers. 39 */ 40 class DefaultErrorStrategy : ANTLRErrorStrategy 41 { 42 43 /** 44 * Indicates whether the error strategy is currently "recovering from an 45 * error". This is used to suppress reporting multiple error messages while 46 * attempting to recover from a detected syntax error. 47 * 48 * @see #inErrorRecoveryMode 49 */ 50 protected bool errorRecoveryMode; 51 52 /** 53 * The index into the input stream where the last error occurred. 54 * This is used to prevent infinite loops where an error is found 55 * but no token is consumed during recovery...another error is found, 56 * ad nauseum. This is a failsafe mechanism to guarantee that at least 57 * one token/tree node is consumed for two errors. 58 */ 59 protected int lastErrorIndex = -1; 60 61 public IntervalSet lastErrorStates; 62 63 /** 64 * This field is used to propagate information about the lookahead following 65 * the previous match. Since prediction prefers completing the current rule 66 * to error recovery efforts, error reporting may occur later than the 67 * original point where it was discoverable. The original context is used to 68 * compute the true expected sets as though the reporting occurred as early 69 * as possible. 70 */ 71 protected ParserRuleContext nextTokensContext; 72 73 /** 74 * @see #nextTokensContext 75 */ 76 protected int nextTokensState; 77 78 /** 79 * <p>The default implementation simply calls {@link #endErrorCondition} to 80 * ensure that the handler is not in error recovery mode.</p> 81 */ 82 public void reset(Parser recognizer) 83 { 84 endErrorCondition(recognizer); 85 } 86 87 /** 88 * This method is called to enter error recovery mode when a recognition 89 * exception is reported. 90 * 91 * @param recognizer the parser instance 92 */ 93 protected void beginErrorCondition(Parser recognizer) 94 { 95 errorRecoveryMode = true; 96 } 97 98 public bool inErrorRecoveryMode(Parser recognizer) 99 { 100 return errorRecoveryMode; 101 } 102 103 /** 104 * This method is called to leave error recovery mode after recovering from 105 * a recognition exception. 106 * 107 * @param recognizer 108 */ 109 protected void endErrorCondition(Parser recognizer) 110 { 111 errorRecoveryMode = false; 112 lastErrorStates = null; 113 lastErrorIndex = -1; 114 } 115 116 /** 117 * {@inheritDoc} 118 * 119 * <p>The default implementation simply calls {@link #endErrorCondition}.</p> 120 */ 121 public void reportMatch(Parser recognizer) 122 { 123 endErrorCondition(recognizer); 124 } 125 126 /** 127 * {@inheritDoc} 128 * 129 * <p>The default implementation returns immediately if the handler is already 130 * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} 131 * and dispatches the reporting task based on the runtime type of {@code e} 132 * according to the following table.</p> 133 * 134 * <ul> 135 * <li>{@link NoViableAltException}: Dispatches the call to 136 * {@link #reportNoViableAlternative}</li> 137 * <li>{@link InputMismatchException}: Dispatches the call to 138 * {@link #reportInputMismatch}</li> 139 * <li>{@link FailedPredicateException}: Dispatches the call to 140 * {@link #reportFailedPredicate}</li> 141 * <li>All other types: calls {@link Parser#notifyErrorListeners} to report 142 * the exception</li> 143 * </ul> 144 */ 145 public void reportError(Parser recognizer, RecognitionException e) 146 { 147 // if we've already reported an error and have not matched a token 148 // yet successfully, don't report any errors. 149 if (inErrorRecoveryMode(recognizer)) { 150 debug(DefaultErrorStrategy) 151 { 152 writefln("DefaultErrorStrategy:[SPURIOUS]"); 153 } 154 return; // don't report spurious errors 155 } 156 beginErrorCondition(recognizer); 157 if (cast(NoViableAltException)e) { 158 reportNoViableAlternative(recognizer, cast(NoViableAltException)e); 159 } 160 else if (cast(InputMismatchException)e) { 161 reportInputMismatch(recognizer, cast(InputMismatchException)e); 162 } 163 else if (cast(FailedPredicateException)e) { 164 reportFailedPredicate(recognizer, cast(FailedPredicateException)e); 165 } 166 else { 167 stderr.writefln("unknown recognition error type: %s", e.stringof); 168 recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e); 169 } 170 } 171 172 /** 173 * {@inheritDoc} 174 * 175 * <p>The default implementation resynchronizes the parser by consuming tokens 176 * until we find one in the resynchronization set--loosely the set of tokens 177 * that can follow the current rule.</p> 178 */ 179 public void recover(Parser recognizer, RecognitionException e) 180 { 181 debug(DefaultErrorStrategy) 182 { 183 writefln("recover in %s index=%s, lastErrorIndex=%s, states=%s", 184 recognizer.getRuleInvocationStack, 185 recognizer.getInputStream().index(), 186 lastErrorIndex, 187 lastErrorStates); 188 } 189 if (lastErrorIndex == recognizer.getInputStream.index() && 190 lastErrorStates !is null && 191 lastErrorStates.contains(recognizer.getState)) 192 { 193 // uh oh, another error at same token index and previously-visited 194 // state in ATN; must be a case where LT(1) is in the recovery 195 // token set so nothing got consumed. Consume a single token 196 // at least to prevent an infinite loop; this is a failsafe. 197 debug(DefaultErrorStrategy) 198 { 199 stderr.writefln("seen error condition before index= %s, states=%s", 200 lastErrorIndex, lastErrorStates); 201 stderr.writefln("FAILSAFE consumes %s", 202 recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]); 203 } 204 recognizer.consume; 205 } 206 lastErrorIndex = recognizer.getInputStream.index; 207 if (!lastErrorStates) 208 lastErrorStates = new IntervalSet; 209 lastErrorStates.add(recognizer.getState); 210 IntervalSet followSet = getErrorRecoverySet(recognizer); 211 consumeUntil(recognizer, followSet); 212 } 213 214 /** 215 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure 216 * that the current lookahead symbol is consistent with what were expecting 217 * at this point in the ATN. You can call this anytime but ANTLR only 218 * generates code to check before subrules/loops and each iteration. 219 * 220 * <p>Implements Jim Idle's magic sync mechanism in closures and optional 221 * subrules. E.g.,</p> 222 * 223 * <pre> 224 * a : sync ( stuff sync )* ; 225 * sync : {consume to what can follow sync} ; 226 * </pre> 227 * 228 * At the start of a sub rule upon error, {@link #sync} performs single 229 * token deletion, if possible. If it can't do that, it bails on the current 230 * rule and uses the default error recovery, which consumes until the 231 * resynchronization set of the current rule. 232 * 233 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block 234 * with an empty alternative), then the expected set includes what follows 235 * the subrule.</p> 236 * 237 * <p>During loop iteration, it consumes until it sees a token that can start a 238 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to 239 * stay in the loop as long as possible.</p> 240 * 241 * <p><strong>ORIGINS</strong></p> 242 * 243 * <p>Previous versions of ANTLR did a poor job of their recovery within loops. 244 * A single mismatch token or missing token would force the parser to bail 245 * out of the entire rules surrounding the loop. So, for rule</p> 246 * 247 * <pre> 248 * classDef : 'class' ID '{' member* '}' 249 * </pre> 250 * 251 * input with an extra token between members would force the parser to 252 * consume until it found the next class definition rather than the next 253 * member definition of the current class. 254 * 255 * <p>This functionality cost a little bit of effort because the parser has to 256 * compare token set at the start of the loop and at each iteration. If for 257 * some reason speed is suffering for you, you can turn off this 258 * functionality by simply overriding this method as a blank { }.</p> 259 */ 260 public void sync(Parser recognizer) 261 { 262 ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState]; 263 debug(ErrorHandling) 264 { 265 writefln("sync @ %s=%s", s.stateNumber, s.classinfo); 266 } 267 // If already recovering, don't try to sync 268 if (inErrorRecoveryMode(recognizer)) { 269 return; 270 } 271 TokenStream tokens = recognizer.getInputStream; 272 int la = tokens.LA(1); 273 274 // try cheaper subset first; might get lucky. seems to shave a wee bit off 275 IntervalSet nextTokens = recognizer.getATN.nextTokens(s); 276 if (nextTokens.contains(la)) { 277 // We are sure the token matches 278 nextTokensContext = null; 279 nextTokensState = ATNState.INVALID_STATE_NUMBER; 280 return; 281 } 282 283 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 284 if (nextTokensContext is null) { 285 // It's possible the next token won't match; information tracked 286 // by sync is restricted for performance. 287 nextTokensContext = recognizer.ctx; 288 nextTokensState = recognizer.getState; 289 } 290 return; 291 } 292 293 switch (s.getStateType) { 294 case StateNames.BLOCK_START: 295 case StateNames.STAR_BLOCK_START: 296 case StateNames.PLUS_BLOCK_START: 297 case StateNames.STAR_LOOP_ENTRY: 298 // report error and recover if possible 299 if (singleTokenDeletion(recognizer) !is null) { 300 return; 301 } 302 throw new InputMismatchException(recognizer); 303 304 case StateNames.PLUS_LOOP_BACK: 305 case StateNames.STAR_LOOP_BACK: 306 debug(ErrorHandling) 307 { 308 stderr.writefln("at loop back: %s", s); 309 } 310 reportUnwantedToken(recognizer); 311 IntervalSet expecting = recognizer.getExpectedTokens; 312 IntervalSet whatFollowsLoopIterationOrRule = 313 expecting.or(getErrorRecoverySet(recognizer)); 314 consumeUntil(recognizer, whatFollowsLoopIterationOrRule); 315 break; 316 317 default: 318 // do nothing if we can't identify the exact kind of ATN state 319 break; 320 } 321 } 322 323 /** 324 * This is called by {@link #reportError} when the exception is a 325 * {@link NoViableAltException}. 326 * 327 * @see #reportError 328 * 329 * @param recognizer the parser instance 330 * @param e the recognition exception 331 */ 332 protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e) 333 { 334 TokenStream tokens = recognizer.getInputStream(); 335 string input; 336 if (tokens !is null) { 337 if (e.getStartToken.getType == TokenConstantDefinition.EOF) 338 input = "<EOF>"; 339 else 340 input = to!string(tokens.getText(e.getStartToken, e.getOffendingToken)); 341 } 342 else { 343 input = "<unknown input>"; 344 } 345 string msg = format("no viable alternative at input %s", escapeWSAndQuote(input)); 346 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 347 } 348 349 /** 350 * This is called by {@link #reportError} when the exception is an 351 * {@link InputMismatchException}. 352 * 353 * @see #reportError 354 * 355 * @param recognizer the parser instance 356 * @param e the recognition exception 357 */ 358 protected void reportInputMismatch(Parser recognizer, InputMismatchException e) 359 { 360 string msg = format("mismatched input %1$s expecting %2$s", 361 getTokenErrorDisplay(e.getOffendingToken), 362 e.getExpectedTokens.toString(recognizer.getVocabulary)); 363 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 364 } 365 366 /** 367 * This is called by {@link #reportError} when the exception is a 368 * {@link FailedPredicateException}. 369 * 370 * @see #reportError 371 * 372 * @param recognizer the parser instance 373 * @param e the recognition exception 374 */ 375 protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e) 376 { 377 string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()]; 378 string msg = "rule " ~ ruleName ~ " " ~ e.msg; 379 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 380 } 381 382 /** 383 * This method is called to report a syntax error which requires the removal 384 * of a token from the input stream. At the time this method is called, the 385 * erroneous symbol is current {@code LT(1)} symbol and has not yet been 386 * removed from the input stream. When this method returns, 387 * {@code recognizer} is in error recovery mode. 388 * 389 * <p>This method is called when {@link #singleTokenDeletion} identifies 390 * single-token deletion as a viable recovery strategy for a mismatched 391 * input error.</p> 392 * 393 * <p>The default implementation simply returns if the handler is already in 394 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 395 * enter error recovery mode, followed by calling 396 * {@link Parser#notifyErrorListeners}.</p> 397 * 398 * @param recognizer the parser instance 399 */ 400 protected void reportUnwantedToken(Parser recognizer) 401 { 402 if (inErrorRecoveryMode(recognizer)) { 403 return; 404 } 405 406 beginErrorCondition(recognizer); 407 408 Token t = recognizer.getCurrentToken(); 409 string tokenName = getTokenErrorDisplay(t); 410 IntervalSet expecting = getExpectedTokens(recognizer); 411 string msg = "extraneous input " ~ tokenName ~ " expecting " ~ 412 expecting.toString(recognizer.getVocabulary()); 413 recognizer.notifyErrorListeners(t, msg, null); 414 } 415 416 /** 417 * This method is called to report a syntax error which requires the 418 * insertion of a missing token into the input stream. At the time this 419 * method is called, the missing token has not yet been inserted. When this 420 * method returns, {@code recognizer} is in error recovery mode. 421 * 422 * <p>This method is called when {@link #singleTokenInsertion} identifies 423 * single-token insertion as a viable recovery strategy for a mismatched 424 * input error.</p> 425 * 426 * <p>The default implementation simply returns if the handler is already in 427 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 428 * enter error recovery mode, followed by calling 429 * {@link Parser#notifyErrorListeners}.</p> 430 * 431 * @param recognizer the parser instance 432 */ 433 protected void reportMissingToken(Parser recognizer) 434 { 435 if (inErrorRecoveryMode(recognizer)) { 436 return; 437 } 438 439 beginErrorCondition(recognizer); 440 441 Token t = recognizer.getCurrentToken(); 442 IntervalSet expecting = getExpectedTokens(recognizer); 443 string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary), 444 getTokenErrorDisplay(t)); 445 446 recognizer.notifyErrorListeners(t, msg, null); 447 } 448 449 /** 450 * {@inheritDoc} 451 * 452 * <p>The default implementation attempts to recover from the mismatched input 453 * by using single token insertion and deletion as described below. If the 454 * recovery attempt fails, this method throws an 455 * {@link InputMismatchException}.</p> 456 * 457 * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> 458 * 459 * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the 460 * right token, however, then assume {@code LA(1)} is some extra spurious 461 * token and delete it. Then consume and return the next token (which was 462 * the {@code LA(2)} token) as the successful result of the match operation.</p> 463 * 464 * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> 465 * 466 * <p><strong>MISSING TOKEN</strong> (single token insertion)</p> 467 * 468 * <p>If current token (at {@code LA(1)}) is consistent with what could come 469 * after the expected {@code LA(1)} token, then assume the token is missing 470 * and use the parser's {@link TokenFactory} to create it on the fly. The 471 * "insertion" is performed by returning the created token as the successful 472 * result of the match operation.</p> 473 * 474 * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> 475 * 476 * <p><strong>EXAMPLE</strong></p> 477 * 478 * <p>For example, Input {@code i=(3;} is clearly missing the {@code '$(RPAREN)'}. When 479 * the parser returns from the nested call to {@code expr}, it will have 480 * call chain:</p> 481 * 482 * <pre> 483 * stat → expr → atom 484 * </pre> 485 * 486 * and it will be trying to match the {@code '$(RPAREN)'} at this point in the 487 * derivation: 488 * 489 * <pre> 490 * => ID '=' '(' INT ')' ('+' atom)* ';' 491 * ^ 492 * </pre> 493 * 494 * The attempt to match {@code '$(RPAREN)'} will fail when it sees {@code ';'} and 495 * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} 496 * is in the set of tokens that can follow the {@code '$(RPAREN)'} token reference 497 * in rule {@code atom}. It can assume that you forgot the {@code ')'}. 498 */ 499 public Token recoverInline(Parser recognizer) 500 { 501 // SINGLE TOKEN DELETION 502 Token matchedSymbol = singleTokenDeletion(recognizer); 503 if (matchedSymbol !is null) { 504 // we have deleted the extra token. 505 // now, move past ttype token as if all were ok 506 recognizer.consume(); 507 return matchedSymbol; 508 } 509 510 // SINGLE TOKEN INSERTION 511 if (singleTokenInsertion(recognizer)) { 512 return getMissingSymbol(recognizer); 513 } 514 515 // even that didn't work; must throw the exception 516 InputMismatchException e; 517 if (nextTokensContext is null) { 518 e = new InputMismatchException(recognizer); 519 } else { 520 e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext); 521 } 522 throw e; 523 } 524 525 /** 526 * This method implements the single-token insertion inline error recovery 527 * strategy. It is called by {@link #recoverInline} if the single-token 528 * deletion strategy fails to recover from the mismatched input. If this 529 * method returns {@code true}, {@code recognizer} will be in error recovery 530 * mode. 531 * 532 * <p>This method determines whether or not single-token insertion is viable by 533 * checking if the {@code LA(1)} input symbol could be successfully matched 534 * if it were instead the {@code LA(2)} symbol. If this method returns 535 * {@code true}, the caller is responsible for creating and inserting a 536 * token with the correct type to produce this behavior.</p> 537 * 538 * @param recognizer the parser instance 539 * @return {@code true} if single-token insertion is a viable recovery 540 * strategy for the current mismatched input, otherwise {@code false} 541 */ 542 protected bool singleTokenInsertion(Parser recognizer) 543 { 544 int currentSymbolType = recognizer.getInputStream().LA(1); 545 // if current token is consistent with what could come after current 546 // ATN state, then we know we're missing a token; error recovery 547 // is free to conjure up and insert the missing token 548 ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState]; 549 ATNState next = currentState.transition(0).target; 550 ATN atn = recognizer.getInterpreter().atn; 551 IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx); 552 debug(ErrorHandling) 553 { 554 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames)); 555 } 556 if ( expectingAtLL2.contains(currentSymbolType) ) { 557 reportMissingToken(recognizer); 558 return true; 559 } 560 return false; 561 } 562 563 /** 564 * This method implements the single-token deletion inline error recovery 565 * strategy. It is called by {@link #recoverInline} to attempt to recover 566 * from mismatched input. If this method returns null, the parser and error 567 * handler state will not have changed. If this method returns non-null, 568 * {@code recognizer} will <em>not</em> be in error recovery mode since the 569 * returned token was a successful match. 570 * 571 * <p>If the single-token deletion is successful, this method calls 572 * {@link #reportUnwantedToken} to report the error, followed by 573 * {@link Parser#consume} to actually "delete" the extraneous token. Then, 574 * before returning {@link #reportMatch} is called to signal a successful 575 * match.</p> 576 * 577 * @param recognizer the parser instance 578 * @return the successfully matched {@link Token} instance if single-token 579 * deletion successfully recovers from the mismatched input, otherwise 580 * {@code null} 581 */ 582 protected Token singleTokenDeletion(Parser recognizer) 583 { 584 int nextTokenType = recognizer.getInputStream().LA(2); 585 IntervalSet expecting = getExpectedTokens(recognizer); 586 if ( expecting.contains(nextTokenType) ) { 587 reportUnwantedToken(recognizer); 588 /* 589 System.err.println("recoverFromMismatchedToken deleting "+ 590 ((TokenStream)recognizer.getInputStream()).LT(1)+ 591 " since "+((TokenStream)recognizer.getInputStream()).LT(2)+ 592 " is what we want"); 593 */ 594 recognizer.consume(); // simply delete extra token 595 // we want to return the token we're actually matching 596 Token matchedSymbol = recognizer.getCurrentToken(); 597 reportMatch(recognizer); // we know current token is correct 598 return matchedSymbol; 599 } 600 return null; 601 } 602 603 /** 604 * Conjure up a missing token during error recovery. 605 * 606 * The recognizer attempts to recover from single missing 607 * symbols. But, actions might refer to that missing symbol. 608 * For example, x=ID {f($x);}. The action clearly assumes 609 * that there has been an identifier matched previously and that 610 * $x points at that token. If that token is missing, but 611 * the next token in the stream is what we want we assume that 612 * this token is missing and we keep going. Because we 613 * have to return some token to replace the missing token, 614 * we have to conjure one up. This method gives the user control 615 * over the tokens returned for missing tokens. Mostly, 616 * you will want to create something special for identifier 617 * tokens. For literals such as '{' and ',', the default 618 * action in the parser or tree parser works. It simply creates 619 * a CommonToken of the appropriate type. The text will be the token. 620 * If you change what tokens must be created by the lexer, 621 * override this method to create the appropriate tokens. 622 */ 623 protected Token getMissingSymbol(Parser recognizer) 624 { 625 Token currentSymbol = recognizer.getCurrentToken(); 626 IntervalSet expecting = getExpectedTokens(recognizer); 627 int expectedTokenType = TokenConstantDefinition.INVALID_TYPE; 628 if (!expecting.isNil) { 629 expectedTokenType = expecting.getMinElement(); // get any element 630 } 631 string tokenText; 632 if (expectedTokenType == TokenConstantDefinition.EOF) 633 tokenText = "<missing EOF>"; 634 else 635 tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType)); 636 Token current = currentSymbol; 637 Token lookback = recognizer.getInputStream().LT(-1); 638 if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) { 639 current = lookback; 640 } 641 TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream()); 642 Variant v = tokenText; 643 return 644 recognizer.tokenFactory().create(tsp, expectedTokenType, v, 645 TokenConstantDefinition.DEFAULT_CHANNEL, 646 -1, -1, 647 current.getLine(), current.getCharPositionInLine()); 648 649 } 650 651 protected IntervalSet getExpectedTokens(Parser recognizer) 652 { 653 return recognizer.getExpectedTokens; 654 } 655 656 /** 657 * How should a token be displayed in an error message? The default 658 * is to display just the text, but during development you might 659 * want to have a lot of information spit out. Override in that case 660 * to use t.toString() (which, for CommonToken, dumps everything about 661 * the token). This is better than forcing you to override a method in 662 * your token objects because you don't have to go modify your lexer 663 * so that it creates a new Java type. 664 */ 665 protected string getTokenErrorDisplay(Token t) 666 { 667 if (t is null) return "<no token>"; 668 string s = getSymbolText(t); 669 if (s is null) { 670 if (getSymbolType(t) == TokenConstantDefinition.EOF) { 671 s = "<EOF>"; 672 } 673 else { 674 s = format("<%s>", getSymbolType(t)); 675 } 676 } 677 return escapeWSAndQuote(s); 678 } 679 680 protected string getSymbolText(Token symbol) 681 { 682 return to!string(symbol.getText); 683 } 684 685 protected int getSymbolType(Token symbol) 686 { 687 return symbol.getType(); 688 } 689 690 protected string escapeWSAndQuote(string s) 691 { 692 // if ( s==null ) return s; 693 s = s.replace("\n","\\n"); 694 s = s.replace("\r","\\r"); 695 s = s.replace("\t","\\t"); 696 return "'" ~ s ~ "'"; 697 } 698 699 /** 700 * Compute the error recovery set for the current rule. During 701 * rule invocation, the parser pushes the set of tokens that can 702 * follow that rule reference on the stack; this amounts to 703 * computing FIRST of what follows the rule reference in the 704 * enclosing rule. See LinearApproximator.FIRST(). 705 * This local follow set only includes tokens 706 * from within the rule; i.e., the FIRST computation done by 707 * ANTLR stops at the end of a rule. 708 * 709 * EXAMPLE 710 * 711 * When you find a "no viable alt exception", the input is not 712 * consistent with any of the alternatives for rule r. The best 713 * thing to do is to consume tokens until you see something that 714 * can legally follow a call to r *or* any rule that called r. 715 * You don't want the exact set of viable next tokens because the 716 * input might just be missing a token--you might consume the 717 * rest of the input looking for one of the missing tokens. 718 * 719 * Consider grammar: 720 * 721 * a : '[' b ']' 722 * | '(' b ')' 723 * ; 724 * b : c '^' INT ; 725 * c : ID 726 * | INT 727 * ; 728 * 729 * At each rule invocation, the set of tokens that could follow 730 * that rule is pushed on a stack. Here are the various 731 * context-sensitive follow sets: 732 * 733 * FOLLOW(b1_in_a) = FIRST(']') = ']' 734 * FOLLOW(b2_in_a) = FIRST('$(RPAREN)') = '$(RPAREN)' 735 * FOLLOW(c_in_b) = FIRST('^') = '^' 736 * 737 * Upon erroneous input "[]", the call chain is 738 * 739 * a -> b -> c 740 * 741 * and, hence, the follow context stack is: 742 * 743 * depth follow set start of rule execution 744 * 0 <EOF> a (from main()) 745 * 1 ']' b 746 * 2 '^' c 747 * 748 * Notice that '$(RPAREN)' is not included, because b would have to have 749 * been called from a different context in rule a for '$(RPAREN)' to be 750 * included. 751 * 752 * For error recovery, we cannot consider FOLLOW(c) 753 * (context-sensitive or otherwise). We need the combined set of 754 * all context-sensitive FOLLOW sets--the set of all tokens that 755 * could follow any reference in the call chain. We need to 756 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 757 * we resync'd to that token, we'd consume until EOF. We need to 758 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 759 * In this case, for input "[]", LA(1) is ']' and in the set, so we would 760 * not consume anything. After printing an error, rule c would 761 * return normally. Rule b would not find the required '^' though. 762 * At this point, it gets a mismatched token error and throws an 763 * exception (since LA(1) is not in the viable following token 764 * set). The rule exception handler tries to recover, but finds 765 * the same recovery set and doesn't consume anything. Rule b 766 * exits normally returning to rule a. Now it finds the ']' (and 767 * with the successful match exits errorRecovery mode). 768 * 769 * So, you can see that the parser walks up the call chain looking 770 * for the token that was a member of the recovery set. 771 * 772 * Errors are not generated in errorRecovery mode. 773 * 774 * ANTLR's error recovery mechanism is based upon original ideas: 775 * 776 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 777 * 778 * and 779 * 780 * "A note on error recovery in recursive descent parsers": 781 * http://portal.acm.org/citation.cfm?id=947902.947905 782 * 783 * Later, Josef Grosch had some good ideas: 784 * 785 * "Efficient and Comfortable Error Recovery in Recursive Descent 786 * Parsers": 787 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 788 * 789 * Like Grosch I implement context-sensitive FOLLOW sets that are combined 790 * at run-time upon error to avoid overhead during parsing. 791 */ 792 protected IntervalSet getErrorRecoverySet(Parser recognizer) 793 { 794 ATN atn = recognizer.getInterpreter().atn; 795 RuleContext ctx = recognizer.ctx; 796 IntervalSet recoverSet = new IntervalSet(); 797 while (ctx !is null && ctx.invokingState >= 0) { 798 // compute what follows who invoked us 799 ATNState invokingState = atn.states[ctx.invokingState]; 800 RuleTransition rt = cast(RuleTransition)invokingState.transition(0); 801 IntervalSet follow = atn.nextTokens(rt.followState); 802 recoverSet.addAll(follow); 803 ctx = ctx.parent; 804 } 805 recoverSet.remove(TokenConstantDefinition.EPSILON); 806 // System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); 807 return recoverSet; 808 } 809 810 /** 811 * Consume tokens until one matches the given token set. 812 */ 813 protected void consumeUntil(Parser recognizer, IntervalSet set) 814 { 815 //stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames())); 816 int ttype = recognizer.getInputStream().LA(1); 817 while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) { 818 debug(ErrorHandling) 819 { 820 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]); 821 } 822 // recognizer.getInputStream().consume(); 823 recognizer.consume(); 824 ttype = recognizer.getInputStream().LA(1); 825 } 826 } 827 828 }