1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.DefaultErrorStrategy; 8 9 import antlr.v4.runtime.ANTLRErrorStrategy; 10 import antlr.v4.runtime.CharStream; 11 import antlr.v4.runtime.FailedPredicateException; 12 import antlr.v4.runtime.InputMismatchException; 13 import antlr.v4.runtime.NoViableAltException; 14 import antlr.v4.runtime.Parser; 15 import antlr.v4.runtime.ParserRuleContext; 16 import antlr.v4.runtime.RecognitionException; 17 import antlr.v4.runtime.RuleContext; 18 import antlr.v4.runtime.Token; 19 import antlr.v4.runtime.TokenConstantDefinition; 20 import antlr.v4.runtime.TokenSource; 21 import antlr.v4.runtime.TokenStream; 22 import antlr.v4.runtime.atn.ATN; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.RuleTransition; 25 import antlr.v4.runtime.atn.StateNames; 26 import antlr.v4.runtime.misc.IntervalSet; 27 import std.array; 28 import std.format; 29 import std.stdio; 30 import std.typecons; 31 32 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b"); 33 34 // Class DefaultErrorStrategy 35 /** 36 * This is the default implementation of {@link ANTLRErrorStrategy} used for 37 * error reporting and recovery in ANTLR parsers. 38 */ 39 class DefaultErrorStrategy : ANTLRErrorStrategy 40 { 41 42 /** 43 * Indicates whether the error strategy is currently "recovering from an 44 * error". This is used to suppress reporting multiple error messages while 45 * attempting to recover from a detected syntax error. 46 * 47 * @see #inErrorRecoveryMode 48 */ 49 protected bool errorRecoveryMode; 50 51 /** 52 * The index into the input stream where the last error occurred. 53 * This is used to prevent infinite loops where an error is found 54 * but no token is consumed during recovery...another error is found, 55 * ad nauseum. This is a failsafe mechanism to guarantee that at least 56 * one token/tree node is consumed for two errors. 57 */ 58 protected int lastErrorIndex = -1; 59 60 public IntervalSet lastErrorStates; 61 62 /** 63 * This field is used to propagate information about the lookahead following 64 * the previous match. Since prediction prefers completing the current rule 65 * to error recovery efforts, error reporting may occur later than the 66 * original point where it was discoverable. The original context is used to 67 * compute the true expected sets as though the reporting occurred as early 68 * as possible. 69 */ 70 protected ParserRuleContext nextTokensContext; 71 72 /** 73 * @see #nextTokensContext 74 */ 75 protected int nextTokensState; 76 77 /** 78 * <p>The default implementation simply calls {@link #endErrorCondition} to 79 * ensure that the handler is not in error recovery mode.</p> 80 */ 81 public void reset(Parser recognizer) 82 { 83 endErrorCondition(recognizer); 84 } 85 86 /** 87 * This method is called to enter error recovery mode when a recognition 88 * exception is reported. 89 * 90 * @param recognizer the parser instance 91 */ 92 protected void beginErrorCondition(Parser recognizer) 93 { 94 errorRecoveryMode = true; 95 } 96 97 public bool inErrorRecoveryMode(Parser recognizer) 98 { 99 return errorRecoveryMode; 100 } 101 102 /** 103 * This method is called to leave error recovery mode after recovering from 104 * a recognition exception. 105 * 106 * @param recognizer 107 */ 108 protected void endErrorCondition(Parser recognizer) 109 { 110 errorRecoveryMode = false; 111 lastErrorStates = null; 112 lastErrorIndex = -1; 113 } 114 115 /** 116 * {@inheritDoc} 117 * 118 * <p>The default implementation simply calls {@link #endErrorCondition}.</p> 119 */ 120 public void reportMatch(Parser recognizer) 121 { 122 endErrorCondition(recognizer); 123 } 124 125 /** 126 * {@inheritDoc} 127 * 128 * <p>The default implementation returns immediately if the handler is already 129 * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} 130 * and dispatches the reporting task based on the runtime type of {@code e} 131 * according to the following table.</p> 132 * 133 * <ul> 134 * <li>{@link NoViableAltException}: Dispatches the call to 135 * {@link #reportNoViableAlternative}</li> 136 * <li>{@link InputMismatchException}: Dispatches the call to 137 * {@link #reportInputMismatch}</li> 138 * <li>{@link FailedPredicateException}: Dispatches the call to 139 * {@link #reportFailedPredicate}</li> 140 * <li>All other types: calls {@link Parser#notifyErrorListeners} to report 141 * the exception</li> 142 * </ul> 143 */ 144 public void reportError(Parser recognizer, RecognitionException e) 145 { 146 // if we've already reported an error and have not matched a token 147 // yet successfully, don't report any errors. 148 if (inErrorRecoveryMode(recognizer)) { 149 debug(DefaultErrorStrategy) 150 { 151 writefln("DefaultErrorStrategy:[SPURIOUS]"); 152 } 153 return; // don't report spurious errors 154 } 155 beginErrorCondition(recognizer); 156 if (cast(NoViableAltException)e) { 157 reportNoViableAlternative(recognizer, cast(NoViableAltException)e); 158 } 159 else if (cast(InputMismatchException)e) { 160 reportInputMismatch(recognizer, cast(InputMismatchException)e); 161 } 162 else if (cast(FailedPredicateException)e) { 163 reportFailedPredicate(recognizer, cast(FailedPredicateException)e); 164 } 165 else { 166 stderr.writefln("unknown recognition error type: %s", e.stringof); 167 recognizer.notifyErrorListeners(e.getOffendingToken(), e.msg, e); 168 } 169 } 170 171 /** 172 * {@inheritDoc} 173 * 174 * <p>The default implementation resynchronizes the parser by consuming tokens 175 * until we find one in the resynchronization set--loosely the set of tokens 176 * that can follow the current rule.</p> 177 */ 178 public void recover(Parser recognizer, RecognitionException e) 179 { 180 debug(DefaultErrorStrategy) 181 { 182 writefln("recover in %s index=%s, lastErrorIndex=%s, states=%s", 183 recognizer.getRuleInvocationStack, 184 recognizer.getInputStream().index(), 185 lastErrorIndex, 186 lastErrorStates); 187 } 188 if (lastErrorIndex == recognizer.getInputStream.index() && 189 lastErrorStates !is null && 190 lastErrorStates.contains(recognizer.getState)) 191 { 192 // uh oh, another error at same token index and previously-visited 193 // state in ATN; must be a case where LT(1) is in the recovery 194 // token set so nothing got consumed. Consume a single token 195 // at least to prevent an infinite loop; this is a failsafe. 196 debug(DefaultErrorStrategy) 197 { 198 stderr.writefln("seen error condition before index= %s, states=%s", 199 lastErrorIndex, lastErrorStates); 200 stderr.writefln("FAILSAFE consumes %s", 201 recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]); 202 } 203 recognizer.consume; 204 } 205 lastErrorIndex = recognizer.getInputStream.index; 206 if (!lastErrorStates) 207 lastErrorStates = new IntervalSet; 208 lastErrorStates.add(recognizer.getState); 209 IntervalSet followSet = getErrorRecoverySet(recognizer); 210 consumeUntil(recognizer, followSet); 211 } 212 213 /** 214 * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure 215 * that the current lookahead symbol is consistent with what were expecting 216 * at this point in the ATN. You can call this anytime but ANTLR only 217 * generates code to check before subrules/loops and each iteration. 218 * 219 * <p>Implements Jim Idle's magic sync mechanism in closures and optional 220 * subrules. E.g.,</p> 221 * 222 * <pre> 223 * a : sync ( stuff sync )* ; 224 * sync : {consume to what can follow sync} ; 225 * </pre> 226 * 227 * At the start of a sub rule upon error, {@link #sync} performs single 228 * token deletion, if possible. If it can't do that, it bails on the current 229 * rule and uses the default error recovery, which consumes until the 230 * resynchronization set of the current rule. 231 * 232 * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block 233 * with an empty alternative), then the expected set includes what follows 234 * the subrule.</p> 235 * 236 * <p>During loop iteration, it consumes until it sees a token that can start a 237 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to 238 * stay in the loop as long as possible.</p> 239 * 240 * <p><strong>ORIGINS</strong></p> 241 * 242 * <p>Previous versions of ANTLR did a poor job of their recovery within loops. 243 * A single mismatch token or missing token would force the parser to bail 244 * out of the entire rules surrounding the loop. So, for rule</p> 245 * 246 * <pre> 247 * classDef : 'class' ID '{' member* '}' 248 * </pre> 249 * 250 * input with an extra token between members would force the parser to 251 * consume until it found the next class definition rather than the next 252 * member definition of the current class. 253 * 254 * <p>This functionality cost a little bit of effort because the parser has to 255 * compare token set at the start of the loop and at each iteration. If for 256 * some reason speed is suffering for you, you can turn off this 257 * functionality by simply overriding this method as a blank { }.</p> 258 */ 259 public void sync(Parser recognizer) 260 { 261 ATNState s = recognizer.getInterpreter.atn.states[recognizer.getState]; 262 debug(ErrorHandling) 263 { 264 writefln("sync @ %s=%s", s.stateNumber, s.classinfo); 265 } 266 // If already recovering, don't try to sync 267 if (inErrorRecoveryMode(recognizer)) { 268 return; 269 } 270 TokenStream tokens = recognizer.getInputStream; 271 int la = tokens.LA(1); 272 273 // try cheaper subset first; might get lucky. seems to shave a wee bit off 274 IntervalSet nextTokens = recognizer.getATN.nextTokens(s); 275 if (nextTokens.contains(la)) { 276 // We are sure the token matches 277 nextTokensContext = null; 278 nextTokensState = ATNState.INVALID_STATE_NUMBER; 279 return; 280 } 281 282 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 283 if (nextTokensContext is null) { 284 // It's possible the next token won't match; information tracked 285 // by sync is restricted for performance. 286 nextTokensContext = recognizer.ctx; 287 nextTokensState = recognizer.getState; 288 } 289 return; 290 } 291 292 switch (s.getStateType) { 293 case StateNames.BLOCK_START: 294 case StateNames.STAR_BLOCK_START: 295 case StateNames.PLUS_BLOCK_START: 296 case StateNames.STAR_LOOP_ENTRY: 297 // report error and recover if possible 298 if (singleTokenDeletion(recognizer) !is null) { 299 return; 300 } 301 throw new InputMismatchException(recognizer); 302 303 case StateNames.PLUS_LOOP_BACK: 304 case StateNames.STAR_LOOP_BACK: 305 debug(ErrorHandling) 306 { 307 stderr.writefln("at loop back: %s", s); 308 } 309 reportUnwantedToken(recognizer); 310 IntervalSet expecting = recognizer.getExpectedTokens; 311 IntervalSet whatFollowsLoopIterationOrRule = 312 expecting.or(getErrorRecoverySet(recognizer)); 313 consumeUntil(recognizer, whatFollowsLoopIterationOrRule); 314 break; 315 316 default: 317 // do nothing if we can't identify the exact kind of ATN state 318 break; 319 } 320 } 321 322 /** 323 * This is called by {@link #reportError} when the exception is a 324 * {@link NoViableAltException}. 325 * 326 * @see #reportError 327 * 328 * @param recognizer the parser instance 329 * @param e the recognition exception 330 */ 331 protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e) 332 { 333 TokenStream tokens = recognizer.getInputStream(); 334 string input; 335 if (tokens !is null) { 336 if (e.getStartToken.getType == TokenConstantDefinition.EOF) 337 input = "<EOF>"; 338 else 339 input = tokens.getText(e.getStartToken, e.getOffendingToken); 340 } 341 else { 342 input = "<unknown input>"; 343 } 344 string msg = format("no viable alternative at input %s", escapeWSAndQuote(input)); 345 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 346 } 347 348 /** 349 * This is called by {@link #reportError} when the exception is an 350 * {@link InputMismatchException}. 351 * 352 * @see #reportError 353 * 354 * @param recognizer the parser instance 355 * @param e the recognition exception 356 */ 357 protected void reportInputMismatch(Parser recognizer, InputMismatchException e) 358 { 359 string msg = format("mismatched input %1$s expecting %2$s", 360 getTokenErrorDisplay(e.getOffendingToken), 361 e.getExpectedTokens.toString(recognizer.getVocabulary)); 362 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 363 } 364 365 /** 366 * This is called by {@link #reportError} when the exception is a 367 * {@link FailedPredicateException}. 368 * 369 * @see #reportError 370 * 371 * @param recognizer the parser instance 372 * @param e the recognition exception 373 */ 374 protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e) 375 { 376 string ruleName = recognizer.getRuleNames[recognizer.ctx.getRuleIndex()]; 377 string msg = "rule " ~ ruleName ~ " " ~ e.msg; 378 recognizer.notifyErrorListeners(e.getOffendingToken, msg, e); 379 } 380 381 /** 382 * This method is called to report a syntax error which requires the removal 383 * of a token from the input stream. At the time this method is called, the 384 * erroneous symbol is current {@code LT(1)} symbol and has not yet been 385 * removed from the input stream. When this method returns, 386 * {@code recognizer} is in error recovery mode. 387 * 388 * <p>This method is called when {@link #singleTokenDeletion} identifies 389 * single-token deletion as a viable recovery strategy for a mismatched 390 * input error.</p> 391 * 392 * <p>The default implementation simply returns if the handler is already in 393 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 394 * enter error recovery mode, followed by calling 395 * {@link Parser#notifyErrorListeners}.</p> 396 * 397 * @param recognizer the parser instance 398 */ 399 protected void reportUnwantedToken(Parser recognizer) 400 { 401 if (inErrorRecoveryMode(recognizer)) { 402 return; 403 } 404 405 beginErrorCondition(recognizer); 406 407 Token t = recognizer.getCurrentToken(); 408 string tokenName = getTokenErrorDisplay(t); 409 IntervalSet expecting = getExpectedTokens(recognizer); 410 string msg = "extraneous input " ~ tokenName ~ " expecting " ~ 411 expecting.toString(recognizer.getVocabulary()); 412 recognizer.notifyErrorListeners(t, msg, null); 413 } 414 415 /** 416 * This method is called to report a syntax error which requires the 417 * insertion of a missing token into the input stream. At the time this 418 * method is called, the missing token has not yet been inserted. When this 419 * method returns, {@code recognizer} is in error recovery mode. 420 * 421 * <p>This method is called when {@link #singleTokenInsertion} identifies 422 * single-token insertion as a viable recovery strategy for a mismatched 423 * input error.</p> 424 * 425 * <p>The default implementation simply returns if the handler is already in 426 * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to 427 * enter error recovery mode, followed by calling 428 * {@link Parser#notifyErrorListeners}.</p> 429 * 430 * @param recognizer the parser instance 431 */ 432 protected void reportMissingToken(Parser recognizer) 433 { 434 if (inErrorRecoveryMode(recognizer)) { 435 return; 436 } 437 438 beginErrorCondition(recognizer); 439 440 Token t = recognizer.getCurrentToken(); 441 IntervalSet expecting = getExpectedTokens(recognizer); 442 string msg = format("missing %1$s at %2$s", expecting.toString(recognizer.getVocabulary), 443 getTokenErrorDisplay(t)); 444 445 recognizer.notifyErrorListeners(t, msg, null); 446 } 447 448 /** 449 * {@inheritDoc} 450 * 451 * <p>The default implementation attempts to recover from the mismatched input 452 * by using single token insertion and deletion as described below. If the 453 * recovery attempt fails, this method throws an 454 * {@link InputMismatchException}.</p> 455 * 456 * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> 457 * 458 * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the 459 * right token, however, then assume {@code LA(1)} is some extra spurious 460 * token and delete it. Then consume and return the next token (which was 461 * the {@code LA(2)} token) as the successful result of the match operation.</p> 462 * 463 * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> 464 * 465 * <p><strong>MISSING TOKEN</strong> (single token insertion)</p> 466 * 467 * <p>If current token (at {@code LA(1)}) is consistent with what could come 468 * after the expected {@code LA(1)} token, then assume the token is missing 469 * and use the parser's {@link TokenFactory} to create it on the fly. The 470 * "insertion" is performed by returning the created token as the successful 471 * result of the match operation.</p> 472 * 473 * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> 474 * 475 * <p><strong>EXAMPLE</strong></p> 476 * 477 * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When 478 * the parser returns from the nested call to {@code expr}, it will have 479 * call chain:</p> 480 * 481 * <pre> 482 * stat → expr → atom 483 * </pre> 484 * 485 * and it will be trying to match the {@code ')'} at this point in the 486 * derivation: 487 * 488 * <pre> 489 * => ID '=' '(' INT ')' ('+' atom)* ';' 490 * ^ 491 * </pre> 492 * 493 * The attempt to match {@code ')'} will fail when it sees {@code ';'} and 494 * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} 495 * is in the set of tokens that can follow the {@code ')'} token reference 496 * in rule {@code atom}. It can assume that you forgot the {@code ')'}. 497 */ 498 public Token recoverInline(Parser recognizer) 499 { 500 // SINGLE TOKEN DELETION 501 Token matchedSymbol = singleTokenDeletion(recognizer); 502 if (matchedSymbol !is null) { 503 // we have deleted the extra token. 504 // now, move past ttype token as if all were ok 505 recognizer.consume(); 506 return matchedSymbol; 507 } 508 509 // SINGLE TOKEN INSERTION 510 if (singleTokenInsertion(recognizer)) { 511 return getMissingSymbol(recognizer); 512 } 513 514 // even that didn't work; must throw the exception 515 InputMismatchException e; 516 if (nextTokensContext is null) { 517 e = new InputMismatchException(recognizer); 518 } else { 519 e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext); 520 } 521 throw e; 522 } 523 524 /** 525 * This method implements the single-token insertion inline error recovery 526 * strategy. It is called by {@link #recoverInline} if the single-token 527 * deletion strategy fails to recover from the mismatched input. If this 528 * method returns {@code true}, {@code recognizer} will be in error recovery 529 * mode. 530 * 531 * <p>This method determines whether or not single-token insertion is viable by 532 * checking if the {@code LA(1)} input symbol could be successfully matched 533 * if it were instead the {@code LA(2)} symbol. If this method returns 534 * {@code true}, the caller is responsible for creating and inserting a 535 * token with the correct type to produce this behavior.</p> 536 * 537 * @param recognizer the parser instance 538 * @return {@code true} if single-token insertion is a viable recovery 539 * strategy for the current mismatched input, otherwise {@code false} 540 */ 541 protected bool singleTokenInsertion(Parser recognizer) 542 { 543 int currentSymbolType = recognizer.getInputStream().LA(1); 544 // if current token is consistent with what could come after current 545 // ATN state, then we know we're missing a token; error recovery 546 // is free to conjure up and insert the missing token 547 ATNState currentState = recognizer.getInterpreter().atn.states[recognizer.getState]; 548 ATNState next = currentState.transition(0).target; 549 ATN atn = recognizer.getInterpreter().atn; 550 IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer.ctx); 551 debug(ErrorHandling) 552 { 553 //writefln("LT(2) set=%s", expectingAtLL2.toString(recognizer.getTokenNames)); 554 } 555 if ( expectingAtLL2.contains(currentSymbolType) ) { 556 reportMissingToken(recognizer); 557 return true; 558 } 559 return false; 560 } 561 562 /** 563 * This method implements the single-token deletion inline error recovery 564 * strategy. It is called by {@link #recoverInline} to attempt to recover 565 * from mismatched input. If this method returns null, the parser and error 566 * handler state will not have changed. If this method returns non-null, 567 * {@code recognizer} will <em>not</em> be in error recovery mode since the 568 * returned token was a successful match. 569 * 570 * <p>If the single-token deletion is successful, this method calls 571 * {@link #reportUnwantedToken} to report the error, followed by 572 * {@link Parser#consume} to actually "delete" the extraneous token. Then, 573 * before returning {@link #reportMatch} is called to signal a successful 574 * match.</p> 575 * 576 * @param recognizer the parser instance 577 * @return the successfully matched {@link Token} instance if single-token 578 * deletion successfully recovers from the mismatched input, otherwise 579 * {@code null} 580 */ 581 protected Token singleTokenDeletion(Parser recognizer) 582 { 583 int nextTokenType = recognizer.getInputStream().LA(2); 584 IntervalSet expecting = getExpectedTokens(recognizer); 585 if ( expecting.contains(nextTokenType) ) { 586 reportUnwantedToken(recognizer); 587 /* 588 System.err.println("recoverFromMismatchedToken deleting "+ 589 ((TokenStream)recognizer.getInputStream()).LT(1)+ 590 " since "+((TokenStream)recognizer.getInputStream()).LT(2)+ 591 " is what we want"); 592 */ 593 recognizer.consume(); // simply delete extra token 594 // we want to return the token we're actually matching 595 Token matchedSymbol = recognizer.getCurrentToken(); 596 reportMatch(recognizer); // we know current token is correct 597 return matchedSymbol; 598 } 599 return null; 600 } 601 602 /** 603 * Conjure up a missing token during error recovery. 604 * 605 * The recognizer attempts to recover from single missing 606 * symbols. But, actions might refer to that missing symbol. 607 * For example, x=ID {f($x);}. The action clearly assumes 608 * that there has been an identifier matched previously and that 609 * $x points at that token. If that token is missing, but 610 * the next token in the stream is what we want we assume that 611 * this token is missing and we keep going. Because we 612 * have to return some token to replace the missing token, 613 * we have to conjure one up. This method gives the user control 614 * over the tokens returned for missing tokens. Mostly, 615 * you will want to create something special for identifier 616 * tokens. For literals such as '{' and ',', the default 617 * action in the parser or tree parser works. It simply creates 618 * a CommonToken of the appropriate type. The text will be the token. 619 * If you change what tokens must be created by the lexer, 620 * override this method to create the appropriate tokens. 621 */ 622 protected Token getMissingSymbol(Parser recognizer) 623 { 624 Token currentSymbol = recognizer.getCurrentToken(); 625 IntervalSet expecting = getExpectedTokens(recognizer); 626 int expectedTokenType = TokenConstantDefinition.INVALID_TYPE; 627 if (!expecting.isNil) { 628 expectedTokenType = expecting.getMinElement(); // get any element 629 } 630 string tokenText; 631 if (expectedTokenType == TokenConstantDefinition.EOF) 632 tokenText = "<missing EOF>"; 633 else 634 tokenText = format("<missing %s>", recognizer.getVocabulary.getDisplayName(expectedTokenType)); 635 Token current = currentSymbol; 636 Token lookback = recognizer.getInputStream().LT(-1); 637 if ( current.getType() == TokenConstantDefinition.EOF && lookback !is null) { 638 current = lookback; 639 } 640 TokenFactorySourcePair tsp = tuple(current.getTokenSource(), current.getTokenSource().getInputStream()); 641 return 642 recognizer.tokenFactory().create(tsp, expectedTokenType, tokenText, 643 TokenConstantDefinition.DEFAULT_CHANNEL, 644 -1, -1, 645 current.getLine(), current.getCharPositionInLine()); 646 647 } 648 649 protected IntervalSet getExpectedTokens(Parser recognizer) 650 { 651 return recognizer.getExpectedTokens; 652 } 653 654 /** 655 * How should a token be displayed in an error message? The default 656 * is to display just the text, but during development you might 657 * want to have a lot of information spit out. Override in that case 658 * to use t.toString() (which, for CommonToken, dumps everything about 659 * the token). This is better than forcing you to override a method in 660 * your token objects because you don't have to go modify your lexer 661 * so that it creates a new Java type. 662 */ 663 protected string getTokenErrorDisplay(Token t) 664 { 665 if (t is null) return "<no token>"; 666 string s = getSymbolText(t); 667 if (s is null) { 668 if (getSymbolType(t) == TokenConstantDefinition.EOF) { 669 s = "<EOF>"; 670 } 671 else { 672 s = format("<%s>", getSymbolType(t)); 673 } 674 } 675 return escapeWSAndQuote(s); 676 } 677 678 protected string getSymbolText(Token symbol) 679 { 680 return symbol.getText; 681 } 682 683 protected int getSymbolType(Token symbol) 684 { 685 return symbol.getType(); 686 } 687 688 protected string escapeWSAndQuote(string s) 689 { 690 // if ( s==null ) return s; 691 s = s.replace("\n","\\n"); 692 s = s.replace("\r","\\r"); 693 s = s.replace("\t","\\t"); 694 return "'" ~ s ~ "'"; 695 } 696 697 /** 698 * Compute the error recovery set for the current rule. During 699 * rule invocation, the parser pushes the set of tokens that can 700 * follow that rule reference on the stack; this amounts to 701 * computing FIRST of what follows the rule reference in the 702 * enclosing rule. See LinearApproximator.FIRST(). 703 * This local follow set only includes tokens 704 * from within the rule; i.e., the FIRST computation done by 705 * ANTLR stops at the end of a rule. 706 * 707 * EXAMPLE 708 * 709 * When you find a "no viable alt exception", the input is not 710 * consistent with any of the alternatives for rule r. The best 711 * thing to do is to consume tokens until you see something that 712 * can legally follow a call to r *or* any rule that called r. 713 * You don't want the exact set of viable next tokens because the 714 * input might just be missing a token--you might consume the 715 * rest of the input looking for one of the missing tokens. 716 * 717 * Consider grammar: 718 * 719 * a : '[' b ']' 720 * | '(' b ')' 721 * ; 722 * b : c '^' INT ; 723 * c : ID 724 * | INT 725 * ; 726 * 727 * At each rule invocation, the set of tokens that could follow 728 * that rule is pushed on a stack. Here are the various 729 * context-sensitive follow sets: 730 * 731 * FOLLOW(b1_in_a) = FIRST(']') = ']' 732 * FOLLOW(b2_in_a) = FIRST(')') = ')' 733 * FOLLOW(c_in_b) = FIRST('^') = '^' 734 * 735 * Upon erroneous input "[]", the call chain is 736 * 737 * a -> b -> c 738 * 739 * and, hence, the follow context stack is: 740 * 741 * depth follow set start of rule execution 742 * 0 <EOF> a (from main()) 743 * 1 ']' b 744 * 2 '^' c 745 * 746 * Notice that ')' is not included, because b would have to have 747 * been called from a different context in rule a for ')' to be 748 * included. 749 * 750 * For error recovery, we cannot consider FOLLOW(c) 751 * (context-sensitive or otherwise). We need the combined set of 752 * all context-sensitive FOLLOW sets--the set of all tokens that 753 * could follow any reference in the call chain. We need to 754 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 755 * we resync'd to that token, we'd consume until EOF. We need to 756 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 757 * In this case, for input "[]", LA(1) is ']' and in the set, so we would 758 * not consume anything. After printing an error, rule c would 759 * return normally. Rule b would not find the required '^' though. 760 * At this point, it gets a mismatched token error and throws an 761 * exception (since LA(1) is not in the viable following token 762 * set). The rule exception handler tries to recover, but finds 763 * the same recovery set and doesn't consume anything. Rule b 764 * exits normally returning to rule a. Now it finds the ']' (and 765 * with the successful match exits errorRecovery mode). 766 * 767 * So, you can see that the parser walks up the call chain looking 768 * for the token that was a member of the recovery set. 769 * 770 * Errors are not generated in errorRecovery mode. 771 * 772 * ANTLR's error recovery mechanism is based upon original ideas: 773 * 774 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 775 * 776 * and 777 * 778 * "A note on error recovery in recursive descent parsers": 779 * http://portal.acm.org/citation.cfm?id=947902.947905 780 * 781 * Later, Josef Grosch had some good ideas: 782 * 783 * "Efficient and Comfortable Error Recovery in Recursive Descent 784 * Parsers": 785 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 786 * 787 * Like Grosch I implement context-sensitive FOLLOW sets that are combined 788 * at run-time upon error to avoid overhead during parsing. 789 */ 790 protected IntervalSet getErrorRecoverySet(Parser recognizer) 791 { 792 ATN atn = recognizer.getInterpreter().atn; 793 RuleContext ctx = recognizer.ctx; 794 IntervalSet recoverSet = new IntervalSet(); 795 while (ctx !is null && ctx.invokingState >= 0) { 796 // compute what follows who invoked us 797 ATNState invokingState = atn.states[ctx.invokingState]; 798 RuleTransition rt = cast(RuleTransition)invokingState.transition(0); 799 IntervalSet follow = atn.nextTokens(rt.followState); 800 recoverSet.addAll(follow); 801 ctx = ctx.parent; 802 } 803 recoverSet.remove(TokenConstantDefinition.EPSILON); 804 // System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); 805 return recoverSet; 806 } 807 808 /** 809 * Consume tokens until one matches the given token set. 810 */ 811 protected void consumeUntil(Parser recognizer, IntervalSet set) 812 { 813 //stderr.writefln("consumeUntil(%s)", set.toString(recognizer.getTokenNames())); 814 int ttype = recognizer.getInputStream().LA(1); 815 while (ttype != TokenConstantDefinition.EOF && !set.contains(ttype) ) { 816 debug(ErrorHandling) 817 { 818 //writefln("consume during recover LA(1)=%s", getTokenNames[input.LA(1)]); 819 } 820 // recognizer.getInputStream().consume(); 821 recognizer.consume(); 822 ttype = recognizer.getInputStream().LA(1); 823 } 824 } 825 826 }