1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ParserATNSimulator; 8 9 import antlr.v4.runtime.IntStream; 10 import antlr.v4.runtime.IntStreamConstant; 11 import antlr.v4.runtime.NoViableAltException; 12 import antlr.v4.runtime.Parser; 13 import antlr.v4.runtime.ParserRuleContext; 14 import antlr.v4.runtime.RuleContext; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.Vocabulary; 18 import antlr.v4.runtime.VocabularyImpl; 19 import antlr.v4.runtime.atn.ATN; 20 import antlr.v4.runtime.atn.ATNConfig; 21 import antlr.v4.runtime.atn.ATNConfigSet; 22 import antlr.v4.runtime.atn.ATNSimulator; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.ActionTransition; 25 import antlr.v4.runtime.atn.AtomTransition; 26 import antlr.v4.runtime.atn.DecisionState; 27 import antlr.v4.runtime.atn.EpsilonTransition; 28 import antlr.v4.runtime.atn.InterfaceParserATNSimulator; 29 import antlr.v4.runtime.atn.NotSetTransition; 30 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 31 import antlr.v4.runtime.atn.PredicateTransition; 32 import antlr.v4.runtime.atn.PredictionContext; 33 import antlr.v4.runtime.atn.PredictionContextCache; 34 import antlr.v4.runtime.atn.PredictionMode; 35 import antlr.v4.runtime.atn.PredictionModeConst; 36 import antlr.v4.runtime.atn.RuleStopState; 37 import antlr.v4.runtime.atn.RuleTransition; 38 import antlr.v4.runtime.atn.SemanticContext; 39 import antlr.v4.runtime.atn.SetTransition; 40 import antlr.v4.runtime.atn.SingletonPredictionContext; 41 import antlr.v4.runtime.atn.Transition; 42 import antlr.v4.runtime.atn.TransitionStates; 43 import antlr.v4.runtime.dfa.DFA; 44 import antlr.v4.runtime.dfa.DFAState; 45 import antlr.v4.runtime.dfa.PredPrediction; 46 import antlr.v4.runtime.misc; 47 import std.algorithm.searching; 48 import std.conv; 49 import std.format; 50 import std.stdio; 51 import std.typecons; 52 53 alias ATNConfigSetATNConfigSetPair = Tuple!(ATNConfigSet, "a", ATNConfigSet, "b"); 54 55 // Class ParserATNSimulator 56 /** 57 * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. 58 * 59 * <p> 60 * The basic complexity of the adaptive strategy makes it harder to understand. 61 * We begin with ATN simulation to build paths in a DFA. Subsequent prediction 62 * requests go through the DFA first. If they reach a state without an edge for 63 * the current symbol, the algorithm fails over to the ATN simulation to 64 * complete the DFA path for the current input (until it finds a conflict state 65 * or uniquely predicting state).</p> 66 * 67 * <p> 68 * All of that is done without using the outer context because we want to create 69 * a DFA that is not dependent upon the rule invocation stack when we do a 70 * prediction. One DFA works in all contexts. We avoid using context not 71 * necessarily because it's slower, although it can be, but because of the DFA 72 * caching problem. The closure routine only considers the rule invocation stack 73 * created during prediction beginning in the decision rule. For example, if 74 * prediction occurs without invoking another rule's ATN, there are no context 75 * stacks in the configurations. When lack of context leads to a conflict, we 76 * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing 77 * strategy (versus full LL(*)).</p> 78 * 79 * <p> 80 * When SLL yields a configuration set with conflict, we rewind the input and 81 * retry the ATN simulation, this time using full outer context without adding 82 * to the DFA. Configuration context stacks will be the full invocation stacks 83 * from the start rule. If we get a conflict using full context, then we can 84 * definitively say we have a true ambiguity for that input sequence. If we 85 * don't get a conflict, it implies that the decision is sensitive to the outer 86 * context. (It is not context-sensitive in the sense of context-sensitive 87 * grammars.)</p> 88 * 89 * <p> 90 * The next time we reach this DFA state with an SLL conflict, through DFA 91 * simulation, we will again retry the ATN simulation using full context mode. 92 * This is slow because we can't save the results and have to "interpret" the 93 * ATN each time we get that input.</p> 94 * 95 * <p> 96 * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> 97 * 98 * <p> 99 * We could cache results from full context to predicted alternative easily and 100 * that saves a lot of time but doesn't work in presence of predicates. The set 101 * of visible predicates from the ATN start state changes depending on the 102 * context, because closure can fall off the end of a rule. I tried to cache 103 * tuples (stack context, semantic context, predicted alt) but it was slower 104 * than interpreting and much more complicated. Also required a huge amount of 105 * memory. The goal is not to create the world's fastest parser anyway. I'd like 106 * to keep this algorithm simple. By launching multiple threads, we can improve 107 * the speed of parsing across a large number of files.</p> 108 * 109 * <p> 110 * There is no strict ordering between the amount of input used by SLL vs LL, 111 * which makes it really hard to build a cache for full context. Let's say that 112 * we have input A B C that leads to an SLL conflict with full context X. That 113 * implies that using X we might only use A B but we could also use A B C D to 114 * resolve conflict. Input A B C D could predict alternative 1 in one position 115 * in the input and A B C E could predict alternative 2 in another position in 116 * input. The conflicting SLL configurations could still be non-unique in the 117 * full context prediction, which would lead us to requiring more input than the 118 * original A B C. To make a prediction cache work, we have to track the exact 119 * input used during the previous prediction. That amounts to a cache that maps 120 * X to a specific DFA for that context.</p> 121 * 122 * <p> 123 * Something should be done for left-recursive expression predictions. They are 124 * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry 125 * with full LL thing Sam does.</p> 126 * 127 * <p> 128 * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> 129 * 130 * <p> 131 * We avoid doing full context retry when the outer context is empty, we did not 132 * dip into the outer context by falling off the end of the decision state rule, 133 * or when we force SLL mode.</p> 134 * 135 * <p> 136 * As an example of the not dip into outer context case, consider as super 137 * constructor calls versus function calls. One grammar might look like 138 * this:</p> 139 * 140 * <pre> 141 * ctorBody 142 * : '{' superCall? stat* '}' 143 * ; 144 * </pre> 145 * 146 * <p> 147 * Or, you might see something like</p> 148 * 149 * <pre> 150 * stat 151 * : superCall ';' 152 * | expression ';' 153 * | ... 154 * ; 155 * </pre> 156 * 157 * <p> 158 * In both cases I believe that no closure operations will dip into the outer 159 * context. In the first case ctorBody in the worst case will stop at the '}'. 160 * In the 2nd case it should stop at the ';'. Both cases should stay within the 161 * entry rule and not dip into the outer context.</p> 162 * 163 * <p> 164 * <strong>PREDICATES</strong></p> 165 * 166 * <p> 167 * Predicates are always evaluated if present in either SLL or LL both. SLL and 168 * LL simulation deals with predicates differently. SLL collects predicates as 169 * it performs closure operations like ANTLR v3 did. It delays predicate 170 * evaluation until it reaches and accept state. This allows us to cache the SLL 171 * ATN simulation whereas, if we had evaluated predicates on-the-fly during 172 * closure, the DFA state configuration sets would be different and we couldn't 173 * build up a suitable DFA.</p> 174 * 175 * <p> 176 * When building a DFA accept state during ATN simulation, we evaluate any 177 * predicates and return the sole semantically valid alternative. If there is 178 * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, 179 * we throw an exception. Alternatives without predicates act like they have 180 * true predicates. The simple way to think about it is to strip away all 181 * alternatives with false predicates and choose the minimum alternative that 182 * remains.</p> 183 * 184 * <p> 185 * When we start in the DFA and reach an accept state that's predicated, we test 186 * those and return the minimum semantically viable alternative. If no 187 * alternatives are viable, we throw an exception.</p> 188 * 189 * <p> 190 * During full LL ATN simulation, closure always evaluates predicates and 191 * on-the-fly. This is crucial to reducing the configuration set size during 192 * closure. It hits a landmine when parsing with the Java grammar, for example, 193 * without this on-the-fly evaluation.</p> 194 * 195 * <p> 196 * <strong>SHARING DFA</strong></p> 197 * 198 * <p> 199 * All instances of the same parser share the same decision DFAs through a 200 * static field. Each instance gets its own ATN simulator but they share the 201 * same {@link #decisionToDFA} field. They also share a 202 * {@link PredictionContextCache} object that makes sure that all 203 * {@link PredictionContext} objects are shared among the DFA states. This makes 204 * a big size difference.</p> 205 * 206 * <p> 207 * <strong>THREAD SAFETY</strong></p> 208 * 209 * <p> 210 * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when 211 * it adds a new DFA object to that array. {@link #addDFAEdge} 212 * locks on the DFA for the current decision when setting the 213 * {@link DFAState#edges} field. {@link #addDFAState} locks on 214 * the DFA for the current decision when looking up a DFA state to see if it 215 * already exists. We must make sure that all requests to add DFA states that 216 * are equivalent result in the same shared DFA object. This is because lots of 217 * threads will be trying to update the DFA at once. The 218 * {@link #addDFAState} method also locks inside the DFA lock 219 * but this time on the shared context cache when it rebuilds the 220 * configurations' {@link PredictionContext} objects using cached 221 * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is 222 * safe as long as we can guarantee that all threads referencing 223 * {@code s.edge[t]} get the same physical target {@link DFAState}, or 224 * {@code null}. Once into the DFA, the DFA simulation does not reference the 225 * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new 226 * targets. The DFA simulator will either find {@link DFAState#edges} to be 227 * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or 228 * {@code dfa.edges[t]} to be non-null. The 229 * {@link #addDFAEdge} method could be racing to set the field 230 * but in either case the DFA simulator works; if {@code null}, and requests ATN 231 * simulation. It could also race trying to get {@code dfa.edges[t]}, but either 232 * way it will work because it's not doing a test and set operation.</p> 233 * 234 * <p> 235 * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage 236 * Parsing)</strong></p> 237 * 238 * <p> 239 * Sam pointed out that if SLL does not give a syntax error, then there is no 240 * point in doing full LL, which is slower. We only have to try LL if we get a 241 * syntax error. For maximum speed, Sam starts the parser set to pure SLL 242 * mode with the {@link BailErrorStrategy}:</p> 243 * 244 * <pre> 245 * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; 246 * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); 247 * </pre> 248 * 249 * <p> 250 * If it does not get a syntax error, then we're done. If it does get a syntax 251 * error, we need to retry with the combined SLL/LL strategy.</p> 252 * 253 * <p> 254 * The reason this works is as follows. If there are no SLL conflicts, then the 255 * grammar is SLL (at least for that input set). If there is an SLL conflict, 256 * the full LL analysis must yield a set of viable alternatives which is a 257 * subset of the alternatives reported by SLL. If the LL set is a singleton, 258 * then the grammar is LL but not SLL. If the LL set is the same size as the SLL 259 * set, the decision is SLL. If the LL set has size > 1, then that decision 260 * is truly ambiguous on the current input. If the LL set is smaller, then the 261 * SLL conflict resolution might choose an alternative that the full LL would 262 * rule out as a possibility based upon better context information. If that's 263 * the case, then the SLL parse will definitely get an error because the full LL 264 * analysis says it's not viable. If SLL conflict resolution chooses an 265 * alternative within the LL set, them both SLL and LL would choose the same 266 * alternative because they both choose the minimum of multiple conflicting 267 * alternatives.</p> 268 * 269 * <p> 270 * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and 271 * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL 272 * parsing will get an error because SLL will pursue alternative 1. If 273 * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will 274 * choose the same alternative because alternative one is the minimum of either 275 * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax 276 * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> 277 * 278 * <p> 279 * Of course, if the input is invalid, then we will get an error for sure in 280 * both SLL and LL parsing. Erroneous input will therefore require 2 passes over 281 * the input.</p> 282 */ 283 class ParserATNSimulator : ATNSimulator, InterfaceParserATNSimulator 284 { 285 286 protected Parser parser; 287 288 public DFA[] decisionToDFA; 289 290 public PredictionModeConst mode = PredictionModeConst.LL; 291 292 /** 293 * Each prediction operation uses a cache for merge of prediction contexts. 294 * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap 295 * isn't synchronized but we're ok since two threads shouldn't reuse same 296 * parser/atnsim object because it can only handle one input at a time. 297 * This maps graphs a and b to merged result c. (a,b)→c. We can avoid 298 * the merge if we ever see a and b again. Note that (b,a)→c should 299 * also be examined during cache lookup. 300 * @uml 301 * @__gshared 302 */ 303 public static __gshared DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext) mergeCache; 304 305 protected DFA _dfa; 306 307 protected TokenStream _input; 308 309 protected int _startIndex; 310 311 protected ParserRuleContext _outerContext; 312 313 /** 314 * @uml 315 * Testing only! 316 */ 317 protected this(ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 318 { 319 this(null, atn, decisionToDFA, sharedContextCache); 320 } 321 322 public this(Parser parser, ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 323 { 324 super(atn, sharedContextCache); 325 this.parser = parser; 326 this.decisionToDFA = decisionToDFA; 327 if (mergeCache is null) 328 this.mergeCache = new typeof(this.mergeCache); 329 } 330 331 /** 332 * @uml 333 * @override 334 */ 335 public override void reset() 336 { 337 } 338 339 /** 340 * @uml 341 * @override 342 */ 343 public override void clearDFA() 344 { 345 for (int d = 0; d < decisionToDFA.length; d++) { 346 decisionToDFA[d] = new DFA(atn.getDecisionState(d), d); 347 } 348 } 349 350 public int adaptivePredict(TokenStream input, int decision, ParserRuleContext outerContext) 351 { 352 debug(ParserATNSimulator) { 353 writefln("adaptivePredict decision %1$s"~ 354 " exec LA(1)==%2$s"~ 355 " line %3$s:%4$s", 356 decision, getLookaheadName(input), input.LT(1).getLine, 357 input.LT(1).getCharPositionInLine()); 358 } 359 360 _input = input; 361 _startIndex = input.index(); 362 _outerContext = outerContext; 363 DFA dfa = decisionToDFA[decision]; 364 _dfa = dfa; 365 366 int m = input.mark(); 367 int index = _startIndex; 368 // Now we are certain to have a specific decision's DFA 369 // But, do we still need an initial state? 370 try { 371 DFAState s0; 372 if (dfa.isPrecedenceDfa) { 373 // the start state for a precedence DFA depends on the current 374 // parser precedence, and is provided by a DFA method. 375 s0 = dfa.getPrecedenceStartState(parser.getPrecedence()); 376 } 377 else { 378 // the start state for a "regular" DFA is just s0 379 s0 = dfa.s0; 380 } 381 382 if (s0 is null) { 383 if ( outerContext is null ) 384 outerContext = ParserRuleContext.EMPTY; 385 debug(ParserATNSimulator) { 386 writefln("predictATN decision = %1$s"~ 387 " exec LA(1)==%2$s"~ 388 ", outerContext=%3$s", 389 dfa.decision, getLookaheadName(input), 390 outerContext); 391 } 392 393 bool fullCtx = false; 394 ATNConfigSet s0_closure = 395 computeStartState(dfa.atnStartState, 396 ParserRuleContext.EMPTY, 397 fullCtx); 398 if (dfa.isPrecedenceDfa) { 399 /* If this is a precedence DFA, we use applyPrecedenceFilter 400 * to convert the computed start state to a precedence start 401 * state. We then use DFA.setPrecedenceStartState to set the 402 * appropriate start state for the precedence level rather 403 * than simply setting DFA.s0. 404 */ 405 dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway 406 s0_closure = applyPrecedenceFilter(s0_closure); 407 s0 = addDFAState(dfa, new DFAState(s0_closure)); 408 dfa.setPrecedenceStartState(parser.getPrecedence(), s0); 409 } 410 else { 411 s0 = addDFAState(dfa, new DFAState(s0_closure)); 412 dfa.s0 = s0; 413 } 414 } 415 int alt = execATN(dfa, s0, input, index, outerContext); 416 debug(ParserATNSimulator) 417 writefln("DFA after predictATN: %1$s, alt = %s, dfa.states = %s", dfa.toString(parser.getVocabulary), alt, dfa); 418 return alt; 419 } 420 finally { 421 mergeCache = null; // wack cache after each prediction 422 _dfa = null; 423 input.seek(index); 424 input.release(m); 425 } 426 } 427 428 /** 429 * There are some key conditions we're looking for after computing a new 430 * set of ATN configs (proposed DFA state): 431 * * if the set is empty, there is no viable alternative for current symbol 432 * * does the state uniquely predict an alternative? 433 * * does the state have a conflict that would prevent us from 434 * putting it on the work list? 435 * 436 * We also have some key operations to do: 437 * * add an edge from previous DFA state to potentially new DFA state, D, 438 * upon current symbol but only if adding to work list, which means in all 439 * cases except no viable alternative (and possibly non-greedy decisions?) 440 * * collecting predicates and adding semantic context to DFA accept states 441 * * adding rule context to context-sensitive DFA accept states 442 * * consuming an input symbol 443 * * reporting a conflict 444 * * reporting an ambiguity 445 * * reporting a context sensitivity 446 * * reporting insufficient predicates 447 * 448 * cover these cases: 449 * dead end 450 * single alt 451 * single alt + preds 452 * conflict 453 * conflict + preds 454 */ 455 protected int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, ParserRuleContext outerContext) 456 { 457 debug { 458 writefln("execATN decision %1$s"~ 459 " exec LA(1)==%2$s line %3$s:%4$s", 460 dfa.decision, 461 getLookaheadName(input), 462 input.LT(1).getLine, 463 input.LT(1).getCharPositionInLine()); 464 } 465 466 DFAState previousD = s0; 467 468 debug 469 writefln("s0 = %1$s", s0); 470 471 int t = input.LA(1); 472 473 while (true) { // while more work 474 DFAState D = getExistingTargetState(previousD, t); 475 if (D is null) { 476 D = computeTargetState(dfa, previousD, t); 477 } 478 479 if (D == ERROR) { 480 // if any configs in previous dipped into outer context, that 481 // means that input up to t actually finished entry rule 482 // at least for SLL decision. Full LL doesn't dip into outer 483 // so don't need special case. 484 // We will get an error no matter what so delay until after 485 // decision; better error message. Also, no reachable target 486 // ATN states in SLL implies LL will also get nowhere. 487 // If conflict in states that dip out, choose min since we 488 // will get error no matter what. 489 NoViableAltException e = noViableAlt(input, outerContext, previousD.configs, startIndex); 490 input.seek(startIndex); 491 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext); 492 if (alt != ATN.INVALID_ALT_NUMBER) { 493 return alt; 494 } 495 throw e; 496 } 497 498 if (D.requiresFullContext && mode != PredictionModeConst.SLL) { 499 // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) 500 BitSet conflictingAlts = D.configs.conflictingAlts; 501 if (D.predicates != null) { 502 debug 503 writeln("DFA state has preds in DFA sim LL failover"); 504 int conflictIndex = input.index(); 505 if (conflictIndex != startIndex) { 506 input.seek(startIndex); 507 } 508 509 conflictingAlts = evalSemanticContext(D.predicates, outerContext, true); 510 if (conflictingAlts.cardinality ==1) { 511 debug 512 writeln("Full LL avoided"); 513 return conflictingAlts.nextSetBit(0); 514 } 515 516 if (conflictIndex != startIndex) { 517 // restore the index so reporting the fallback to full 518 // context occurs with the index at the correct spot 519 input.seek(conflictIndex); 520 } 521 } 522 523 debug(dfa_debug) 524 writefln("ctx sensitive state %1$ in %2$s", 525 outerContext, D); 526 bool fullCtx = true; 527 ATNConfigSet s0_closure = 528 computeStartState(dfa.atnStartState, outerContext, 529 fullCtx); 530 reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index()); 531 int alt = execATNWithFullContext(dfa, D, s0_closure, 532 input, startIndex, 533 outerContext); 534 return alt; 535 } 536 537 if ( D.isAcceptState ) { 538 if (D.predicates == null) { 539 return D.prediction; 540 } 541 542 int stopIndex = input.index(); 543 input.seek(startIndex); 544 BitSet alts = evalSemanticContext(D.predicates, outerContext, true); 545 switch (alts.cardinality) { 546 case 0: 547 throw noViableAlt(input, outerContext, D.configs, startIndex); 548 549 case 1: 550 return alts.nextSetBit(0); 551 552 default: 553 // report ambiguity after predicate evaluation to make sure the correct 554 // set of ambig alts is reported. 555 reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs); 556 return alts.nextSetBit(0); 557 } 558 } 559 560 previousD = D; 561 562 if (t != IntStreamConstant.EOF) { 563 input.consume(); 564 t = input.LA(1); 565 } 566 } 567 568 } 569 570 /** 571 * Get an existing target state for an edge in the DFA. If the target state 572 * for the edge has not yet been computed or is otherwise not available, 573 * this method returns {@code null}. 574 * 575 * @param previousD The current DFA state 576 * @param t The next input symbol 577 * @return The existing target DFA state for the given input symbol 578 * {@code t}, or {@code null} if the target state for this edge is not 579 * already cached 580 */ 581 public DFAState getExistingTargetState(DFAState previousD, int t) 582 { 583 DFAState[] edges = previousD.edges; 584 if (edges == null || t + 1 < 0 || t + 1 >= edges.length) { 585 return null; 586 } 587 return edges[t + 1]; 588 } 589 590 /** 591 * Compute a target state for an edge in the DFA, and attempt to add the 592 * computed state and corresponding edge to the DFA. 593 * 594 * @param dfa The DFA 595 * @param previousD The current DFA state 596 * @param t The next input symbol 597 * 598 * @return The computed target DFA state for the given input symbol 599 * {@code t}. If {@code t} does not lead to a valid DFA state, this method 600 * returns {@link #ERROR}. 601 */ 602 public DFAState computeTargetState(DFA dfa, DFAState previousD, int t) 603 { 604 ATNConfigSet reach = computeReachSet(previousD.configs, t, false); 605 if (reach is null) { 606 addDFAEdge(dfa, previousD, t, ERROR); 607 return ERROR; 608 } 609 // create new target state; we'll add to DFA after it's complete 610 DFAState D = new DFAState(reach); 611 int predictedAlt = getUniqueAlt(reach); 612 613 debug { 614 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 615 writefln("SLL altSubSets=%1$s"~ 616 ", configs=%2$s"~ 617 ", predict=%3$s, allSubsetsConflict=%4$s, "~ 618 "conflictingAlts=%5$s", 619 altSubSets, 620 reach, 621 predictedAlt, 622 PredictionMode.allSubsetsConflict(altSubSets), 623 getConflictingAlts(reach)); 624 } 625 626 if (predictedAlt != ATN.INVALID_ALT_NUMBER) { 627 // NO CONFLICT, UNIQUELY PREDICTED ALT 628 D.isAcceptState = true; 629 D.configs.uniqueAlt = predictedAlt; 630 D.prediction = predictedAlt; 631 } 632 else if ( PredictionMode.hasSLLConflictTerminatingPrediction(mode, reach)) { 633 // MORE THAN ONE VIABLE ALTERNATIVE 634 D.configs.conflictingAlts = getConflictingAlts(reach); 635 D.requiresFullContext = true; 636 // in SLL-only mode, we will stop at this state and return the minimum alt 637 D.isAcceptState = true; 638 D.prediction = D.configs.conflictingAlts.nextSetBit(0); 639 } 640 641 if ( D.isAcceptState && D.configs.hasSemanticContext ) { 642 predicateDFAState(D, atn.getDecisionState(dfa.decision)); 643 if (D.predicates != null) { 644 D.prediction = ATN.INVALID_ALT_NUMBER; 645 } 646 } 647 // all adds to dfa are done after we've created full D state 648 D = addDFAEdge(dfa, previousD, t, D); 649 return D; 650 } 651 652 public void predicateDFAState(DFAState dfaState, DecisionState decisionState) 653 { 654 // We need to test all predicates, even in DFA states that 655 // uniquely predict alternative. 656 int nalts = decisionState.getNumberOfTransitions(); 657 // Update DFA so reach becomes accept state with (predicate,alt) 658 // pairs if preds found for conflicting alts 659 BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState.configs); 660 SemanticContext[] altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); 661 if ( altToPred!=null ) { 662 dfaState.predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); 663 dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds 664 } 665 else { 666 // There are preds in configs but they might go away 667 // when OR'd together like {p}? || NONE == NONE. If neither 668 // alt has preds, resolve to min alt 669 dfaState.prediction = altsToCollectPredsFrom.nextSetBit(0); 670 } 671 } 672 673 /** 674 * @uml 675 * comes back with reach.uniqueAlt set to a valid alt 676 */ 677 protected int execATNWithFullContext(DFA dfa, DFAState D, ATNConfigSet s0, TokenStream input, 678 int startIndex, ParserRuleContext outerContext) 679 { 680 debug { 681 writefln("execATNWithFullContext %s", s0); 682 } 683 bool fullCtx = true; 684 bool foundExactAmbig = false; 685 ATNConfigSet reach = null; 686 ATNConfigSet previous = s0; 687 input.seek(startIndex); 688 int t = input.LA(1); 689 int predictedAlt; 690 while (true) { // while more work 691 // System.out.println("LL REACH "+getLookaheadName(input)+ 692 // " from configs.size="+previous.size()+ 693 // " line "+input.LT(1).getLine()+":"+input.LT(1).getCharPositionInLine()); 694 reach = computeReachSet(previous, t, fullCtx); 695 if (reach is null) { 696 // if any configs in previous dipped into outer context, that 697 // means that input up to t actually finished entry rule 698 // at least for LL decision. Full LL doesn't dip into outer 699 // so don't need special case. 700 // We will get an error no matter what so delay until after 701 // decision; better error message. Also, no reachable target 702 // ATN states in SLL implies LL will also get nowhere. 703 // If conflict in states that dip out, choose min since we 704 // will get error no matter what. 705 NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex); 706 input.seek(startIndex); 707 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); 708 if ( alt!=ATN.INVALID_ALT_NUMBER ) { 709 return alt; 710 } 711 throw e; 712 } 713 714 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 715 debug { 716 writefln("LL altSubSets=%1$s, PredictionMode.getUniqueAlt(altSubSets)" ~ 717 ", resolvesToJustOneViableAlt=%3$s", 718 altSubSets, 719 PredictionMode.getUniqueAlt(altSubSets), 720 PredictionMode.resolvesToJustOneViableAlt(altSubSets)); 721 } 722 723 // System.out.println("altSubSets: "+altSubSets); 724 // System.err.println("reach="+reach+", "+reach.conflictingAlts); 725 reach.uniqueAlt = getUniqueAlt(reach); 726 // unique prediction? 727 if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) { 728 predictedAlt = reach.uniqueAlt; 729 break; 730 } 731 if ( mode != PredictionModeConst.LL_EXACT_AMBIG_DETECTION ) { 732 predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets); 733 if ( predictedAlt != ATN.INVALID_ALT_NUMBER ) { 734 break; 735 } 736 } 737 else { 738 // In exact ambiguity mode, we never try to terminate early. 739 // Just keeps scarfing until we know what the conflict is 740 if ( PredictionMode.allSubsetsConflict(altSubSets) && 741 PredictionMode.allSubsetsEqual(altSubSets) ) 742 { 743 foundExactAmbig = true; 744 predictedAlt = PredictionMode.getSingleViableAlt(altSubSets); 745 break; 746 } 747 // else there are multiple non-conflicting subsets or 748 // we're not sure what the ambiguity is yet. 749 // So, keep going. 750 } 751 752 previous = reach; 753 if (t != IntStreamConstant.EOF) { 754 input.consume(); 755 t = input.LA(1); 756 } 757 } 758 759 // If the configuration set uniquely predicts an alternative, 760 // without conflict, then we know that it's a full LL decision 761 // not SLL. 762 if ( reach.uniqueAlt != ATN.INVALID_ALT_NUMBER ) { 763 reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index()); 764 return predictedAlt; 765 } 766 767 // We do not check predicates here because we have checked them 768 // on-the-fly when doing full context prediction. 769 770 /* 771 In non-exact ambiguity detection mode, we might actually be able to 772 detect an exact ambiguity, but I'm not going to spend the cycles 773 needed to check. We only emit ambiguity warnings in exact ambiguity 774 mode. 775 776 For example, we might know that we have conflicting configurations. 777 But, that does not mean that there is no way forward without a 778 conflict. It's possible to have nonconflicting alt subsets as in: 779 780 LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] 781 782 from 783 784 [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), 785 (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] 786 787 In this case, (17,1,[5 $]) indicates there is some next sequence that 788 would resolve this without conflict to alternative 1. Any other viable 789 next sequence, however, is associated with a conflict. We stop 790 looking for input because no amount of further lookahead will alter 791 the fact that we should predict alternative 1. We just can't say for 792 sure that there is an ambiguity without looking further. 793 */ 794 reportAmbiguity(dfa, D, startIndex, input.index(), foundExactAmbig, 795 reach.getAlts(), reach); 796 797 return predictedAlt; 798 } 799 800 public ATNConfigSet computeReachSet(ATNConfigSet closure, int t, bool fullCtx) 801 { 802 debug 803 writefln("in computeReachSet, starting closure: %s", closure); 804 805 if (mergeCache is null) { 806 mergeCache = new DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext); 807 } 808 809 ATNConfigSet intermediate = new ATNConfigSet(fullCtx); 810 811 /* Configurations already in a rule stop state indicate reaching the end 812 * of the decision rule (local context) or end of the start rule (full 813 * context). Once reached, these configurations are never updated by a 814 * closure operation, so they are handled separately for the performance 815 * advantage of having a smaller intermediate set when calling closure. 816 * 817 * For full-context reach operations, separate handling is required to 818 * ensure that the alternative matching the longest overall sequence is 819 * chosen when multiple such configurations can match the input. 820 */ 821 ATNConfig[] skippedStopStates; 822 823 // First figure out where we can reach on input t 824 foreach (c; closure.configs) { 825 debug 826 writefln("testing %1$s at %2$s", getTokenName(t), c.toString); 827 828 if (cast(RuleStopState)c.state) { 829 assert(c.context.isEmpty); 830 if (fullCtx || t == IntStreamConstant.EOF) { 831 skippedStopStates ~= c; 832 } 833 continue; 834 } 835 836 foreach (trans; c.state.transitions) { 837 ATNState target = getReachableTarget(trans, t); 838 if (target) { 839 intermediate.add(new ATNConfig(c, target), mergeCache); 840 } 841 } 842 } 843 844 // Now figure out where the reach operation can take us... 845 846 ATNConfigSet reach = null; 847 848 /* This block optimizes the reach operation for intermediate sets which 849 * trivially indicate a termination state for the overall 850 * adaptivePredict operation. 851 * 852 * The conditions assume that intermediate 853 * contains all configurations relevant to the reach set, but this 854 * condition is not true when one or more configurations have been 855 * withheld in skippedStopStates, or when the current symbol is EOF. 856 */ 857 if (skippedStopStates is null && t != TokenConstantDefinition.EOF) { 858 if (intermediate.size() == 1 ) { 859 // Don't pursue the closure if there is just one state. 860 // It can only have one alternative; just add to result 861 // Also don't pursue the closure if there is unique alternative 862 // among the configurations. 863 reach = intermediate; 864 } 865 else if (getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER ) { 866 // Also don't pursue the closure if there is unique alternative 867 // among the configurations. 868 reach = intermediate; 869 } 870 } 871 872 /* If the reach set could not be trivially determined, perform a closure 873 * operation on the intermediate set to compute its initial value. 874 */ 875 if (reach is null) { 876 reach = new ATNConfigSet(fullCtx); 877 ATNConfig[] closureBusy; 878 bool treatEofAsEpsilon = t == TokenConstantDefinition.EOF; 879 foreach (c; intermediate.configs) 880 { 881 closureATN(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon); 882 } 883 } 884 885 if (t == IntStreamConstant.EOF) { 886 /* After consuming EOF no additional input is possible, so we are 887 * only interested in configurations which reached the end of the 888 * decision rule (local context) or end of the start rule (full 889 * context). Update reach to contain only these configurations. This 890 * handles both explicit EOF transitions in the grammar and implicit 891 * EOF transitions following the end of the decision or start rule. 892 * 893 * When reach==intermediate, no closure operation was performed. In 894 * this case, removeAllConfigsNotInRuleStopState needs to check for 895 * reachable rule stop states as well as configurations already in 896 * a rule stop state. 897 * 898 * This is handled before the configurations in skippedStopStates, 899 * because any configurations potentially added from that list are 900 * already guaranteed to meet this condition whether or not it's 901 * required. 902 */ 903 reach = removeAllConfigsNotInRuleStopState(reach, reach.configs == intermediate.configs); 904 } 905 906 /* If skippedStopStates is not null, then it contains at least one 907 * configuration. For full-context reach operations, these 908 * configurations reached the end of the start rule, in which case we 909 * only add them back to reach if no configuration during the current 910 * closure operation reached such a state. This ensures adaptivePredict 911 * chooses an alternative matching the longest overall sequence when 912 * multiple alternatives are viable. 913 */ 914 if (skippedStopStates !is null && (!fullCtx || !PredictionMode.hasConfigInRuleStopState(reach))) { 915 assert(skippedStopStates.length > 0); 916 foreach (c; skippedStopStates) { 917 reach.add(c, mergeCache); 918 } 919 } 920 921 if (reach.isEmpty) 922 return null; 923 return reach; 924 } 925 926 /** 927 * Return a configuration set containing only the configurations from 928 * {@code configs} which are in a {@link RuleStopState}. If all 929 * configurations in {@code configs} are already in a rule stop state, this 930 * method simply returns {@code configs}. 931 * 932 * <p>When {@code lookToEndOfRule} is true, this method uses 933 * {@link ATN#nextTokens} for each configuration in {@code configs} which is 934 * not already in a rule stop state to see if a rule stop state is reachable 935 * from the configuration via epsilon-only transitions.</p> 936 * 937 * @param configs the configuration set to update 938 * @param lookToEndOfRule when true, this method checks for rule stop states 939 * reachable by epsilon-only transitions from each configuration in 940 * {@code configs}. 941 * 942 * @return {@code configs} if all configurations in {@code configs} are in a 943 * rule stop state, otherwise return a new configuration set containing only 944 * the configurations from {@code configs} which are in a rule stop state 945 */ 946 protected ATNConfigSet removeAllConfigsNotInRuleStopState(ATNConfigSet configs, bool lookToEndOfRule) 947 { 948 if (PredictionMode.allConfigsInRuleStopStates(configs)) { 949 return configs; 950 } 951 952 ATNConfigSet result = new ATNConfigSet(configs.fullCtx); 953 foreach (ATNConfig config; configs.configs) { 954 if (config.state.classinfo == RuleStopState.classinfo) { 955 result.add(config, mergeCache); 956 continue; 957 } 958 959 if (lookToEndOfRule && config.state.onlyHasEpsilonTransitions()) { 960 IntervalSet nextTokens = atn.nextTokens(config.state); 961 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 962 ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex]; 963 result.add(new ATNConfig(config, endOfRuleState), mergeCache); 964 } 965 } 966 } 967 return result; 968 } 969 970 public ATNConfigSet computeStartState(ATNState p, RuleContext ctx, bool fullCtx) 971 { 972 // always at least the implicit call to start rule 973 PredictionContext initialContext = PredictionContext.fromRuleContext(atn, ctx); 974 ATNConfigSet configs = new ATNConfigSet(fullCtx); 975 976 for (int i=0; i<p.getNumberOfTransitions(); i++) { 977 ATNState target = p.transition(i).target; 978 ATNConfig c = new ATNConfig(target, i+1, initialContext); 979 ATNConfig[] closureBusy; 980 closureATN(c, configs, closureBusy, true, fullCtx, false); 981 } 982 983 return configs; 984 } 985 986 public ATNConfigSet applyPrecedenceFilter(ATNConfigSet configs) 987 { 988 PredictionContext[int] statesFromAlt1; 989 ATNConfigSet configSet = new ATNConfigSet(configs.fullCtx); 990 foreach (config; configs.configs) { 991 // handle alt 1 first 992 if (config.alt != 1) { 993 continue; 994 } 995 996 SemanticContext updatedContext = config.semanticContext.evalPrecedence(parser, _outerContext); 997 if (updatedContext is null) { 998 // the configuration was eliminated 999 continue; 1000 } 1001 1002 statesFromAlt1[config.state.stateNumber] = config.context; 1003 if (updatedContext != config.semanticContext) { 1004 configSet.add(new ATNConfig(config, updatedContext), mergeCache); 1005 } 1006 else { 1007 configSet.add(config, mergeCache); 1008 } 1009 } 1010 1011 foreach (config; configs.configs) { 1012 if (config.alt == 1) { 1013 // already handled 1014 continue; 1015 } 1016 1017 if (!config.isPrecedenceFilterSuppressed()) { 1018 /* In the future, this elimination step could be updated to also 1019 * filter the prediction context for alternatives predicting alt>1 1020 * (basically a graph subtraction algorithm). 1021 */ 1022 if (config.state.stateNumber in statesFromAlt1 && 1023 statesFromAlt1[config.state.stateNumber].opEquals(config.context)) { 1024 // eliminated 1025 continue; 1026 } 1027 } 1028 1029 configSet.add(config, mergeCache); 1030 } 1031 1032 return configSet; 1033 1034 } 1035 1036 public ATNState getReachableTarget(Transition trans, int ttype) 1037 { 1038 if (trans.matches(ttype, 0, atn.maxTokenType)) { 1039 return trans.target; 1040 } 1041 return null; 1042 } 1043 1044 public SemanticContext[] getPredsForAmbigAlts(BitSet ambigAlts, ATNConfigSet configs, 1045 int nalts) 1046 { 1047 // REACH=[1|1|[]|0:0, 1|2|[]|0:1] 1048 /* altToPred starts as an array of all null contexts. The entry at index i 1049 * corresponds to alternative i. altToPred[i] may have one of three values: 1050 * 1. null: no ATNConfig c is found such that c.alt==i 1051 * 2. SemanticContext.NONE: At least one ATNConfig c exists such that 1052 * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, 1053 * alt i has at least one unpredicated config. 1054 * 3. Non-NONE Semantic Context: There exists at least one, and for all 1055 * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. 1056 * 1057 * From this, it is clear that NONE||anything==NONE. 1058 */ 1059 SemanticContext[] altToPred = new SemanticContext[nalts + 1]; 1060 foreach (ATNConfig c; configs.configs) { 1061 if ( ambigAlts.get(c.alt) ) { 1062 altToPred[c.alt] = SemanticContext.or(altToPred[c.alt], c.semanticContext); 1063 } 1064 } 1065 1066 int nPredAlts = 0; 1067 for (int i = 1; i <= nalts; i++) { 1068 if (altToPred[i] is null) { 1069 altToPred[i] = SemanticContext.NONE; 1070 } 1071 else if (altToPred[i] != SemanticContext.NONE) { 1072 nPredAlts++; 1073 } 1074 } 1075 if (nPredAlts == 0) altToPred = null; 1076 debug 1077 writefln("getPredsForAmbigAlts result %s", to!string(altToPred)); 1078 return altToPred; 1079 } 1080 1081 protected PredPrediction[] getPredicatePredictions(BitSet ambigAlts, SemanticContext[] altToPred) 1082 { 1083 PredPrediction[] pairs; 1084 bool containsPredicate = false; 1085 1086 for (int i = 1; i < altToPred.length; i++) { 1087 SemanticContext pred = altToPred[i]; 1088 1089 // unpredicated is indicated by SemanticContext.NONE 1090 assert(pred !is null); 1091 1092 if (ambigAlts.length > 0 && ambigAlts.get(i)) { 1093 pairs ~= new PredPrediction(pred, i); 1094 } 1095 if (pred != SemanticContext.NONE) 1096 containsPredicate = true; 1097 } 1098 1099 if (!containsPredicate) { 1100 return null; 1101 } 1102 1103 // System.out.println(Arrays.toString(altToPred)+"->"+pairs); 1104 return pairs; 1105 1106 } 1107 1108 protected int getAltThatFinishedDecisionEntryRule(ATNConfigSet configs) 1109 { 1110 IntervalSet alts = new IntervalSet(); 1111 foreach (ATNConfig c; configs.configs) { 1112 if ( c.getOuterContextDepth() > 0 || (c.state.classinfo == RuleStopState.classinfo && c.context.hasEmptyPath()) ) { 1113 alts.add(c.alt); 1114 } 1115 } 1116 if ( alts.size()==0 ) return ATN.INVALID_ALT_NUMBER; 1117 return alts.getMinElement(); 1118 } 1119 1120 /** 1121 * This method is used to improve the localization of error messages by 1122 * choosing an alternative rather than throwing a 1123 * {@link NoViableAltException} in particular prediction scenarios where the 1124 * {@link #ERROR} state was reached during ATN simulation. 1125 * 1126 * <p> 1127 * The default implementation of this method uses the following 1128 * algorithm to identify an ATN configuration which successfully parsed the 1129 * decision entry rule. Choosing such an alternative ensures that the 1130 * {@link ParserRuleContext} returned by the calling rule will be complete 1131 * and valid, and the syntax error will be reported later at a more 1132 * localized location.</p> 1133 * 1134 * <ul> 1135 * <li>If a syntactically valid path or paths reach the end of the decision rule and 1136 * they are semantically valid if predicated, return the min associated alt.</li> 1137 * <li>Else, if a semantically invalid but syntactically valid path exist 1138 * or paths exist, return the minimum associated alt. 1139 * </li> 1140 * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> 1141 * </ul> 1142 * 1143 * <p> 1144 * In some scenarios, the algorithm described above could predict an 1145 * alternative which will result in a {@link FailedPredicateException} in 1146 * the parser. Specifically, this could occur if the <em>only</em> configuration 1147 * capable of successfully parsing to the end of the decision rule is 1148 * blocked by a semantic predicate. By choosing this alternative within 1149 * {@link #adaptivePredict} instead of throwing a 1150 * {@link NoViableAltException}, the resulting 1151 * {@link FailedPredicateException} in the parser will identify the specific 1152 * predicate which is preventing the parser from successfully parsing the 1153 * decision rule, which helps developers identify and correct logic errors 1154 * in semantic predicates. 1155 * </p> 1156 * 1157 * @param configs The ATN configurations which were valid immediately before 1158 * the {@link #ERROR} state was reached 1159 * @param outerContext The is the \gamma_0 initial parser context from the paper 1160 * or the parser stack at the instant before prediction commences. 1161 * 1162 * @return The value to return from {@link #adaptivePredict}, or 1163 * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not 1164 * identified and {@link #adaptivePredict} should report an error instead. 1165 */ 1166 protected int getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet configs, 1167 ParserRuleContext outerContext) 1168 { 1169 IntervalSet alts = new IntervalSet(); 1170 foreach (ATNConfig c; configs.configs) { 1171 if (c.getOuterContextDepth > 0 || (c.state.classinfo == RuleStopState.classinfo && c.context.hasEmptyPath()) ) { 1172 alts.add(c.alt); 1173 } 1174 } 1175 if (alts.size == 0) 1176 return ATN.INVALID_ALT_NUMBER; 1177 return alts.getMinElement(); 1178 } 1179 1180 protected ATNConfigSetATNConfigSetPair splitAccordingToSemanticValidity(ATNConfigSet configs, 1181 ParserRuleContext outerContext) 1182 { 1183 ATNConfigSet succeeded = new ATNConfigSet(configs.fullCtx); 1184 ATNConfigSet failed = new ATNConfigSet(configs.fullCtx); 1185 foreach (ATNConfig c; configs.configs) { 1186 if (c.semanticContext != SemanticContext.NONE ) { 1187 bool predicateEvaluationResult = evalSemanticContext(c.semanticContext, outerContext, c.alt, configs.fullCtx); 1188 if (predicateEvaluationResult) { 1189 succeeded.add(c); 1190 } 1191 else { 1192 failed.add(c); 1193 } 1194 } 1195 else { 1196 succeeded.add(c); 1197 } 1198 } 1199 ATNConfigSetATNConfigSetPair res; 1200 res.a = succeeded; 1201 res.b = failed; 1202 return res; 1203 } 1204 1205 /** 1206 * pairs that win. A {@code NONE} predicate indicates an alt containing an 1207 * unpredicated config which behaves as "always true." If !complete 1208 * then we stop at the first predicate that evaluates to true. This 1209 * includes pairs with null predicates. 1210 */ 1211 protected BitSet evalSemanticContext(PredPrediction[] predPredictions, ParserRuleContext outerContext, 1212 bool complete) 1213 { 1214 BitSet predictions; 1215 foreach (pair; predPredictions) { 1216 if (pair.pred == SemanticContext.NONE ) { 1217 predictions.set(pair.alt, true); 1218 if (!complete) { 1219 break; 1220 } 1221 continue; 1222 } 1223 1224 bool fullCtx = false; // in dfa 1225 bool predicateEvaluationResult = evalSemanticContext(pair.pred, outerContext, pair.alt, fullCtx); 1226 debug(dfa_debug) { 1227 writefln("eval pred %1$s=%2$s", pair, predicateEvaluationResult); 1228 } 1229 1230 if ( predicateEvaluationResult ) { 1231 debug(dfa_debug) 1232 writefln("PREDICT ", pair.alt); 1233 predictions.set(pair.alt, true); 1234 if (!complete) { 1235 break; 1236 } 1237 } 1238 } 1239 1240 return predictions; 1241 } 1242 1243 public bool evalSemanticContext(SemanticContext pred, ParserRuleContext parserCallStack, 1244 int alt, bool fullCtx) 1245 { 1246 return pred.eval(parser, parserCallStack); 1247 } 1248 1249 protected void closureATN(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1250 bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) 1251 { 1252 int initialDepth = 0; 1253 closureCheckingStopState(config, configs, closureBusy, collectPredicates, 1254 fullCtx, 1255 initialDepth, treatEofAsEpsilon); 1256 assert (!fullCtx || !configs.dipsIntoOuterContext); 1257 } 1258 1259 protected void closureCheckingStopState(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1260 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1261 { 1262 debug { 1263 writefln("\nclosureCheckingStopState: closure(%s)", config.toString(parser, true)); 1264 } 1265 if (cast(RuleStopState)config.state) { 1266 // We hit rule end. If we have context info, use it 1267 // run thru all possible stack tops in ctx 1268 if (!config.context.isEmpty) { 1269 for (int i = 0; i < config.context.size; i++) { 1270 if (config.context.getReturnState(i) == PredictionContext.EMPTY_RETURN_STATE) { 1271 if (fullCtx) { 1272 configs.add(new ATNConfig(config, config.state, 1273 cast(PredictionContext)PredictionContext.EMPTY), mergeCache); 1274 continue; 1275 } 1276 else { 1277 // we have no context info, just chase follow links (if greedy) 1278 debug 1279 writefln("1FALLING off rule %s", 1280 getRuleName(config.state.ruleIndex)); 1281 closure_(config, configs, closureBusy, collectPredicates, 1282 fullCtx, depth, treatEofAsEpsilon); 1283 } 1284 continue; 1285 } 1286 ATNState returnState = atn.states[config.context.getReturnState(i)]; 1287 PredictionContext newContext = config.context.getParent(i); // "pop" return state 1288 ATNConfig c = new ATNConfig(returnState, config.alt, newContext, 1289 config.semanticContext); 1290 // While we have context to pop back from, we may have 1291 // gotten that context AFTER having falling off a rule. 1292 // Make sure we track that we are now out of context. 1293 // 1294 // This assignment also propagates the 1295 // isPrecedenceFilterSuppressed() value to the new 1296 // configuration. 1297 c.reachesIntoOuterContext = config.reachesIntoOuterContext; 1298 assert(depth > int.min); 1299 closureCheckingStopState(c, configs, closureBusy, collectPredicates, 1300 fullCtx, depth - 1, treatEofAsEpsilon); 1301 } 1302 return; 1303 } 1304 else if (fullCtx) { 1305 // reached end of start rule 1306 configs.add(config, mergeCache); 1307 return; 1308 } 1309 else { 1310 // else if we have no context info, just chase follow links (if greedy) 1311 debug 1312 writefln("FALLING off rule %s", 1313 getRuleName(config.state.ruleIndex)); 1314 } 1315 } 1316 closure_(config, configs, closureBusy, collectPredicates, 1317 fullCtx, depth, treatEofAsEpsilon); 1318 } 1319 1320 /** 1321 * Do the actual work of walking epsilon edges 1322 */ 1323 protected void closure_(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1324 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1325 { 1326 ATNState p = config.state; 1327 1328 // optimization 1329 if (!p.onlyHasEpsilonTransitions) { 1330 configs.add(config, mergeCache); 1331 // make sure to not return here, because EOF transitions can act as 1332 // both epsilon transitions and non-epsilon transitions. 1333 // if ( debug ) System.out.println("added config "+configs); 1334 } 1335 for (int i=0; i<p.getNumberOfTransitions(); i++) { 1336 if ( i==0 && canDropLoopEntryEdgeInLeftRecursiveRule(config)) 1337 continue; 1338 Transition t = p.transition(i); 1339 bool continueCollecting = 1340 collectPredicates && !(cast(ActionTransition)t); 1341 ATNConfig c = getEpsilonTarget(config, t, continueCollecting, 1342 depth == 0, fullCtx, treatEofAsEpsilon); 1343 if (c !is null) { 1344 int newDepth = depth; 1345 if (cast(RuleStopState)config.state) { 1346 assert (!fullCtx); 1347 1348 // target fell off end of rule; mark resulting c as having dipped into outer context 1349 // We can't get here if incoming config was rule stop and we had context 1350 // track how far we dip into outer context. Might 1351 // come in handy and we avoid evaluating context dependent 1352 // preds if this is > 0. 1353 if (_dfa !is null && _dfa.isPrecedenceDfa) { 1354 int outermostPrecedenceReturn = (cast(EpsilonTransition)t).outermostPrecedenceReturn; 1355 if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) { 1356 c.setPrecedenceFilterSuppressed(true); 1357 } 1358 } 1359 1360 c.reachesIntoOuterContext++; 1361 1362 if (count(closureBusy, c) > 0) { 1363 // avoid infinite recursion for right-recursive rules 1364 continue; 1365 } 1366 closureBusy ~= c; 1367 1368 configs.dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method 1369 assert (newDepth > int.min); 1370 newDepth--; 1371 debug 1372 writefln("dips into outer ctx: %s", c); 1373 } 1374 else { 1375 if (!t.isEpsilon) { 1376 if (count(closureBusy, c) > 0) { 1377 // avoid infinite recursion for right-recursive rules 1378 continue; 1379 } 1380 closureBusy ~= c; 1381 } 1382 if (cast(RuleTransition)t) { 1383 // latch when newDepth goes negative - once we step out of the entry context we can't return 1384 if (newDepth >= 0) { 1385 newDepth++; 1386 } 1387 } 1388 } 1389 closureCheckingStopState(c, configs, closureBusy, continueCollecting, 1390 fullCtx, newDepth, treatEofAsEpsilon); 1391 } 1392 } 1393 1394 } 1395 1396 public string getRuleName(int index) 1397 { 1398 if (parser !is null && index>=0 ) return parser.getRuleNames()[index]; 1399 return format("<rule %s>",index); 1400 } 1401 1402 public ATNConfig getEpsilonTarget(ATNConfig config, Transition t, bool collectPredicates, 1403 bool inContext, bool fullCtx, bool treatEofAsEpsilon) 1404 { 1405 switch (t.getSerializationType()) { 1406 case TransitionStates.RULE: 1407 return ruleTransition(config, cast(RuleTransition)t); 1408 1409 case TransitionStates.PRECEDENCE: 1410 return precedenceTransition(config, cast(PrecedencePredicateTransition)t, collectPredicates, inContext, fullCtx); 1411 1412 case TransitionStates.PREDICATE: 1413 return predTransition(config, cast(PredicateTransition)t, 1414 collectPredicates, 1415 inContext, 1416 fullCtx); 1417 1418 case TransitionStates.ACTION: 1419 return actionTransition(config, cast(ActionTransition)t); 1420 1421 case TransitionStates.EPSILON: 1422 return new ATNConfig(config, t.target); 1423 1424 case TransitionStates.ATOM: 1425 case TransitionStates.RANGE: 1426 case TransitionStates.SET: 1427 // EOF transitions act like epsilon transitions after the first EOF 1428 // transition is traversed 1429 if (treatEofAsEpsilon) { 1430 if (t.matches(TokenConstantDefinition.EOF, 0, 1)) { 1431 return new ATNConfig(config, t.target); 1432 } 1433 } 1434 1435 return null; 1436 1437 default: 1438 return null; 1439 } 1440 1441 } 1442 1443 protected ATNConfig actionTransition(ATNConfig config, ActionTransition t) 1444 { 1445 debug writefln("ACTION edge %1$s:%2$s", t.ruleIndex, t.actionIndex); 1446 return new ATNConfig(config, t.target); 1447 } 1448 1449 public ATNConfig precedenceTransition(ATNConfig config, PrecedencePredicateTransition pt, 1450 bool collectPredicates, bool inContext, bool fullCtx) 1451 { 1452 debug { 1453 writefln("PRED (collectPredicates=%1$s) %2$s" ~ 1454 ">=_p, ctx dependent=true", collectPredicates, pt.precedence); 1455 if ( parser !is null ) { 1456 writefln("context surrounding pred is %s", 1457 parser.getRuleInvocationStack); 1458 //parser.classinfo); 1459 } 1460 } 1461 1462 ATNConfig c = null; 1463 if (collectPredicates && inContext) { 1464 if ( fullCtx ) { 1465 // In full context mode, we can evaluate predicates on-the-fly 1466 // during closure, which dramatically reduces the size of 1467 // the config sets. It also obviates the need to test predicates 1468 // later during conflict resolution. 1469 int currentPosition = _input.index(); 1470 _input.seek(_startIndex); 1471 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1472 _input.seek(currentPosition); 1473 if ( predSucceeds ) { 1474 c = new ATNConfig(config, pt.target); // no pred context 1475 } 1476 } 1477 else { 1478 SemanticContext newSemCtx = 1479 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1480 c = new ATNConfig(config, pt.target, newSemCtx); 1481 } 1482 } 1483 else { 1484 c = new ATNConfig(config, pt.target); 1485 } 1486 1487 debug 1488 writefln("config from pred transition=%s", c); 1489 return c; 1490 1491 } 1492 1493 protected ATNConfig predTransition(ATNConfig config, PredicateTransition pt, bool collectPredicates, 1494 bool inContext, bool fullCtx) 1495 { 1496 debug { 1497 writefln("PRED (collectPredicates=%1$s) %2$s:%3$s, ctx dependent=%4$s", 1498 collectPredicates, pt.ruleIndex, 1499 pt.predIndex, pt.isCtxDependent); 1500 if ( parser !is null ) { 1501 writefln("context surrounding pred is %s", 1502 parser.getRuleInvocationStack()); 1503 } 1504 } 1505 1506 ATNConfig c = null; 1507 if ( collectPredicates && 1508 (!pt.isCtxDependent || (pt.isCtxDependent&&inContext)) ) 1509 { 1510 if ( fullCtx ) { 1511 // In full context mode, we can evaluate predicates on-the-fly 1512 // during closure, which dramatically reduces the size of 1513 // the config sets. It also obviates the need to test predicates 1514 // later during conflict resolution. 1515 int currentPosition = _input.index(); 1516 _input.seek(_startIndex); 1517 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1518 _input.seek(currentPosition); 1519 if (predSucceeds) { 1520 c = new ATNConfig(config, pt.target); // no pred context 1521 } 1522 } 1523 else { 1524 SemanticContext newSemCtx = 1525 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1526 c = new ATNConfig(config, pt.target, newSemCtx); 1527 } 1528 } 1529 else { 1530 c = new ATNConfig(config, pt.target); 1531 } 1532 1533 debug writefln("config from pred transition=",c); 1534 return c; 1535 } 1536 1537 public ATNConfig ruleTransition(ATNConfig config, RuleTransition t) 1538 { 1539 debug { 1540 writefln("CALL rule %1$s, ctx=%2$s", getRuleName(t.target.ruleIndex), config.context); 1541 } 1542 ATNState returnState = t.followState; 1543 PredictionContext newContext = 1544 SingletonPredictionContext.create(config.context, returnState.stateNumber); 1545 return new ATNConfig(config, t.target, newContext); 1546 } 1547 1548 /** 1549 * Gets a {@link BitSet} containing the alternatives in {@code configs} 1550 * which are part of one or more conflicting alternative subsets. 1551 * 1552 * @param configs The {@link ATNConfigSet} to analyze. 1553 * @return The alternatives in {@code configs} which are part of one or more 1554 * conflicting alternative subsets. If {@code configs} does not contain any 1555 * conflicting subsets, this method returns an empty {@link BitSet}. 1556 */ 1557 public BitSet getConflictingAlts(ATNConfigSet configs) 1558 { 1559 BitSet[] altsets = PredictionMode.getConflictingAltSubsets(configs); 1560 return PredictionMode.getAlts(altsets); 1561 } 1562 1563 /** 1564 * Sam pointed out a problem with the previous definition, v3, of 1565 * ambiguous states. If we have another state associated with conflicting 1566 * alternatives, we should keep going. For example, the following grammar 1567 * 1568 * s : (ID | ID ID?) ';' ; 1569 * 1570 * When the ATN simulation reaches the state before ';', it has a DFA 1571 * state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 1572 * 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node 1573 * because alternative to has another way to continue, via [6|2|[]]. 1574 * The key is that we have a single state that has config's only associated 1575 * with a single alternative, 2, and crucially the state transitions 1576 * among the configurations are all non-epsilon transitions. That means 1577 * we don't consider any conflicts that include alternative 2. So, we 1578 * ignore the conflict between alts 1 and 2. We ignore a set of 1579 * conflicting alts when there is an intersection with an alternative 1580 * associated with a single alt state in the state→config-list map. 1581 * 1582 * It's also the case that we might have two conflicting configurations but 1583 * also a 3rd nonconflicting configuration for a different alternative: 1584 * [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: 1585 * 1586 * a : A | A | A B ; 1587 * 1588 * After matching input A, we reach the stop state for rule A, state 1. 1589 * State 8 is the state right before B. Clearly alternatives 1 and 2 1590 * conflict and no amount of further lookahead will separate the two. 1591 * However, alternative 3 will be able to continue and so we do not 1592 * stop working on this state. In the previous example, we're concerned 1593 * with states associated with the conflicting alternatives. Here alt 1594 * 3 is not associated with the conflicting configs, but since we can continue 1595 * looking for input reasonably, I don't declare the state done. We 1596 * ignore a set of conflicting alts when we have an alternative 1597 * that we still need to pursue. 1598 */ 1599 public BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) 1600 { 1601 auto conflictingAlts = new BitSet(1); 1602 if (configs.uniqueAlt != ATN.INVALID_ALT_NUMBER) { 1603 conflictingAlts.set(configs.uniqueAlt, true); 1604 } 1605 else { 1606 *conflictingAlts = configs.conflictingAlts; 1607 } 1608 return *conflictingAlts; 1609 } 1610 1611 public string getTokenName(int t) 1612 { 1613 if (t == TokenConstantDefinition.EOF) { 1614 return "EOF"; 1615 } 1616 1617 Vocabulary vocabulary = parser !is null ? parser.getVocabulary() : new VocabularyImpl(null, null, null); 1618 string displayName = vocabulary.getDisplayName(t); 1619 if (displayName == to!string(t)) { 1620 return displayName; 1621 } 1622 1623 return displayName ~ "<" ~ to!string(t) ~ ">"; 1624 } 1625 1626 public string getLookaheadName(TokenStream input) 1627 { 1628 return getTokenName(input.LA(1)); 1629 } 1630 1631 /** 1632 * Used for debugging in adaptivePredict around execATN but I cut 1633 * it out for clarity now that alg. works well. We can leave this 1634 * "dead" code for a bit. 1635 */ 1636 public void dumpDeadEndConfigs(NoViableAltException nvae) 1637 { 1638 debug 1639 writefln("dead end configs: "); 1640 foreach (ATNConfig c; nvae.getDeadEndConfigs().configs) { 1641 string trans = "no edges"; 1642 if (c.state.getNumberOfTransitions > 0) { 1643 Transition t = c.state.transition(0); 1644 if (t.classinfo == AtomTransition.classinfo) { 1645 AtomTransition at = cast(AtomTransition)t; 1646 trans = "Atom " ~ getTokenName(at._label); 1647 } 1648 else if (t.classinfo == SetTransition.classinfo) { 1649 SetTransition st = cast(SetTransition)t; 1650 bool not = (st.classinfo == NotSetTransition.classinfo); 1651 trans = (not?"~":"") ~ "Set "~ st.set.toString(); 1652 } 1653 } 1654 debug 1655 writefln("%1$s:%2$s", c.toString(parser, true), trans); 1656 } 1657 } 1658 1659 protected NoViableAltException noViableAlt(TokenStream input, ParserRuleContext outerContext, 1660 ATNConfigSet configs, int startIndex) 1661 { 1662 return new NoViableAltException(parser, input, 1663 input.get(startIndex), 1664 input.LT(1), 1665 configs, outerContext); 1666 } 1667 1668 protected static int getUniqueAlt(ATNConfigSet configs) 1669 { 1670 int alt = ATN.INVALID_ALT_NUMBER; 1671 foreach (ATNConfig c; configs.configs) { 1672 if (alt == ATN.INVALID_ALT_NUMBER) { 1673 alt = c.alt; // found first alt 1674 } 1675 else if (c.alt != alt) { 1676 return ATN.INVALID_ALT_NUMBER; 1677 } 1678 } 1679 return alt; 1680 } 1681 1682 /** 1683 * Add an edge to the DFA, if possible. This method calls 1684 * {@link #addDFAState} to ensure the {@code to} state is present in the 1685 * DFA. If {@code from} is {@code null}, or if {@code t} is outside the 1686 * range of edges that can be represented in the DFA tables, this method 1687 * returns without adding the edge to the DFA. 1688 * 1689 * <p>If {@code to} is {@code null}, this method returns {@code null}. 1690 * Otherwise, this method returns the {@link DFAState} returned by calling 1691 * {@link #addDFAState} for the {@code to} state.</p> 1692 * 1693 * @param dfa The DFA 1694 * @param from The source state for the edge 1695 * @param t The input symbol 1696 * @param to The target state for the edge 1697 * 1698 * @return If {@code to} is {@code null}, this method returns {@code null}; 1699 * otherwise this method returns the result of calling {@link #addDFAState} 1700 * on {@code to} 1701 */ 1702 protected DFAState addDFAEdge(ref DFA dfa, DFAState from, int t, DFAState to) 1703 { 1704 debug { 1705 writefln("\nEDGE %1$s -> %2$s upon %3$s", from, to, getTokenName(t)); 1706 } 1707 if (to is null) { 1708 return null; 1709 } 1710 to = addDFAState(dfa, to); // used existing if possible not incoming 1711 if (from is null || t < -1 || t > atn.maxTokenType) { 1712 return to; 1713 } 1714 synchronized (from) { 1715 if (from.edges == null) { 1716 from.edges = new DFAState[atn.maxTokenType+1+1]; 1717 } 1718 from.edges[t+1] = to; // connect 1719 } 1720 1721 debug { 1722 writefln("end dfa.decision = %s, dfa.states = %s", dfa.decision, dfa.states); 1723 } 1724 1725 return to; 1726 } 1727 1728 /** 1729 * Add state {@code D} to the DFA if it is not already present, and return 1730 * the actual instance stored in the DFA. If a state equivalent to {@code D} 1731 * is already in the DFA, the existing state is returned. Otherwise this 1732 * method returns {@code D} after adding it to the DFA. 1733 * 1734 * <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and 1735 * does not change the DFA.</p> 1736 * 1737 * @param dfa The dfa 1738 * @param D The DFA state to add 1739 * @return The state stored in the DFA. This will be either the existing 1740 * state if {@code D} is already in the DFA, or {@code D} itself if the 1741 * state was not already present. 1742 */ 1743 protected DFAState addDFAState(ref DFA dfa, DFAState D) 1744 { 1745 if (D == ERROR) { 1746 return D; 1747 } 1748 debug 1749 writefln("adding new dfa: D = %s, dfa.states = %s, \n(D in dfa.states) = %s", D, dfa.states, D in dfa.states); 1750 if (D in dfa.states) 1751 return dfa.states[D]; 1752 D.stateNumber = to!int(dfa.states.length); 1753 if (!D.configs.readonly) { 1754 D.configs.optimizeConfigs(this); 1755 D.configs.readonly(true); 1756 } 1757 dfa.states[D] = D; 1758 debug 1759 writefln("adding new DFA state end: D = %1$s, dfa.states = %2$s", D, dfa.states); 1760 return D; 1761 } 1762 1763 protected void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, ATNConfigSet configs, 1764 int startIndex, int stopIndex) 1765 { 1766 debug(retry_debug) { 1767 import antlr.v4.runtime.misc.Interval; 1768 Interval interval = Interval.of(startIndex, stopIndex); 1769 writefln("reportAttemptingFullContext decision=%1$s:%2$s, input=%3$s", 1770 dfa.decision, configs, 1771 parser.getTokenStream().getText(interval)); 1772 } 1773 if (parser !is null) parser.getErrorListenerDispatch().reportAttemptingFullContext(parser, 1774 dfa, 1775 startIndex, 1776 stopIndex, 1777 conflictingAlts, 1778 configs); 1779 } 1780 1781 protected void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, 1782 int startIndex, int stopIndex) 1783 { 1784 debug(retry_debug) { 1785 import antlr.v4.runtime.misc.Interval; 1786 Interval interval = Interval.of(startIndex, stopIndex); 1787 writefln("reportContextSensitivity decision=%1$s:%2$s, input=%3$s", 1788 dfa.decision, configs, parser.getTokenStream().getText(interval)); 1789 } 1790 if (parser !is null) 1791 parser.getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); 1792 } 1793 1794 protected void reportAmbiguity(DFA dfa, DFAState D, int startIndex, int stopIndex, bool exact, 1795 BitSet ambigAlts, ATNConfigSet configs) 1796 { 1797 debug(retry_debug) { 1798 import antlr.v4.runtime.misc.Interval; 1799 Interval interval = Interval.of(startIndex, stopIndex); 1800 writefln("reportAmbiguity %1$s:%2$s, input=%3$s", 1801 ambigAlts, configs, parser.getTokenStream().getText(interval)); 1802 } 1803 if (parser !is null) parser.getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, 1804 exact, ambigAlts, configs); 1805 } 1806 1807 public void setPredictionMode(PredictionModeConst mode) 1808 { 1809 this.mode = mode; 1810 } 1811 1812 public PredictionModeConst getPredictionMode() 1813 { 1814 return mode; 1815 } 1816 1817 public Parser getParser() 1818 { 1819 return parser; 1820 } 1821 1822 protected bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) 1823 { 1824 auto TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = true; 1825 if ( TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT ) 1826 return false; 1827 return false; 1828 } 1829 1830 }