1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ParserATNSimulator; 8 9 import antlr.v4.runtime.IntStream; 10 import antlr.v4.runtime.IntStreamConstant; 11 import antlr.v4.runtime.NoViableAltException; 12 import antlr.v4.runtime.Parser; 13 import antlr.v4.runtime.ParserRuleContext; 14 import antlr.v4.runtime.RuleContext; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.Vocabulary; 18 import antlr.v4.runtime.VocabularyImpl; 19 import antlr.v4.runtime.atn.ATN; 20 import antlr.v4.runtime.atn.ATNConfig; 21 import antlr.v4.runtime.atn.ATNConfigSet; 22 import antlr.v4.runtime.atn.ATNSimulator; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.ActionTransition; 25 import antlr.v4.runtime.atn.AtomTransition; 26 import antlr.v4.runtime.atn.DecisionState; 27 import antlr.v4.runtime.atn.EpsilonTransition; 28 import antlr.v4.runtime.atn.InterfaceParserATNSimulator; 29 import antlr.v4.runtime.atn.NotSetTransition; 30 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 31 import antlr.v4.runtime.atn.PredicateTransition; 32 import antlr.v4.runtime.atn.PredictionContext; 33 import antlr.v4.runtime.atn.PredictionContextCache; 34 import antlr.v4.runtime.atn.PredictionMode; 35 import antlr.v4.runtime.atn.PredictionModeConst; 36 import antlr.v4.runtime.atn.RuleStopState; 37 import antlr.v4.runtime.atn.RuleTransition; 38 import antlr.v4.runtime.atn.SemanticContext; 39 import antlr.v4.runtime.atn.SetTransition; 40 import antlr.v4.runtime.atn.SingletonPredictionContext; 41 import antlr.v4.runtime.atn.Transition; 42 import antlr.v4.runtime.atn.TransitionStates; 43 import antlr.v4.runtime.dfa.DFA; 44 import antlr.v4.runtime.dfa.DFAState; 45 import antlr.v4.runtime.dfa.PredPrediction; 46 import antlr.v4.runtime.misc; 47 import std.algorithm.searching; 48 import std.conv; 49 import std.format; 50 import std.stdio; 51 import std.typecons; 52 53 alias ATNConfigSetATNConfigSetPair = Tuple!(ATNConfigSet, "a", ATNConfigSet, "b"); 54 55 // Class ParserATNSimulator 56 /** 57 * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. 58 * 59 * <p> 60 * The basic complexity of the adaptive strategy makes it harder to understand. 61 * We begin with ATN simulation to build paths in a DFA. Subsequent prediction 62 * requests go through the DFA first. If they reach a state without an edge for 63 * the current symbol, the algorithm fails over to the ATN simulation to 64 * complete the DFA path for the current input (until it finds a conflict state 65 * or uniquely predicting state).</p> 66 * 67 * <p> 68 * All of that is done without using the outer context because we want to create 69 * a DFA that is not dependent upon the rule invocation stack when we do a 70 * prediction. One DFA works in all contexts. We avoid using context not 71 * necessarily because it's slower, although it can be, but because of the DFA 72 * caching problem. The closure routine only considers the rule invocation stack 73 * created during prediction beginning in the decision rule. For example, if 74 * prediction occurs without invoking another rule's ATN, there are no context 75 * stacks in the configurations. When lack of context leads to a conflict, we 76 * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing 77 * strategy (versus full LL(*)).</p> 78 * 79 * <p> 80 * When SLL yields a configuration set with conflict, we rewind the input and 81 * retry the ATN simulation, this time using full outer context without adding 82 * to the DFA. Configuration context stacks will be the full invocation stacks 83 * from the start rule. If we get a conflict using full context, then we can 84 * definitively say we have a true ambiguity for that input sequence. If we 85 * don't get a conflict, it implies that the decision is sensitive to the outer 86 * context. (It is not context-sensitive in the sense of context-sensitive 87 * grammars.)</p> 88 * 89 * <p> 90 * The next time we reach this DFA state with an SLL conflict, through DFA 91 * simulation, we will again retry the ATN simulation using full context mode. 92 * This is slow because we can't save the results and have to "interpret" the 93 * ATN each time we get that input.</p> 94 * 95 * <p> 96 * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> 97 * 98 * <p> 99 * We could cache results from full context to predicted alternative easily and 100 * that saves a lot of time but doesn't work in presence of predicates. The set 101 * of visible predicates from the ATN start state changes depending on the 102 * context, because closure can fall off the end of a rule. I tried to cache 103 * tuples (stack context, semantic context, predicted alt) but it was slower 104 * than interpreting and much more complicated. Also required a huge amount of 105 * memory. The goal is not to create the world's fastest parser anyway. I'd like 106 * to keep this algorithm simple. By launching multiple threads, we can improve 107 * the speed of parsing across a large number of files.</p> 108 * 109 * <p> 110 * There is no strict ordering between the amount of input used by SLL vs LL, 111 * which makes it really hard to build a cache for full context. Let's say that 112 * we have input A B C that leads to an SLL conflict with full context X. That 113 * implies that using X we might only use A B but we could also use A B C D to 114 * resolve conflict. Input A B C D could predict alternative 1 in one position 115 * in the input and A B C E could predict alternative 2 in another position in 116 * input. The conflicting SLL configurations could still be non-unique in the 117 * full context prediction, which would lead us to requiring more input than the 118 * original A B C. To make a prediction cache work, we have to track the exact 119 * input used during the previous prediction. That amounts to a cache that maps 120 * X to a specific DFA for that context.</p> 121 * 122 * <p> 123 * Something should be done for left-recursive expression predictions. They are 124 * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry 125 * with full LL thing Sam does.</p> 126 * 127 * <p> 128 * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> 129 * 130 * <p> 131 * We avoid doing full context retry when the outer context is empty, we did not 132 * dip into the outer context by falling off the end of the decision state rule, 133 * or when we force SLL mode.</p> 134 * 135 * <p> 136 * As an example of the not dip into outer context case, consider as super 137 * constructor calls versus function calls. One grammar might look like 138 * this:</p> 139 * 140 * <pre> 141 * ctorBody 142 * : '{' superCall? stat* '}' 143 * ; 144 * </pre> 145 * 146 * <p> 147 * Or, you might see something like</p> 148 * 149 * <pre> 150 * stat 151 * : superCall ';' 152 * | expression ';' 153 * | ... 154 * ; 155 * </pre> 156 * 157 * <p> 158 * In both cases I believe that no closure operations will dip into the outer 159 * context. In the first case ctorBody in the worst case will stop at the '}'. 160 * In the 2nd case it should stop at the ';'. Both cases should stay within the 161 * entry rule and not dip into the outer context.</p> 162 * 163 * <p> 164 * <strong>PREDICATES</strong></p> 165 * 166 * <p> 167 * Predicates are always evaluated if present in either SLL or LL both. SLL and 168 * LL simulation deals with predicates differently. SLL collects predicates as 169 * it performs closure operations like ANTLR v3 did. It delays predicate 170 * evaluation until it reaches and accept state. This allows us to cache the SLL 171 * ATN simulation whereas, if we had evaluated predicates on-the-fly during 172 * closure, the DFA state configuration sets would be different and we couldn't 173 * build up a suitable DFA.</p> 174 * 175 * <p> 176 * When building a DFA accept state during ATN simulation, we evaluate any 177 * predicates and return the sole semantically valid alternative. If there is 178 * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, 179 * we throw an exception. Alternatives without predicates act like they have 180 * true predicates. The simple way to think about it is to strip away all 181 * alternatives with false predicates and choose the minimum alternative that 182 * remains.</p> 183 * 184 * <p> 185 * When we start in the DFA and reach an accept state that's predicated, we test 186 * those and return the minimum semantically viable alternative. If no 187 * alternatives are viable, we throw an exception.</p> 188 * 189 * <p> 190 * During full LL ATN simulation, closure always evaluates predicates and 191 * on-the-fly. This is crucial to reducing the configuration set size during 192 * closure. It hits a landmine when parsing with the Java grammar, for example, 193 * without this on-the-fly evaluation.</p> 194 * 195 * <p> 196 * <strong>SHARING DFA</strong></p> 197 * 198 * <p> 199 * All instances of the same parser share the same decision DFAs through a 200 * static field. Each instance gets its own ATN simulator but they share the 201 * same {@link #decisionToDFA} field. They also share a 202 * {@link PredictionContextCache} object that makes sure that all 203 * {@link PredictionContext} objects are shared among the DFA states. This makes 204 * a big size difference.</p> 205 * 206 * <p> 207 * <strong>THREAD SAFETY</strong></p> 208 * 209 * <p> 210 * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when 211 * it adds a new DFA object to that array. {@link #addDFAEdge} 212 * locks on the DFA for the current decision when setting the 213 * {@link DFAState#edges} field. {@link #addDFAState} locks on 214 * the DFA for the current decision when looking up a DFA state to see if it 215 * already exists. We must make sure that all requests to add DFA states that 216 * are equivalent result in the same shared DFA object. This is because lots of 217 * threads will be trying to update the DFA at once. The 218 * {@link #addDFAState} method also locks inside the DFA lock 219 * but this time on the shared context cache when it rebuilds the 220 * configurations' {@link PredictionContext} objects using cached 221 * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is 222 * safe as long as we can guarantee that all threads referencing 223 * {@code s.edge[t]} get the same physical target {@link DFAState}, or 224 * {@code null}. Once into the DFA, the DFA simulation does not reference the 225 * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new 226 * targets. The DFA simulator will either find {@link DFAState#edges} to be 227 * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or 228 * {@code dfa.edges[t]} to be non-null. The 229 * {@link #addDFAEdge} method could be racing to set the field 230 * but in either case the DFA simulator works; if {@code null}, and requests ATN 231 * simulation. It could also race trying to get {@code dfa.edges[t]}, but either 232 * way it will work because it's not doing a test and set operation.</p> 233 * 234 * <p> 235 * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage 236 * Parsing)</strong></p> 237 * 238 * <p> 239 * Sam pointed out that if SLL does not give a syntax error, then there is no 240 * point in doing full LL, which is slower. We only have to try LL if we get a 241 * syntax error. For maximum speed, Sam starts the parser set to pure SLL 242 * mode with the {@link BailErrorStrategy}:</p> 243 * 244 * <pre> 245 * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; 246 * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); 247 * </pre> 248 * 249 * <p> 250 * If it does not get a syntax error, then we're done. If it does get a syntax 251 * error, we need to retry with the combined SLL/LL strategy.</p> 252 * 253 * <p> 254 * The reason this works is as follows. If there are no SLL conflicts, then the 255 * grammar is SLL (at least for that input set). If there is an SLL conflict, 256 * the full LL analysis must yield a set of viable alternatives which is a 257 * subset of the alternatives reported by SLL. If the LL set is a singleton, 258 * then the grammar is LL but not SLL. If the LL set is the same size as the SLL 259 * set, the decision is SLL. If the LL set has size > 1, then that decision 260 * is truly ambiguous on the current input. If the LL set is smaller, then the 261 * SLL conflict resolution might choose an alternative that the full LL would 262 * rule out as a possibility based upon better context information. If that's 263 * the case, then the SLL parse will definitely get an error because the full LL 264 * analysis says it's not viable. If SLL conflict resolution chooses an 265 * alternative within the LL set, them both SLL and LL would choose the same 266 * alternative because they both choose the minimum of multiple conflicting 267 * alternatives.</p> 268 * 269 * <p> 270 * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and 271 * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL 272 * parsing will get an error because SLL will pursue alternative 1. If 273 * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will 274 * choose the same alternative because alternative one is the minimum of either 275 * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax 276 * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> 277 * 278 * <p> 279 * Of course, if the input is invalid, then we will get an error for sure in 280 * both SLL and LL parsing. Erroneous input will therefore require 2 passes over 281 * the input.</p> 282 */ 283 class ParserATNSimulator : ATNSimulator, InterfaceParserATNSimulator 284 { 285 286 protected Parser parser; 287 288 public DFA[] decisionToDFA; 289 290 public PredictionModeConst mode = PredictionModeConst.LL; 291 292 /** 293 * Each prediction operation uses a cache for merge of prediction contexts. 294 * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap 295 * isn't synchronized but we're ok since two threads shouldn't reuse same 296 * parser/atnsim object because it can only handle one input at a time. 297 * This maps graphs a and b to merged result c. (a,b)→c. We can avoid 298 * the merge if we ever see a and b again. Note that (b,a)→c should 299 * also be examined during cache lookup. 300 * @uml 301 * @__gshared 302 */ 303 public static __gshared DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext) mergeCache; 304 305 protected DFA _dfa; 306 307 protected TokenStream _input; 308 309 protected int _startIndex; 310 311 protected ParserRuleContext _outerContext; 312 313 /** 314 * @uml 315 * Testing only! 316 */ 317 protected this(ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 318 { 319 this(null, atn, decisionToDFA, sharedContextCache); 320 } 321 322 public this(Parser parser, ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 323 { 324 super(atn, sharedContextCache); 325 this.parser = parser; 326 this.decisionToDFA = decisionToDFA; 327 if (mergeCache is null) 328 this.mergeCache = new typeof(this.mergeCache); 329 } 330 331 /** 332 * @uml 333 * @override 334 */ 335 public override void reset() 336 { 337 } 338 339 /** 340 * @uml 341 * @override 342 */ 343 public override void clearDFA() 344 { 345 for (int d = 0; d < decisionToDFA.length; d++) { 346 decisionToDFA[d] = new DFA(atn.getDecisionState(d), d); 347 } 348 } 349 350 public int adaptivePredict(TokenStream input, int decision, ParserRuleContext outerContext) 351 { 352 debug(ParserATNSimulator) { 353 writefln("adaptivePredict decision %1$s"~ 354 " exec LA(1)==%2$s"~ 355 " line %3$s:%4$s", 356 decision, getLookaheadName(input), input.LT(1).getLine, 357 input.LT(1).getCharPositionInLine()); 358 } 359 360 _input = input; 361 _startIndex = input.index(); 362 _outerContext = outerContext; 363 DFA dfa = decisionToDFA[decision]; 364 _dfa = dfa; 365 366 int m = input.mark(); 367 int index = _startIndex; 368 // Now we are certain to have a specific decision's DFA 369 // But, do we still need an initial state? 370 try { 371 DFAState s0; 372 if (dfa.isPrecedenceDfa) { 373 // the start state for a precedence DFA depends on the current 374 // parser precedence, and is provided by a DFA method. 375 s0 = dfa.getPrecedenceStartState(parser.getPrecedence()); 376 } 377 else { 378 // the start state for a "regular" DFA is just s0 379 s0 = dfa.s0; 380 } 381 382 if (s0 is null) { 383 if ( outerContext is null ) 384 outerContext = ParserRuleContext.EMPTY; 385 debug(ParserATNSimulator) { 386 writefln("predictATN decision = %1$s"~ 387 " exec LA(1)==%2$s"~ 388 ", outerContext=%3$s", 389 dfa.decision, getLookaheadName(input), 390 outerContext); 391 } 392 393 bool fullCtx = false; 394 ATNConfigSet s0_closure = 395 computeStartState(dfa.atnStartState, 396 ParserRuleContext.EMPTY, 397 fullCtx); 398 if (dfa.isPrecedenceDfa) { 399 /* If this is a precedence DFA, we use applyPrecedenceFilter 400 * to convert the computed start state to a precedence start 401 * state. We then use DFA.setPrecedenceStartState to set the 402 * appropriate start state for the precedence level rather 403 * than simply setting DFA.s0. 404 */ 405 dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway 406 s0_closure = applyPrecedenceFilter(s0_closure); 407 s0 = addDFAState(dfa, new DFAState(s0_closure)); 408 dfa.setPrecedenceStartState(parser.getPrecedence(), s0); 409 } 410 else { 411 s0 = addDFAState(dfa, new DFAState(s0_closure)); 412 dfa.s0 = s0; 413 } 414 } 415 int alt = execATN(dfa, s0, input, index, outerContext); 416 debug(ParserATNSimulator) 417 writefln("DFA after predictATN: %1$s, alt = %s, dfa.states = %s", dfa.toString(parser.getVocabulary), alt, dfa); 418 return alt; 419 } 420 finally { 421 mergeCache = null; // wack cache after each prediction 422 _dfa = null; 423 input.seek(index); 424 input.release(m); 425 } 426 } 427 428 /** 429 * There are some key conditions we're looking for after computing a new 430 * set of ATN configs (proposed DFA state): 431 * * if the set is empty, there is no viable alternative for current symbol 432 * * does the state uniquely predict an alternative? 433 * * does the state have a conflict that would prevent us from 434 * putting it on the work list? 435 * 436 * We also have some key operations to do: 437 * * add an edge from previous DFA state to potentially new DFA state, D, 438 * upon current symbol but only if adding to work list, which means in all 439 * cases except no viable alternative (and possibly non-greedy decisions?) 440 * * collecting predicates and adding semantic context to DFA accept states 441 * * adding rule context to context-sensitive DFA accept states 442 * * consuming an input symbol 443 * * reporting a conflict 444 * * reporting an ambiguity 445 * * reporting a context sensitivity 446 * * reporting insufficient predicates 447 * 448 * cover these cases: 449 * dead end 450 * single alt 451 * single alt + preds 452 * conflict 453 * conflict + preds 454 */ 455 protected int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, ParserRuleContext outerContext) 456 { 457 debug { 458 writefln("execATN decision %1$s"~ 459 " exec LA(1)==%2$s line %3$s:%4$s", 460 dfa.decision, 461 getLookaheadName(input), 462 input.LT(1).getLine, 463 input.LT(1).getCharPositionInLine()); 464 } 465 466 DFAState previousD = s0; 467 468 debug 469 writefln("s0 = %1$s", s0); 470 471 int t = input.LA(1); 472 473 while (true) { // while more work 474 DFAState D = getExistingTargetState(previousD, t); 475 if (D is null) { 476 D = computeTargetState(dfa, previousD, t); 477 } 478 479 if (D == ERROR) { 480 // if any configs in previous dipped into outer context, that 481 // means that input up to t actually finished entry rule 482 // at least for SLL decision. Full LL doesn't dip into outer 483 // so don't need special case. 484 // We will get an error no matter what so delay until after 485 // decision; better error message. Also, no reachable target 486 // ATN states in SLL implies LL will also get nowhere. 487 // If conflict in states that dip out, choose min since we 488 // will get error no matter what. 489 NoViableAltException e = noViableAlt(input, outerContext, previousD.configs, startIndex); 490 input.seek(startIndex); 491 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext); 492 if (alt != ATN.INVALID_ALT_NUMBER) { 493 return alt; 494 } 495 throw e; 496 } 497 498 if (D.requiresFullContext && mode != PredictionModeConst.SLL) { 499 // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) 500 BitSet conflictingAlts = D.configs.conflictingAlts; 501 if (D.predicates != null) { 502 debug 503 writeln("DFA state has preds in DFA sim LL failover"); 504 int conflictIndex = input.index(); 505 if (conflictIndex != startIndex) { 506 input.seek(startIndex); 507 } 508 509 conflictingAlts = evalSemanticContext(D.predicates, outerContext, true); 510 if (conflictingAlts.cardinality ==1) { 511 debug 512 writeln("Full LL avoided"); 513 return conflictingAlts.nextSetBit(0); 514 } 515 516 if (conflictIndex != startIndex) { 517 // restore the index so reporting the fallback to full 518 // context occurs with the index at the correct spot 519 input.seek(conflictIndex); 520 } 521 } 522 523 debug(dfa_debug) 524 writefln("ctx sensitive state %1$ in %2$s", 525 outerContext, D); 526 bool fullCtx = true; 527 ATNConfigSet s0_closure = 528 computeStartState(dfa.atnStartState, outerContext, 529 fullCtx); 530 reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index()); 531 int alt = execATNWithFullContext(dfa, D, s0_closure, 532 input, startIndex, 533 outerContext); 534 return alt; 535 } 536 537 if ( D.isAcceptState ) { 538 if (D.predicates == null) { 539 return D.prediction; 540 } 541 542 int stopIndex = input.index(); 543 input.seek(startIndex); 544 BitSet alts = evalSemanticContext(D.predicates, outerContext, true); 545 switch (alts.cardinality) { 546 case 0: 547 throw noViableAlt(input, outerContext, D.configs, startIndex); 548 549 case 1: 550 return alts.nextSetBit(0); 551 552 default: 553 // report ambiguity after predicate evaluation to make sure the correct 554 // set of ambig alts is reported. 555 reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs); 556 return alts.nextSetBit(0); 557 } 558 } 559 560 previousD = D; 561 562 if (t != IntStreamConstant.EOF) { 563 input.consume(); 564 t = input.LA(1); 565 } 566 } 567 568 } 569 570 /** 571 * Get an existing target state for an edge in the DFA. If the target state 572 * for the edge has not yet been computed or is otherwise not available, 573 * this method returns {@code null}. 574 * 575 * @param previousD The current DFA state 576 * @param t The next input symbol 577 * @return The existing target DFA state for the given input symbol 578 * {@code t}, or {@code null} if the target state for this edge is not 579 * already cached 580 */ 581 public DFAState getExistingTargetState(DFAState previousD, int t) 582 { 583 DFAState[] edges = previousD.edges; 584 if (edges == null || t + 1 < 0 || t + 1 >= edges.length) { 585 return null; 586 } 587 return edges[t + 1]; 588 } 589 590 /** 591 * Compute a target state for an edge in the DFA, and attempt to add the 592 * computed state and corresponding edge to the DFA. 593 * 594 * @param dfa The DFA 595 * @param previousD The current DFA state 596 * @param t The next input symbol 597 * 598 * @return The computed target DFA state for the given input symbol 599 * {@code t}. If {@code t} does not lead to a valid DFA state, this method 600 * returns {@link #ERROR}. 601 */ 602 public DFAState computeTargetState(ref DFA dfa, DFAState previousD, int t) 603 { 604 ATNConfigSet reach = computeReachSet(previousD.configs, t, false); 605 if (reach is null) { 606 addDFAEdge(dfa, previousD, t, ERROR); 607 return ERROR; 608 } 609 // create new target state; we'll add to DFA after it's complete 610 DFAState D = new DFAState(reach); 611 int predictedAlt = getUniqueAlt(reach); 612 debug { 613 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 614 writefln("SLL altSubSets=%1$s"~ 615 ", configs=%2$s"~ 616 ", predict=%3$s, allSubsetsConflict=%4$s, "~ 617 "conflictingAlts=%5$s", 618 altSubSets, 619 reach, 620 predictedAlt, 621 PredictionMode.allSubsetsConflict(altSubSets), 622 getConflictingAlts(reach)); 623 } 624 625 if (predictedAlt != ATN.INVALID_ALT_NUMBER) { 626 // NO CONFLICT, UNIQUELY PREDICTED ALT 627 D.isAcceptState = true; 628 D.configs.uniqueAlt = predictedAlt; 629 D.prediction = predictedAlt; 630 } 631 else if ( PredictionMode.hasSLLConflictTerminatingPrediction(mode, reach)) { 632 // MORE THAN ONE VIABLE ALTERNATIVE 633 D.configs.conflictingAlts = getConflictingAlts(reach); 634 D.requiresFullContext = true; 635 // in SLL-only mode, we will stop at this state and return the minimum alt 636 D.isAcceptState = true; 637 D.prediction = D.configs.conflictingAlts.nextSetBit(0); 638 } 639 640 if ( D.isAcceptState && D.configs.hasSemanticContext ) { 641 predicateDFAState(D, atn.getDecisionState(dfa.decision)); 642 if (D.predicates != null) { 643 D.prediction = ATN.INVALID_ALT_NUMBER; 644 } 645 } 646 // all adds to dfa are done after we've created full D state 647 D = addDFAEdge(dfa, previousD, t, D); 648 return D; 649 } 650 651 public void predicateDFAState(DFAState dfaState, DecisionState decisionState) 652 { 653 // We need to test all predicates, even in DFA states that 654 // uniquely predict alternative. 655 int nalts = decisionState.getNumberOfTransitions(); 656 // Update DFA so reach becomes accept state with (predicate,alt) 657 // pairs if preds found for conflicting alts 658 BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState.configs); 659 SemanticContext[] altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); 660 if ( altToPred!=null ) { 661 dfaState.predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); 662 dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds 663 } 664 else { 665 // There are preds in configs but they might go away 666 // when OR'd together like {p}? || NONE == NONE. If neither 667 // alt has preds, resolve to min alt 668 dfaState.prediction = altsToCollectPredsFrom.nextSetBit(0); 669 } 670 } 671 672 /** 673 * @uml 674 * comes back with reach.uniqueAlt set to a valid alt 675 */ 676 protected int execATNWithFullContext(DFA dfa, DFAState D, ATNConfigSet s0, TokenStream input, 677 int startIndex, ParserRuleContext outerContext) 678 { 679 debug { 680 writefln("execATNWithFullContext %s", s0); 681 } 682 bool fullCtx = true; 683 bool foundExactAmbig = false; 684 ATNConfigSet reach = null; 685 ATNConfigSet previous = s0; 686 input.seek(startIndex); 687 int t = input.LA(1); 688 int predictedAlt; 689 while (true) { // while more work 690 // System.out.println("LL REACH "+getLookaheadName(input)+ 691 // " from configs.size="+previous.size()+ 692 // " line "+input.LT(1).getLine()+":"+input.LT(1).getCharPositionInLine()); 693 reach = computeReachSet(previous, t, fullCtx); 694 if (reach is null) { 695 // if any configs in previous dipped into outer context, that 696 // means that input up to t actually finished entry rule 697 // at least for LL decision. Full LL doesn't dip into outer 698 // so don't need special case. 699 // We will get an error no matter what so delay until after 700 // decision; better error message. Also, no reachable target 701 // ATN states in SLL implies LL will also get nowhere. 702 // If conflict in states that dip out, choose min since we 703 // will get error no matter what. 704 NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex); 705 input.seek(startIndex); 706 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); 707 if ( alt!=ATN.INVALID_ALT_NUMBER ) { 708 return alt; 709 } 710 throw e; 711 } 712 713 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 714 debug { 715 writefln("LL altSubSets=%1$s, PredictionMode.getUniqueAlt(altSubSets)" ~ 716 ", resolvesToJustOneViableAlt=%3$s", 717 altSubSets, 718 PredictionMode.getUniqueAlt(altSubSets), 719 PredictionMode.resolvesToJustOneViableAlt(altSubSets)); 720 } 721 722 // System.out.println("altSubSets: "+altSubSets); 723 // System.err.println("reach="+reach+", "+reach.conflictingAlts); 724 reach.uniqueAlt = getUniqueAlt(reach); 725 // unique prediction? 726 if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) { 727 predictedAlt = reach.uniqueAlt; 728 break; 729 } 730 if ( mode != PredictionModeConst.LL_EXACT_AMBIG_DETECTION ) { 731 predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets); 732 if ( predictedAlt != ATN.INVALID_ALT_NUMBER ) { 733 break; 734 } 735 } 736 else { 737 // In exact ambiguity mode, we never try to terminate early. 738 // Just keeps scarfing until we know what the conflict is 739 if ( PredictionMode.allSubsetsConflict(altSubSets) && 740 PredictionMode.allSubsetsEqual(altSubSets) ) 741 { 742 foundExactAmbig = true; 743 predictedAlt = PredictionMode.getSingleViableAlt(altSubSets); 744 break; 745 } 746 // else there are multiple non-conflicting subsets or 747 // we're not sure what the ambiguity is yet. 748 // So, keep going. 749 } 750 751 previous = reach; 752 if (t != IntStreamConstant.EOF) { 753 input.consume(); 754 t = input.LA(1); 755 } 756 } 757 758 // If the configuration set uniquely predicts an alternative, 759 // without conflict, then we know that it's a full LL decision 760 // not SLL. 761 if ( reach.uniqueAlt != ATN.INVALID_ALT_NUMBER ) { 762 reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index()); 763 return predictedAlt; 764 } 765 766 // We do not check predicates here because we have checked them 767 // on-the-fly when doing full context prediction. 768 769 /* 770 In non-exact ambiguity detection mode, we might actually be able to 771 detect an exact ambiguity, but I'm not going to spend the cycles 772 needed to check. We only emit ambiguity warnings in exact ambiguity 773 mode. 774 775 For example, we might know that we have conflicting configurations. 776 But, that does not mean that there is no way forward without a 777 conflict. It's possible to have nonconflicting alt subsets as in: 778 779 LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] 780 781 from 782 783 [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), 784 (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] 785 786 In this case, (17,1,[5 $]) indicates there is some next sequence that 787 would resolve this without conflict to alternative 1. Any other viable 788 next sequence, however, is associated with a conflict. We stop 789 looking for input because no amount of further lookahead will alter 790 the fact that we should predict alternative 1. We just can't say for 791 sure that there is an ambiguity without looking further. 792 */ 793 reportAmbiguity(dfa, D, startIndex, input.index(), foundExactAmbig, 794 reach.getAlts(), reach); 795 796 return predictedAlt; 797 } 798 799 public ATNConfigSet computeReachSet(ATNConfigSet closure, int t, bool fullCtx) 800 { 801 debug 802 writefln("in computeReachSet, starting closure: %s", closure); 803 804 if (mergeCache is null) { 805 mergeCache = new DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext); 806 } 807 808 ATNConfigSet intermediate = new ATNConfigSet(fullCtx); 809 810 /* Configurations already in a rule stop state indicate reaching the end 811 * of the decision rule (local context) or end of the start rule (full 812 * context). Once reached, these configurations are never updated by a 813 * closure operation, so they are handled separately for the performance 814 * advantage of having a smaller intermediate set when calling closure. 815 * 816 * For full-context reach operations, separate handling is required to 817 * ensure that the alternative matching the longest overall sequence is 818 * chosen when multiple such configurations can match the input. 819 */ 820 ATNConfig[] skippedStopStates; 821 822 // First figure out where we can reach on input t 823 foreach (c; closure.configs) { 824 debug 825 writefln("testing %1$s at %2$s", getTokenName(t), c.toString); 826 827 if (cast(RuleStopState)c.state) { 828 assert(c.context.isEmpty); 829 if (fullCtx || t == IntStreamConstant.EOF) { 830 skippedStopStates ~= c; 831 } 832 continue; 833 } 834 835 foreach (trans; c.state.transitions) { 836 ATNState target = getReachableTarget(trans, t); 837 if (target) { 838 intermediate.add(new ATNConfig(c, target), mergeCache); 839 } 840 } 841 } 842 843 // Now figure out where the reach operation can take us... 844 845 ATNConfigSet reach = null; 846 847 /* This block optimizes the reach operation for intermediate sets which 848 * trivially indicate a termination state for the overall 849 * adaptivePredict operation. 850 * 851 * The conditions assume that intermediate 852 * contains all configurations relevant to the reach set, but this 853 * condition is not true when one or more configurations have been 854 * withheld in skippedStopStates, or when the current symbol is EOF. 855 */ 856 if (skippedStopStates is null && t != TokenConstantDefinition.EOF) { 857 if (intermediate.size() == 1 ) { 858 // Don't pursue the closure if there is just one state. 859 // It can only have one alternative; just add to result 860 // Also don't pursue the closure if there is unique alternative 861 // among the configurations. 862 reach = intermediate; 863 } 864 else if (getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER ) { 865 // Also don't pursue the closure if there is unique alternative 866 // among the configurations. 867 reach = intermediate; 868 } 869 } 870 871 /* If the reach set could not be trivially determined, perform a closure 872 * operation on the intermediate set to compute its initial value. 873 */ 874 if (reach is null) { 875 reach = new ATNConfigSet(fullCtx); 876 ATNConfig[] closureBusy; 877 bool treatEofAsEpsilon = t == TokenConstantDefinition.EOF; 878 foreach (c; intermediate.configs) 879 { 880 closureATN(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon); 881 } 882 } 883 884 if (t == IntStreamConstant.EOF) { 885 /* After consuming EOF no additional input is possible, so we are 886 * only interested in configurations which reached the end of the 887 * decision rule (local context) or end of the start rule (full 888 * context). Update reach to contain only these configurations. This 889 * handles both explicit EOF transitions in the grammar and implicit 890 * EOF transitions following the end of the decision or start rule. 891 * 892 * When reach==intermediate, no closure operation was performed. In 893 * this case, removeAllConfigsNotInRuleStopState needs to check for 894 * reachable rule stop states as well as configurations already in 895 * a rule stop state. 896 * 897 * This is handled before the configurations in skippedStopStates, 898 * because any configurations potentially added from that list are 899 * already guaranteed to meet this condition whether or not it's 900 * required. 901 */ 902 reach = removeAllConfigsNotInRuleStopState(reach, reach.configs == intermediate.configs); 903 } 904 905 /* If skippedStopStates is not null, then it contains at least one 906 * configuration. For full-context reach operations, these 907 * configurations reached the end of the start rule, in which case we 908 * only add them back to reach if no configuration during the current 909 * closure operation reached such a state. This ensures adaptivePredict 910 * chooses an alternative matching the longest overall sequence when 911 * multiple alternatives are viable. 912 */ 913 if (skippedStopStates !is null && (!fullCtx || !PredictionMode.hasConfigInRuleStopState(reach))) { 914 assert(skippedStopStates.length > 0); 915 foreach (c; skippedStopStates) { 916 reach.add(c, mergeCache); 917 } 918 } 919 920 if (reach.isEmpty) 921 return null; 922 return reach; 923 } 924 925 /** 926 * Return a configuration set containing only the configurations from 927 * {@code configs} which are in a {@link RuleStopState}. If all 928 * configurations in {@code configs} are already in a rule stop state, this 929 * method simply returns {@code configs}. 930 * 931 * <p>When {@code lookToEndOfRule} is true, this method uses 932 * {@link ATN#nextTokens} for each configuration in {@code configs} which is 933 * not already in a rule stop state to see if a rule stop state is reachable 934 * from the configuration via epsilon-only transitions.</p> 935 * 936 * @param configs the configuration set to update 937 * @param lookToEndOfRule when true, this method checks for rule stop states 938 * reachable by epsilon-only transitions from each configuration in 939 * {@code configs}. 940 * 941 * @return {@code configs} if all configurations in {@code configs} are in a 942 * rule stop state, otherwise return a new configuration set containing only 943 * the configurations from {@code configs} which are in a rule stop state 944 */ 945 protected ATNConfigSet removeAllConfigsNotInRuleStopState(ATNConfigSet configs, bool lookToEndOfRule) 946 { 947 if (PredictionMode.allConfigsInRuleStopStates(configs)) { 948 return configs; 949 } 950 951 ATNConfigSet result = new ATNConfigSet(configs.fullCtx); 952 foreach (ATNConfig config; configs.configs) { 953 if (config.state.classinfo == RuleStopState.classinfo) { 954 result.add(config, mergeCache); 955 continue; 956 } 957 958 if (lookToEndOfRule && config.state.onlyHasEpsilonTransitions()) { 959 IntervalSet nextTokens = atn.nextTokens(config.state); 960 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 961 ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex]; 962 result.add(new ATNConfig(config, endOfRuleState), mergeCache); 963 } 964 } 965 } 966 return result; 967 } 968 969 public ATNConfigSet computeStartState(ATNState p, RuleContext ctx, bool fullCtx) 970 { 971 // always at least the implicit call to start rule 972 PredictionContext initialContext = PredictionContext.fromRuleContext(atn, ctx); 973 ATNConfigSet configs = new ATNConfigSet(fullCtx); 974 975 for (int i=0; i<p.getNumberOfTransitions(); i++) { 976 ATNState target = p.transition(i).target; 977 ATNConfig c = new ATNConfig(target, i+1, initialContext); 978 ATNConfig[] closureBusy; 979 closureATN(c, configs, closureBusy, true, fullCtx, false); 980 } 981 982 return configs; 983 } 984 985 public ATNConfigSet applyPrecedenceFilter(ATNConfigSet configs) 986 { 987 PredictionContext[int] statesFromAlt1; 988 ATNConfigSet configSet = new ATNConfigSet(configs.fullCtx); 989 foreach (config; configs.configs) { 990 // handle alt 1 first 991 if (config.alt != 1) { 992 continue; 993 } 994 995 SemanticContext updatedContext = config.semanticContext.evalPrecedence(parser, _outerContext); 996 if (updatedContext is null) { 997 // the configuration was eliminated 998 continue; 999 } 1000 1001 statesFromAlt1[config.state.stateNumber] = config.context; 1002 if (updatedContext != config.semanticContext) { 1003 configSet.add(new ATNConfig(config, updatedContext), mergeCache); 1004 } 1005 else { 1006 configSet.add(config, mergeCache); 1007 } 1008 } 1009 1010 foreach (config; configs.configs) { 1011 if (config.alt == 1) { 1012 // already handled 1013 continue; 1014 } 1015 1016 if (!config.isPrecedenceFilterSuppressed()) { 1017 /* In the future, this elimination step could be updated to also 1018 * filter the prediction context for alternatives predicting alt>1 1019 * (basically a graph subtraction algorithm). 1020 */ 1021 if (config.state.stateNumber in statesFromAlt1 && 1022 statesFromAlt1[config.state.stateNumber].opEquals(config.context)) { 1023 // eliminated 1024 continue; 1025 } 1026 } 1027 1028 configSet.add(config, mergeCache); 1029 } 1030 1031 return configSet; 1032 1033 } 1034 1035 public ATNState getReachableTarget(Transition trans, int ttype) 1036 { 1037 if (trans.matches(ttype, 0, atn.maxTokenType)) { 1038 return trans.target; 1039 } 1040 return null; 1041 } 1042 1043 public SemanticContext[] getPredsForAmbigAlts(BitSet ambigAlts, ATNConfigSet configs, 1044 int nalts) 1045 { 1046 // REACH=[1|1|[]|0:0, 1|2|[]|0:1] 1047 /* altToPred starts as an array of all null contexts. The entry at index i 1048 * corresponds to alternative i. altToPred[i] may have one of three values: 1049 * 1. null: no ATNConfig c is found such that c.alt==i 1050 * 2. SemanticContext.NONE: At least one ATNConfig c exists such that 1051 * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, 1052 * alt i has at least one unpredicated config. 1053 * 3. Non-NONE Semantic Context: There exists at least one, and for all 1054 * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. 1055 * 1056 * From this, it is clear that NONE||anything==NONE. 1057 */ 1058 SemanticContext[] altToPred = new SemanticContext[nalts + 1]; 1059 foreach (ATNConfig c; configs.configs) { 1060 if ( ambigAlts.get(c.alt) ) { 1061 altToPred[c.alt] = SemanticContext.or(altToPred[c.alt], c.semanticContext); 1062 } 1063 } 1064 1065 int nPredAlts = 0; 1066 for (int i = 1; i <= nalts; i++) { 1067 if (altToPred[i] is null) { 1068 altToPred[i] = SemanticContext.NONE; 1069 } 1070 else if (altToPred[i] != SemanticContext.NONE) { 1071 nPredAlts++; 1072 } 1073 } 1074 if (nPredAlts == 0) altToPred = null; 1075 debug 1076 writefln("getPredsForAmbigAlts result %s", to!string(altToPred)); 1077 return altToPred; 1078 } 1079 1080 protected PredPrediction[] getPredicatePredictions(BitSet ambigAlts, SemanticContext[] altToPred) 1081 { 1082 PredPrediction[] pairs; 1083 bool containsPredicate = false; 1084 1085 for (int i = 1; i < altToPred.length; i++) { 1086 SemanticContext pred = altToPred[i]; 1087 1088 // unpredicated is indicated by SemanticContext.NONE 1089 assert(pred !is null); 1090 1091 if (ambigAlts.length > 0 && ambigAlts.get(i)) { 1092 pairs ~= new PredPrediction(pred, i); 1093 } 1094 if (pred != SemanticContext.NONE) 1095 containsPredicate = true; 1096 } 1097 1098 if (!containsPredicate) { 1099 return null; 1100 } 1101 1102 // System.out.println(Arrays.toString(altToPred)+"->"+pairs); 1103 return pairs; 1104 1105 } 1106 1107 protected int getAltThatFinishedDecisionEntryRule(ATNConfigSet configs) 1108 { 1109 IntervalSet alts = new IntervalSet(); 1110 foreach (ATNConfig c; configs.configs) { 1111 if ( c.getOuterContextDepth() > 0 || (c.state.classinfo == RuleStopState.classinfo && c.context.hasEmptyPath()) ) { 1112 alts.add(c.alt); 1113 } 1114 } 1115 if ( alts.size()==0 ) return ATN.INVALID_ALT_NUMBER; 1116 return alts.getMinElement(); 1117 } 1118 1119 /** 1120 * This method is used to improve the localization of error messages by 1121 * choosing an alternative rather than throwing a 1122 * {@link NoViableAltException} in particular prediction scenarios where the 1123 * {@link #ERROR} state was reached during ATN simulation. 1124 * 1125 * <p> 1126 * The default implementation of this method uses the following 1127 * algorithm to identify an ATN configuration which successfully parsed the 1128 * decision entry rule. Choosing such an alternative ensures that the 1129 * {@link ParserRuleContext} returned by the calling rule will be complete 1130 * and valid, and the syntax error will be reported later at a more 1131 * localized location.</p> 1132 * 1133 * <ul> 1134 * <li>If a syntactically valid path or paths reach the end of the decision rule and 1135 * they are semantically valid if predicated, return the min associated alt.</li> 1136 * <li>Else, if a semantically invalid but syntactically valid path exist 1137 * or paths exist, return the minimum associated alt. 1138 * </li> 1139 * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> 1140 * </ul> 1141 * 1142 * <p> 1143 * In some scenarios, the algorithm described above could predict an 1144 * alternative which will result in a {@link FailedPredicateException} in 1145 * the parser. Specifically, this could occur if the <em>only</em> configuration 1146 * capable of successfully parsing to the end of the decision rule is 1147 * blocked by a semantic predicate. By choosing this alternative within 1148 * {@link #adaptivePredict} instead of throwing a 1149 * {@link NoViableAltException}, the resulting 1150 * {@link FailedPredicateException} in the parser will identify the specific 1151 * predicate which is preventing the parser from successfully parsing the 1152 * decision rule, which helps developers identify and correct logic errors 1153 * in semantic predicates. 1154 * </p> 1155 * 1156 * @param configs The ATN configurations which were valid immediately before 1157 * the {@link #ERROR} state was reached 1158 * @param outerContext The is the \gamma_0 initial parser context from the paper 1159 * or the parser stack at the instant before prediction commences. 1160 * 1161 * @return The value to return from {@link #adaptivePredict}, or 1162 * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not 1163 * identified and {@link #adaptivePredict} should report an error instead. 1164 */ 1165 protected int getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet configs, 1166 ParserRuleContext outerContext) 1167 { 1168 IntervalSet alts = new IntervalSet(); 1169 foreach (ATNConfig c; configs.configs) { 1170 if (c.getOuterContextDepth > 0 || (c.state.classinfo == RuleStopState.classinfo && c.context.hasEmptyPath()) ) { 1171 alts.add(c.alt); 1172 } 1173 } 1174 if (alts.size == 0) 1175 return ATN.INVALID_ALT_NUMBER; 1176 return alts.getMinElement(); 1177 } 1178 1179 protected ATNConfigSetATNConfigSetPair splitAccordingToSemanticValidity(ATNConfigSet configs, 1180 ParserRuleContext outerContext) 1181 { 1182 ATNConfigSet succeeded = new ATNConfigSet(configs.fullCtx); 1183 ATNConfigSet failed = new ATNConfigSet(configs.fullCtx); 1184 foreach (ATNConfig c; configs.configs) { 1185 if (c.semanticContext != SemanticContext.NONE ) { 1186 bool predicateEvaluationResult = evalSemanticContext(c.semanticContext, outerContext, c.alt, configs.fullCtx); 1187 if (predicateEvaluationResult) { 1188 succeeded.add(c); 1189 } 1190 else { 1191 failed.add(c); 1192 } 1193 } 1194 else { 1195 succeeded.add(c); 1196 } 1197 } 1198 ATNConfigSetATNConfigSetPair res; 1199 res.a = succeeded; 1200 res.b = failed; 1201 return res; 1202 } 1203 1204 /** 1205 * pairs that win. A {@code NONE} predicate indicates an alt containing an 1206 * unpredicated config which behaves as "always true." If !complete 1207 * then we stop at the first predicate that evaluates to true. This 1208 * includes pairs with null predicates. 1209 */ 1210 protected BitSet evalSemanticContext(PredPrediction[] predPredictions, ParserRuleContext outerContext, 1211 bool complete) 1212 { 1213 BitSet predictions; 1214 foreach (pair; predPredictions) { 1215 if (pair.pred == SemanticContext.NONE ) { 1216 predictions.set(pair.alt, true); 1217 if (!complete) { 1218 break; 1219 } 1220 continue; 1221 } 1222 1223 bool fullCtx = false; // in dfa 1224 bool predicateEvaluationResult = evalSemanticContext(pair.pred, outerContext, pair.alt, fullCtx); 1225 debug(dfa_debug) { 1226 writefln("eval pred %1$s=%2$s", pair, predicateEvaluationResult); 1227 } 1228 1229 if ( predicateEvaluationResult ) { 1230 debug(dfa_debug) 1231 writefln("PREDICT ", pair.alt); 1232 predictions.set(pair.alt, true); 1233 if (!complete) { 1234 break; 1235 } 1236 } 1237 } 1238 1239 return predictions; 1240 } 1241 1242 public bool evalSemanticContext(SemanticContext pred, ParserRuleContext parserCallStack, 1243 int alt, bool fullCtx) 1244 { 1245 return pred.eval(parser, parserCallStack); 1246 } 1247 1248 protected void closureATN(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1249 bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) 1250 { 1251 int initialDepth = 0; 1252 closureCheckingStopState(config, configs, closureBusy, collectPredicates, 1253 fullCtx, 1254 initialDepth, treatEofAsEpsilon); 1255 assert (!fullCtx || !configs.dipsIntoOuterContext); 1256 } 1257 1258 protected void closureCheckingStopState(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1259 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1260 { 1261 debug { 1262 writefln("\nclosureCheckingStopState: closure(%s)", config.toString(parser, true)); 1263 } 1264 if (cast(RuleStopState)config.state) { 1265 // We hit rule end. If we have context info, use it 1266 // run thru all possible stack tops in ctx 1267 if (!config.context.isEmpty) { 1268 for (int i = 0; i < config.context.size; i++) { 1269 if (config.context.getReturnState(i) == PredictionContext.EMPTY_RETURN_STATE) { 1270 if (fullCtx) { 1271 configs.add(new ATNConfig(config, config.state, 1272 cast(PredictionContext)PredictionContext.EMPTY), mergeCache); 1273 continue; 1274 } 1275 else { 1276 // we have no context info, just chase follow links (if greedy) 1277 debug 1278 writefln("1FALLING off rule %s", 1279 getRuleName(config.state.ruleIndex)); 1280 closure_(config, configs, closureBusy, collectPredicates, 1281 fullCtx, depth, treatEofAsEpsilon); 1282 } 1283 continue; 1284 } 1285 ATNState returnState = atn.states[config.context.getReturnState(i)]; 1286 PredictionContext newContext = config.context.getParent(i); // "pop" return state 1287 ATNConfig c = new ATNConfig(returnState, config.alt, newContext, 1288 config.semanticContext); 1289 // While we have context to pop back from, we may have 1290 // gotten that context AFTER having falling off a rule. 1291 // Make sure we track that we are now out of context. 1292 // 1293 // This assignment also propagates the 1294 // isPrecedenceFilterSuppressed() value to the new 1295 // configuration. 1296 c.reachesIntoOuterContext = config.reachesIntoOuterContext; 1297 assert(depth > int.min); 1298 closureCheckingStopState(c, configs, closureBusy, collectPredicates, 1299 fullCtx, depth - 1, treatEofAsEpsilon); 1300 } 1301 return; 1302 } 1303 else if (fullCtx) { 1304 // reached end of start rule 1305 configs.add(config, mergeCache); 1306 return; 1307 } 1308 else { 1309 // else if we have no context info, just chase follow links (if greedy) 1310 debug 1311 writefln("FALLING off rule %s", 1312 getRuleName(config.state.ruleIndex)); 1313 } 1314 } 1315 closure_(config, configs, closureBusy, collectPredicates, 1316 fullCtx, depth, treatEofAsEpsilon); 1317 } 1318 1319 /** 1320 * Do the actual work of walking epsilon edges 1321 */ 1322 protected void closure_(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1323 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1324 { 1325 ATNState p = config.state; 1326 1327 // optimization 1328 if (!p.onlyHasEpsilonTransitions) { 1329 configs.add(config, mergeCache); 1330 // make sure to not return here, because EOF transitions can act as 1331 // both epsilon transitions and non-epsilon transitions. 1332 // if ( debug ) System.out.println("added config "+configs); 1333 } 1334 for (int i=0; i<p.getNumberOfTransitions(); i++) { 1335 if ( i==0 && canDropLoopEntryEdgeInLeftRecursiveRule(config)) 1336 continue; 1337 Transition t = p.transition(i); 1338 bool continueCollecting = 1339 collectPredicates && !(cast(ActionTransition)t); 1340 ATNConfig c = getEpsilonTarget(config, t, continueCollecting, 1341 depth == 0, fullCtx, treatEofAsEpsilon); 1342 if (c !is null) { 1343 int newDepth = depth; 1344 if (cast(RuleStopState)config.state) { 1345 assert (!fullCtx); 1346 1347 // target fell off end of rule; mark resulting c as having dipped into outer context 1348 // We can't get here if incoming config was rule stop and we had context 1349 // track how far we dip into outer context. Might 1350 // come in handy and we avoid evaluating context dependent 1351 // preds if this is > 0. 1352 if (_dfa !is null && _dfa.isPrecedenceDfa) { 1353 int outermostPrecedenceReturn = (cast(EpsilonTransition)t).outermostPrecedenceReturn; 1354 if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) { 1355 c.setPrecedenceFilterSuppressed(true); 1356 } 1357 } 1358 1359 c.reachesIntoOuterContext++; 1360 1361 if (count(closureBusy, c) > 0) { 1362 // avoid infinite recursion for right-recursive rules 1363 continue; 1364 } 1365 closureBusy ~= c; 1366 1367 configs.dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method 1368 assert (newDepth > int.min); 1369 newDepth--; 1370 debug 1371 writefln("dips into outer ctx: %s", c); 1372 } 1373 else { 1374 if (!t.isEpsilon) { 1375 if (count(closureBusy, c) > 0) { 1376 // avoid infinite recursion for right-recursive rules 1377 continue; 1378 } 1379 closureBusy ~= c; 1380 } 1381 if (cast(RuleTransition)t) { 1382 // latch when newDepth goes negative - once we step out of the entry context we can't return 1383 if (newDepth >= 0) { 1384 newDepth++; 1385 } 1386 } 1387 } 1388 closureCheckingStopState(c, configs, closureBusy, continueCollecting, 1389 fullCtx, newDepth, treatEofAsEpsilon); 1390 } 1391 } 1392 1393 } 1394 1395 public string getRuleName(int index) 1396 { 1397 if (parser !is null && index>=0 ) return parser.getRuleNames()[index]; 1398 return format("<rule %s>",index); 1399 } 1400 1401 public ATNConfig getEpsilonTarget(ATNConfig config, Transition t, bool collectPredicates, 1402 bool inContext, bool fullCtx, bool treatEofAsEpsilon) 1403 { 1404 switch (t.getSerializationType()) { 1405 case TransitionStates.RULE: 1406 return ruleTransition(config, cast(RuleTransition)t); 1407 1408 case TransitionStates.PRECEDENCE: 1409 return precedenceTransition(config, cast(PrecedencePredicateTransition)t, collectPredicates, inContext, fullCtx); 1410 1411 case TransitionStates.PREDICATE: 1412 return predTransition(config, cast(PredicateTransition)t, 1413 collectPredicates, 1414 inContext, 1415 fullCtx); 1416 1417 case TransitionStates.ACTION: 1418 return actionTransition(config, cast(ActionTransition)t); 1419 1420 case TransitionStates.EPSILON: 1421 return new ATNConfig(config, t.target); 1422 1423 case TransitionStates.ATOM: 1424 case TransitionStates.RANGE: 1425 case TransitionStates.SET: 1426 // EOF transitions act like epsilon transitions after the first EOF 1427 // transition is traversed 1428 if (treatEofAsEpsilon) { 1429 if (t.matches(TokenConstantDefinition.EOF, 0, 1)) { 1430 return new ATNConfig(config, t.target); 1431 } 1432 } 1433 1434 return null; 1435 1436 default: 1437 return null; 1438 } 1439 1440 } 1441 1442 protected ATNConfig actionTransition(ATNConfig config, ActionTransition t) 1443 { 1444 debug writefln("ACTION edge %1$s:%2$s", t.ruleIndex, t.actionIndex); 1445 return new ATNConfig(config, t.target); 1446 } 1447 1448 public ATNConfig precedenceTransition(ATNConfig config, PrecedencePredicateTransition pt, 1449 bool collectPredicates, bool inContext, bool fullCtx) 1450 { 1451 debug { 1452 writefln("PRED (collectPredicates=%1$s) %2$s" ~ 1453 ">=_p, ctx dependent=true", collectPredicates, pt.precedence); 1454 if ( parser !is null ) { 1455 writefln("context surrounding pred is %s", 1456 parser.getRuleInvocationStack); 1457 //parser.classinfo); 1458 } 1459 } 1460 1461 ATNConfig c = null; 1462 if (collectPredicates && inContext) { 1463 if ( fullCtx ) { 1464 // In full context mode, we can evaluate predicates on-the-fly 1465 // during closure, which dramatically reduces the size of 1466 // the config sets. It also obviates the need to test predicates 1467 // later during conflict resolution. 1468 int currentPosition = _input.index(); 1469 _input.seek(_startIndex); 1470 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1471 _input.seek(currentPosition); 1472 if ( predSucceeds ) { 1473 c = new ATNConfig(config, pt.target); // no pred context 1474 } 1475 } 1476 else { 1477 SemanticContext newSemCtx = 1478 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1479 c = new ATNConfig(config, pt.target, newSemCtx); 1480 } 1481 } 1482 else { 1483 c = new ATNConfig(config, pt.target); 1484 } 1485 1486 debug 1487 writefln("config from pred transition=%s", c); 1488 return c; 1489 1490 } 1491 1492 protected ATNConfig predTransition(ATNConfig config, PredicateTransition pt, bool collectPredicates, 1493 bool inContext, bool fullCtx) 1494 { 1495 debug { 1496 writefln("PRED (collectPredicates=%1$s) %2$s:%3$s, ctx dependent=%4$s", 1497 collectPredicates, pt.ruleIndex, 1498 pt.predIndex, pt.isCtxDependent); 1499 if ( parser !is null ) { 1500 writefln("context surrounding pred is %s", 1501 parser.getRuleInvocationStack()); 1502 } 1503 } 1504 1505 ATNConfig c = null; 1506 if ( collectPredicates && 1507 (!pt.isCtxDependent || (pt.isCtxDependent&&inContext)) ) 1508 { 1509 if ( fullCtx ) { 1510 // In full context mode, we can evaluate predicates on-the-fly 1511 // during closure, which dramatically reduces the size of 1512 // the config sets. It also obviates the need to test predicates 1513 // later during conflict resolution. 1514 int currentPosition = _input.index(); 1515 _input.seek(_startIndex); 1516 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1517 _input.seek(currentPosition); 1518 if (predSucceeds) { 1519 c = new ATNConfig(config, pt.target); // no pred context 1520 } 1521 } 1522 else { 1523 SemanticContext newSemCtx = 1524 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1525 c = new ATNConfig(config, pt.target, newSemCtx); 1526 } 1527 } 1528 else { 1529 c = new ATNConfig(config, pt.target); 1530 } 1531 1532 debug writefln("config from pred transition=",c); 1533 return c; 1534 } 1535 1536 public ATNConfig ruleTransition(ATNConfig config, RuleTransition t) 1537 { 1538 debug { 1539 writefln("CALL rule %1$s, ctx=%2$s", getRuleName(t.target.ruleIndex), config.context); 1540 } 1541 ATNState returnState = t.followState; 1542 PredictionContext newContext = 1543 SingletonPredictionContext.create(config.context, returnState.stateNumber); 1544 return new ATNConfig(config, t.target, newContext); 1545 } 1546 1547 /** 1548 * Gets a {@link BitSet} containing the alternatives in {@code configs} 1549 * which are part of one or more conflicting alternative subsets. 1550 * 1551 * @param configs The {@link ATNConfigSet} to analyze. 1552 * @return The alternatives in {@code configs} which are part of one or more 1553 * conflicting alternative subsets. If {@code configs} does not contain any 1554 * conflicting subsets, this method returns an empty {@link BitSet}. 1555 */ 1556 public BitSet getConflictingAlts(ATNConfigSet configs) 1557 { 1558 BitSet[] altsets = PredictionMode.getConflictingAltSubsets(configs); 1559 return PredictionMode.getAlts(altsets); 1560 } 1561 1562 /** 1563 * Sam pointed out a problem with the previous definition, v3, of 1564 * ambiguous states. If we have another state associated with conflicting 1565 * alternatives, we should keep going. For example, the following grammar 1566 * 1567 * s : (ID | ID ID?) ';' ; 1568 * 1569 * When the ATN simulation reaches the state before ';', it has a DFA 1570 * state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 1571 * 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node 1572 * because alternative to has another way to continue, via [6|2|[]]. 1573 * The key is that we have a single state that has config's only associated 1574 * with a single alternative, 2, and crucially the state transitions 1575 * among the configurations are all non-epsilon transitions. That means 1576 * we don't consider any conflicts that include alternative 2. So, we 1577 * ignore the conflict between alts 1 and 2. We ignore a set of 1578 * conflicting alts when there is an intersection with an alternative 1579 * associated with a single alt state in the state→config-list map. 1580 * 1581 * It's also the case that we might have two conflicting configurations but 1582 * also a 3rd nonconflicting configuration for a different alternative: 1583 * [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: 1584 * 1585 * a : A | A | A B ; 1586 * 1587 * After matching input A, we reach the stop state for rule A, state 1. 1588 * State 8 is the state right before B. Clearly alternatives 1 and 2 1589 * conflict and no amount of further lookahead will separate the two. 1590 * However, alternative 3 will be able to continue and so we do not 1591 * stop working on this state. In the previous example, we're concerned 1592 * with states associated with the conflicting alternatives. Here alt 1593 * 3 is not associated with the conflicting configs, but since we can continue 1594 * looking for input reasonably, I don't declare the state done. We 1595 * ignore a set of conflicting alts when we have an alternative 1596 * that we still need to pursue. 1597 */ 1598 public BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) 1599 { 1600 auto conflictingAlts = new BitSet(1); 1601 if (configs.uniqueAlt != ATN.INVALID_ALT_NUMBER) { 1602 conflictingAlts.set(configs.uniqueAlt, true); 1603 } 1604 else { 1605 *conflictingAlts = configs.conflictingAlts; 1606 } 1607 return *conflictingAlts; 1608 } 1609 1610 public string getTokenName(int t) 1611 { 1612 if (t == TokenConstantDefinition.EOF) { 1613 return "EOF"; 1614 } 1615 1616 Vocabulary vocabulary = parser !is null ? parser.getVocabulary() : new VocabularyImpl(null, null, null); 1617 string displayName = vocabulary.getDisplayName(t); 1618 if (displayName == to!string(t)) { 1619 return displayName; 1620 } 1621 1622 return displayName ~ "<" ~ to!string(t) ~ ">"; 1623 } 1624 1625 public string getLookaheadName(TokenStream input) 1626 { 1627 return getTokenName(input.LA(1)); 1628 } 1629 1630 /** 1631 * Used for debugging in adaptivePredict around execATN but I cut 1632 * it out for clarity now that alg. works well. We can leave this 1633 * "dead" code for a bit. 1634 */ 1635 public void dumpDeadEndConfigs(NoViableAltException nvae) 1636 { 1637 debug 1638 writefln("dead end configs: "); 1639 foreach (ATNConfig c; nvae.getDeadEndConfigs().configs) { 1640 string trans = "no edges"; 1641 if (c.state.getNumberOfTransitions > 0) { 1642 Transition t = c.state.transition(0); 1643 if (t.classinfo == AtomTransition.classinfo) { 1644 AtomTransition at = cast(AtomTransition)t; 1645 trans = "Atom " ~ getTokenName(at._label); 1646 } 1647 else if (t.classinfo == SetTransition.classinfo) { 1648 SetTransition st = cast(SetTransition)t; 1649 bool not = (st.classinfo == NotSetTransition.classinfo); 1650 trans = (not?"~":"") ~ "Set "~ st.set.toString(); 1651 } 1652 } 1653 debug 1654 writefln("%1$s:%2$s", c.toString(parser, true), trans); 1655 } 1656 } 1657 1658 protected NoViableAltException noViableAlt(TokenStream input, ParserRuleContext outerContext, 1659 ATNConfigSet configs, int startIndex) 1660 { 1661 return new NoViableAltException(parser, input, 1662 input.get(startIndex), 1663 input.LT(1), 1664 configs, outerContext); 1665 } 1666 1667 protected static int getUniqueAlt(ATNConfigSet configs) 1668 { 1669 int alt = ATN.INVALID_ALT_NUMBER; 1670 foreach (ATNConfig c; configs.configs) { 1671 if (alt == ATN.INVALID_ALT_NUMBER) { 1672 alt = c.alt; // found first alt 1673 } 1674 else if (c.alt != alt) { 1675 return ATN.INVALID_ALT_NUMBER; 1676 } 1677 } 1678 return alt; 1679 } 1680 1681 /** 1682 * Add an edge to the DFA, if possible. This method calls 1683 * {@link #addDFAState} to ensure the {@code to} state is present in the 1684 * DFA. If {@code from} is {@code null}, or if {@code t} is outside the 1685 * range of edges that can be represented in the DFA tables, this method 1686 * returns without adding the edge to the DFA. 1687 * 1688 * <p>If {@code to} is {@code null}, this method returns {@code null}. 1689 * Otherwise, this method returns the {@link DFAState} returned by calling 1690 * {@link #addDFAState} for the {@code to} state.</p> 1691 * 1692 * @param dfa The DFA 1693 * @param from The source state for the edge 1694 * @param t The input symbol 1695 * @param to The target state for the edge 1696 * 1697 * @return If {@code to} is {@code null}, this method returns {@code null}; 1698 * otherwise this method returns the result of calling {@link #addDFAState} 1699 * on {@code to} 1700 */ 1701 protected DFAState addDFAEdge(ref DFA dfa, DFAState from, int t, DFAState to) 1702 { 1703 debug { 1704 writefln("\nEDGE %1$s -> %2$s upon %3$s", from, to, getTokenName(t)); 1705 } 1706 if (to is null) { 1707 return null; 1708 } 1709 to = addDFAState(dfa, to); // used existing if possible not incoming 1710 if (from is null || t < -1 || t > atn.maxTokenType) { 1711 return to; 1712 } 1713 synchronized (from) { 1714 if (from.edges == null) { 1715 from.edges = new DFAState[atn.maxTokenType+1+1]; 1716 } 1717 from.edges[t+1] = to; // connect 1718 } 1719 1720 debug { 1721 writefln("end dfa.decision = %s, dfa.states = %s", dfa.decision, dfa.states); 1722 } 1723 1724 return to; 1725 } 1726 1727 /** 1728 * Add state {@code D} to the DFA if it is not already present, and return 1729 * the actual instance stored in the DFA. If a state equivalent to {@code D} 1730 * is already in the DFA, the existing state is returned. Otherwise this 1731 * method returns {@code D} after adding it to the DFA. 1732 * 1733 * <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and 1734 * does not change the DFA.</p> 1735 * 1736 * @param dfa The dfa 1737 * @param D The DFA state to add 1738 * @return The state stored in the DFA. This will be either the existing 1739 * state if {@code D} is already in the DFA, or {@code D} itself if the 1740 * state was not already present. 1741 */ 1742 protected DFAState addDFAState(ref DFA dfa, DFAState D) 1743 { 1744 if (D == ERROR) { 1745 return D; 1746 } 1747 debug 1748 writefln("adding new dfa: D = %s, dfa.states = %s", D, dfa.states); 1749 if (D in dfa.states) 1750 return dfa.states[D]; 1751 D.stateNumber = to!int(dfa.states.length); 1752 if (!D.configs.readonly) { 1753 D.configs.optimizeConfigs(this); 1754 D.configs.readonly(true); 1755 } 1756 dfa.states[D] = D; 1757 debug 1758 writefln("adding new DFA state end: D = %1$s, dfa.states = %2$s", D, dfa.states); 1759 return D; 1760 } 1761 1762 protected void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, ATNConfigSet configs, 1763 int startIndex, int stopIndex) 1764 { 1765 debug(retry_debug) { 1766 import antlr.v4.runtime.misc.Interval; 1767 Interval interval = Interval.of(startIndex, stopIndex); 1768 writefln("reportAttemptingFullContext decision=%1$s:%2$s, input=%3$s", 1769 dfa.decision, configs, 1770 parser.getTokenStream().getText(interval)); 1771 } 1772 if (parser !is null) parser.getErrorListenerDispatch().reportAttemptingFullContext(parser, 1773 dfa, 1774 startIndex, 1775 stopIndex, 1776 conflictingAlts, 1777 configs); 1778 } 1779 1780 protected void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, 1781 int startIndex, int stopIndex) 1782 { 1783 debug(retry_debug) { 1784 import antlr.v4.runtime.misc.Interval; 1785 Interval interval = Interval.of(startIndex, stopIndex); 1786 writefln("reportContextSensitivity decision=%1$s:%2$s, input=%3$s", 1787 dfa.decision, configs, parser.getTokenStream().getText(interval)); 1788 } 1789 if (parser !is null) 1790 parser.getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); 1791 } 1792 1793 protected void reportAmbiguity(DFA dfa, DFAState D, int startIndex, int stopIndex, bool exact, 1794 BitSet ambigAlts, ATNConfigSet configs) 1795 { 1796 debug(retry_debug) { 1797 import antlr.v4.runtime.misc.Interval; 1798 Interval interval = Interval.of(startIndex, stopIndex); 1799 writefln("reportAmbiguity %1$s:%2$s, input=%3$s", 1800 ambigAlts, configs, parser.getTokenStream().getText(interval)); 1801 } 1802 if (parser !is null) parser.getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, 1803 exact, ambigAlts, configs); 1804 } 1805 1806 public void setPredictionMode(PredictionModeConst mode) 1807 { 1808 this.mode = mode; 1809 } 1810 1811 public PredictionModeConst getPredictionMode() 1812 { 1813 return mode; 1814 } 1815 1816 public Parser getParser() 1817 { 1818 return parser; 1819 } 1820 1821 protected bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) 1822 { 1823 auto TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = true; 1824 if ( TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT ) 1825 return false; 1826 return false; 1827 } 1828 1829 }