1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ParserATNSimulator; 8 9 import antlr.v4.runtime.IntStream; 10 import antlr.v4.runtime.IntStreamConstant; 11 import antlr.v4.runtime.NoViableAltException; 12 import antlr.v4.runtime.Parser; 13 import antlr.v4.runtime.ParserRuleContext; 14 import antlr.v4.runtime.RuleContext; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.Vocabulary; 18 import antlr.v4.runtime.VocabularyImpl; 19 import antlr.v4.runtime.atn.ATN; 20 import antlr.v4.runtime.atn.ATNConfig; 21 import antlr.v4.runtime.atn.ATNConfigSet; 22 import antlr.v4.runtime.atn.ATNSimulator; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.ActionTransition; 25 import antlr.v4.runtime.atn.AtomTransition; 26 import antlr.v4.runtime.atn.DecisionState; 27 import antlr.v4.runtime.atn.EpsilonTransition; 28 import antlr.v4.runtime.atn.InterfaceParserATNSimulator; 29 import antlr.v4.runtime.atn.NotSetTransition; 30 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 31 import antlr.v4.runtime.atn.PredicateTransition; 32 import antlr.v4.runtime.atn.PredictionContext; 33 import antlr.v4.runtime.atn.PredictionContextCache; 34 import antlr.v4.runtime.atn.PredictionMode; 35 import antlr.v4.runtime.atn.PredictionModeConst; 36 import antlr.v4.runtime.atn.RuleStopState; 37 import antlr.v4.runtime.atn.RuleTransition; 38 import antlr.v4.runtime.atn.SemanticContext; 39 import antlr.v4.runtime.atn.SetTransition; 40 import antlr.v4.runtime.atn.SingletonPredictionContext; 41 import antlr.v4.runtime.atn.Transition; 42 import antlr.v4.runtime.atn.TransitionStates; 43 import antlr.v4.runtime.dfa.DFA; 44 import antlr.v4.runtime.dfa.DFAState; 45 import antlr.v4.runtime.dfa.PredPrediction; 46 import antlr.v4.runtime.misc; 47 import std.algorithm.searching; 48 import std.conv; 49 import std.format; 50 import std.stdio; 51 import std.typecons; 52 53 alias ATNConfigSetATNConfigSetPair = Tuple!(ATNConfigSet, "a", ATNConfigSet, "b"); 54 55 /** 56 * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. 57 * 58 * <p> 59 * The basic complexity of the adaptive strategy makes it harder to understand. 60 * We begin with ATN simulation to build paths in a DFA. Subsequent prediction 61 * requests go through the DFA first. If they reach a state without an edge for 62 * the current symbol, the algorithm fails over to the ATN simulation to 63 * complete the DFA path for the current input (until it finds a conflict state 64 * or uniquely predicting state).</p> 65 * 66 * <p> 67 * All of that is done without using the outer context because we want to create 68 * a DFA that is not dependent upon the rule invocation stack when we do a 69 * prediction. One DFA works in all contexts. We avoid using context not 70 * necessarily because it's slower, although it can be, but because of the DFA 71 * caching problem. The closure routine only considers the rule invocation stack 72 * created during prediction beginning in the decision rule. For example, if 73 * prediction occurs without invoking another rule's ATN, there are no context 74 * stacks in the configurations. When lack of context leads to a conflict, we 75 * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing 76 * strategy (versus full LL(*)).</p> 77 * 78 * <p> 79 * When SLL yields a configuration set with conflict, we rewind the input and 80 * retry the ATN simulation, this time using full outer context without adding 81 * to the DFA. Configuration context stacks will be the full invocation stacks 82 * from the start rule. If we get a conflict using full context, then we can 83 * definitively say we have a true ambiguity for that input sequence. If we 84 * don't get a conflict, it implies that the decision is sensitive to the outer 85 * context. (It is not context-sensitive in the sense of context-sensitive 86 * grammars.)</p> 87 * 88 * <p> 89 * The next time we reach this DFA state with an SLL conflict, through DFA 90 * simulation, we will again retry the ATN simulation using full context mode. 91 * This is slow because we can't save the results and have to "interpret" the 92 * ATN each time we get that input.</p> 93 * 94 * <p> 95 * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> 96 * 97 * <p> 98 * We could cache results from full context to predicted alternative easily and 99 * that saves a lot of time but doesn't work in presence of predicates. The set 100 * of visible predicates from the ATN start state changes depending on the 101 * context, because closure can fall off the end of a rule. I tried to cache 102 * tuples (stack context, semantic context, predicted alt) but it was slower 103 * than interpreting and much more complicated. Also required a huge amount of 104 * memory. The goal is not to create the world's fastest parser anyway. I'd like 105 * to keep this algorithm simple. By launching multiple threads, we can improve 106 * the speed of parsing across a large number of files.</p> 107 * 108 * <p> 109 * There is no strict ordering between the amount of input used by SLL vs LL, 110 * which makes it really hard to build a cache for full context. Let's say that 111 * we have input A B C that leads to an SLL conflict with full context X. That 112 * implies that using X we might only use A B but we could also use A B C D to 113 * resolve conflict. Input A B C D could predict alternative 1 in one position 114 * in the input and A B C E could predict alternative 2 in another position in 115 * input. The conflicting SLL configurations could still be non-unique in the 116 * full context prediction, which would lead us to requiring more input than the 117 * original A B C. To make a prediction cache work, we have to track the exact 118 * input used during the previous prediction. That amounts to a cache that maps 119 * X to a specific DFA for that context.</p> 120 * 121 * <p> 122 * Something should be done for left-recursive expression predictions. They are 123 * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry 124 * with full LL thing Sam does.</p> 125 * 126 * <p> 127 * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> 128 * 129 * <p> 130 * We avoid doing full context retry when the outer context is empty, we did not 131 * dip into the outer context by falling off the end of the decision state rule, 132 * or when we force SLL mode.</p> 133 * 134 * <p> 135 * As an example of the not dip into outer context case, consider as super 136 * constructor calls versus function calls. One grammar might look like 137 * this:</p> 138 * 139 * <pre> 140 * ctorBody 141 * : '{' superCall? stat* '}' 142 * ; 143 * </pre> 144 * 145 * <p> 146 * Or, you might see something like</p> 147 * 148 * <pre> 149 * stat 150 * : superCall ';' 151 * | expression ';' 152 * | ... 153 * ; 154 * </pre> 155 * 156 * <p> 157 * In both cases I believe that no closure operations will dip into the outer 158 * context. In the first case ctorBody in the worst case will stop at the '}'. 159 * In the 2nd case it should stop at the ';'. Both cases should stay within the 160 * entry rule and not dip into the outer context.</p> 161 * 162 * <p> 163 * <strong>PREDICATES</strong></p> 164 * 165 * <p> 166 * Predicates are always evaluated if present in either SLL or LL both. SLL and 167 * LL simulation deals with predicates differently. SLL collects predicates as 168 * it performs closure operations like ANTLR v3 did. It delays predicate 169 * evaluation until it reaches and accept state. This allows us to cache the SLL 170 * ATN simulation whereas, if we had evaluated predicates on-the-fly during 171 * closure, the DFA state configuration sets would be different and we couldn't 172 * build up a suitable DFA.</p> 173 * 174 * <p> 175 * When building a DFA accept state during ATN simulation, we evaluate any 176 * predicates and return the sole semantically valid alternative. If there is 177 * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, 178 * we throw an exception. Alternatives without predicates act like they have 179 * true predicates. The simple way to think about it is to strip away all 180 * alternatives with false predicates and choose the minimum alternative that 181 * remains.</p> 182 * 183 * <p> 184 * When we start in the DFA and reach an accept state that's predicated, we test 185 * those and return the minimum semantically viable alternative. If no 186 * alternatives are viable, we throw an exception.</p> 187 * 188 * <p> 189 * During full LL ATN simulation, closure always evaluates predicates and 190 * on-the-fly. This is crucial to reducing the configuration set size during 191 * closure. It hits a landmine when parsing with the Java grammar, for example, 192 * without this on-the-fly evaluation.</p> 193 * 194 * <p> 195 * <strong>SHARING DFA</strong></p> 196 * 197 * <p> 198 * All instances of the same parser share the same decision DFAs through a 199 * static field. Each instance gets its own ATN simulator but they share the 200 * same {@link #decisionToDFA} field. They also share a 201 * {@link PredictionContextCache} object that makes sure that all 202 * {@link PredictionContext} objects are shared among the DFA states. This makes 203 * a big size difference.</p> 204 * 205 * <p> 206 * <strong>THREAD SAFETY</strong></p> 207 * 208 * <p> 209 * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when 210 * it adds a new DFA object to that array. {@link #addDFAEdge} 211 * locks on the DFA for the current decision when setting the 212 * {@link DFAState#edges} field. {@link #addDFAState} locks on 213 * the DFA for the current decision when looking up a DFA state to see if it 214 * already exists. We must make sure that all requests to add DFA states that 215 * are equivalent result in the same shared DFA object. This is because lots of 216 * threads will be trying to update the DFA at once. The 217 * {@link #addDFAState} method also locks inside the DFA lock 218 * but this time on the shared context cache when it rebuilds the 219 * configurations' {@link PredictionContext} objects using cached 220 * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is 221 * safe as long as we can guarantee that all threads referencing 222 * {@code s.edge[t]} get the same physical target {@link DFAState}, or 223 * {@code null}. Once into the DFA, the DFA simulation does not reference the 224 * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new 225 * targets. The DFA simulator will either find {@link DFAState#edges} to be 226 * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or 227 * {@code dfa.edges[t]} to be non-null. The 228 * {@link #addDFAEdge} method could be racing to set the field 229 * but in either case the DFA simulator works; if {@code null}, and requests ATN 230 * simulation. It could also race trying to get {@code dfa.edges[t]}, but either 231 * way it will work because it's not doing a test and set operation.</p> 232 * 233 * <p> 234 * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage 235 * Parsing)</strong></p> 236 * 237 * <p> 238 * Sam pointed out that if SLL does not give a syntax error, then there is no 239 * point in doing full LL, which is slower. We only have to try LL if we get a 240 * syntax error. For maximum speed, Sam starts the parser set to pure SLL 241 * mode with the {@link BailErrorStrategy}:</p> 242 * 243 * <pre> 244 * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; 245 * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); 246 * </pre> 247 * 248 * <p> 249 * If it does not get a syntax error, then we're done. If it does get a syntax 250 * error, we need to retry with the combined SLL/LL strategy.</p> 251 * 252 * <p> 253 * The reason this works is as follows. If there are no SLL conflicts, then the 254 * grammar is SLL (at least for that input set). If there is an SLL conflict, 255 * the full LL analysis must yield a set of viable alternatives which is a 256 * subset of the alternatives reported by SLL. If the LL set is a singleton, 257 * then the grammar is LL but not SLL. If the LL set is the same size as the SLL 258 * set, the decision is SLL. If the LL set has size > 1, then that decision 259 * is truly ambiguous on the current input. If the LL set is smaller, then the 260 * SLL conflict resolution might choose an alternative that the full LL would 261 * rule out as a possibility based upon better context information. If that's 262 * the case, then the SLL parse will definitely get an error because the full LL 263 * analysis says it's not viable. If SLL conflict resolution chooses an 264 * alternative within the LL set, them both SLL and LL would choose the same 265 * alternative because they both choose the minimum of multiple conflicting 266 * alternatives.</p> 267 * 268 * <p> 269 * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and 270 * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL 271 * parsing will get an error because SLL will pursue alternative 1. If 272 * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will 273 * choose the same alternative because alternative one is the minimum of either 274 * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax 275 * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> 276 * 277 * <p> 278 * Of course, if the input is invalid, then we will get an error for sure in 279 * both SLL and LL parsing. Erroneous input will therefore require 2 passes over 280 * the input.</p> 281 */ 282 class ParserATNSimulator : ATNSimulator, InterfaceParserATNSimulator 283 { 284 285 protected Parser parser; 286 287 public DFA[] decisionToDFA; 288 289 public PredictionModeConst mode = PredictionModeConst.LL; 290 291 /** 292 * Each prediction operation uses a cache for merge of prediction contexts. 293 * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap 294 * isn't synchronized but we're ok since two threads shouldn't reuse same 295 * parser/atnsim object because it can only handle one input at a time. 296 * This maps graphs a and b to merged result c. (a,b)→c. We can avoid 297 * the merge if we ever see a and b again. Note that (b,a)→c should 298 * also be examined during cache lookup. 299 * @uml 300 * @__gshared 301 */ 302 public static __gshared DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext) mergeCache; 303 304 protected DFA _dfa; 305 306 protected TokenStream _input; 307 308 protected int _startIndex; 309 310 protected ParserRuleContext _outerContext; 311 312 /** 313 * @uml 314 * Testing only! 315 */ 316 protected this(ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 317 { 318 this(null, atn, decisionToDFA, sharedContextCache); 319 } 320 321 public this(Parser parser, ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 322 { 323 super(atn, sharedContextCache); 324 this.parser = parser; 325 this.decisionToDFA = decisionToDFA; 326 if (mergeCache is null) 327 this.mergeCache = new typeof(this.mergeCache); 328 } 329 330 /** 331 * @uml 332 * @override 333 */ 334 public override void reset() 335 { 336 } 337 338 /** 339 * @uml 340 * @override 341 */ 342 public override void clearDFA() 343 { 344 for (int d = 0; d < decisionToDFA.length; d++) { 345 decisionToDFA[d] = new DFA(atn.getDecisionState(d), d); 346 } 347 } 348 349 public int adaptivePredict(TokenStream input, int decision, ParserRuleContext outerContext) 350 { 351 debug(ParserATNSimulator) { 352 writefln("adaptivePredict decision %1$s"~ 353 " exec LA(1)==%2$s"~ 354 " line %3$s:%4$s", 355 decision, getLookaheadName(input), input.LT(1).getLine, 356 input.LT(1).getCharPositionInLine()); 357 } 358 359 _input = input; 360 _startIndex = input.index(); 361 _outerContext = outerContext; 362 DFA dfa = decisionToDFA[decision]; 363 _dfa = dfa; 364 365 int m = input.mark(); 366 int index = _startIndex; 367 // Now we are certain to have a specific decision's DFA 368 // But, do we still need an initial state? 369 try { 370 DFAState s0; 371 if (dfa.isPrecedenceDfa) { 372 // the start state for a precedence DFA depends on the current 373 // parser precedence, and is provided by a DFA method. 374 s0 = dfa.getPrecedenceStartState(parser.getPrecedence()); 375 } 376 else { 377 // the start state for a "regular" DFA is just s0 378 s0 = dfa.s0; 379 } 380 381 if (s0 is null) { 382 if ( outerContext is null ) 383 outerContext = ParserRuleContext.EMPTY; 384 debug(ParserATNSimulator) { 385 writefln("predictATN decision = %1$s"~ 386 " exec LA(1)==%2$s"~ 387 ", outerContext=%3$s", 388 dfa.decision, getLookaheadName(input), 389 outerContext); 390 } 391 392 bool fullCtx = false; 393 ATNConfigSet s0_closure = 394 computeStartState(dfa.atnStartState, 395 ParserRuleContext.EMPTY, 396 fullCtx); 397 if (dfa.isPrecedenceDfa) { 398 /* If this is a precedence DFA, we use applyPrecedenceFilter 399 * to convert the computed start state to a precedence start 400 * state. We then use DFA.setPrecedenceStartState to set the 401 * appropriate start state for the precedence level rather 402 * than simply setting DFA.s0. 403 */ 404 dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway 405 s0_closure = applyPrecedenceFilter(s0_closure); 406 s0 = addDFAState(dfa, new DFAState(s0_closure)); 407 dfa.setPrecedenceStartState(parser.getPrecedence(), s0); 408 } 409 else { 410 s0 = addDFAState(dfa, new DFAState(s0_closure)); 411 dfa.s0 = s0; 412 } 413 } 414 int alt = execATN(dfa, s0, input, index, outerContext); 415 debug(ParserATNSimulator) 416 writefln("DFA after predictATN: %1$s, alt = %s, dfa.states = %s", dfa.toString(parser.getVocabulary), alt, dfa); 417 return alt; 418 } 419 finally { 420 mergeCache = null; // wack cache after each prediction 421 _dfa = null; 422 input.seek(index); 423 input.release(m); 424 } 425 } 426 427 /** 428 * There are some key conditions we're looking for after computing a new 429 * set of ATN configs (proposed DFA state): 430 * * if the set is empty, there is no viable alternative for current symbol 431 * * does the state uniquely predict an alternative? 432 * * does the state have a conflict that would prevent us from 433 * putting it on the work list? 434 * 435 * We also have some key operations to do: 436 * * add an edge from previous DFA state to potentially new DFA state, D, 437 * upon current symbol but only if adding to work list, which means in all 438 * cases except no viable alternative (and possibly non-greedy decisions?) 439 * * collecting predicates and adding semantic context to DFA accept states 440 * * adding rule context to context-sensitive DFA accept states 441 * * consuming an input symbol 442 * * reporting a conflict 443 * * reporting an ambiguity 444 * * reporting a context sensitivity 445 * * reporting insufficient predicates 446 * 447 * cover these cases: 448 * dead end 449 * single alt 450 * single alt + preds 451 * conflict 452 * conflict + preds 453 */ 454 protected int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, ParserRuleContext outerContext) 455 { 456 debug { 457 writefln("execATN decision %1$s"~ 458 " exec LA(1)==%2$s line %3$s:%4$s", 459 dfa.decision, 460 getLookaheadName(input), 461 input.LT(1).getLine, 462 input.LT(1).getCharPositionInLine()); 463 } 464 465 DFAState previousD = s0; 466 467 debug 468 writefln("s0 = %1$s", s0); 469 470 int t = input.LA(1); 471 472 while (true) { // while more work 473 DFAState D = getExistingTargetState(previousD, t); 474 if (D is null) { 475 D = computeTargetState(dfa, previousD, t); 476 } 477 478 if (D == ERROR) { 479 // if any configs in previous dipped into outer context, that 480 // means that input up to t actually finished entry rule 481 // at least for SLL decision. Full LL doesn't dip into outer 482 // so don't need special case. 483 // We will get an error no matter what so delay until after 484 // decision; better error message. Also, no reachable target 485 // ATN states in SLL implies LL will also get nowhere. 486 // If conflict in states that dip out, choose min since we 487 // will get error no matter what. 488 NoViableAltException e = noViableAlt(input, outerContext, previousD.configs, startIndex); 489 input.seek(startIndex); 490 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext); 491 if (alt != ATN.INVALID_ALT_NUMBER) { 492 return alt; 493 } 494 throw e; 495 } 496 497 if (D.requiresFullContext && mode != PredictionModeConst.SLL) { 498 // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) 499 BitSet conflictingAlts = D.configs.conflictingAlts; 500 if (D.predicates != null) { 501 debug 502 writeln("DFA state has preds in DFA sim LL failover"); 503 int conflictIndex = input.index(); 504 if (conflictIndex != startIndex) { 505 input.seek(startIndex); 506 } 507 508 conflictingAlts = evalSemanticContext(D.predicates, outerContext, true); 509 if (conflictingAlts.cardinality ==1) { 510 debug 511 writeln("Full LL avoided"); 512 return conflictingAlts.nextSetBit(0); 513 } 514 515 if (conflictIndex != startIndex) { 516 // restore the index so reporting the fallback to full 517 // context occurs with the index at the correct spot 518 input.seek(conflictIndex); 519 } 520 } 521 522 debug(dfa_debug) 523 writefln("ctx sensitive state %1$ in %2$s", 524 outerContext, D); 525 bool fullCtx = true; 526 ATNConfigSet s0_closure = 527 computeStartState(dfa.atnStartState, outerContext, 528 fullCtx); 529 reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index()); 530 int alt = execATNWithFullContext(dfa, D, s0_closure, 531 input, startIndex, 532 outerContext); 533 return alt; 534 } 535 536 if ( D.isAcceptState ) { 537 if (D.predicates == null) { 538 return D.prediction; 539 } 540 541 int stopIndex = input.index(); 542 input.seek(startIndex); 543 BitSet alts = evalSemanticContext(D.predicates, outerContext, true); 544 switch (alts.cardinality) { 545 case 0: 546 throw noViableAlt(input, outerContext, D.configs, startIndex); 547 548 case 1: 549 return alts.nextSetBit(0); 550 551 default: 552 // report ambiguity after predicate evaluation to make sure the correct 553 // set of ambig alts is reported. 554 reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs); 555 return alts.nextSetBit(0); 556 } 557 } 558 559 previousD = D; 560 561 if (t != IntStreamConstant.EOF) { 562 input.consume(); 563 t = input.LA(1); 564 } 565 } 566 567 } 568 569 /** 570 * Get an existing target state for an edge in the DFA. If the target state 571 * for the edge has not yet been computed or is otherwise not available, 572 * this method returns {@code null}. 573 * 574 * @param previousD The current DFA state 575 * @param t The next input symbol 576 * @return The existing target DFA state for the given input symbol 577 * {@code t}, or {@code null} if the target state for this edge is not 578 * already cached 579 */ 580 public DFAState getExistingTargetState(DFAState previousD, int t) 581 { 582 DFAState[] edges = previousD.edges; 583 if (edges == null || t + 1 < 0 || t + 1 >= edges.length) { 584 return null; 585 } 586 return edges[t + 1]; 587 } 588 589 /** 590 * Compute a target state for an edge in the DFA, and attempt to add the 591 * computed state and corresponding edge to the DFA. 592 * 593 * @param dfa The DFA 594 * @param previousD The current DFA state 595 * @param t The next input symbol 596 * 597 * @return The computed target DFA state for the given input symbol 598 * {@code t}. If {@code t} does not lead to a valid DFA state, this method 599 * returns {@link #ERROR}. 600 */ 601 public DFAState computeTargetState(DFA dfa, DFAState previousD, int t) 602 { 603 ATNConfigSet reach = computeReachSet(previousD.configs, t, false); 604 if (reach is null) { 605 addDFAEdge(dfa, previousD, t, ERROR); 606 return ERROR; 607 } 608 // create new target state; we'll add to DFA after it's complete 609 DFAState D = new DFAState(reach); 610 int predictedAlt = getUniqueAlt(reach); 611 612 debug { 613 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 614 writefln("SLL altSubSets=%1$s"~ 615 ", configs=%2$s"~ 616 ", predict=%3$s, allSubsetsConflict=%4$s, "~ 617 "conflictingAlts=%5$s", 618 altSubSets, 619 reach, 620 predictedAlt, 621 PredictionMode.allSubsetsConflict(altSubSets), 622 getConflictingAlts(reach)); 623 } 624 625 if (predictedAlt != ATN.INVALID_ALT_NUMBER) { 626 // NO CONFLICT, UNIQUELY PREDICTED ALT 627 D.isAcceptState = true; 628 D.configs.uniqueAlt = predictedAlt; 629 D.prediction = predictedAlt; 630 } 631 else if ( PredictionMode.hasSLLConflictTerminatingPrediction(mode, reach)) { 632 // MORE THAN ONE VIABLE ALTERNATIVE 633 D.configs.conflictingAlts = getConflictingAlts(reach); 634 D.requiresFullContext = true; 635 // in SLL-only mode, we will stop at this state and return the minimum alt 636 D.isAcceptState = true; 637 D.prediction = D.configs.conflictingAlts.nextSetBit(0); 638 } 639 640 if ( D.isAcceptState && D.configs.hasSemanticContext ) { 641 predicateDFAState(D, atn.getDecisionState(dfa.decision)); 642 if (D.predicates != null) { 643 D.prediction = ATN.INVALID_ALT_NUMBER; 644 } 645 } 646 // all adds to dfa are done after we've created full D state 647 D = addDFAEdge(dfa, previousD, t, D); 648 return D; 649 } 650 651 public void predicateDFAState(DFAState dfaState, DecisionState decisionState) 652 { 653 // We need to test all predicates, even in DFA states that 654 // uniquely predict alternative. 655 int nalts = decisionState.getNumberOfTransitions(); 656 // Update DFA so reach becomes accept state with (predicate,alt) 657 // pairs if preds found for conflicting alts 658 BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState.configs); 659 SemanticContext[] altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); 660 if ( altToPred!=null ) { 661 dfaState.predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); 662 dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds 663 } 664 else { 665 // There are preds in configs but they might go away 666 // when OR'd together like {p}? || NONE == NONE. If neither 667 // alt has preds, resolve to min alt 668 dfaState.prediction = altsToCollectPredsFrom.nextSetBit(0); 669 } 670 } 671 672 /** 673 * @uml 674 * comes back with reach.uniqueAlt set to a valid alt 675 */ 676 protected int execATNWithFullContext(DFA dfa, DFAState D, ATNConfigSet s0, TokenStream input, 677 int startIndex, ParserRuleContext outerContext) 678 { 679 debug { 680 writefln("execATNWithFullContext %s", s0); 681 } 682 bool fullCtx = true; 683 bool foundExactAmbig = false; 684 ATNConfigSet reach = null; 685 ATNConfigSet previous = s0; 686 input.seek(startIndex); 687 int t = input.LA(1); 688 int predictedAlt; 689 while (true) { // while more work 690 // System.out.println("LL REACH "+getLookaheadName(input)+ 691 // " from configs.size="+previous.size()+ 692 // " line "+input.LT(1).getLine()+":"+input.LT(1).getCharPositionInLine()); 693 reach = computeReachSet(previous, t, fullCtx); 694 if (reach is null) { 695 // if any configs in previous dipped into outer context, that 696 // means that input up to t actually finished entry rule 697 // at least for LL decision. Full LL doesn't dip into outer 698 // so don't need special case. 699 // We will get an error no matter what so delay until after 700 // decision; better error message. Also, no reachable target 701 // ATN states in SLL implies LL will also get nowhere. 702 // If conflict in states that dip out, choose min since we 703 // will get error no matter what. 704 NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex); 705 input.seek(startIndex); 706 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); 707 if ( alt!=ATN.INVALID_ALT_NUMBER ) { 708 return alt; 709 } 710 throw e; 711 } 712 713 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 714 debug { 715 writefln("LL altSubSets=%1$s, PredictionMode.getUniqueAlt(altSubSets)" ~ 716 ", resolvesToJustOneViableAlt=%3$s", 717 altSubSets, 718 PredictionMode.getUniqueAlt(altSubSets), 719 PredictionMode.resolvesToJustOneViableAlt(altSubSets)); 720 } 721 722 // System.out.println("altSubSets: "+altSubSets); 723 // System.err.println("reach="+reach+", "+reach.conflictingAlts); 724 reach.uniqueAlt = getUniqueAlt(reach); 725 // unique prediction? 726 if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) { 727 predictedAlt = reach.uniqueAlt; 728 break; 729 } 730 if ( mode != PredictionModeConst.LL_EXACT_AMBIG_DETECTION ) { 731 predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets); 732 if ( predictedAlt != ATN.INVALID_ALT_NUMBER ) { 733 break; 734 } 735 } 736 else { 737 // In exact ambiguity mode, we never try to terminate early. 738 // Just keeps scarfing until we know what the conflict is 739 if ( PredictionMode.allSubsetsConflict(altSubSets) && 740 PredictionMode.allSubsetsEqual(altSubSets) ) 741 { 742 foundExactAmbig = true; 743 predictedAlt = PredictionMode.getSingleViableAlt(altSubSets); 744 break; 745 } 746 // else there are multiple non-conflicting subsets or 747 // we're not sure what the ambiguity is yet. 748 // So, keep going. 749 } 750 751 previous = reach; 752 if (t != IntStreamConstant.EOF) { 753 input.consume(); 754 t = input.LA(1); 755 } 756 } 757 758 // If the configuration set uniquely predicts an alternative, 759 // without conflict, then we know that it's a full LL decision 760 // not SLL. 761 if ( reach.uniqueAlt != ATN.INVALID_ALT_NUMBER ) { 762 reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index()); 763 return predictedAlt; 764 } 765 766 // We do not check predicates here because we have checked them 767 // on-the-fly when doing full context prediction. 768 769 /* 770 In non-exact ambiguity detection mode, we might actually be able to 771 detect an exact ambiguity, but I'm not going to spend the cycles 772 needed to check. We only emit ambiguity warnings in exact ambiguity 773 mode. 774 775 For example, we might know that we have conflicting configurations. 776 But, that does not mean that there is no way forward without a 777 conflict. It's possible to have nonconflicting alt subsets as in: 778 779 LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] 780 781 from 782 783 [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), 784 (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] 785 786 In this case, (17,1,[5 $]) indicates there is some next sequence that 787 would resolve this without conflict to alternative 1. Any other viable 788 next sequence, however, is associated with a conflict. We stop 789 looking for input because no amount of further lookahead will alter 790 the fact that we should predict alternative 1. We just can't say for 791 sure that there is an ambiguity without looking further. 792 */ 793 reportAmbiguity(dfa, D, startIndex, input.index(), foundExactAmbig, 794 reach.getAlts(), reach); 795 796 return predictedAlt; 797 } 798 799 public ATNConfigSet computeReachSet(ATNConfigSet closure, int t, bool fullCtx) 800 { 801 debug 802 writefln("in computeReachSet, starting closure: %s", closure); 803 804 if (mergeCache is null) { 805 mergeCache = new DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext); 806 } 807 808 ATNConfigSet intermediate = new ATNConfigSet(fullCtx); 809 810 /* Configurations already in a rule stop state indicate reaching the end 811 * of the decision rule (local context) or end of the start rule (full 812 * context). Once reached, these configurations are never updated by a 813 * closure operation, so they are handled separately for the performance 814 * advantage of having a smaller intermediate set when calling closure. 815 * 816 * For full-context reach operations, separate handling is required to 817 * ensure that the alternative matching the longest overall sequence is 818 * chosen when multiple such configurations can match the input. 819 */ 820 ATNConfig[] skippedStopStates; 821 822 // First figure out where we can reach on input t 823 foreach (c; closure.configs) { 824 debug 825 writefln("testing %1$s at %2$s", getTokenName(t), c.toString); 826 827 if (cast(RuleStopState)c.state) { 828 assert(c.context.isEmpty); 829 if (fullCtx || t == IntStreamConstant.EOF) { 830 skippedStopStates ~= c; 831 } 832 continue; 833 } 834 835 foreach (trans; c.state.transitions) { 836 ATNState target = getReachableTarget(trans, t); 837 if (target) { 838 intermediate.add(new ATNConfig(c, target), mergeCache); 839 } 840 } 841 } 842 843 // Now figure out where the reach operation can take us... 844 845 ATNConfigSet reach = null; 846 847 /* This block optimizes the reach operation for intermediate sets which 848 * trivially indicate a termination state for the overall 849 * adaptivePredict operation. 850 * 851 * The conditions assume that intermediate 852 * contains all configurations relevant to the reach set, but this 853 * condition is not true when one or more configurations have been 854 * withheld in skippedStopStates, or when the current symbol is EOF. 855 */ 856 if (skippedStopStates is null && t != TokenConstantDefinition.EOF) { 857 if (intermediate.size() == 1 ) { 858 // Don't pursue the closure if there is just one state. 859 // It can only have one alternative; just add to result 860 // Also don't pursue the closure if there is unique alternative 861 // among the configurations. 862 reach = intermediate; 863 } 864 else if (getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER ) { 865 // Also don't pursue the closure if there is unique alternative 866 // among the configurations. 867 reach = intermediate; 868 } 869 } 870 871 /* If the reach set could not be trivially determined, perform a closure 872 * operation on the intermediate set to compute its initial value. 873 */ 874 if (reach is null) { 875 reach = new ATNConfigSet(fullCtx); 876 ATNConfig[] closureBusy; 877 bool treatEofAsEpsilon = t == TokenConstantDefinition.EOF; 878 foreach (c; intermediate.configs) 879 { 880 closureATN(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon); 881 } 882 } 883 884 if (t == IntStreamConstant.EOF) { 885 /* After consuming EOF no additional input is possible, so we are 886 * only interested in configurations which reached the end of the 887 * decision rule (local context) or end of the start rule (full 888 * context). Update reach to contain only these configurations. This 889 * handles both explicit EOF transitions in the grammar and implicit 890 * EOF transitions following the end of the decision or start rule. 891 * 892 * When reach==intermediate, no closure operation was performed. In 893 * this case, removeAllConfigsNotInRuleStopState needs to check for 894 * reachable rule stop states as well as configurations already in 895 * a rule stop state. 896 * 897 * This is handled before the configurations in skippedStopStates, 898 * because any configurations potentially added from that list are 899 * already guaranteed to meet this condition whether or not it's 900 * required. 901 */ 902 reach = removeAllConfigsNotInRuleStopState(reach, reach.configs == intermediate.configs); 903 } 904 905 /* If skippedStopStates is not null, then it contains at least one 906 * configuration. For full-context reach operations, these 907 * configurations reached the end of the start rule, in which case we 908 * only add them back to reach if no configuration during the current 909 * closure operation reached such a state. This ensures adaptivePredict 910 * chooses an alternative matching the longest overall sequence when 911 * multiple alternatives are viable. 912 */ 913 if (skippedStopStates !is null && (!fullCtx || !PredictionMode.hasConfigInRuleStopState(reach))) { 914 assert(skippedStopStates.length > 0); 915 foreach (c; skippedStopStates) { 916 reach.add(c, mergeCache); 917 } 918 } 919 920 if (reach.isEmpty) 921 return null; 922 return reach; 923 } 924 925 /** 926 * Return a configuration set containing only the configurations from 927 * {@code configs} which are in a {@link RuleStopState}. If all 928 * configurations in {@code configs} are already in a rule stop state, this 929 * method simply returns {@code configs}. 930 * 931 * <p>When {@code lookToEndOfRule} is true, this method uses 932 * {@link ATN#nextTokens} for each configuration in {@code configs} which is 933 * not already in a rule stop state to see if a rule stop state is reachable 934 * from the configuration via epsilon-only transitions.</p> 935 * 936 * @param configs the configuration set to update 937 * @param lookToEndOfRule when true, this method checks for rule stop states 938 * reachable by epsilon-only transitions from each configuration in 939 * {@code configs}. 940 * 941 * @return {@code configs} if all configurations in {@code configs} are in a 942 * rule stop state, otherwise return a new configuration set containing only 943 * the configurations from {@code configs} which are in a rule stop state 944 */ 945 protected ATNConfigSet removeAllConfigsNotInRuleStopState(ATNConfigSet configs, bool lookToEndOfRule) 946 { 947 if (PredictionMode.allConfigsInRuleStopStates(configs)) { 948 return configs; 949 } 950 951 ATNConfigSet result = new ATNConfigSet(configs.fullCtx); 952 foreach (ATNConfig config; configs.configs) { 953 if (config.state.classinfo == RuleStopState.classinfo) { 954 result.add(config, mergeCache); 955 continue; 956 } 957 958 if (lookToEndOfRule && config.state.onlyHasEpsilonTransitions()) { 959 IntervalSet nextTokens = atn.nextTokens(config.state); 960 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 961 ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex]; 962 result.add(new ATNConfig(config, endOfRuleState), mergeCache); 963 } 964 } 965 } 966 return result; 967 } 968 969 public ATNConfigSet computeStartState(ATNState p, RuleContext ctx, bool fullCtx) 970 { 971 // always at least the implicit call to start rule 972 PredictionContext initialContext = PredictionContext.fromRuleContext(atn, ctx); 973 ATNConfigSet configs = new ATNConfigSet(fullCtx); 974 975 for (int i=0; i<p.getNumberOfTransitions(); i++) { 976 ATNState target = p.transition(i).target; 977 ATNConfig c = new ATNConfig(target, i+1, initialContext); 978 ATNConfig[] closureBusy; 979 closureATN(c, configs, closureBusy, true, fullCtx, false); 980 } 981 982 return configs; 983 } 984 985 public ATNConfigSet applyPrecedenceFilter(ATNConfigSet configs) 986 { 987 PredictionContext[int] statesFromAlt1; 988 ATNConfigSet configSet = new ATNConfigSet(configs.fullCtx); 989 foreach (config; configs.configs) { 990 // handle alt 1 first 991 if (config.alt != 1) { 992 continue; 993 } 994 995 SemanticContext updatedContext = config.semanticContext.evalPrecedence(parser, _outerContext); 996 if (updatedContext is null) { 997 // the configuration was eliminated 998 continue; 999 } 1000 1001 statesFromAlt1[config.state.stateNumber] = config.context; 1002 if (updatedContext != config.semanticContext) { 1003 configSet.add(new ATNConfig(config, updatedContext), mergeCache); 1004 } 1005 else { 1006 configSet.add(config, mergeCache); 1007 } 1008 } 1009 1010 foreach (config; configs.configs) { 1011 if (config.alt == 1) { 1012 // already handled 1013 continue; 1014 } 1015 1016 if (!config.isPrecedenceFilterSuppressed()) { 1017 /* In the future, this elimination step could be updated to also 1018 * filter the prediction context for alternatives predicting alt>1 1019 * (basically a graph subtraction algorithm). 1020 */ 1021 if (config.state.stateNumber in statesFromAlt1 && 1022 statesFromAlt1[config.state.stateNumber].opEquals(config.context)) { 1023 // eliminated 1024 continue; 1025 } 1026 } 1027 1028 configSet.add(config, mergeCache); 1029 } 1030 1031 return configSet; 1032 1033 } 1034 1035 public ATNState getReachableTarget(Transition trans, int ttype) 1036 { 1037 if (trans.matches(ttype, 0, atn.maxTokenType)) { 1038 return trans.target; 1039 } 1040 return null; 1041 } 1042 1043 public SemanticContext[] getPredsForAmbigAlts(BitSet ambigAlts, ATNConfigSet configs, 1044 int nalts) 1045 { 1046 // REACH=[1|1|[]|0:0, 1|2|[]|0:1] 1047 /* altToPred starts as an array of all null contexts. The entry at index i 1048 * corresponds to alternative i. altToPred[i] may have one of three values: 1049 * 1. null: no ATNConfig c is found such that c.alt==i 1050 * 2. SemanticContext.NONE: At least one ATNConfig c exists such that 1051 * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, 1052 * alt i has at least one unpredicated config. 1053 * 3. Non-NONE Semantic Context: There exists at least one, and for all 1054 * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. 1055 * 1056 * From this, it is clear that NONE||anything==NONE. 1057 */ 1058 SemanticContext[] altToPred = new SemanticContext[nalts + 1]; 1059 foreach (ATNConfig c; configs.configs) { 1060 if ( ambigAlts.get(c.alt) ) { 1061 altToPred[c.alt] = SemanticContext.or(altToPred[c.alt], c.semanticContext); 1062 } 1063 } 1064 1065 int nPredAlts = 0; 1066 if (!SemanticContext.NONE) { 1067 auto sp = new SemanticContext; 1068 SemanticContext.NONE = sp..new SemanticContext.Predicate; 1069 } 1070 for (int i = 1; i <= nalts; i++) { 1071 if (altToPred[i] is null) { 1072 altToPred[i] = SemanticContext.NONE; 1073 } 1074 else if (altToPred[i] != SemanticContext.NONE) { 1075 nPredAlts++; 1076 } 1077 } 1078 if (nPredAlts == 0) altToPred = null; 1079 debug 1080 writefln("getPredsForAmbigAlts result %s", to!string(altToPred)); 1081 return altToPred; 1082 } 1083 1084 protected PredPrediction[] getPredicatePredictions(BitSet ambigAlts, SemanticContext[] altToPred) 1085 { 1086 PredPrediction[] pairs; 1087 bool containsPredicate = false; 1088 1089 for (int i = 1; i < altToPred.length; i++) { 1090 SemanticContext pred = altToPred[i]; 1091 1092 // unpredicated is indicated by SemanticContext.NONE 1093 assert(pred !is null); 1094 1095 if (ambigAlts.length > 0 && ambigAlts.get(i)) { 1096 pairs ~= new PredPrediction(pred, i); 1097 } 1098 if (pred != SemanticContext.NONE) 1099 containsPredicate = true; 1100 } 1101 1102 if (!containsPredicate) { 1103 return null; 1104 } 1105 1106 // System.out.println(Arrays.toString(altToPred)+"->"+pairs); 1107 return pairs; 1108 1109 } 1110 1111 protected int getAltThatFinishedDecisionEntryRule(ATNConfigSet configs) 1112 { 1113 IntervalSet alts = new IntervalSet(); 1114 foreach (ATNConfig c; configs.configs) { 1115 if ( c.getOuterContextDepth() > 0 || (c.state.classinfo == RuleStopState.classinfo && c.context.hasEmptyPath()) ) { 1116 alts.add(c.alt); 1117 } 1118 } 1119 if ( alts.size()==0 ) return ATN.INVALID_ALT_NUMBER; 1120 return alts.getMinElement(); 1121 } 1122 1123 /** 1124 * This method is used to improve the localization of error messages by 1125 * choosing an alternative rather than throwing a 1126 * {@link NoViableAltException} in particular prediction scenarios where the 1127 * {@link #ERROR} state was reached during ATN simulation. 1128 * 1129 * <p> 1130 * The default implementation of this method uses the following 1131 * algorithm to identify an ATN configuration which successfully parsed the 1132 * decision entry rule. Choosing such an alternative ensures that the 1133 * {@link ParserRuleContext} returned by the calling rule will be complete 1134 * and valid, and the syntax error will be reported later at a more 1135 * localized location.</p> 1136 * 1137 * <ul> 1138 * <li>If a syntactically valid path or paths reach the end of the decision rule and 1139 * they are semantically valid if predicated, return the min associated alt.</li> 1140 * <li>Else, if a semantically invalid but syntactically valid path exist 1141 * or paths exist, return the minimum associated alt. 1142 * </li> 1143 * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> 1144 * </ul> 1145 * 1146 * <p> 1147 * In some scenarios, the algorithm described above could predict an 1148 * alternative which will result in a {@link FailedPredicateException} in 1149 * the parser. Specifically, this could occur if the <em>only</em> configuration 1150 * capable of successfully parsing to the end of the decision rule is 1151 * blocked by a semantic predicate. By choosing this alternative within 1152 * {@link #adaptivePredict} instead of throwing a 1153 * {@link NoViableAltException}, the resulting 1154 * {@link FailedPredicateException} in the parser will identify the specific 1155 * predicate which is preventing the parser from successfully parsing the 1156 * decision rule, which helps developers identify and correct logic errors 1157 * in semantic predicates. 1158 * </p> 1159 * 1160 * @param configs The ATN configurations which were valid immediately before 1161 * the {@link #ERROR} state was reached 1162 * @param outerContext The is the \gamma_0 initial parser context from the paper 1163 * or the parser stack at the instant before prediction commences. 1164 * 1165 * @return The value to return from {@link #adaptivePredict}, or 1166 * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not 1167 * identified and {@link #adaptivePredict} should report an error instead. 1168 */ 1169 protected int getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet configs, 1170 ParserRuleContext outerContext) 1171 { 1172 IntervalSet alts = new IntervalSet(); 1173 foreach (ATNConfig c; configs.configs) { 1174 if (c.getOuterContextDepth > 0 || (c.state.classinfo == RuleStopState.classinfo && c.context.hasEmptyPath()) ) { 1175 alts.add(c.alt); 1176 } 1177 } 1178 if (alts.size == 0) 1179 return ATN.INVALID_ALT_NUMBER; 1180 return alts.getMinElement(); 1181 } 1182 1183 protected ATNConfigSetATNConfigSetPair splitAccordingToSemanticValidity(ATNConfigSet configs, 1184 ParserRuleContext outerContext) 1185 { 1186 ATNConfigSet succeeded = new ATNConfigSet(configs.fullCtx); 1187 ATNConfigSet failed = new ATNConfigSet(configs.fullCtx); 1188 foreach (ATNConfig c; configs.configs) { 1189 if (c.semanticContext != SemanticContext.NONE ) { 1190 bool predicateEvaluationResult = evalSemanticContext(c.semanticContext, outerContext, c.alt, configs.fullCtx); 1191 if (predicateEvaluationResult) { 1192 succeeded.add(c); 1193 } 1194 else { 1195 failed.add(c); 1196 } 1197 } 1198 else { 1199 succeeded.add(c); 1200 } 1201 } 1202 ATNConfigSetATNConfigSetPair res; 1203 res.a = succeeded; 1204 res.b = failed; 1205 return res; 1206 } 1207 1208 /** 1209 * pairs that win. A {@code NONE} predicate indicates an alt containing an 1210 * unpredicated config which behaves as "always true." If !complete 1211 * then we stop at the first predicate that evaluates to true. This 1212 * includes pairs with null predicates. 1213 */ 1214 protected BitSet evalSemanticContext(PredPrediction[] predPredictions, ParserRuleContext outerContext, 1215 bool complete) 1216 { 1217 BitSet predictions; 1218 foreach (pair; predPredictions) { 1219 if (pair.pred == SemanticContext.NONE ) { 1220 predictions.set(pair.alt, true); 1221 if (!complete) { 1222 break; 1223 } 1224 continue; 1225 } 1226 1227 bool fullCtx = false; // in dfa 1228 bool predicateEvaluationResult = evalSemanticContext(pair.pred, outerContext, pair.alt, fullCtx); 1229 debug(dfa_debug) { 1230 writefln("eval pred %1$s=%2$s", pair, predicateEvaluationResult); 1231 } 1232 1233 if ( predicateEvaluationResult ) { 1234 debug(dfa_debug) 1235 writefln("PREDICT ", pair.alt); 1236 predictions.set(pair.alt, true); 1237 if (!complete) { 1238 break; 1239 } 1240 } 1241 } 1242 1243 return predictions; 1244 } 1245 1246 public bool evalSemanticContext(SemanticContext pred, ParserRuleContext parserCallStack, 1247 int alt, bool fullCtx) 1248 { 1249 return pred.eval(parser, parserCallStack); 1250 } 1251 1252 protected void closureATN(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1253 bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) 1254 { 1255 int initialDepth = 0; 1256 closureCheckingStopState(config, configs, closureBusy, collectPredicates, 1257 fullCtx, 1258 initialDepth, treatEofAsEpsilon); 1259 assert (!fullCtx || !configs.dipsIntoOuterContext); 1260 } 1261 1262 protected void closureCheckingStopState(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1263 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1264 { 1265 debug { 1266 writefln("\nclosureCheckingStopState: closure(%s)", config.toString(parser, true)); 1267 } 1268 if (cast(RuleStopState)config.state) { 1269 // We hit rule end. If we have context info, use it 1270 // run thru all possible stack tops in ctx 1271 if (!config.context.isEmpty) { 1272 for (int i = 0; i < config.context.size; i++) { 1273 if (config.context.getReturnState(i) == PredictionContext.EMPTY_RETURN_STATE) { 1274 if (fullCtx) { 1275 configs.add(new ATNConfig(config, config.state, 1276 cast(PredictionContext)PredictionContext.EMPTY), mergeCache); 1277 continue; 1278 } 1279 else { 1280 // we have no context info, just chase follow links (if greedy) 1281 debug 1282 writefln("1FALLING off rule %s", 1283 getRuleName(config.state.ruleIndex)); 1284 closure_(config, configs, closureBusy, collectPredicates, 1285 fullCtx, depth, treatEofAsEpsilon); 1286 } 1287 continue; 1288 } 1289 ATNState returnState = atn.states[config.context.getReturnState(i)]; 1290 PredictionContext newContext = config.context.getParent(i); // "pop" return state 1291 ATNConfig c = new ATNConfig(returnState, config.alt, newContext, 1292 config.semanticContext); 1293 // While we have context to pop back from, we may have 1294 // gotten that context AFTER having falling off a rule. 1295 // Make sure we track that we are now out of context. 1296 // 1297 // This assignment also propagates the 1298 // isPrecedenceFilterSuppressed() value to the new 1299 // configuration. 1300 c.reachesIntoOuterContext = config.reachesIntoOuterContext; 1301 assert(depth > int.min); 1302 closureCheckingStopState(c, configs, closureBusy, collectPredicates, 1303 fullCtx, depth - 1, treatEofAsEpsilon); 1304 } 1305 return; 1306 } 1307 else if (fullCtx) { 1308 // reached end of start rule 1309 configs.add(config, mergeCache); 1310 return; 1311 } 1312 else { 1313 // else if we have no context info, just chase follow links (if greedy) 1314 debug 1315 writefln("FALLING off rule %s", 1316 getRuleName(config.state.ruleIndex)); 1317 } 1318 } 1319 closure_(config, configs, closureBusy, collectPredicates, 1320 fullCtx, depth, treatEofAsEpsilon); 1321 } 1322 1323 /** 1324 * Do the actual work of walking epsilon edges 1325 */ 1326 protected void closure_(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1327 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1328 { 1329 ATNState p = config.state; 1330 1331 // optimization 1332 if (!p.onlyHasEpsilonTransitions) { 1333 configs.add(config, mergeCache); 1334 // make sure to not return here, because EOF transitions can act as 1335 // both epsilon transitions and non-epsilon transitions. 1336 // if ( debug ) System.out.println("added config "+configs); 1337 } 1338 for (int i=0; i<p.getNumberOfTransitions(); i++) { 1339 if ( i==0 && canDropLoopEntryEdgeInLeftRecursiveRule(config)) 1340 continue; 1341 Transition t = p.transition(i); 1342 bool continueCollecting = 1343 collectPredicates && !(cast(ActionTransition)t); 1344 ATNConfig c = getEpsilonTarget(config, t, continueCollecting, 1345 depth == 0, fullCtx, treatEofAsEpsilon); 1346 if (c !is null) { 1347 int newDepth = depth; 1348 if (cast(RuleStopState)config.state) { 1349 assert (!fullCtx); 1350 1351 // target fell off end of rule; mark resulting c as having dipped into outer context 1352 // We can't get here if incoming config was rule stop and we had context 1353 // track how far we dip into outer context. Might 1354 // come in handy and we avoid evaluating context dependent 1355 // preds if this is > 0. 1356 if (_dfa !is null && _dfa.isPrecedenceDfa) { 1357 int outermostPrecedenceReturn = (cast(EpsilonTransition)t).outermostPrecedenceReturn; 1358 if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) { 1359 c.setPrecedenceFilterSuppressed(true); 1360 } 1361 } 1362 1363 c.reachesIntoOuterContext++; 1364 1365 if (count(closureBusy, c) > 0) { 1366 // avoid infinite recursion for right-recursive rules 1367 continue; 1368 } 1369 closureBusy ~= c; 1370 1371 configs.dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method 1372 assert (newDepth > int.min); 1373 newDepth--; 1374 debug 1375 writefln("dips into outer ctx: %s", c); 1376 } 1377 else { 1378 if (!t.isEpsilon) { 1379 if (count(closureBusy, c) > 0) { 1380 // avoid infinite recursion for right-recursive rules 1381 continue; 1382 } 1383 closureBusy ~= c; 1384 } 1385 if (cast(RuleTransition)t) { 1386 // latch when newDepth goes negative - once we step out of the entry context we can't return 1387 if (newDepth >= 0) { 1388 newDepth++; 1389 } 1390 } 1391 } 1392 closureCheckingStopState(c, configs, closureBusy, continueCollecting, 1393 fullCtx, newDepth, treatEofAsEpsilon); 1394 } 1395 } 1396 1397 } 1398 1399 public string getRuleName(int index) 1400 { 1401 if (parser !is null && index>=0 ) return parser.getRuleNames()[index]; 1402 return format("<rule %s>",index); 1403 } 1404 1405 public ATNConfig getEpsilonTarget(ATNConfig config, Transition t, bool collectPredicates, 1406 bool inContext, bool fullCtx, bool treatEofAsEpsilon) 1407 { 1408 switch (t.getSerializationType()) { 1409 case TransitionStates.RULE: 1410 return ruleTransition(config, cast(RuleTransition)t); 1411 1412 case TransitionStates.PRECEDENCE: 1413 return precedenceTransition(config, cast(PrecedencePredicateTransition)t, collectPredicates, inContext, fullCtx); 1414 1415 case TransitionStates.PREDICATE: 1416 return predTransition(config, cast(PredicateTransition)t, 1417 collectPredicates, 1418 inContext, 1419 fullCtx); 1420 1421 case TransitionStates.ACTION: 1422 return actionTransition(config, cast(ActionTransition)t); 1423 1424 case TransitionStates.EPSILON: 1425 return new ATNConfig(config, t.target); 1426 1427 case TransitionStates.ATOM: 1428 case TransitionStates.RANGE: 1429 case TransitionStates.SET: 1430 // EOF transitions act like epsilon transitions after the first EOF 1431 // transition is traversed 1432 if (treatEofAsEpsilon) { 1433 if (t.matches(TokenConstantDefinition.EOF, 0, 1)) { 1434 return new ATNConfig(config, t.target); 1435 } 1436 } 1437 1438 return null; 1439 1440 default: 1441 return null; 1442 } 1443 1444 } 1445 1446 protected ATNConfig actionTransition(ATNConfig config, ActionTransition t) 1447 { 1448 debug writefln("ACTION edge %1$s:%2$s", t.ruleIndex, t.actionIndex); 1449 return new ATNConfig(config, t.target); 1450 } 1451 1452 public ATNConfig precedenceTransition(ATNConfig config, PrecedencePredicateTransition pt, 1453 bool collectPredicates, bool inContext, bool fullCtx) 1454 { 1455 debug { 1456 writefln("PRED (collectPredicates=%1$s) %2$s" ~ 1457 ">=_p, ctx dependent=true", collectPredicates, pt.precedence); 1458 if ( parser !is null ) { 1459 writefln("context surrounding pred is %s", 1460 parser.getRuleInvocationStack); 1461 //parser.classinfo); 1462 } 1463 } 1464 1465 ATNConfig c = null; 1466 if (collectPredicates && inContext) { 1467 if ( fullCtx ) { 1468 // In full context mode, we can evaluate predicates on-the-fly 1469 // during closure, which dramatically reduces the size of 1470 // the config sets. It also obviates the need to test predicates 1471 // later during conflict resolution. 1472 int currentPosition = _input.index(); 1473 _input.seek(_startIndex); 1474 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1475 _input.seek(currentPosition); 1476 if ( predSucceeds ) { 1477 c = new ATNConfig(config, pt.target); // no pred context 1478 } 1479 } 1480 else { 1481 SemanticContext newSemCtx = 1482 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1483 c = new ATNConfig(config, pt.target, newSemCtx); 1484 } 1485 } 1486 else { 1487 c = new ATNConfig(config, pt.target); 1488 } 1489 1490 debug 1491 writefln("config from pred transition=%s", c); 1492 return c; 1493 1494 } 1495 1496 protected ATNConfig predTransition(ATNConfig config, PredicateTransition pt, bool collectPredicates, 1497 bool inContext, bool fullCtx) 1498 { 1499 debug { 1500 writefln("PRED (collectPredicates=%1$s) %2$s:%3$s, ctx dependent=%4$s", 1501 collectPredicates, pt.ruleIndex, 1502 pt.predIndex, pt.isCtxDependent); 1503 if ( parser !is null ) { 1504 writefln("context surrounding pred is %s", 1505 parser.getRuleInvocationStack()); 1506 } 1507 } 1508 1509 ATNConfig c = null; 1510 if ( collectPredicates && 1511 (!pt.isCtxDependent || (pt.isCtxDependent&&inContext)) ) 1512 { 1513 if ( fullCtx ) { 1514 // In full context mode, we can evaluate predicates on-the-fly 1515 // during closure, which dramatically reduces the size of 1516 // the config sets. It also obviates the need to test predicates 1517 // later during conflict resolution. 1518 int currentPosition = _input.index(); 1519 _input.seek(_startIndex); 1520 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1521 _input.seek(currentPosition); 1522 if (predSucceeds) { 1523 c = new ATNConfig(config, pt.target); // no pred context 1524 } 1525 } 1526 else { 1527 SemanticContext newSemCtx = 1528 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1529 c = new ATNConfig(config, pt.target, newSemCtx); 1530 } 1531 } 1532 else { 1533 c = new ATNConfig(config, pt.target); 1534 } 1535 1536 debug writefln("config from pred transition=",c); 1537 return c; 1538 } 1539 1540 public ATNConfig ruleTransition(ATNConfig config, RuleTransition t) 1541 { 1542 debug { 1543 writefln("CALL rule %1$s, ctx=%2$s", getRuleName(t.target.ruleIndex), config.context); 1544 } 1545 ATNState returnState = t.followState; 1546 PredictionContext newContext = 1547 SingletonPredictionContext.create(config.context, returnState.stateNumber); 1548 return new ATNConfig(config, t.target, newContext); 1549 } 1550 1551 /** 1552 * Gets a {@link BitSet} containing the alternatives in {@code configs} 1553 * which are part of one or more conflicting alternative subsets. 1554 * 1555 * @param configs The {@link ATNConfigSet} to analyze. 1556 * @return The alternatives in {@code configs} which are part of one or more 1557 * conflicting alternative subsets. If {@code configs} does not contain any 1558 * conflicting subsets, this method returns an empty {@link BitSet}. 1559 */ 1560 public BitSet getConflictingAlts(ATNConfigSet configs) 1561 { 1562 BitSet[] altsets = PredictionMode.getConflictingAltSubsets(configs); 1563 return PredictionMode.getAlts(altsets); 1564 } 1565 1566 /** 1567 * Sam pointed out a problem with the previous definition, v3, of 1568 * ambiguous states. If we have another state associated with conflicting 1569 * alternatives, we should keep going. For example, the following grammar 1570 * 1571 * s : (ID | ID ID?) ';' ; 1572 * 1573 * When the ATN simulation reaches the state before ';', it has a DFA 1574 * state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 1575 * 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node 1576 * because alternative to has another way to continue, via [6|2|[]]. 1577 * The key is that we have a single state that has config's only associated 1578 * with a single alternative, 2, and crucially the state transitions 1579 * among the configurations are all non-epsilon transitions. That means 1580 * we don't consider any conflicts that include alternative 2. So, we 1581 * ignore the conflict between alts 1 and 2. We ignore a set of 1582 * conflicting alts when there is an intersection with an alternative 1583 * associated with a single alt state in the state→config-list map. 1584 * 1585 * It's also the case that we might have two conflicting configurations but 1586 * also a 3rd nonconflicting configuration for a different alternative: 1587 * [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: 1588 * 1589 * a : A | A | A B ; 1590 * 1591 * After matching input A, we reach the stop state for rule A, state 1. 1592 * State 8 is the state right before B. Clearly alternatives 1 and 2 1593 * conflict and no amount of further lookahead will separate the two. 1594 * However, alternative 3 will be able to continue and so we do not 1595 * stop working on this state. In the previous example, we're concerned 1596 * with states associated with the conflicting alternatives. Here alt 1597 * 3 is not associated with the conflicting configs, but since we can continue 1598 * looking for input reasonably, I don't declare the state done. We 1599 * ignore a set of conflicting alts when we have an alternative 1600 * that we still need to pursue. 1601 */ 1602 public BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) 1603 { 1604 auto conflictingAlts = new BitSet(1); 1605 if (configs.uniqueAlt != ATN.INVALID_ALT_NUMBER) { 1606 conflictingAlts.set(configs.uniqueAlt, true); 1607 } 1608 else { 1609 *conflictingAlts = configs.conflictingAlts; 1610 } 1611 return *conflictingAlts; 1612 } 1613 1614 public string getTokenName(int t) 1615 { 1616 if (t == TokenConstantDefinition.EOF) { 1617 return "EOF"; 1618 } 1619 1620 Vocabulary vocabulary = parser !is null ? parser.getVocabulary() : new VocabularyImpl(null, null, null); 1621 string displayName = vocabulary.getDisplayName(t); 1622 if (displayName == to!string(t)) { 1623 return displayName; 1624 } 1625 1626 return displayName ~ "<" ~ to!string(t) ~ ">"; 1627 } 1628 1629 public string getLookaheadName(TokenStream input) 1630 { 1631 return getTokenName(input.LA(1)); 1632 } 1633 1634 /** 1635 * Used for debugging in adaptivePredict around execATN but I cut 1636 * it out for clarity now that alg. works well. We can leave this 1637 * "dead" code for a bit. 1638 */ 1639 public void dumpDeadEndConfigs(NoViableAltException nvae) 1640 { 1641 debug 1642 writefln("dead end configs: "); 1643 foreach (ATNConfig c; nvae.getDeadEndConfigs().configs) { 1644 string trans = "no edges"; 1645 if (c.state.getNumberOfTransitions > 0) { 1646 Transition t = c.state.transition(0); 1647 if (t.classinfo == AtomTransition.classinfo) { 1648 AtomTransition at = cast(AtomTransition)t; 1649 trans = "Atom " ~ getTokenName(at._label); 1650 } 1651 else if (t.classinfo == SetTransition.classinfo) { 1652 SetTransition st = cast(SetTransition)t; 1653 bool not = (st.classinfo == NotSetTransition.classinfo); 1654 trans = (not?"~":"") ~ "Set "~ st.set.toString(); 1655 } 1656 } 1657 debug 1658 writefln("%1$s:%2$s", c.toString(parser, true), trans); 1659 } 1660 } 1661 1662 protected NoViableAltException noViableAlt(TokenStream input, ParserRuleContext outerContext, 1663 ATNConfigSet configs, int startIndex) 1664 { 1665 return new NoViableAltException(parser, input, 1666 input.get(startIndex), 1667 input.LT(1), 1668 configs, outerContext); 1669 } 1670 1671 protected static int getUniqueAlt(ATNConfigSet configs) 1672 { 1673 int alt = ATN.INVALID_ALT_NUMBER; 1674 foreach (ATNConfig c; configs.configs) { 1675 if (alt == ATN.INVALID_ALT_NUMBER) { 1676 alt = c.alt; // found first alt 1677 } 1678 else if (c.alt != alt) { 1679 return ATN.INVALID_ALT_NUMBER; 1680 } 1681 } 1682 return alt; 1683 } 1684 1685 /** 1686 * Add an edge to the DFA, if possible. This method calls 1687 * {@link #addDFAState} to ensure the {@code to} state is present in the 1688 * DFA. If {@code from} is {@code null}, or if {@code t} is outside the 1689 * range of edges that can be represented in the DFA tables, this method 1690 * returns without adding the edge to the DFA. 1691 * 1692 * <p>If {@code to} is {@code null}, this method returns {@code null}. 1693 * Otherwise, this method returns the {@link DFAState} returned by calling 1694 * {@link #addDFAState} for the {@code to} state.</p> 1695 * 1696 * @param dfa The DFA 1697 * @param from The source state for the edge 1698 * @param t The input symbol 1699 * @param to The target state for the edge 1700 * 1701 * @return If {@code to} is {@code null}, this method returns {@code null}; 1702 * otherwise this method returns the result of calling {@link #addDFAState} 1703 * on {@code to} 1704 */ 1705 protected DFAState addDFAEdge(ref DFA dfa, DFAState from, int t, DFAState to) 1706 { 1707 debug { 1708 writefln("\nEDGE %1$s -> %2$s upon %3$s", from, to, getTokenName(t)); 1709 } 1710 if (to is null) { 1711 return null; 1712 } 1713 to = addDFAState(dfa, to); // used existing if possible not incoming 1714 if (from is null || t < -1 || t > atn.maxTokenType) { 1715 return to; 1716 } 1717 synchronized (from) { 1718 if (from.edges == null) { 1719 from.edges = new DFAState[atn.maxTokenType+1+1]; 1720 } 1721 from.edges[t+1] = to; // connect 1722 } 1723 1724 debug { 1725 writefln("end dfa.decision = %s, dfa.states = %s", dfa.decision, dfa.states); 1726 } 1727 1728 return to; 1729 } 1730 1731 /** 1732 * Add state {@code D} to the DFA if it is not already present, and return 1733 * the actual instance stored in the DFA. If a state equivalent to {@code D} 1734 * is already in the DFA, the existing state is returned. Otherwise this 1735 * method returns {@code D} after adding it to the DFA. 1736 * 1737 * <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and 1738 * does not change the DFA.</p> 1739 * 1740 * @param dfa The dfa 1741 * @param D The DFA state to add 1742 * @return The state stored in the DFA. This will be either the existing 1743 * state if {@code D} is already in the DFA, or {@code D} itself if the 1744 * state was not already present. 1745 */ 1746 protected DFAState addDFAState(ref DFA dfa, DFAState D) 1747 { 1748 if (D == ERROR) { 1749 return D; 1750 } 1751 debug 1752 writefln("adding new dfa: D = %s, dfa.states = %s, \n(D in dfa.states) = %s", D, dfa.states, D in dfa.states); 1753 if (D in dfa.states) 1754 return dfa.states[D]; 1755 D.stateNumber = to!int(dfa.states.length); 1756 if (!D.configs.readonly) { 1757 D.configs.optimizeConfigs(this); 1758 D.configs.readonly(true); 1759 } 1760 dfa.states[D] = D; 1761 debug 1762 writefln("adding new DFA state end: D = %1$s, dfa.states = %2$s", D, dfa.states); 1763 return D; 1764 } 1765 1766 protected void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, ATNConfigSet configs, 1767 int startIndex, int stopIndex) 1768 { 1769 debug(retry_debug) { 1770 import antlr.v4.runtime.misc.Interval; 1771 Interval interval = Interval.of(startIndex, stopIndex); 1772 writefln("reportAttemptingFullContext decision=%1$s:%2$s, input=%3$s", 1773 dfa.decision, configs, 1774 parser.getTokenStream().getText(interval)); 1775 } 1776 if (parser !is null) parser.getErrorListenerDispatch().reportAttemptingFullContext(parser, 1777 dfa, 1778 startIndex, 1779 stopIndex, 1780 conflictingAlts, 1781 configs); 1782 } 1783 1784 protected void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, 1785 int startIndex, int stopIndex) 1786 { 1787 debug(retry_debug) { 1788 import antlr.v4.runtime.misc.Interval; 1789 Interval interval = Interval.of(startIndex, stopIndex); 1790 writefln("reportContextSensitivity decision=%1$s:%2$s, input=%3$s", 1791 dfa.decision, configs, parser.getTokenStream().getText(interval)); 1792 } 1793 if (parser !is null) 1794 parser.getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); 1795 } 1796 1797 protected void reportAmbiguity(DFA dfa, DFAState D, int startIndex, int stopIndex, bool exact, 1798 BitSet ambigAlts, ATNConfigSet configs) 1799 { 1800 debug(retry_debug) { 1801 import antlr.v4.runtime.misc.Interval; 1802 Interval interval = Interval.of(startIndex, stopIndex); 1803 writefln("reportAmbiguity %1$s:%2$s, input=%3$s", 1804 ambigAlts, configs, parser.getTokenStream().getText(interval)); 1805 } 1806 if (parser !is null) parser.getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, 1807 exact, ambigAlts, configs); 1808 } 1809 1810 public void setPredictionMode(PredictionModeConst mode) 1811 { 1812 this.mode = mode; 1813 } 1814 1815 public PredictionModeConst getPredictionMode() 1816 { 1817 return mode; 1818 } 1819 1820 public Parser getParser() 1821 { 1822 return parser; 1823 } 1824 1825 protected bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) 1826 { 1827 auto TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = true; 1828 if ( TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT ) 1829 return false; 1830 return false; 1831 } 1832 1833 }