1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ParserATNSimulator; 8 9 import antlr.v4.runtime.IntStream; 10 import antlr.v4.runtime.IntStreamConstant; 11 import antlr.v4.runtime.NoViableAltException; 12 import antlr.v4.runtime.Parser; 13 import antlr.v4.runtime.ParserRuleContext; 14 import antlr.v4.runtime.RuleContext; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.Vocabulary; 18 import antlr.v4.runtime.VocabularyImpl; 19 import antlr.v4.runtime.atn.ATN; 20 import antlr.v4.runtime.atn.ATNConfig; 21 import antlr.v4.runtime.atn.ATNConfigSet; 22 import antlr.v4.runtime.atn.ATNSimulator; 23 import antlr.v4.runtime.atn.ATNState; 24 import antlr.v4.runtime.atn.ActionTransition; 25 import antlr.v4.runtime.atn.AtomTransition; 26 import antlr.v4.runtime.atn.DecisionState; 27 import antlr.v4.runtime.atn.EpsilonTransition; 28 import antlr.v4.runtime.atn.InterfaceParserATNSimulator; 29 import antlr.v4.runtime.atn.NotSetTransition; 30 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 31 import antlr.v4.runtime.atn.PredicateTransition; 32 import antlr.v4.runtime.atn.PredictionContext; 33 import antlr.v4.runtime.atn.PredictionContextCache; 34 import antlr.v4.runtime.atn.PredictionMode; 35 import antlr.v4.runtime.atn.PredictionModeConst; 36 import antlr.v4.runtime.atn.RuleStopState; 37 import antlr.v4.runtime.atn.RuleTransition; 38 import antlr.v4.runtime.atn.SemanticContext; 39 import antlr.v4.runtime.atn.SetTransition; 40 import antlr.v4.runtime.atn.SingletonPredictionContext; 41 import antlr.v4.runtime.atn.Transition; 42 import antlr.v4.runtime.atn.TransitionStates; 43 import antlr.v4.runtime.dfa.DFA; 44 import antlr.v4.runtime.dfa.DFAState; 45 import antlr.v4.runtime.dfa.PredPrediction; 46 import antlr.v4.runtime.misc; 47 import std.algorithm.searching; 48 import std.conv; 49 import std.format; 50 import std.stdio; 51 import std.typecons; 52 53 alias ATNConfigSetATNConfigSetPair = Tuple!(ATNConfigSet, "a", ATNConfigSet, "b"); 54 55 /** 56 * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. 57 * 58 * <p> 59 * The basic complexity of the adaptive strategy makes it harder to understand. 60 * We begin with ATN simulation to build paths in a DFA. Subsequent prediction 61 * requests go through the DFA first. If they reach a state without an edge for 62 * the current symbol, the algorithm fails over to the ATN simulation to 63 * complete the DFA path for the current input (until it finds a conflict state 64 * or uniquely predicting state).</p> 65 * 66 * <p> 67 * All of that is done without using the outer context because we want to create 68 * a DFA that is not dependent upon the rule invocation stack when we do a 69 * prediction. One DFA works in all contexts. We avoid using context not 70 * necessarily because it's slower, although it can be, but because of the DFA 71 * caching problem. The closure routine only considers the rule invocation stack 72 * created during prediction beginning in the decision rule. For example, if 73 * prediction occurs without invoking another rule's ATN, there are no context 74 * stacks in the configurations. When lack of context leads to a conflict, we 75 * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing 76 * strategy (versus full LL(*)).</p> 77 * 78 * <p> 79 * When SLL yields a configuration set with conflict, we rewind the input and 80 * retry the ATN simulation, this time using full outer context without adding 81 * to the DFA. Configuration context stacks will be the full invocation stacks 82 * from the start rule. If we get a conflict using full context, then we can 83 * definitively say we have a true ambiguity for that input sequence. If we 84 * don't get a conflict, it implies that the decision is sensitive to the outer 85 * context. (It is not context-sensitive in the sense of context-sensitive 86 * grammars.)</p> 87 * 88 * <p> 89 * The next time we reach this DFA state with an SLL conflict, through DFA 90 * simulation, we will again retry the ATN simulation using full context mode. 91 * This is slow because we can't save the results and have to "interpret" the 92 * ATN each time we get that input.</p> 93 * 94 * <p> 95 * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> 96 * 97 * <p> 98 * We could cache results from full context to predicted alternative easily and 99 * that saves a lot of time but doesn't work in presence of predicates. The set 100 * of visible predicates from the ATN start state changes depending on the 101 * context, because closure can fall off the end of a rule. I tried to cache 102 * tuples (stack context, semantic context, predicted alt) but it was slower 103 * than interpreting and much more complicated. Also required a huge amount of 104 * memory. The goal is not to create the world's fastest parser anyway. I'd like 105 * to keep this algorithm simple. By launching multiple threads, we can improve 106 * the speed of parsing across a large number of files.</p> 107 * 108 * <p> 109 * There is no strict ordering between the amount of input used by SLL vs LL, 110 * which makes it really hard to build a cache for full context. Let's say that 111 * we have input A B C that leads to an SLL conflict with full context X. That 112 * implies that using X we might only use A B but we could also use A B C D to 113 * resolve conflict. Input A B C D could predict alternative 1 in one position 114 * in the input and A B C E could predict alternative 2 in another position in 115 * input. The conflicting SLL configurations could still be non-unique in the 116 * full context prediction, which would lead us to requiring more input than the 117 * original A B C. To make a prediction cache work, we have to track the exact 118 * input used during the previous prediction. That amounts to a cache that maps 119 * X to a specific DFA for that context.</p> 120 * 121 * <p> 122 * Something should be done for left-recursive expression predictions. They are 123 * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry 124 * with full LL thing Sam does.</p> 125 * 126 * <p> 127 * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> 128 * 129 * <p> 130 * We avoid doing full context retry when the outer context is empty, we did not 131 * dip into the outer context by falling off the end of the decision state rule, 132 * or when we force SLL mode.</p> 133 * 134 * <p> 135 * As an example of the not dip into outer context case, consider as super 136 * constructor calls versus function calls. One grammar might look like 137 * this:</p> 138 * 139 * <pre> 140 * ctorBody 141 * : '{' superCall? stat* '}' 142 * ; 143 * </pre> 144 * 145 * <p> 146 * Or, you might see something like</p> 147 * 148 * <pre> 149 * stat 150 * : superCall ';' 151 * | expression ';' 152 * | ... 153 * ; 154 * </pre> 155 * 156 * <p> 157 * In both cases I believe that no closure operations will dip into the outer 158 * context. In the first case ctorBody in the worst case will stop at the '}'. 159 * In the 2nd case it should stop at the ';'. Both cases should stay within the 160 * entry rule and not dip into the outer context.</p> 161 * 162 * <p> 163 * <strong>PREDICATES</strong></p> 164 * 165 * <p> 166 * Predicates are always evaluated if present in either SLL or LL both. SLL and 167 * LL simulation deals with predicates differently. SLL collects predicates as 168 * it performs closure operations like ANTLR v3 did. It delays predicate 169 * evaluation until it reaches and accept state. This allows us to cache the SLL 170 * ATN simulation whereas, if we had evaluated predicates on-the-fly during 171 * closure, the DFA state configuration sets would be different and we couldn't 172 * build up a suitable DFA.</p> 173 * 174 * <p> 175 * When building a DFA accept state during ATN simulation, we evaluate any 176 * predicates and return the sole semantically valid alternative. If there is 177 * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, 178 * we throw an exception. Alternatives without predicates act like they have 179 * true predicates. The simple way to think about it is to strip away all 180 * alternatives with false predicates and choose the minimum alternative that 181 * remains.</p> 182 * 183 * <p> 184 * When we start in the DFA and reach an accept state that's predicated, we test 185 * those and return the minimum semantically viable alternative. If no 186 * alternatives are viable, we throw an exception.</p> 187 * 188 * <p> 189 * During full LL ATN simulation, closure always evaluates predicates and 190 * on-the-fly. This is crucial to reducing the configuration set size during 191 * closure. It hits a landmine when parsing with the Java grammar, for example, 192 * without this on-the-fly evaluation.</p> 193 * 194 * <p> 195 * <strong>SHARING DFA</strong></p> 196 * 197 * <p> 198 * All instances of the same parser share the same decision DFAs through a 199 * static field. Each instance gets its own ATN simulator but they share the 200 * same {@link #decisionToDFA} field. They also share a 201 * {@link PredictionContextCache} object that makes sure that all 202 * {@link PredictionContext} objects are shared among the DFA states. This makes 203 * a big size difference.</p> 204 * 205 * <p> 206 * <strong>THREAD SAFETY</strong></p> 207 * 208 * <p> 209 * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when 210 * it adds a new DFA object to that array. {@link #addDFAEdge} 211 * locks on the DFA for the current decision when setting the 212 * {@link DFAState#edges} field. {@link #addDFAState} locks on 213 * the DFA for the current decision when looking up a DFA state to see if it 214 * already exists. We must make sure that all requests to add DFA states that 215 * are equivalent result in the same shared DFA object. This is because lots of 216 * threads will be trying to update the DFA at once. The 217 * {@link #addDFAState} method also locks inside the DFA lock 218 * but this time on the shared context cache when it rebuilds the 219 * configurations' {@link PredictionContext} objects using cached 220 * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is 221 * safe as long as we can guarantee that all threads referencing 222 * {@code s.edge[t]} get the same physical target {@link DFAState}, or 223 * {@code null}. Once into the DFA, the DFA simulation does not reference the 224 * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new 225 * targets. The DFA simulator will either find {@link DFAState#edges} to be 226 * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or 227 * {@code dfa.edges[t]} to be non-null. The 228 * {@link #addDFAEdge} method could be racing to set the field 229 * but in either case the DFA simulator works; if {@code null}, and requests ATN 230 * simulation. It could also race trying to get {@code dfa.edges[t]}, but either 231 * way it will work because it's not doing a test and set operation.</p> 232 * 233 * <p> 234 * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage 235 * Parsing)</strong></p> 236 * 237 * <p> 238 * Sam pointed out that if SLL does not give a syntax error, then there is no 239 * point in doing full LL, which is slower. We only have to try LL if we get a 240 * syntax error. For maximum speed, Sam starts the parser set to pure SLL 241 * mode with the {@link BailErrorStrategy}:</p> 242 * 243 * <pre> 244 * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; 245 * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); 246 * </pre> 247 * 248 * <p> 249 * If it does not get a syntax error, then we're done. If it does get a syntax 250 * error, we need to retry with the combined SLL/LL strategy.</p> 251 * 252 * <p> 253 * The reason this works is as follows. If there are no SLL conflicts, then the 254 * grammar is SLL (at least for that input set). If there is an SLL conflict, 255 * the full LL analysis must yield a set of viable alternatives which is a 256 * subset of the alternatives reported by SLL. If the LL set is a singleton, 257 * then the grammar is LL but not SLL. If the LL set is the same size as the SLL 258 * set, the decision is SLL. If the LL set has size > 1, then that decision 259 * is truly ambiguous on the current input. If the LL set is smaller, then the 260 * SLL conflict resolution might choose an alternative that the full LL would 261 * rule out as a possibility based upon better context information. If that's 262 * the case, then the SLL parse will definitely get an error because the full LL 263 * analysis says it's not viable. If SLL conflict resolution chooses an 264 * alternative within the LL set, them both SLL and LL would choose the same 265 * alternative because they both choose the minimum of multiple conflicting 266 * alternatives.</p> 267 * 268 * <p> 269 * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and 270 * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL 271 * parsing will get an error because SLL will pursue alternative 1. If 272 * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will 273 * choose the same alternative because alternative one is the minimum of either 274 * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax 275 * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> 276 * 277 * <p> 278 * Of course, if the input is invalid, then we will get an error for sure in 279 * both SLL and LL parsing. Erroneous input will therefore require 2 passes over 280 * the input.</p> 281 */ 282 class ParserATNSimulator : ATNSimulator, InterfaceParserATNSimulator 283 { 284 285 protected Parser parser; 286 287 public DFA[] decisionToDFA; 288 289 public PredictionModeConst mode = PredictionModeConst.LL; 290 291 /** 292 * Each prediction operation uses a cache for merge of prediction contexts. 293 * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap 294 * isn't synchronized but we're ok since two threads shouldn't reuse same 295 * parser/atnsim object because it can only handle one input at a time. 296 * This maps graphs a and b to merged result c. (a,b)→c. We can avoid 297 * the merge if we ever see a and b again. Note that (b,a)→c should 298 * also be examined during cache lookup. 299 * @uml 300 * @__gshared 301 */ 302 public static __gshared DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext) mergeCache; 303 304 protected DFA _dfa; 305 306 protected TokenStream _input; 307 308 protected int _startIndex; 309 310 protected ParserRuleContext _outerContext; 311 312 /** 313 * @uml 314 * Testing only! 315 */ 316 protected this(ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 317 { 318 this(null, atn, decisionToDFA, sharedContextCache); 319 } 320 321 public this(Parser parser, ATN atn, DFA[] decisionToDFA, PredictionContextCache sharedContextCache) 322 { 323 super(atn, sharedContextCache); 324 this.parser = parser; 325 this.decisionToDFA = decisionToDFA; 326 if (mergeCache is null) 327 this.mergeCache = new typeof(this.mergeCache); 328 } 329 330 /** 331 * @uml 332 * @override 333 */ 334 public override void reset() 335 { 336 } 337 338 /** 339 * @uml 340 * @override 341 */ 342 public override void clearDFA() 343 { 344 for (int d = 0; d < decisionToDFA.length; d++) { 345 decisionToDFA[d] = new DFA(atn.getDecisionState(d), d); 346 } 347 } 348 349 public int adaptivePredict(TokenStream input, int decision, ParserRuleContext outerContext) 350 { 351 debug(ParserATNSimulator) { 352 writefln("adaptivePredict decision %1$s"~ 353 " exec LA(1)==%2$s"~ 354 " line %3$s:%4$s", 355 decision, getLookaheadName(input), input.LT(1).getLine, 356 input.LT(1).getCharPositionInLine()); 357 } 358 359 _input = input; 360 _startIndex = input.index(); 361 _outerContext = outerContext; 362 DFA dfa = decisionToDFA[decision]; 363 _dfa = dfa; 364 365 int m = input.mark(); 366 int index = _startIndex; 367 // Now we are certain to have a specific decision's DFA 368 // But, do we still need an initial state? 369 try { 370 DFAState s0; 371 if (dfa.isPrecedenceDfa) { 372 // the start state for a precedence DFA depends on the current 373 // parser precedence, and is provided by a DFA method. 374 s0 = dfa.getPrecedenceStartState(parser.getPrecedence()); 375 } 376 else { 377 // the start state for a "regular" DFA is just s0 378 s0 = dfa.s0; 379 } 380 381 if (s0 is null) { 382 if ( outerContext is null ) 383 outerContext = ParserRuleContext.EMPTY; 384 debug(ParserATNSimulator) { 385 writefln("predictATN decision = %1$s"~ 386 " exec LA(1)==%2$s"~ 387 ", outerContext=%3$s", 388 dfa.decision, getLookaheadName(input), 389 outerContext); 390 } 391 392 bool fullCtx = false; 393 ATNConfigSet s0_closure = 394 computeStartState(dfa.atnStartState, 395 ParserRuleContext.EMPTY, 396 fullCtx); 397 if (dfa.isPrecedenceDfa) { 398 /* If this is a precedence DFA, we use applyPrecedenceFilter 399 * to convert the computed start state to a precedence start 400 * state. We then use DFA.setPrecedenceStartState to set the 401 * appropriate start state for the precedence level rather 402 * than simply setting DFA.s0. 403 */ 404 dfa.s0.configs = s0_closure; // not used for prediction but useful to know start configs anyway 405 s0_closure = applyPrecedenceFilter(s0_closure); 406 s0 = addDFAState(dfa, new DFAState(s0_closure)); 407 dfa.setPrecedenceStartState(parser.getPrecedence(), s0); 408 } 409 else { 410 s0 = addDFAState(dfa, new DFAState(s0_closure)); 411 dfa.s0 = s0; 412 } 413 } 414 int alt = execATN(dfa, s0, input, index, outerContext); 415 debug(ParserATNSimulator) 416 writefln("DFA after predictATN: %1$s, alt = %s, dfa.states = %s", dfa.toString(parser.getVocabulary), alt, dfa); 417 return alt; 418 } 419 finally { 420 mergeCache = null; // wack cache after each prediction 421 _dfa = null; 422 input.seek(index); 423 input.release(m); 424 } 425 } 426 427 /** 428 * There are some key conditions we're looking for after computing a new 429 * set of ATN configs (proposed DFA state): 430 * <br>- if the set is empty, there is no viable alternative for current symbol 431 * <br>- does the state uniquely predict an alternative? 432 * <br>- does the state have a conflict that would prevent us from 433 * putting it on the work list? 434 * <br><br>We also have some key operations to do: 435 * <br>- add an edge from previous DFA state to potentially new DFA state, D, 436 * upon current symbol but only if adding to work list, which means in all 437 * cases except no viable alternative (and possibly non-greedy decisions?) 438 * <br>- collecting predicates and adding semantic context to DFA accept states 439 * <br>- adding rule context to context-sensitive DFA accept states 440 * <br>- consuming an input symbol 441 * <br>- reporting a conflict 442 * <br>- reporting an ambiguity 443 * <br>- reporting a context sensitivity 444 * <br>- reporting insufficient predicates 445 * <br><br>cover these cases: 446 * <br>- dead end 447 * <br>- single alt 448 * <br>- single alt + preds 449 * <br>- conflict 450 * <br>- conflict + preds 451 */ 452 protected int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, ParserRuleContext outerContext) 453 { 454 debug { 455 writefln("execATN decision %1$s"~ 456 " exec LA(1)==%2$s line %3$s:%4$s", 457 dfa.decision, 458 getLookaheadName(input), 459 input.LT(1).getLine, 460 input.LT(1).getCharPositionInLine()); 461 } 462 463 DFAState previousD = s0; 464 465 debug 466 writefln("s0 = %1$s", s0); 467 468 int t = input.LA(1); 469 470 while (true) { // while more work 471 DFAState D = getExistingTargetState(previousD, t); 472 if (D is null) { 473 D = computeTargetState(dfa, previousD, t); 474 } 475 476 if (D == ERROR) { 477 // if any configs in previous dipped into outer context, that 478 // means that input up to t actually finished entry rule 479 // at least for SLL decision. Full LL doesn't dip into outer 480 // so don't need special case. 481 // We will get an error no matter what so delay until after 482 // decision; better error message. Also, no reachable target 483 // ATN states in SLL implies LL will also get nowhere. 484 // If conflict in states that dip out, choose min since we 485 // will get error no matter what. 486 NoViableAltException e = noViableAlt(input, outerContext, previousD.configs, startIndex); 487 input.seek(startIndex); 488 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext); 489 if (alt != ATN.INVALID_ALT_NUMBER) { 490 return alt; 491 } 492 throw e; 493 } 494 495 if (D.requiresFullContext && mode != PredictionModeConst.SLL) { 496 // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) 497 BitSet conflictingAlts = D.configs.conflictingAlts; 498 if (D.predicates != null) { 499 debug 500 writeln("DFA state has preds in DFA sim LL failover"); 501 int conflictIndex = input.index(); 502 if (conflictIndex != startIndex) { 503 input.seek(startIndex); 504 } 505 506 conflictingAlts = evalSemanticContext(D.predicates, outerContext, true); 507 if (conflictingAlts.cardinality ==1) { 508 debug 509 writeln("Full LL avoided"); 510 return conflictingAlts.nextSetBit(0); 511 } 512 513 if (conflictIndex != startIndex) { 514 // restore the index so reporting the fallback to full 515 // context occurs with the index at the correct spot 516 input.seek(conflictIndex); 517 } 518 } 519 520 debug(dfa_debug) 521 writefln("ctx sensitive state %1$ in %2$s", 522 outerContext, D); 523 bool fullCtx = true; 524 ATNConfigSet s0_closure = 525 computeStartState(dfa.atnStartState, outerContext, 526 fullCtx); 527 reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index()); 528 int alt = execATNWithFullContext(dfa, D, s0_closure, 529 input, startIndex, 530 outerContext); 531 return alt; 532 } 533 534 if ( D.isAcceptState ) { 535 if (D.predicates == null) { 536 return D.prediction; 537 } 538 539 int stopIndex = input.index(); 540 input.seek(startIndex); 541 BitSet alts = evalSemanticContext(D.predicates, outerContext, true); 542 switch (alts.cardinality) { 543 case 0: 544 throw noViableAlt(input, outerContext, D.configs, startIndex); 545 546 case 1: 547 return alts.nextSetBit(0); 548 549 default: 550 // report ambiguity after predicate evaluation to make sure the correct 551 // set of ambig alts is reported. 552 reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs); 553 return alts.nextSetBit(0); 554 } 555 } 556 557 previousD = D; 558 559 if (t != IntStreamConstant.EOF) { 560 input.consume(); 561 t = input.LA(1); 562 } 563 } 564 565 } 566 567 /** 568 * Get an existing target state for an edge in the DFA. If the target state 569 * for the edge has not yet been computed or is otherwise not available, 570 * this method returns {@code null}. 571 * 572 * @param previousD The current DFA state 573 * @param t The next input symbol 574 * @return The existing target DFA state for the given input symbol 575 * {@code t}, or {@code null} if the target state for this edge is not 576 * already cached 577 */ 578 public DFAState getExistingTargetState(DFAState previousD, int t) 579 { 580 DFAState[] edges = previousD.edges; 581 if (edges is null || t + 1 < 0 || t + 1 >= edges.length) { 582 return null; 583 } 584 return edges[t + 1]; 585 } 586 587 /** 588 * Compute a target state for an edge in the DFA, and attempt to add the 589 * computed state and corresponding edge to the DFA. 590 * 591 * @param dfa The DFA 592 * @param previousD The current DFA state 593 * @param t The next input symbol 594 * 595 * @return The computed target DFA state for the given input symbol 596 * {@code t}. If {@code t} does not lead to a valid DFA state, this method 597 * returns {@link #ERROR}. 598 */ 599 public DFAState computeTargetState(DFA dfa, DFAState previousD, int t) 600 { 601 ATNConfigSet reach = computeReachSet(previousD.configs, t, false); 602 if (reach is null) { 603 addDFAEdge(dfa, previousD, t, ERROR); 604 return ERROR; 605 } 606 // create new target state; we'll add to DFA after it's complete 607 DFAState D = new DFAState(reach); 608 int predictedAlt = getUniqueAlt(reach); 609 610 debug { 611 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 612 writefln("SLL altSubSets=%1$s"~ 613 ", configs=%2$s"~ 614 ", predict=%3$s, allSubsetsConflict=%4$s, "~ 615 "conflictingAlts=%5$s", 616 altSubSets, 617 reach, 618 predictedAlt, 619 PredictionMode.allSubsetsConflict(altSubSets), 620 getConflictingAlts(reach)); 621 } 622 623 if (predictedAlt != ATN.INVALID_ALT_NUMBER) { 624 // NO CONFLICT, UNIQUELY PREDICTED ALT 625 D.isAcceptState = true; 626 D.configs.uniqueAlt = predictedAlt; 627 D.prediction = predictedAlt; 628 } 629 else if ( PredictionMode.hasSLLConflictTerminatingPrediction(mode, reach)) { 630 // MORE THAN ONE VIABLE ALTERNATIVE 631 D.configs.conflictingAlts = getConflictingAlts(reach); 632 D.requiresFullContext = true; 633 // in SLL-only mode, we will stop at this state and return the minimum alt 634 D.isAcceptState = true; 635 D.prediction = D.configs.conflictingAlts.nextSetBit(0); 636 } 637 638 if ( D.isAcceptState && D.configs.hasSemanticContext ) { 639 predicateDFAState(D, atn.getDecisionState(dfa.decision)); 640 if (D.predicates != null) { 641 D.prediction = ATN.INVALID_ALT_NUMBER; 642 } 643 } 644 // all adds to dfa are done after we've created full D state 645 D = addDFAEdge(dfa, previousD, t, D); 646 return D; 647 } 648 649 public void predicateDFAState(DFAState dfaState, DecisionState decisionState) 650 { 651 // We need to test all predicates, even in DFA states that 652 // uniquely predict alternative. 653 int nalts = decisionState.getNumberOfTransitions(); 654 // Update DFA so reach becomes accept state with (predicate,alt) 655 // pairs if preds found for conflicting alts 656 BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState.configs); 657 SemanticContext[] altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); 658 if ( altToPred !is null ) { 659 dfaState.predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); 660 dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds 661 } 662 else { 663 // There are preds in configs but they might go away 664 // when OR'd together like {p}? || NONE == NONE. If neither 665 // alt has preds, resolve to min alt 666 dfaState.prediction = altsToCollectPredsFrom.nextSetBit(0); 667 } 668 } 669 670 /** 671 * @uml 672 * comes back with reach.uniqueAlt set to a valid alt 673 */ 674 protected int execATNWithFullContext(DFA dfa, DFAState D, ATNConfigSet s0, TokenStream input, 675 int startIndex, ParserRuleContext outerContext) 676 { 677 debug { 678 writefln("execATNWithFullContext %s", s0); 679 } 680 bool fullCtx = true; 681 bool foundExactAmbig = false; 682 ATNConfigSet reach = null; 683 ATNConfigSet previous = s0; 684 input.seek(startIndex); 685 int t = input.LA(1); 686 int predictedAlt; 687 while (true) { // while more work 688 // System.out.println("LL REACH "+getLookaheadName(input)+ 689 // " from configs.size="+previous.size()+ 690 // " line "+input.LT(1).getLine()+":"+input.LT(1).getCharPositionInLine()); 691 reach = computeReachSet(previous, t, fullCtx); 692 if (reach is null) { 693 // if any configs in previous dipped into outer context, that 694 // means that input up to t actually finished entry rule 695 // at least for LL decision. Full LL doesn't dip into outer 696 // so don't need special case. 697 // We will get an error no matter what so delay until after 698 // decision; better error message. Also, no reachable target 699 // ATN states in SLL implies LL will also get nowhere. 700 // If conflict in states that dip out, choose min since we 701 // will get error no matter what. 702 NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex); 703 input.seek(startIndex); 704 int alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); 705 if ( alt!=ATN.INVALID_ALT_NUMBER ) { 706 return alt; 707 } 708 throw e; 709 } 710 711 BitSet[] altSubSets = PredictionMode.getConflictingAltSubsets(reach); 712 debug { 713 writefln("LL altSubSets=%1$s, PredictionMode.getUniqueAlt(altSubSets)" ~ 714 ", resolvesToJustOneViableAlt=%3$s", 715 altSubSets, 716 PredictionMode.getUniqueAlt(altSubSets), 717 PredictionMode.resolvesToJustOneViableAlt(altSubSets)); 718 } 719 720 // System.out.println("altSubSets: "+altSubSets); 721 // System.err.println("reach="+reach+", "+reach.conflictingAlts); 722 reach.uniqueAlt = getUniqueAlt(reach); 723 // unique prediction? 724 if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) { 725 predictedAlt = reach.uniqueAlt; 726 break; 727 } 728 if ( mode != PredictionModeConst.LL_EXACT_AMBIG_DETECTION ) { 729 predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets); 730 if ( predictedAlt != ATN.INVALID_ALT_NUMBER ) { 731 break; 732 } 733 } 734 else { 735 // In exact ambiguity mode, we never try to terminate early. 736 // Just keeps scarfing until we know what the conflict is 737 if ( PredictionMode.allSubsetsConflict(altSubSets) && 738 PredictionMode.allSubsetsEqual(altSubSets) ) 739 { 740 foundExactAmbig = true; 741 predictedAlt = PredictionMode.getSingleViableAlt(altSubSets); 742 break; 743 } 744 // else there are multiple non-conflicting subsets or 745 // we're not sure what the ambiguity is yet. 746 // So, keep going. 747 } 748 749 previous = reach; 750 if (t != IntStreamConstant.EOF) { 751 input.consume(); 752 t = input.LA(1); 753 } 754 } 755 756 // If the configuration set uniquely predicts an alternative, 757 // without conflict, then we know that it's a full LL decision 758 // not SLL. 759 if ( reach.uniqueAlt != ATN.INVALID_ALT_NUMBER ) { 760 reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index()); 761 return predictedAlt; 762 } 763 764 // We do not check predicates here because we have checked them 765 // on-the-fly when doing full context prediction. 766 767 /* 768 In non-exact ambiguity detection mode, we might actually be able to 769 detect an exact ambiguity, but I'm not going to spend the cycles 770 needed to check. We only emit ambiguity warnings in exact ambiguity 771 mode. 772 773 For example, we might know that we have conflicting configurations. 774 But, that does not mean that there is no way forward without a 775 conflict. It's possible to have nonconflicting alt subsets as in: 776 777 LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] 778 779 from 780 781 [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), 782 (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] 783 784 In this case, (17,1,[5 $]) indicates there is some next sequence that 785 would resolve this without conflict to alternative 1. Any other viable 786 next sequence, however, is associated with a conflict. We stop 787 looking for input because no amount of further lookahead will alter 788 the fact that we should predict alternative 1. We just can't say for 789 sure that there is an ambiguity without looking further. 790 */ 791 reportAmbiguity(dfa, D, startIndex, input.index(), foundExactAmbig, 792 reach.getAlts(), reach); 793 794 return predictedAlt; 795 } 796 797 public ATNConfigSet computeReachSet(ATNConfigSet closure, int t, bool fullCtx) 798 { 799 debug 800 writefln("in computeReachSet, starting closure: %s", closure); 801 802 if (mergeCache is null) { 803 mergeCache = new DoubleKeyMap!(PredictionContext, PredictionContext, PredictionContext); 804 } 805 806 ATNConfigSet intermediate = new ATNConfigSet(fullCtx); 807 808 /* Configurations already in a rule stop state indicate reaching the end 809 * of the decision rule (local context) or end of the start rule (full 810 * context). Once reached, these configurations are never updated by a 811 * closure operation, so they are handled separately for the performance 812 * advantage of having a smaller intermediate set when calling closure. 813 * 814 * For full-context reach operations, separate handling is required to 815 * ensure that the alternative matching the longest overall sequence is 816 * chosen when multiple such configurations can match the input. 817 */ 818 ATNConfig[] skippedStopStates; 819 820 // First figure out where we can reach on input t 821 foreach (c; closure.configs) { 822 debug 823 writefln("testing %1$s at %2$s", getTokenName(t), c.toString); 824 825 if (cast(RuleStopState)c.state) { 826 assert(c.context.isEmpty); 827 if (fullCtx || t == IntStreamConstant.EOF) { 828 skippedStopStates ~= c; 829 } 830 continue; 831 } 832 833 foreach (trans; c.state.transitions) { 834 ATNState target = getReachableTarget(trans, t); 835 if (target) { 836 intermediate.add(new ATNConfig(c, target), mergeCache); 837 } 838 } 839 } 840 841 // Now figure out where the reach operation can take us... 842 843 ATNConfigSet reach = null; 844 845 /* This block optimizes the reach operation for intermediate sets which 846 * trivially indicate a termination state for the overall 847 * adaptivePredict operation. 848 * 849 * The conditions assume that intermediate 850 * contains all configurations relevant to the reach set, but this 851 * condition is not true when one or more configurations have been 852 * withheld in skippedStopStates, or when the current symbol is EOF. 853 */ 854 if (skippedStopStates is null && t != TokenConstantDefinition.EOF) { 855 if (intermediate.size() == 1 ) { 856 // Don't pursue the closure if there is just one state. 857 // It can only have one alternative; just add to result 858 // Also don't pursue the closure if there is unique alternative 859 // among the configurations. 860 reach = intermediate; 861 } 862 else if (getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER ) { 863 // Also don't pursue the closure if there is unique alternative 864 // among the configurations. 865 reach = intermediate; 866 } 867 } 868 869 /* If the reach set could not be trivially determined, perform a closure 870 * operation on the intermediate set to compute its initial value. 871 */ 872 if (reach is null) { 873 reach = new ATNConfigSet(fullCtx); 874 ATNConfig[] closureBusy; 875 bool treatEofAsEpsilon = t == TokenConstantDefinition.EOF; 876 foreach (c; intermediate.configs) 877 { 878 closureATN(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon); 879 } 880 } 881 882 if (t == IntStreamConstant.EOF) { 883 /* After consuming EOF no additional input is possible, so we are 884 * only interested in configurations which reached the end of the 885 * decision rule (local context) or end of the start rule (full 886 * context). Update reach to contain only these configurations. This 887 * handles both explicit EOF transitions in the grammar and implicit 888 * EOF transitions following the end of the decision or start rule. 889 * 890 * When reach==intermediate, no closure operation was performed. In 891 * this case, removeAllConfigsNotInRuleStopState needs to check for 892 * reachable rule stop states as well as configurations already in 893 * a rule stop state. 894 * 895 * This is handled before the configurations in skippedStopStates, 896 * because any configurations potentially added from that list are 897 * already guaranteed to meet this condition whether or not it's 898 * required. 899 */ 900 reach = removeAllConfigsNotInRuleStopState(reach, reach.configs == intermediate.configs); 901 } 902 903 /* If skippedStopStates is not null, then it contains at least one 904 * configuration. For full-context reach operations, these 905 * configurations reached the end of the start rule, in which case we 906 * only add them back to reach if no configuration during the current 907 * closure operation reached such a state. This ensures adaptivePredict 908 * chooses an alternative matching the longest overall sequence when 909 * multiple alternatives are viable. 910 */ 911 if (skippedStopStates !is null && (!fullCtx || !PredictionMode.hasConfigInRuleStopState(reach))) { 912 assert(skippedStopStates.length > 0); 913 foreach (c; skippedStopStates) { 914 reach.add(c, mergeCache); 915 } 916 } 917 918 if (reach.isEmpty) 919 return null; 920 return reach; 921 } 922 923 /** 924 * Return a configuration set containing only the configurations from 925 * {@code configs} which are in a {@link RuleStopState}. If all 926 * configurations in {@code configs} are already in a rule stop state, this 927 * method simply returns {@code configs}. 928 * 929 * <p>When {@code lookToEndOfRule} is true, this method uses 930 * {@link ATN#nextTokens} for each configuration in {@code configs} which is 931 * not already in a rule stop state to see if a rule stop state is reachable 932 * from the configuration via epsilon-only transitions.</p> 933 * 934 * @param configs the configuration set to update 935 * @param lookToEndOfRule when true, this method checks for rule stop states 936 * reachable by epsilon-only transitions from each configuration in 937 * {@code configs}. 938 * 939 * @return {@code configs} if all configurations in {@code configs} are in a 940 * rule stop state, otherwise return a new configuration set containing only 941 * the configurations from {@code configs} which are in a rule stop state 942 */ 943 protected ATNConfigSet removeAllConfigsNotInRuleStopState(ATNConfigSet configs, bool lookToEndOfRule) 944 { 945 if (PredictionMode.allConfigsInRuleStopStates(configs)) { 946 return configs; 947 } 948 949 ATNConfigSet result = new ATNConfigSet(configs.fullCtx); 950 foreach (ATNConfig config; configs.configs) { 951 if (cast(RuleStopState)config.state) { 952 result.add(config, mergeCache); 953 continue; 954 } 955 956 if (lookToEndOfRule && config.state.onlyHasEpsilonTransitions()) { 957 IntervalSet nextTokens = atn.nextTokens(config.state); 958 if (nextTokens.contains(TokenConstantDefinition.EPSILON)) { 959 ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex]; 960 result.add(new ATNConfig(config, endOfRuleState), mergeCache); 961 } 962 } 963 } 964 return result; 965 } 966 967 public ATNConfigSet computeStartState(ATNState p, RuleContext ctx, bool fullCtx) 968 { 969 // always at least the implicit call to start rule 970 PredictionContext initialContext = PredictionContext.fromRuleContext(atn, ctx); 971 ATNConfigSet configs = new ATNConfigSet(fullCtx); 972 973 for (int i=0; i<p.getNumberOfTransitions(); i++) { 974 ATNState target = p.transition(i).target; 975 ATNConfig c = new ATNConfig(target, i+1, initialContext); 976 ATNConfig[] closureBusy; 977 closureATN(c, configs, closureBusy, true, fullCtx, false); 978 } 979 980 return configs; 981 } 982 983 public ATNConfigSet applyPrecedenceFilter(ATNConfigSet configs) 984 { 985 PredictionContext[int] statesFromAlt1; 986 ATNConfigSet configSet = new ATNConfigSet(configs.fullCtx); 987 foreach (config; configs.configs) { 988 // handle alt 1 first 989 if (config.alt != 1) { 990 continue; 991 } 992 993 SemanticContext updatedContext = config.semanticContext.evalPrecedence(parser, _outerContext); 994 if (updatedContext is null) { 995 // the configuration was eliminated 996 continue; 997 } 998 999 statesFromAlt1[config.state.stateNumber] = config.context; 1000 if (updatedContext != config.semanticContext) { 1001 configSet.add(new ATNConfig(config, updatedContext), mergeCache); 1002 } 1003 else { 1004 configSet.add(config, mergeCache); 1005 } 1006 } 1007 1008 foreach (config; configs.configs) { 1009 if (config.alt == 1) { 1010 // already handled 1011 continue; 1012 } 1013 1014 if (!config.isPrecedenceFilterSuppressed()) { 1015 /* In the future, this elimination step could be updated to also 1016 * filter the prediction context for alternatives predicting alt>1 1017 * (basically a graph subtraction algorithm). 1018 */ 1019 if (config.state.stateNumber in statesFromAlt1 && 1020 statesFromAlt1[config.state.stateNumber].opEquals(config.context)) { 1021 // eliminated 1022 continue; 1023 } 1024 } 1025 1026 configSet.add(config, mergeCache); 1027 } 1028 1029 return configSet; 1030 1031 } 1032 1033 public ATNState getReachableTarget(Transition trans, int ttype) 1034 { 1035 if (trans.matches(ttype, 0, atn.maxTokenType)) { 1036 return trans.target; 1037 } 1038 return null; 1039 } 1040 1041 public SemanticContext[] getPredsForAmbigAlts(BitSet ambigAlts, ATNConfigSet configs, 1042 int nalts) 1043 { 1044 // REACH=[1|1|[]|0:0, 1|2|[]|0:1] 1045 /* altToPred starts as an array of all null contexts. The entry at index i 1046 * corresponds to alternative i. altToPred[i] may have one of three values: 1047 * 1. null: no ATNConfig c is found such that c.alt==i 1048 * 2. SemanticContext.NONE: At least one ATNConfig c exists such that 1049 * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, 1050 * alt i has at least one unpredicated config. 1051 * 3. Non-NONE Semantic Context: There exists at least one, and for all 1052 * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. 1053 * 1054 * From this, it is clear that NONE||anything==NONE. 1055 */ 1056 SemanticContext[] altToPred = new SemanticContext[nalts + 1]; 1057 foreach (ATNConfig c; configs.configs) { 1058 if ( ambigAlts.get(c.alt) ) { 1059 altToPred[c.alt] = SemanticContext.or(altToPred[c.alt], c.semanticContext); 1060 } 1061 } 1062 1063 int nPredAlts = 0; 1064 if (!SemanticContext.NONE) { 1065 auto sp = new SemanticContext; 1066 SemanticContext.NONE = sp..new SemanticContext.Predicate; 1067 } 1068 for (int i = 1; i <= nalts; i++) { 1069 if (altToPred[i] is null) { 1070 altToPred[i] = SemanticContext.NONE; 1071 } 1072 else if (altToPred[i] != SemanticContext.NONE) { 1073 nPredAlts++; 1074 } 1075 } 1076 if (nPredAlts == 0) altToPred = null; 1077 debug 1078 writefln("getPredsForAmbigAlts result %s", to!string(altToPred)); 1079 return altToPred; 1080 } 1081 1082 protected PredPrediction[] getPredicatePredictions(BitSet ambigAlts, SemanticContext[] altToPred) 1083 { 1084 PredPrediction[] pairs; 1085 bool containsPredicate = false; 1086 1087 for (int i = 1; i < altToPred.length; i++) { 1088 SemanticContext pred = altToPred[i]; 1089 1090 // unpredicated is indicated by SemanticContext.NONE 1091 assert(pred !is null); 1092 1093 if (ambigAlts.length > 0 && ambigAlts.get(i)) { 1094 pairs ~= new PredPrediction(pred, i); 1095 } 1096 if (pred != SemanticContext.NONE) 1097 containsPredicate = true; 1098 } 1099 1100 if (!containsPredicate) { 1101 return null; 1102 } 1103 1104 // System.out.println(Arrays.toString(altToPred)+"->"+pairs); 1105 return pairs; 1106 1107 } 1108 1109 protected int getAltThatFinishedDecisionEntryRule(ATNConfigSet configs) 1110 { 1111 IntervalSet alts = new IntervalSet(); 1112 foreach (ATNConfig c; configs.configs) { 1113 if ( c.getOuterContextDepth() > 0 || (cast(RuleStopState)c.state && c.context.hasEmptyPath) ) { 1114 alts.add(c.alt); 1115 } 1116 } 1117 if ( alts.size()==0 ) return ATN.INVALID_ALT_NUMBER; 1118 return alts.getMinElement(); 1119 } 1120 1121 /** 1122 * This method is used to improve the localization of error messages by 1123 * choosing an alternative rather than throwing a 1124 * {@link NoViableAltException} in particular prediction scenarios where the 1125 * {@link #ERROR} state was reached during ATN simulation. 1126 * 1127 * <p> 1128 * The default implementation of this method uses the following 1129 * algorithm to identify an ATN configuration which successfully parsed the 1130 * decision entry rule. Choosing such an alternative ensures that the 1131 * {@link ParserRuleContext} returned by the calling rule will be complete 1132 * and valid, and the syntax error will be reported later at a more 1133 * localized location.</p> 1134 * 1135 * <ul> 1136 * <li>If a syntactically valid path or paths reach the end of the decision rule and 1137 * they are semantically valid if predicated, return the min associated alt.</li> 1138 * <li>Else, if a semantically invalid but syntactically valid path exist 1139 * or paths exist, return the minimum associated alt. 1140 * </li> 1141 * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> 1142 * </ul> 1143 * 1144 * <p> 1145 * In some scenarios, the algorithm described above could predict an 1146 * alternative which will result in a {@link FailedPredicateException} in 1147 * the parser. Specifically, this could occur if the <em>only</em> configuration 1148 * capable of successfully parsing to the end of the decision rule is 1149 * blocked by a semantic predicate. By choosing this alternative within 1150 * {@link #adaptivePredict} instead of throwing a 1151 * {@link NoViableAltException}, the resulting 1152 * {@link FailedPredicateException} in the parser will identify the specific 1153 * predicate which is preventing the parser from successfully parsing the 1154 * decision rule, which helps developers identify and correct logic errors 1155 * in semantic predicates. 1156 * </p> 1157 * 1158 * @param configs The ATN configurations which were valid immediately before 1159 * the {@link #ERROR} state was reached 1160 * @param outerContext The is the \gamma_0 initial parser context from the paper 1161 * or the parser stack at the instant before prediction commences. 1162 * 1163 * @return The value to return from {@link #adaptivePredict}, or 1164 * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not 1165 * identified and {@link #adaptivePredict} should report an error instead. 1166 */ 1167 protected int getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet configs, 1168 ParserRuleContext outerContext) 1169 { 1170 IntervalSet alts = new IntervalSet(); 1171 foreach (ATNConfig c; configs.configs) { 1172 if (c.getOuterContextDepth > 0 || (cast(RuleStopState)c.state && c.context.hasEmptyPath) ) { 1173 alts.add(c.alt); 1174 } 1175 } 1176 if (alts.size == 0) 1177 return ATN.INVALID_ALT_NUMBER; 1178 return alts.getMinElement(); 1179 } 1180 1181 protected ATNConfigSetATNConfigSetPair splitAccordingToSemanticValidity(ATNConfigSet configs, 1182 ParserRuleContext outerContext) 1183 { 1184 ATNConfigSet succeeded = new ATNConfigSet(configs.fullCtx); 1185 ATNConfigSet failed = new ATNConfigSet(configs.fullCtx); 1186 foreach (ATNConfig c; configs.configs) { 1187 if (c.semanticContext != SemanticContext.NONE ) { 1188 bool predicateEvaluationResult = evalSemanticContext(c.semanticContext, outerContext, c.alt, configs.fullCtx); 1189 if (predicateEvaluationResult) { 1190 succeeded.add(c); 1191 } 1192 else { 1193 failed.add(c); 1194 } 1195 } 1196 else { 1197 succeeded.add(c); 1198 } 1199 } 1200 ATNConfigSetATNConfigSetPair res; 1201 res.a = succeeded; 1202 res.b = failed; 1203 return res; 1204 } 1205 1206 /** 1207 * Look through a list of predicate/alt pairs, returning alts for the 1208 * pairs that win. A {@code NONE} predicate indicates an alt containing an 1209 * unpredicated config which behaves as "always true." If !complete 1210 * then we stop at the first predicate that evaluates to true. This 1211 * includes pairs with null predicates. 1212 */ 1213 protected BitSet evalSemanticContext(PredPrediction[] predPredictions, ParserRuleContext outerContext, 1214 bool complete) 1215 { 1216 BitSet predictions; 1217 foreach (pair; predPredictions) { 1218 if (pair.pred == SemanticContext.NONE ) { 1219 predictions.set(pair.alt, true); 1220 if (!complete) { 1221 break; 1222 } 1223 continue; 1224 } 1225 1226 bool fullCtx = false; // in dfa 1227 bool predicateEvaluationResult = evalSemanticContext(pair.pred, outerContext, pair.alt, fullCtx); 1228 debug(dfa_debug) { 1229 writefln("eval pred %1$s=%2$s", pair, predicateEvaluationResult); 1230 } 1231 1232 if ( predicateEvaluationResult ) { 1233 debug(dfa_debug) 1234 writefln("PREDICT ", pair.alt); 1235 predictions.set(pair.alt, true); 1236 if (!complete) { 1237 break; 1238 } 1239 } 1240 } 1241 1242 return predictions; 1243 } 1244 1245 public bool evalSemanticContext(SemanticContext pred, ParserRuleContext parserCallStack, 1246 int alt, bool fullCtx) 1247 { 1248 return pred.eval(parser, parserCallStack); 1249 } 1250 1251 protected void closureATN(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1252 bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) 1253 { 1254 int initialDepth = 0; 1255 closureCheckingStopState(config, configs, closureBusy, collectPredicates, 1256 fullCtx, 1257 initialDepth, treatEofAsEpsilon); 1258 assert (!fullCtx || !configs.dipsIntoOuterContext); 1259 } 1260 1261 protected void closureCheckingStopState(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1262 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1263 { 1264 if (cast(RuleStopState)config.state) { 1265 // We hit rule end. If we have context info, use it 1266 // run thru all possible stack tops in ctx 1267 if (!config.context.isEmpty) { 1268 for (int i = 0; i < config.context.size; i++) { 1269 if (config.context.getReturnState(i) == PredictionContext.EMPTY_RETURN_STATE) { 1270 if (fullCtx) { 1271 configs.add(new ATNConfig(config, config.state, 1272 cast(PredictionContext)PredictionContext.EMPTY), mergeCache); 1273 continue; 1274 } 1275 else { 1276 // we have no context info, just chase follow links (if greedy) 1277 closure_(config, configs, closureBusy, collectPredicates, 1278 fullCtx, depth, treatEofAsEpsilon); 1279 } 1280 continue; 1281 } 1282 ATNState returnState = atn.states[config.context.getReturnState(i)]; 1283 PredictionContext newContext = config.context.getParent(i); // "pop" return state 1284 ATNConfig c = new ATNConfig(returnState, config.alt, newContext, 1285 config.semanticContext); 1286 // While we have context to pop back from, we may have 1287 // gotten that context AFTER having falling off a rule. 1288 // Make sure we track that we are now out of context. 1289 // 1290 // This assignment also propagates the 1291 // isPrecedenceFilterSuppressed() value to the new 1292 // configuration. 1293 c.reachesIntoOuterContext = config.reachesIntoOuterContext; 1294 assert(depth > int.min); 1295 closureCheckingStopState(c, configs, closureBusy, collectPredicates, 1296 fullCtx, depth - 1, treatEofAsEpsilon); 1297 } 1298 return; 1299 } 1300 else if (fullCtx) { 1301 // reached end of start rule 1302 configs.add(config, mergeCache); 1303 return; 1304 } 1305 else { 1306 // else if we have no context info, just chase follow links (if greedy) 1307 debug 1308 writefln("FALLING off rule %s", 1309 getRuleName(config.state.ruleIndex)); 1310 } 1311 } 1312 closure_(config, configs, closureBusy, collectPredicates, 1313 fullCtx, depth, treatEofAsEpsilon); 1314 } 1315 1316 /** 1317 * Do the actual work of walking epsilon edges 1318 */ 1319 protected void closure_(ATNConfig config, ATNConfigSet configs, ref ATNConfig[] closureBusy, 1320 bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) 1321 { 1322 ATNState p = config.state; 1323 1324 // optimization 1325 if (!p.onlyHasEpsilonTransitions) { 1326 configs.add(config, mergeCache); 1327 // make sure to not return here, because EOF transitions can act as 1328 // both epsilon transitions and non-epsilon transitions. 1329 // if ( debug ) System.out.println("added config "+configs); 1330 } 1331 for (int i=0; i<p.getNumberOfTransitions(); i++) { 1332 if ( i==0 && canDropLoopEntryEdgeInLeftRecursiveRule(config)) 1333 continue; 1334 Transition t = p.transition(i); 1335 bool continueCollecting = 1336 collectPredicates && !(cast(ActionTransition)t); 1337 ATNConfig c = getEpsilonTarget(config, t, continueCollecting, 1338 depth == 0, fullCtx, treatEofAsEpsilon); 1339 if (c !is null) { 1340 int newDepth = depth; 1341 if (cast(RuleStopState)config.state) { 1342 assert (!fullCtx); 1343 1344 // target fell off end of rule; mark resulting c as having dipped into outer context 1345 // We can't get here if incoming config was rule stop and we had context 1346 // track how far we dip into outer context. Might 1347 // come in handy and we avoid evaluating context dependent 1348 // preds if this is > 0. 1349 if (_dfa !is null && _dfa.isPrecedenceDfa) { 1350 int outermostPrecedenceReturn = (cast(EpsilonTransition)t).outermostPrecedenceReturn; 1351 if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) { 1352 c.setPrecedenceFilterSuppressed(true); 1353 } 1354 } 1355 1356 c.reachesIntoOuterContext++; 1357 1358 if (count(closureBusy, c) > 0) { 1359 // avoid infinite recursion for right-recursive rules 1360 continue; 1361 } 1362 closureBusy ~= c; 1363 1364 configs.dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method 1365 assert (newDepth > int.min); 1366 newDepth--; 1367 debug 1368 writefln("dips into outer ctx: %s", c); 1369 } 1370 else { 1371 if (!t.isEpsilon) { 1372 if (count(closureBusy, c) > 0) { 1373 // avoid infinite recursion for right-recursive rules 1374 continue; 1375 } 1376 closureBusy ~= c; 1377 } 1378 if (cast(RuleTransition)t) { 1379 // latch when newDepth goes negative - once we step out of the entry context we can't return 1380 if (newDepth >= 0) { 1381 newDepth++; 1382 } 1383 } 1384 } 1385 closureCheckingStopState(c, configs, closureBusy, continueCollecting, 1386 fullCtx, newDepth, treatEofAsEpsilon); 1387 } 1388 } 1389 1390 } 1391 1392 public string getRuleName(int index) 1393 { 1394 if (parser !is null && index>=0 ) return parser.getRuleNames()[index]; 1395 return format("<rule %s>",index); 1396 } 1397 1398 public ATNConfig getEpsilonTarget(ATNConfig config, Transition t, bool collectPredicates, 1399 bool inContext, bool fullCtx, bool treatEofAsEpsilon) 1400 { 1401 switch (t.getSerializationType()) { 1402 case TransitionStates.RULE: 1403 return ruleTransition(config, cast(RuleTransition)t); 1404 1405 case TransitionStates.PRECEDENCE: 1406 return precedenceTransition(config, cast(PrecedencePredicateTransition)t, collectPredicates, inContext, fullCtx); 1407 1408 case TransitionStates.PREDICATE: 1409 return predTransition(config, cast(PredicateTransition)t, 1410 collectPredicates, 1411 inContext, 1412 fullCtx); 1413 1414 case TransitionStates.ACTION: 1415 return actionTransition(config, cast(ActionTransition)t); 1416 1417 case TransitionStates.EPSILON: 1418 return new ATNConfig(config, t.target); 1419 1420 case TransitionStates.ATOM: 1421 case TransitionStates.RANGE: 1422 case TransitionStates.SET: 1423 // EOF transitions act like epsilon transitions after the first EOF 1424 // transition is traversed 1425 if (treatEofAsEpsilon) { 1426 if (t.matches(TokenConstantDefinition.EOF, 0, 1)) { 1427 return new ATNConfig(config, t.target); 1428 } 1429 } 1430 1431 return null; 1432 1433 default: 1434 return null; 1435 } 1436 1437 } 1438 1439 protected ATNConfig actionTransition(ATNConfig config, ActionTransition t) 1440 { 1441 debug writefln("ACTION edge %1$s:%2$s", t.ruleIndex, t.actionIndex); 1442 return new ATNConfig(config, t.target); 1443 } 1444 1445 public ATNConfig precedenceTransition(ATNConfig config, PrecedencePredicateTransition pt, 1446 bool collectPredicates, bool inContext, bool fullCtx) 1447 { 1448 debug { 1449 writefln("PRED (collectPredicates=%1$s) %2$s" ~ 1450 ">=_p, ctx dependent=true", collectPredicates, pt.precedence); 1451 if ( parser !is null ) { 1452 writefln("context surrounding pred is %s", 1453 parser.getRuleInvocationStack); 1454 //parser.classinfo); 1455 } 1456 } 1457 1458 ATNConfig c = null; 1459 if (collectPredicates && inContext) { 1460 if ( fullCtx ) { 1461 // In full context mode, we can evaluate predicates on-the-fly 1462 // during closure, which dramatically reduces the size of 1463 // the config sets. It also obviates the need to test predicates 1464 // later during conflict resolution. 1465 int currentPosition = _input.index(); 1466 _input.seek(_startIndex); 1467 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1468 _input.seek(currentPosition); 1469 if ( predSucceeds ) { 1470 c = new ATNConfig(config, pt.target); // no pred context 1471 } 1472 } 1473 else { 1474 SemanticContext newSemCtx = 1475 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1476 c = new ATNConfig(config, pt.target, newSemCtx); 1477 } 1478 } 1479 else { 1480 c = new ATNConfig(config, pt.target); 1481 } 1482 1483 debug 1484 writefln("config from pred transition=%s", c); 1485 return c; 1486 1487 } 1488 1489 protected ATNConfig predTransition(ATNConfig config, PredicateTransition pt, bool collectPredicates, 1490 bool inContext, bool fullCtx) 1491 { 1492 debug { 1493 writefln("PRED (collectPredicates=%1$s) %2$s:%3$s, ctx dependent=%4$s", 1494 collectPredicates, pt.ruleIndex, 1495 pt.predIndex, pt.isCtxDependent); 1496 if ( parser !is null ) { 1497 writefln("context surrounding pred is %s", 1498 parser.getRuleInvocationStack()); 1499 } 1500 } 1501 1502 ATNConfig c = null; 1503 if ( collectPredicates && 1504 (!pt.isCtxDependent || (pt.isCtxDependent&&inContext)) ) 1505 { 1506 if ( fullCtx ) { 1507 // In full context mode, we can evaluate predicates on-the-fly 1508 // during closure, which dramatically reduces the size of 1509 // the config sets. It also obviates the need to test predicates 1510 // later during conflict resolution. 1511 int currentPosition = _input.index(); 1512 _input.seek(_startIndex); 1513 bool predSucceeds = evalSemanticContext(pt.getPredicate(), _outerContext, config.alt, fullCtx); 1514 _input.seek(currentPosition); 1515 if (predSucceeds) { 1516 c = new ATNConfig(config, pt.target); // no pred context 1517 } 1518 } 1519 else { 1520 SemanticContext newSemCtx = 1521 SemanticContext.and(config.semanticContext, pt.getPredicate()); 1522 c = new ATNConfig(config, pt.target, newSemCtx); 1523 } 1524 } 1525 else { 1526 c = new ATNConfig(config, pt.target); 1527 } 1528 1529 debug writefln("config from pred transition=",c); 1530 return c; 1531 } 1532 1533 public ATNConfig ruleTransition(ATNConfig config, RuleTransition t) 1534 { 1535 debug { 1536 writefln("CALL rule %1$s, ctx=%2$s", getRuleName(t.target.ruleIndex), config.context); 1537 } 1538 ATNState returnState = t.followState; 1539 PredictionContext newContext = 1540 SingletonPredictionContext.create(config.context, returnState.stateNumber); 1541 return new ATNConfig(config, t.target, newContext); 1542 } 1543 1544 /** 1545 * Gets a {@link BitSet} containing the alternatives in {@code configs} 1546 * which are part of one or more conflicting alternative subsets. 1547 * 1548 * @param configs The {@link ATNConfigSet} to analyze. 1549 * @return The alternatives in {@code configs} which are part of one or more 1550 * conflicting alternative subsets. If {@code configs} does not contain any 1551 * conflicting subsets, this method returns an empty {@link BitSet}. 1552 */ 1553 public BitSet getConflictingAlts(ATNConfigSet configs) 1554 { 1555 BitSet[] altsets = PredictionMode.getConflictingAltSubsets(configs); 1556 return PredictionMode.getAlts(altsets); 1557 } 1558 1559 /** 1560 * Sam pointed out a problem with the previous definition, v3, of 1561 * ambiguous states. If we have another state associated with conflicting 1562 * alternatives, we should keep going. For example, the following grammar 1563 * 1564 * s : (ID | ID ID?) ';' ; 1565 * 1566 * When the ATN simulation reaches the state before ';', it has a DFA 1567 * state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 1568 * 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node 1569 * because alternative to has another way to continue, via [6|2|[]]. 1570 * The key is that we have a single state that has config's only associated 1571 * with a single alternative, 2, and crucially the state transitions 1572 * among the configurations are all non-epsilon transitions. That means 1573 * we don't consider any conflicts that include alternative 2. So, we 1574 * ignore the conflict between alts 1 and 2. We ignore a set of 1575 * conflicting alts when there is an intersection with an alternative 1576 * associated with a single alt state in the state→config-list map. 1577 * 1578 * It's also the case that we might have two conflicting configurations but 1579 * also a 3rd nonconflicting configuration for a different alternative: 1580 * [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: 1581 * 1582 * a : A | A | A B ; 1583 * 1584 * After matching input A, we reach the stop state for rule A, state 1. 1585 * State 8 is the state right before B. Clearly alternatives 1 and 2 1586 * conflict and no amount of further lookahead will separate the two. 1587 * However, alternative 3 will be able to continue and so we do not 1588 * stop working on this state. In the previous example, we're concerned 1589 * with states associated with the conflicting alternatives. Here alt 1590 * 3 is not associated with the conflicting configs, but since we can continue 1591 * looking for input reasonably, I don't declare the state done. We 1592 * ignore a set of conflicting alts when we have an alternative 1593 * that we still need to pursue. 1594 */ 1595 public BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) 1596 { 1597 auto conflictingAlts = new BitSet(1); 1598 if (configs.uniqueAlt != ATN.INVALID_ALT_NUMBER) { 1599 conflictingAlts.set(configs.uniqueAlt, true); 1600 } 1601 else { 1602 *conflictingAlts = configs.conflictingAlts; 1603 } 1604 return *conflictingAlts; 1605 } 1606 1607 public string getTokenName(int t) 1608 { 1609 if (t == TokenConstantDefinition.EOF) { 1610 return "EOF"; 1611 } 1612 1613 Vocabulary vocabulary = parser !is null ? parser.getVocabulary() : new VocabularyImpl(null, null, null); 1614 string displayName = vocabulary.getDisplayName(t); 1615 if (displayName == to!string(t)) { 1616 return displayName; 1617 } 1618 1619 return displayName ~ "<" ~ to!string(t) ~ ">"; 1620 } 1621 1622 public string getLookaheadName(TokenStream input) 1623 { 1624 return getTokenName(input.LA(1)); 1625 } 1626 1627 /** 1628 * Used for debugging in adaptivePredict around execATN but I cut 1629 * it out for clarity now that alg. works well. We can leave this 1630 * "dead" code for a bit. 1631 */ 1632 public void dumpDeadEndConfigs(NoViableAltException nvae) 1633 { 1634 debug 1635 writefln("dead end configs: "); 1636 foreach (ATNConfig c; nvae.getDeadEndConfigs().configs) { 1637 string trans = "no edges"; 1638 if (c.state.getNumberOfTransitions > 0) { 1639 Transition t = c.state.transition(0); 1640 if (t.classinfo == AtomTransition.classinfo) { 1641 AtomTransition at = cast(AtomTransition)t; 1642 trans = "Atom " ~ getTokenName(at._label); 1643 } 1644 else if (t.classinfo == SetTransition.classinfo) { 1645 SetTransition st = cast(SetTransition)t; 1646 bool not = (st.classinfo == NotSetTransition.classinfo); 1647 trans = (not?"~":"") ~ "Set "~ st.set.toString(); 1648 } 1649 } 1650 debug 1651 writefln("%1$s:%2$s", c.toString(parser, true), trans); 1652 } 1653 } 1654 1655 protected NoViableAltException noViableAlt(TokenStream input, ParserRuleContext outerContext, 1656 ATNConfigSet configs, int startIndex) 1657 { 1658 return new NoViableAltException(parser, input, 1659 input.get(startIndex), 1660 input.LT(1), 1661 configs, outerContext); 1662 } 1663 1664 protected static int getUniqueAlt(ATNConfigSet configs) 1665 { 1666 int alt = ATN.INVALID_ALT_NUMBER; 1667 foreach (ATNConfig c; configs.configs) { 1668 if (alt == ATN.INVALID_ALT_NUMBER) { 1669 alt = c.alt; // found first alt 1670 } 1671 else if (c.alt != alt) { 1672 return ATN.INVALID_ALT_NUMBER; 1673 } 1674 } 1675 return alt; 1676 } 1677 1678 /** 1679 * Add an edge to the DFA, if possible. This method calls 1680 * {@link #addDFAState} to ensure the {@code to} state is present in the 1681 * DFA. If {@code from} is {@code null}, or if {@code t} is outside the 1682 * range of edges that can be represented in the DFA tables, this method 1683 * returns without adding the edge to the DFA. 1684 * 1685 * <p>If {@code to} is {@code null}, this method returns {@code null}. 1686 * Otherwise, this method returns the {@link DFAState} returned by calling 1687 * {@link #addDFAState} for the {@code to} state.</p> 1688 * 1689 * @param dfa The DFA 1690 * @param from The source state for the edge 1691 * @param t The input symbol 1692 * @param to The target state for the edge 1693 * 1694 * @return If {@code to} is {@code null}, this method returns {@code null}; 1695 * otherwise this method returns the result of calling {@link #addDFAState} 1696 * on {@code to} 1697 */ 1698 protected DFAState addDFAEdge(ref DFA dfa, DFAState from, int t, DFAState to) 1699 { 1700 debug { 1701 writefln("\nEDGE %1$s -> %2$s upon %3$s", from, to, getTokenName(t)); 1702 } 1703 if (to is null) { 1704 return null; 1705 } 1706 to = addDFAState(dfa, to); // used existing if possible not incoming 1707 if (from is null || t < -1 || t > atn.maxTokenType) { 1708 return to; 1709 } 1710 synchronized (from) { 1711 if (from.edges == null) { 1712 from.edges = new DFAState[atn.maxTokenType+1+1]; 1713 } 1714 from.edges[t+1] = to; // connect 1715 } 1716 1717 debug { 1718 writefln("end dfa.decision = %s, dfa.states = %s", dfa.decision, dfa.states); 1719 } 1720 1721 return to; 1722 } 1723 1724 /** 1725 * Add state {@code D} to the DFA if it is not already present, and return 1726 * the actual instance stored in the DFA. If a state equivalent to {@code D} 1727 * is already in the DFA, the existing state is returned. Otherwise this 1728 * method returns {@code D} after adding it to the DFA. 1729 * 1730 * <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and 1731 * does not change the DFA.</p> 1732 * 1733 * @param dfa The dfa 1734 * @param D The DFA state to add 1735 * @return The state stored in the DFA. This will be either the existing 1736 * state if {@code D} is already in the DFA, or {@code D} itself if the 1737 * state was not already present. 1738 */ 1739 protected DFAState addDFAState(ref DFA dfa, DFAState D) 1740 { 1741 if (D == ERROR) { 1742 return D; 1743 } 1744 debug 1745 writefln("adding new dfa: D = %s, dfa.states = %s,\n(D in dfa.states) = %s", D, dfa.states, D in dfa.states); 1746 if (D in dfa.states) 1747 return dfa.states[D]; 1748 D.stateNumber = to!int(dfa.states.length); 1749 if (!D.configs.readonly) { 1750 D.configs.optimizeConfigs(this); 1751 D.configs.readonly(true); 1752 } 1753 dfa.states[D] = D; 1754 debug 1755 writefln("adding new DFA state end: D = %1$s, dfa.states = %2$s", D, dfa.states); 1756 return D; 1757 } 1758 1759 protected void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, ATNConfigSet configs, 1760 int startIndex, int stopIndex) 1761 { 1762 debug(retry_debug) { 1763 import antlr.v4.runtime.misc.Interval; 1764 Interval interval = Interval.of(startIndex, stopIndex); 1765 writefln("reportAttemptingFullContext decision=%1$s:%2$s, input=%3$s", 1766 dfa.decision, configs, 1767 parser.getTokenStream().getText(interval)); 1768 } 1769 if (parser) 1770 parser.getErrorListenerDispatch().reportAttemptingFullContext(parser, 1771 dfa, 1772 startIndex, 1773 stopIndex, 1774 conflictingAlts, 1775 configs); 1776 } 1777 1778 protected void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, 1779 int startIndex, int stopIndex) 1780 { 1781 debug(retry_debug) { 1782 import antlr.v4.runtime.misc.Interval; 1783 Interval interval = Interval.of(startIndex, stopIndex); 1784 writefln("reportContextSensitivity decision=%1$s:%2$s, input=%3$s", 1785 dfa.decision, configs, parser.getTokenStream().getText(interval)); 1786 } 1787 if (parser !is null) 1788 parser.getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); 1789 } 1790 1791 protected void reportAmbiguity(DFA dfa, DFAState D, int startIndex, int stopIndex, bool exact, 1792 BitSet ambigAlts, ATNConfigSet configs) 1793 { 1794 debug(retry_debug) { 1795 import antlr.v4.runtime.misc.Interval; 1796 Interval interval = Interval.of(startIndex, stopIndex); 1797 writefln("reportAmbiguity %1$s:%2$s, input=%3$s", 1798 ambigAlts, configs, parser.getTokenStream().getText(interval)); 1799 } 1800 if (parser !is null) parser.getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, 1801 exact, ambigAlts, configs); 1802 } 1803 1804 public void setPredictionMode(PredictionModeConst mode) 1805 { 1806 this.mode = mode; 1807 } 1808 1809 public PredictionModeConst getPredictionMode() 1810 { 1811 return mode; 1812 } 1813 1814 public Parser getParser() 1815 { 1816 return parser; 1817 } 1818 1819 protected bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) 1820 { 1821 auto TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = true; 1822 if ( TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT ) 1823 return false; 1824 return false; 1825 } 1826 1827 }