1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.PredictionMode; 8 9 import antlr.v4.runtime.atn.ATN; 10 import antlr.v4.runtime.atn.ATNConfig; 11 import antlr.v4.runtime.atn.ATNConfigSet; 12 import antlr.v4.runtime.atn.ATNState; 13 import antlr.v4.runtime.atn.AltAndContextMap; 14 import antlr.v4.runtime.atn.PredictionModeConst; 15 import antlr.v4.runtime.atn.RuleStopState; 16 import antlr.v4.runtime.atn.ContextMapObjectEqualityComparator; 17 import antlr.v4.runtime.atn.SemanticContext; 18 import antlr.v4.runtime.misc.BitSet; 19 import std.typecons; 20 21 /** 22 * This enumeration defines the prediction modes available in ANTLR 4 along with 23 * utility methods for analyzing configuration sets for conflicts and/or 24 * ambiguities. 25 */ 26 class PredictionMode 27 { 28 29 /** 30 * Computes the SLL prediction termination condition. 31 * 32 * <p> 33 * This method computes the SLL prediction termination condition for both of 34 * the following cases.</p> 35 * 36 * <ul> 37 * <li>The usual SLL+LL fallback upon SLL conflict</li> 38 * <li>Pure SLL without LL fallback</li> 39 * </ul> 40 * 41 * <p><strong>COMBINED SLL+LL PARSING</strong></p> 42 * 43 * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are 44 * ensured regardless of how the termination condition is computed by this 45 * method. Due to the substantially higher cost of LL prediction, the 46 * prediction should only fall back to LL when the additional lookahead 47 * cannot lead to a unique SLL prediction.</p> 48 * 49 * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only 50 * conflicting subsets should fall back to full LL, even if the 51 * configuration sets don't resolve to the same alternative (e.g. 52 * {@code {1,2}} and {@code {3,4}}). If there is at least one non-conflicting 53 * configuration, SLL could continue with the hopes that more lookahead will 54 * resolve via one of those non-conflicting configurations.</p> 55 * 56 * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing) 57 * stops when it sees only conflicting configuration subsets. In contrast, 58 * full LL keeps going when there is uncertainty.</p> 59 * 60 * <p><strong>HEURISTIC</strong></p> 61 * 62 * <p>As a heuristic, we stop prediction when we see any conflicting subset 63 * unless we see a state that only has one alternative associated with it. 64 * The single-alt-state thing lets prediction continue upon rules like 65 * (otherwise, it would admit defeat too soon):</p> 66 * 67 * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p> 68 * 69 * <p>When the ATN simulation reaches the state before {@code ';'}, it has a 70 * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally 71 * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop 72 * processing this node because alternative to has another way to continue, 73 * via {@code [6|2|[]]}.</p> 74 * 75 * <p>It also let's us continue for this rule:</p> 76 * 77 * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p> 78 * 79 * <p>After matching input A, we reach the stop state for rule A, state 1. 80 * State 8 is the state right before B. Clearly alternatives 1 and 2 81 * conflict and no amount of further lookahead will separate the two. 82 * However, alternative 3 will be able to continue and so we do not stop 83 * working on this state. In the previous example, we're concerned with 84 * states associated with the conflicting alternatives. Here alt 3 is not 85 * associated with the conflicting configs, but since we can continue 86 * looking for input reasonably, don't declare the state done.</p> 87 * 88 * <p><strong>PURE SLL PARSING</strong></p> 89 * 90 * <p>To handle pure SLL parsing, all we have to do is make sure that we 91 * combine stack contexts for configurations that differ only by semantic 92 * predicate. From there, we can do the usual SLL termination heuristic.</p> 93 * 94 * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p> 95 * 96 * <p>SLL decisions don't evaluate predicates until after they reach DFA stop 97 * states because they need to create the DFA cache that works in all 98 * semantic situations. In contrast, full LL evaluates predicates collected 99 * during start state computation so it can ignore predicates thereafter. 100 * This means that SLL termination detection can totally ignore semantic 101 * predicates.</p> 102 * 103 * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not 104 * semantic predicate contexts so we might see two configurations like the 105 * following.</p> 106 * 107 * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p> 108 * 109 * <p>Before testing these configurations against others, we have to merge 110 * {@code x} and {@code x'} (without modifying the existing configurations). 111 * For example, we test {@code (x+x')==x''} when looking for conflicts in 112 * the following configurations.</p> 113 * 114 * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p> 115 * 116 * <p>If the configuration set has predicates (as indicated by 117 * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of 118 * the configurations to strip out all of the predicates so that a standard 119 * {@link ATNConfigSet} will merge everything ignoring predicates.</p> 120 */ 121 public static bool hasSLLConflictTerminatingPrediction(PredictionModeConst mode, ATNConfigSet configs) 122 { 123 /* Configs in rule stop states indicate reaching the end of the decision 124 * rule (local context) or end of start rule (full context). If all 125 * configs meet this condition, then none of the configurations is able 126 * to match additional input so we terminate prediction. 127 */ 128 if (allConfigsInRuleStopStates(configs)) { 129 return true; 130 } 131 132 // pure SLL mode parsing 133 if (mode == PredictionModeConst.SLL) { 134 // Don't bother with combining configs from different semantic 135 // contexts if we can fail over to full LL; costs more time 136 // since we'll often fail over anyway. 137 if (configs.hasSemanticContext) { 138 // dup configs, tossing out semantic predicates 139 ATNConfigSet dupli = new ATNConfigSet(); 140 auto _ = new SemanticContext(); 141 foreach (ATNConfig c; configs.configs) { 142 c = new ATNConfig(c, SemanticContext.NONE); 143 dupli.add(c); 144 } 145 configs = dupli; 146 } 147 // now we have combined contexts for configs with dissimilar preds 148 } 149 150 // pure SLL or combined SLL+LL mode parsing 151 152 BitSet[] altsets = getConflictingAltSubsets(configs); 153 bool heuristic = 154 hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); 155 return heuristic; 156 157 } 158 159 /** 160 * Checks if any configuration in {@code configs} is in a 161 * {@link RuleStopState}. Configurations meeting this condition have reached 162 * the end of the decision rule (local context) or end of start rule (full 163 * context). 164 * 165 * @param configs the configuration set to test 166 * @return {@code true} if any configuration in {@code configs} is in a 167 * {@link RuleStopState}, otherwise {@code false} 168 */ 169 public static bool hasConfigInRuleStopState(ATNConfigSet configs) 170 { 171 foreach (ATNConfig c; configs.configs) 172 { 173 if (cast(RuleStopState)c.state) { 174 return true; 175 } 176 } 177 return false; 178 } 179 180 /** 181 * Checks if all configurations in {@code configs} are in a 182 * {@link RuleStopState}. Configurations meeting this condition have reached 183 * the end of the decision rule (local context) or end of start rule (full 184 * context). 185 * 186 * @param configs the configuration set to test 187 * @return {@code true} if all configurations in {@code configs} are in a 188 * {@link RuleStopState}, otherwise {@code false} 189 */ 190 public static bool allConfigsInRuleStopStates(ATNConfigSet configs) 191 { 192 foreach (ATNConfig config; configs.configs) { 193 if (! cast(RuleStopState)config.state) { 194 return false; 195 } 196 } 197 return true; 198 } 199 200 /** 201 * Full LL prediction termination. 202 * 203 * <p>Can we stop looking ahead during ATN simulation or is there some 204 * uncertainty as to which alternative we will ultimately pick, after 205 * consuming more input? Even if there are partial conflicts, we might know 206 * that everything is going to resolve to the same minimum alternative. That 207 * means we can stop since no more lookahead will change that fact. On the 208 * other hand, there might be multiple conflicts that resolve to different 209 * minimums. That means we need more look ahead to decide which of those 210 * alternatives we should predict.</p> 211 * 212 * <p>The basic idea is to split the set of configurations {@code C}, into 213 * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with 214 * non-conflicting configurations. Two configurations conflict if they have 215 * identical {@link ATNConfig#state} and {@link ATNConfig#context} values 216 * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} 217 * and {@code (s, j, ctx, _)} for {@code i!=j}.</p> 218 * 219 * <p>Reduce these configuration subsets to the set of possible alternatives. 220 * You can compute the alternative subsets in one pass as follows:</p> 221 * 222 * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in 223 * {@code C} holding {@code s} and {@code ctx} fixed.</p> 224 * 225 * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p> 226 * 227 * <pre> 228 * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not 229 * alt and not pred 230 * </pre> 231 * 232 * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p> 233 * 234 * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with 235 * {@code s} and {@code ctx}.</p> 236 * 237 * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If 238 * the union of these alternative subsets is a singleton, then no amount of 239 * more lookahead will help us. We will always pick that alternative. If, 240 * however, there is more than one alternative, then we are uncertain which 241 * alternative to predict and must continue looking for resolution. We may 242 * or may not discover an ambiguity in the future, even if there are no 243 * conflicting subsets this round.</p> 244 * 245 * <p>The biggest sin is to terminate early because it means we've made a 246 * decision but were uncertain as to the eventual outcome. We haven't used 247 * enough lookahead. On the other hand, announcing a conflict too late is no 248 * big deal; you will still have the conflict. It's just inefficient. It 249 * might even look until the end of file.</p> 250 * 251 * <p>No special consideration for semantic predicates is required because 252 * predicates are evaluated on-the-fly for full LL prediction, ensuring that 253 * no configuration contains a semantic context during the termination 254 * check.</p> 255 * 256 * <p><strong>CONFLICTING CONFIGS</strong></p> 257 * 258 * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict 259 * when {@code i!=j} but {@code x=x'}. Because we merge all 260 * {@code (s, i, _)} configurations together, that means that there are at 261 * most {@code n} configurations associated with state {@code s} for 262 * {@code n} possible alternatives in the decision. The merged stacks 263 * complicate the comparison of configuration contexts {@code x} and 264 * {@code x'}. Sam checks to see if one is a subset of the other by calling 265 * merge and checking to see if the merged result is either {@code x} or 266 * {@code x'}. If the {@code x} associated with lowest alternative {@code i} 267 * is the superset, then {@code i} is the only possible prediction since the 268 * others resolve to {@code min(i)} as well. However, if {@code x} is 269 * associated with {@code j>i} then at least one stack configuration for 270 * {@code j} is not in conflict with alternative {@code i}. The algorithm 271 * should keep going, looking for more lookahead due to the uncertainty.</p> 272 * 273 * <p>For simplicity, I'm doing a equality check between {@code x} and 274 * {@code x'} that lets the algorithm continue to consume lookahead longer 275 * than necessary. The reason I like the equality is of course the 276 * simplicity but also because that is the test you need to detect the 277 * alternatives that are actually in conflict.</p> 278 * 279 * <p><strong>CONTINUE/STOP RULE</strong></p> 280 * 281 * <p>Continue if union of resolved alternative sets from non-conflicting and 282 * conflicting alternative subsets has more than one alternative. We are 283 * uncertain about which alternative to predict.</p> 284 * 285 * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which 286 * alternatives are still in the running for the amount of input we've 287 * consumed at this point. The conflicting sets let us to strip away 288 * configurations that won't lead to more states because we resolve 289 * conflicts to the configuration with a minimum alternate for the 290 * conflicting set.</p> 291 * 292 * <p><strong>CASES</strong></p> 293 * 294 * <ul> 295 * 296 * <li>no conflicts and more than 1 alternative in set => continue</li> 297 * 298 * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, 299 * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set 300 * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = 301 * {@code {1,3}} => continue 302 * </li> 303 * 304 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, 305 * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set 306 * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = 307 * {@code {1}} => stop and predict 1</li> 308 * 309 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, 310 * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U 311 * {@code {1}} = {@code {1}} => stop and predict 1, can announce 312 * ambiguity {@code {1,2}}</li> 313 * 314 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, 315 * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U 316 * {@code {2}} = {@code {1,2}} => continue</li> 317 * 318 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, 319 * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U 320 * {@code {3}} = {@code {1,3}} => continue</li> 321 * 322 * </ul> 323 * 324 * <p><strong>EXACT AMBIGUITY DETECTION</strong></p> 325 * 326 * <p>If all states report the same conflicting set of alternatives, then we 327 * know we have the exact ambiguity set.</p> 328 * 329 * <p><code>|A_<em>i</em>|>1</code> and 330 * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p> 331 * 332 * <p>In other words, we continue examining lookahead until all {@code A_i} 333 * have more than one alternative and all {@code A_i} are the same. If 334 * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate 335 * because the resolved set is {@code {1}}. To determine what the real 336 * ambiguity is, we have to know whether the ambiguity is between one and 337 * two or one and three so we keep going. We can only stop prediction when 338 * we need exact ambiguity detection when the sets look like 339 * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p> 340 */ 341 public static int resolvesToJustOneViableAlt(BitSet[] altsets) 342 { 343 return getSingleViableAlt(altsets); 344 } 345 346 /** 347 * Determines if every alternative subset in {@code altsets} contains more 348 * than one alternative. 349 * 350 * @param altsets a collection of alternative subsets 351 * @return {@code true} if every {@link BitSet} in {@code altsets} has 352 * {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} 353 */ 354 public static bool allSubsetsConflict(BitSet[] altsets) 355 { 356 return !hasNonConflictingAltSet(altsets); 357 } 358 359 /** 360 * Determines if any single alternative subset in {@code altsets} contains 361 * exactly one alternative. 362 * 363 * @param altsets a collection of alternative subsets 364 * @return {@code true} if {@code altsets} contains a {@link BitSet} with 365 * {@link BitSet#cardinality cardinality} 1, otherwise {@code false} 366 */ 367 public static bool hasNonConflictingAltSet(BitSet[] altsets) 368 { 369 foreach (BitSet alts; altsets) 370 { 371 if (alts.cardinality == 1) { 372 return true; 373 } 374 } 375 return false; 376 } 377 378 /** 379 * Determines if any single alternative subset in {@code altsets} contains 380 * more than one alternative. 381 * 382 * @param altsets a collection of alternative subsets 383 * @return {@code true} if {@code altsets} contains a {@link BitSet} with 384 * {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} 385 */ 386 public static bool hasConflictingAltSet(BitSet[] altsets) 387 { 388 foreach (BitSet alts; altsets) { 389 if (alts.cardinality >1) { 390 return true; 391 } 392 } 393 return false; 394 } 395 396 /** 397 * Determines if every alternative subset in {@code altsets} is equivalent. 398 * 399 * @param altsets a collection of alternative subsets 400 * @return {@code true} if every member of {@code altsets} is equal to the 401 * others, otherwise {@code false} 402 */ 403 public static bool allSubsetsEqual(BitSet[] altsets) 404 { 405 BitSet first; 406 foreach (i, el; altsets) { 407 if (i == 0) 408 first = el; 409 else 410 if (!el.opEquals(first)) return false; 411 } 412 return true; 413 } 414 415 /** 416 * Returns the unique alternative predicted by all alternative subsets in 417 * {@code altsets}. If no such alternative exists, this method returns 418 * {@link ATN#INVALID_ALT_NUMBER}. 419 * 420 * @param altsets a collection of alternative subsets 421 */ 422 public static int getUniqueAlt(BitSet[] altsets) 423 { 424 BitSet all = getAlts(altsets); 425 if (all.cardinality == 1) 426 return all.nextSetBit(0); 427 return ATN.INVALID_ALT_NUMBER; 428 } 429 430 /** 431 * Gets the complete set of represented alternatives for a collection of 432 * alternative subsets. This method returns the union of each {@link BitSet} 433 * in {@code altsets}. 434 * 435 * @param altsets a collection of alternative subsets 436 * @return the set of represented alternatives in {@code altsets} 437 */ 438 public static BitSet getAlts(BitSet[] altsets) 439 { 440 BitSet all; 441 foreach (BitSet alts; altsets) { 442 all = all.or(alts); 443 } 444 return all; 445 } 446 447 public static BitSet getAlts(ATNConfigSet configs) 448 { 449 BitSet *alts; 450 alts = new BitSet; 451 foreach (ATNConfig config; configs.configs) { 452 alts.set(config.alt, true); 453 } 454 return *alts; 455 } 456 457 /** 458 * This function gets the conflicting alt subsets from a configuration set. 459 * For each configuration {@code c} in {@code configs}: 460 * 461 * <pre> 462 * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses stack and context, not 463 * alt and not pred 464 * </pre> 465 */ 466 public static BitSet[] getConflictingAltSubsets(ATNConfigSet configs) 467 { 468 AltAndContextMap configToAlts; 469 BitSet *alts; 470 471 foreach (ATNConfig c; configs.configs) { 472 auto c_copy = new ATNConfig(c); 473 if (!configToAlts.hasKey(c_copy)) 474 { 475 alts = new BitSet(); 476 alts.set(0, false); // initialise alts 477 configToAlts.put(c_copy, *alts); 478 } 479 else { 480 *alts = configToAlts.get(c_copy); 481 } 482 alts.set(c_copy.alt, true); 483 } 484 return configToAlts.altAndContextMap.values; 485 } 486 487 /** 488 * Get a map from state to alt subset from a configuration set. For each 489 * configuration {@code c} in {@code configs}: 490 * 491 * <pre> 492 * map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt} 493 * </pre> 494 */ 495 public static BitSet[ATNState] getStateToAltMap(ATNConfigSet configs) 496 { 497 BitSet[ATNState] m; 498 foreach (ATNConfig c; configs.configs) { 499 if (!(c.state in m)) 500 { 501 BitSet alts; 502 alts.set(c.alt, true); 503 m[c.state] = alts; 504 } 505 else 506 { 507 m[c.state].set(c.alt, true); 508 } 509 } 510 return m; 511 } 512 513 public static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs) 514 { 515 BitSet[ATNState] x = getStateToAltMap(configs); 516 foreach (alts; x.values) { 517 if (alts.cardinality == 1) 518 return true; 519 } 520 return false; 521 } 522 523 public static int getSingleViableAlt(BitSet[] altsets) 524 { 525 BitSet viableAlts; 526 foreach (BitSet alts; altsets) { 527 int minAlt = alts.nextSetBit(0); 528 viableAlts.set(minAlt, true); 529 if (viableAlts.cardinality > 1) { // more than 1 viable alt 530 return ATN.INVALID_ALT_NUMBER; 531 } 532 } 533 return viableAlts.nextSetBit(0); 534 } 535 536 }