1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.PredictionMode; 8 9 import std.typecons; 10 import antlr.v4.runtime.atn.ATN; 11 import antlr.v4.runtime.atn.ATNConfig; 12 import antlr.v4.runtime.atn.PredictionModeConst; 13 import antlr.v4.runtime.atn.ATNConfigSet; 14 import antlr.v4.runtime.atn.ATNState; 15 import antlr.v4.runtime.atn.AltAndContextMap; 16 import antlr.v4.runtime.atn.RuleStopState; 17 import antlr.v4.runtime.atn.SemanticContext; 18 import antlr.v4.runtime.misc.BitSet; 19 20 // Class PredictionMode 21 /** 22 * Utility methods for analyzing configuration sets for conflicts and/or 23 * ambiguities. 24 */ 25 class PredictionMode 26 { 27 28 /** 29 * Computes the SLL prediction termination condition. 30 * 31 * <p> 32 * This method computes the SLL prediction termination condition for both of 33 * the following cases.</p> 34 * 35 * <ul> 36 * <li>The usual SLL+LL fallback upon SLL conflict</li> 37 * <li>Pure SLL without LL fallback</li> 38 * </ul> 39 * 40 * <p><strong>COMBINED SLL+LL PARSING</strong></p> 41 * 42 * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are 43 * ensured regardless of how the termination condition is computed by this 44 * method. Due to the substantially higher cost of LL prediction, the 45 * prediction should only fall back to LL when the additional lookahead 46 * cannot lead to a unique SLL prediction.</p> 47 * 48 * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only 49 * conflicting subsets should fall back to full LL, even if the 50 * configuration sets don't resolve to the same alternative (e.g. 51 * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting 52 * configuration, SLL could continue with the hopes that more lookahead will 53 * resolve via one of those non-conflicting configurations.</p> 54 * 55 * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing) 56 * stops when it sees only conflicting configuration subsets. In contrast, 57 * full LL keeps going when there is uncertainty.</p> 58 * 59 * <p><strong>HEURISTIC</strong></p> 60 * 61 * <p>As a heuristic, we stop prediction when we see any conflicting subset 62 * unless we see a state that only has one alternative associated with it. 63 * The single-alt-state thing lets prediction continue upon rules like 64 * (otherwise, it would admit defeat too soon):</p> 65 * 66 * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p> 67 * 68 * <p>When the ATN simulation reaches the state before {@code ';'}, it has a 69 * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally 70 * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop 71 * processing this node because alternative to has another way to continue, 72 * via {@code [6|2|[]]}.</p> 73 * 74 * <p>It also let's us continue for this rule:</p> 75 * 76 * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p> 77 * 78 * <p>After matching input A, we reach the stop state for rule A, state 1. 79 * State 8 is the state right before B. Clearly alternatives 1 and 2 80 * conflict and no amount of further lookahead will separate the two. 81 * However, alternative 3 will be able to continue and so we do not stop 82 * working on this state. In the previous example, we're concerned with 83 * states associated with the conflicting alternatives. Here alt 3 is not 84 * associated with the conflicting configs, but since we can continue 85 * looking for input reasonably, don't declare the state done.</p> 86 * 87 * <p><strong>PURE SLL PARSING</strong></p> 88 * 89 * <p>To handle pure SLL parsing, all we have to do is make sure that we 90 * combine stack contexts for configurations that differ only by semantic 91 * predicate. From there, we can do the usual SLL termination heuristic.</p> 92 * 93 * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p> 94 * 95 * <p>SLL decisions don't evaluate predicates until after they reach DFA stop 96 * states because they need to create the DFA cache that works in all 97 * semantic situations. In contrast, full LL evaluates predicates collected 98 * during start state computation so it can ignore predicates thereafter. 99 * This means that SLL termination detection can totally ignore semantic 100 * predicates.</p> 101 * 102 * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not 103 * semantic predicate contexts so we might see two configurations like the 104 * following.</p> 105 * 106 * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p> 107 * 108 * <p>Before testing these configurations against others, we have to merge 109 * {@code x} and {@code x'} (without modifying the existing configurations). 110 * For example, we test {@code (x+x')==x''} when looking for conflicts in 111 * the following configurations.</p> 112 * 113 * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p> 114 * 115 * <p>If the configuration set has predicates (as indicated by 116 * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of 117 * the configurations to strip out all of the predicates so that a standard 118 * {@link ATNConfigSet} will merge everything ignoring predicates.</p> 119 */ 120 public static bool hasSLLConflictTerminatingPrediction(PredictionModeConst mode, ATNConfigSet configs) 121 { 122 /* Configs in rule stop states indicate reaching the end of the decision 123 * rule (local context) or end of start rule (full context). If all 124 * configs meet this condition, then none of the configurations is able 125 * to match additional input so we terminate prediction. 126 */ 127 if (allConfigsInRuleStopStates(configs)) { 128 return true; 129 } 130 131 // pure SLL mode parsing 132 if (mode == PredictionModeConst.SLL) { 133 // Don't bother with combining configs from different semantic 134 // contexts if we can fail over to full LL; costs more time 135 // since we'll often fail over anyway. 136 if (configs.hasSemanticContext) { 137 // dup configs, tossing out semantic predicates 138 ATNConfigSet dupli = new ATNConfigSet(); 139 foreach (ATNConfig c; configs.configs) { 140 c = new ATNConfig(c,SemanticContext.NONE); 141 dupli.add(c); 142 } 143 configs = dupli; 144 } 145 // now we have combined contexts for configs with dissimilar preds 146 } 147 148 // pure SLL or combined SLL+LL mode parsing 149 150 BitSet[] altsets = getConflictingAltSubsets(configs); 151 bool heuristic = 152 hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); 153 return heuristic; 154 155 } 156 157 /** 158 * Checks if any configuration in {@code configs} is in a 159 * {@link RuleStopState}. Configurations meeting this condition have reached 160 * the end of the decision rule (local context) or end of start rule (full 161 * context). 162 * 163 * @param configs the configuration set to test 164 * @return {@code true} if any configuration in {@code configs} is in a 165 * {@link RuleStopState}, otherwise {@code false} 166 */ 167 public static bool hasConfigInRuleStopState(ATNConfigSet configs) 168 { 169 foreach (ATNConfig c; configs.configs) { 170 if (c.state.classinfo == RuleStopState.classinfo) { 171 return true; 172 } 173 } 174 return false; 175 } 176 177 /** 178 * Checks if all configurations in {@code configs} are in a 179 * {@link RuleStopState}. Configurations meeting this condition have reached 180 * the end of the decision rule (local context) or end of start rule (full 181 * context). 182 * 183 * @param configs the configuration set to test 184 * @return {@code true} if all configurations in {@code configs} are in a 185 * {@link RuleStopState}, otherwise {@code false} 186 */ 187 public static bool allConfigsInRuleStopStates(ATNConfigSet configs) 188 { 189 foreach (ATNConfig config; configs.configs) { 190 if (config.state.classinfo != RuleStopState.classinfo) { 191 return false; 192 } 193 } 194 return true; 195 } 196 197 /** 198 * Full LL prediction termination. 199 * 200 * <p>Can we stop looking ahead during ATN simulation or is there some 201 * uncertainty as to which alternative we will ultimately pick, after 202 * consuming more input? Even if there are partial conflicts, we might know 203 * that everything is going to resolve to the same minimum alternative. That 204 * means we can stop since no more lookahead will change that fact. On the 205 * other hand, there might be multiple conflicts that resolve to different 206 * minimums. That means we need more look ahead to decide which of those 207 * alternatives we should predict.</p> 208 * 209 * <p>The basic idea is to split the set of configurations {@code C}, into 210 * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with 211 * non-conflicting configurations. Two configurations conflict if they have 212 * identical {@link ATNConfig#state} and {@link ATNConfig#context} values 213 * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} 214 * and {@code (s, j, ctx, _)} for {@code i!=j}.</p> 215 * 216 * <p>Reduce these configuration subsets to the set of possible alternatives. 217 * You can compute the alternative subsets in one pass as follows:</p> 218 * 219 * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in 220 * {@code C} holding {@code s} and {@code ctx} fixed.</p> 221 * 222 * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p> 223 * 224 * <pre> 225 * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not 226 * alt and not pred 227 * </pre> 228 * 229 * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p> 230 * 231 * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with 232 * {@code s} and {@code ctx}.</p> 233 * 234 * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If 235 * the union of these alternative subsets is a singleton, then no amount of 236 * more lookahead will help us. We will always pick that alternative. If, 237 * however, there is more than one alternative, then we are uncertain which 238 * alternative to predict and must continue looking for resolution. We may 239 * or may not discover an ambiguity in the future, even if there are no 240 * conflicting subsets this round.</p> 241 * 242 * <p>The biggest sin is to terminate early because it means we've made a 243 * decision but were uncertain as to the eventual outcome. We haven't used 244 * enough lookahead. On the other hand, announcing a conflict too late is no 245 * big deal; you will still have the conflict. It's just inefficient. It 246 * might even look until the end of file.</p> 247 * 248 * <p>No special consideration for semantic predicates is required because 249 * predicates are evaluated on-the-fly for full LL prediction, ensuring that 250 * no configuration contains a semantic context during the termination 251 * check.</p> 252 * 253 * <p><strong>CONFLICTING CONFIGS</strong></p> 254 * 255 * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict 256 * when {@code i!=j} but {@code x=x'}. Because we merge all 257 * {@code (s, i, _)} configurations together, that means that there are at 258 * most {@code n} configurations associated with state {@code s} for 259 * {@code n} possible alternatives in the decision. The merged stacks 260 * complicate the comparison of configuration contexts {@code x} and 261 * {@code x'}. Sam checks to see if one is a subset of the other by calling 262 * merge and checking to see if the merged result is either {@code x} or 263 * {@code x'}. If the {@code x} associated with lowest alternative {@code i} 264 * is the superset, then {@code i} is the only possible prediction since the 265 * others resolve to {@code min(i)} as well. However, if {@code x} is 266 * associated with {@code j>i} then at least one stack configuration for 267 * {@code j} is not in conflict with alternative {@code i}. The algorithm 268 * should keep going, looking for more lookahead due to the uncertainty.</p> 269 * 270 * <p>For simplicity, I'm doing a equality check between {@code x} and 271 * {@code x'} that lets the algorithm continue to consume lookahead longer 272 * than necessary. The reason I like the equality is of course the 273 * simplicity but also because that is the test you need to detect the 274 * alternatives that are actually in conflict.</p> 275 * 276 * <p><strong>CONTINUE/STOP RULE</strong></p> 277 * 278 * <p>Continue if union of resolved alternative sets from non-conflicting and 279 * conflicting alternative subsets has more than one alternative. We are 280 * uncertain about which alternative to predict.</p> 281 * 282 * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which 283 * alternatives are still in the running for the amount of input we've 284 * consumed at this point. The conflicting sets let us to strip away 285 * configurations that won't lead to more states because we resolve 286 * conflicts to the configuration with a minimum alternate for the 287 * conflicting set.</p> 288 * 289 * <p><strong>CASES</strong></p> 290 * 291 * <ul> 292 * 293 * <li>no conflicts and more than 1 alternative in set => continue</li> 294 * 295 * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, 296 * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set 297 * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = 298 * {@code {1,3}} => continue 299 * </li> 300 * 301 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, 302 * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set 303 * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = 304 * {@code {1}} => stop and predict 1</li> 305 * 306 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, 307 * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U 308 * {@code {1}} = {@code {1}} => stop and predict 1, can announce 309 * ambiguity {@code {1,2}}</li> 310 * 311 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, 312 * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U 313 * {@code {2}} = {@code {1,2}} => continue</li> 314 * 315 * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, 316 * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U 317 * {@code {3}} = {@code {1,3}} => continue</li> 318 * 319 * </ul> 320 * 321 * <p><strong>EXACT AMBIGUITY DETECTION</strong></p> 322 * 323 * <p>If all states report the same conflicting set of alternatives, then we 324 * know we have the exact ambiguity set.</p> 325 * 326 * <p><code>|A_<em>i</em>|>1</code> and 327 * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p> 328 * 329 * <p>In other words, we continue examining lookahead until all {@code A_i} 330 * have more than one alternative and all {@code A_i} are the same. If 331 * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate 332 * because the resolved set is {@code {1}}. To determine what the real 333 * ambiguity is, we have to know whether the ambiguity is between one and 334 * two or one and three so we keep going. We can only stop prediction when 335 * we need exact ambiguity detection when the sets look like 336 * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p> 337 */ 338 public static int resolvesToJustOneViableAlt(BitSet[] altsets) 339 { 340 return getSingleViableAlt(altsets); 341 } 342 343 /** 344 * Determines if every alternative subset in {@code altsets} contains more 345 * than one alternative. 346 * 347 * @param altsets a collection of alternative subsets 348 * @return {@code true} if every {@link BitSet} in {@code altsets} has 349 * {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} 350 */ 351 public static bool allSubsetsConflict(BitSet[] altsets) 352 { 353 return !hasNonConflictingAltSet(altsets); 354 } 355 356 /** 357 * Determines if any single alternative subset in {@code altsets} contains 358 * exactly one alternative. 359 * 360 * @param altsets a collection of alternative subsets 361 * @return {@code true} if {@code altsets} contains a {@link BitSet} with 362 * {@link BitSet#cardinality cardinality} 1, otherwise {@code false} 363 */ 364 public static bool hasNonConflictingAltSet(BitSet[] altsets) 365 { 366 foreach (BitSet alts; altsets) { 367 if (alts.cardinality == 1) { 368 return true; 369 } 370 } 371 return false; 372 } 373 374 /** 375 * Determines if any single alternative subset in {@code altsets} contains 376 * more than one alternative. 377 * 378 * @param altsets a collection of alternative subsets 379 * @return {@code true} if {@code altsets} contains a {@link BitSet} with 380 * {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} 381 */ 382 public static bool hasConflictingAltSet(BitSet[] altsets) 383 { 384 foreach (BitSet alts; altsets) { 385 if (alts.cardinality >1) { 386 return true; 387 } 388 } 389 return false; 390 } 391 392 /** 393 * Determines if every alternative subset in {@code altsets} is equivalent. 394 * 395 * @param altsets a collection of alternative subsets 396 * @return {@code true} if every member of {@code altsets} is equal to the 397 * others, otherwise {@code false} 398 */ 399 public static bool allSubsetsEqual(BitSet[] altsets) 400 { 401 BitSet first; 402 foreach (i, el; altsets) { 403 if (i == 0) 404 first = el; 405 else 406 if (!el.opEquals(first)) return false; 407 } 408 return true; 409 } 410 411 /** 412 * Returns the unique alternative predicted by all alternative subsets in 413 * {@code altsets}. If no such alternative exists, this method returns 414 * {@link ATN#INVALID_ALT_NUMBER}. 415 * 416 * @param altsets a collection of alternative subsets 417 */ 418 public static int getUniqueAlt(BitSet[] altsets) 419 { 420 BitSet all = getAlts(altsets); 421 if (all.cardinality == 1) 422 return all.nextSetBit(0); 423 return ATN.INVALID_ALT_NUMBER; 424 } 425 426 /** 427 * Gets the complete set of represented alternatives for a collection of 428 * alternative subsets. This method returns the union of each {@link BitSet} 429 * in {@code altsets}. 430 * 431 * @param altsets a collection of alternative subsets 432 * @return the set of represented alternatives in {@code altsets} 433 */ 434 public static BitSet getAlts(BitSet[] altsets) 435 { 436 BitSet all; 437 foreach (BitSet alts; altsets) { 438 all = all.or(alts); 439 } 440 return all; 441 } 442 443 public static BitSet getAlts(ATNConfigSet configs) 444 { 445 BitSet *alts; 446 alts = new BitSet; 447 foreach (ATNConfig config; configs.configs) { 448 alts.set(config.alt, true); 449 } 450 return *alts; 451 } 452 453 /** 454 * This function gets the conflicting alt subsets from a configuration set. 455 * For each configuration {@code c} in {@code configs}: 456 * 457 * <pre> 458 * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not 459 * alt and not pred 460 * </pre> 461 */ 462 public static BitSet[] getConflictingAltSubsets(ATNConfigSet configs) 463 { 464 AltAndContextMap configToAlts; 465 BitSet *alts; 466 467 foreach (ATNConfig c; configs.configs) { 468 // c.hashOfFp = &ContextMapObjectEqualityComparator.toHash; 469 // c.opEqualsFp = &ContextMapObjectEqualityComparator.opEquals; 470 if (!configToAlts.hasKey(c)) 471 { 472 alts = new BitSet(); 473 alts.set(0, false); 474 configToAlts.put(c, *alts); 475 } 476 else { 477 *alts = configToAlts.get(c); 478 } 479 alts.set(c.alt, true); 480 } 481 return configToAlts.altAndContextMap.values; 482 } 483 484 /** 485 * Get a map from state to alt subset from a configuration set. For each 486 * configuration {@code c} in {@code configs}: 487 * 488 * <pre> 489 * map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt} 490 * </pre> 491 */ 492 public static BitSet[ATNState] getStateToAltMap(ATNConfigSet configs) 493 { 494 BitSet[ATNState] m; 495 foreach (ATNConfig c; configs.configs) { 496 BitSet alts; 497 if (!(c.state in m)){ 498 alts.clear; 499 m[c.state] = alts; 500 } 501 alts.set(c.alt, true); 502 } 503 return m; 504 } 505 506 public static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs) 507 { 508 BitSet[ATNState] x = getStateToAltMap(configs); 509 foreach (alts; x.values) { 510 if (alts.cardinality == 1) 511 return true; 512 } 513 return false; 514 } 515 516 public static int getSingleViableAlt(BitSet[] altsets) 517 { 518 BitSet viableAlts; 519 foreach (BitSet alts; altsets) { 520 int minAlt = alts.nextSetBit(0); 521 viableAlts.set(minAlt, true); 522 if (viableAlts.cardinality > 1) { // more than 1 viable alt 523 return ATN.INVALID_ALT_NUMBER; 524 } 525 } 526 return viableAlts.nextSetBit(0); 527 } 528 529 }