antlr.v4.runtime.atn.PredictionMode source code

1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.atn.PredictionMode;
8 
9 import std.typecons;
10 import antlr.v4.runtime.atn.ATN;
11 import antlr.v4.runtime.atn.ATNConfig;
12 import antlr.v4.runtime.atn.PredictionModeConst;
13 import antlr.v4.runtime.atn.ATNConfigSet;
14 import antlr.v4.runtime.atn.ATNState;
15 import antlr.v4.runtime.atn.AltAndContextMap;
16 import antlr.v4.runtime.atn.RuleStopState;
17 import antlr.v4.runtime.atn.SemanticContext;
18 import antlr.v4.runtime.misc.BitSet;
19 
20 // Class PredictionMode
21 /**
22  * Utility methods for analyzing configuration sets for conflicts and/or
23  * ambiguities.
24  */
25 class PredictionMode
26 {
27 
28     /**
29      * Computes the SLL prediction termination condition.
30      *
31      * <p>
32      * This method computes the SLL prediction termination condition for both of
33      * the following cases.</p>
34      *
35      * <ul>
36      * <li>The usual SLL+LL fallback upon SLL conflict</li>
37      * <li>Pure SLL without LL fallback</li>
38      * </ul>
39      *
40      * <p><strong>COMBINED SLL+LL PARSING</strong></p>
41      *
42      * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
43      * ensured regardless of how the termination condition is computed by this
44      * method. Due to the substantially higher cost of LL prediction, the
45      * prediction should only fall back to LL when the additional lookahead
46      * cannot lead to a unique SLL prediction.</p>
47      *
48      * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
49      * conflicting subsets should fall back to full LL, even if the
50      * configuration sets don't resolve to the same alternative (e.g.
51      * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
52      * configuration, SLL could continue with the hopes that more lookahead will
53      * resolve via one of those non-conflicting configurations.</p>
54      *
55      * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
56      * stops when it sees only conflicting configuration subsets. In contrast,
57      * full LL keeps going when there is uncertainty.</p>
58      *
59      * <p><strong>HEURISTIC</strong></p>
60      *
61      * <p>As a heuristic, we stop prediction when we see any conflicting subset
62      * unless we see a state that only has one alternative associated with it.
63      * The single-alt-state thing lets prediction continue upon rules like
64      * (otherwise, it would admit defeat too soon):</p>
65      *
66      * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
67      *
68      * <p>When the ATN simulation reaches the state before {@code ';'}, it has a
69      * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
70      * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
71      * processing this node because alternative to has another way to continue,
72      * via {@code [6|2|[]]}.</p>
73      *
74      * <p>It also let's us continue for this rule:</p>
75      *
76      * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
77      *
78      * <p>After matching input A, we reach the stop state for rule A, state 1.
79      * State 8 is the state right before B. Clearly alternatives 1 and 2
80      * conflict and no amount of further lookahead will separate the two.
81      * However, alternative 3 will be able to continue and so we do not stop
82      * working on this state. In the previous example, we're concerned with
83      * states associated with the conflicting alternatives. Here alt 3 is not
84      * associated with the conflicting configs, but since we can continue
85      * looking for input reasonably, don't declare the state done.</p>
86      *
87      * <p><strong>PURE SLL PARSING</strong></p>
88      *
89      * <p>To handle pure SLL parsing, all we have to do is make sure that we
90      * combine stack contexts for configurations that differ only by semantic
91      * predicate. From there, we can do the usual SLL termination heuristic.</p>
92      *
93      * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
94      *
95      * <p>SLL decisions don't evaluate predicates until after they reach DFA stop
96      * states because they need to create the DFA cache that works in all
97      * semantic situations. In contrast, full LL evaluates predicates collected
98      * during start state computation so it can ignore predicates thereafter.
99      * This means that SLL termination detection can totally ignore semantic
100      * predicates.</p>
101      *
102      * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
103      * semantic predicate contexts so we might see two configurations like the
104      * following.</p>
105      *
106      * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
107      *
108      * <p>Before testing these configurations against others, we have to merge
109      * {@code x} and {@code x'} (without modifying the existing configurations).
110      * For example, we test {@code (x+x')==x''} when looking for conflicts in
111      * the following configurations.</p>
112      *
113      * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
114      *
115      * <p>If the configuration set has predicates (as indicated by
116      * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
117      * the configurations to strip out all of the predicates so that a standard
118      * {@link ATNConfigSet} will merge everything ignoring predicates.</p>
119      */
120     public static bool hasSLLConflictTerminatingPrediction(PredictionModeConst mode, ATNConfigSet configs)
121     {
122 	/* Configs in rule stop states indicate reaching the end of the decision
123          * rule (local context) or end of start rule (full context). If all
124          * configs meet this condition, then none of the configurations is able
125          * to match additional input so we terminate prediction.
126          */
127         if (allConfigsInRuleStopStates(configs)) {
128             return true;
129         }
130 
131         // pure SLL mode parsing
132         if (mode == PredictionModeConst.SLL) {
133             // Don't bother with combining configs from different semantic
134             // contexts if we can fail over to full LL; costs more time
135             // since we'll often fail over anyway.
136             if (configs.hasSemanticContext) {
137                 // dup configs, tossing out semantic predicates
138                 ATNConfigSet dupli = new ATNConfigSet();
139                 foreach (ATNConfig c; configs.configs) {
140                     c = new ATNConfig(c,SemanticContext.NONE);
141                     dupli.add(c);
142                 }
143                 configs = dupli;
144             }
145             // now we have combined contexts for configs with dissimilar preds
146         }
147 
148         // pure SLL or combined SLL+LL mode parsing
149 
150         BitSet[] altsets = getConflictingAltSubsets(configs);
151         bool heuristic =
152             hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
153         return heuristic;
154 
155     }
156 
157     /**
158      * Checks if any configuration in {@code configs} is in a
159      * {@link RuleStopState}. Configurations meeting this condition have reached
160      * the end of the decision rule (local context) or end of start rule (full
161      * context).
162      *
163      * @param configs the configuration set to test
164      * @return {@code true} if any configuration in {@code configs} is in a
165      * {@link RuleStopState}, otherwise {@code false}
166      */
167     public static bool hasConfigInRuleStopState(ATNConfigSet configs)
168     {
169 	foreach (ATNConfig c; configs.configs) {
170             if (c.state.classinfo == RuleStopState.classinfo) {
171                 return true;
172             }
173         }
174         return false;
175     }
176 
177     /**
178      * Checks if all configurations in {@code configs} are in a
179      * {@link RuleStopState}. Configurations meeting this condition have reached
180      * the end of the decision rule (local context) or end of start rule (full
181      * context).
182      *
183      * @param configs the configuration set to test
184      * @return {@code true} if all configurations in {@code configs} are in a
185      * {@link RuleStopState}, otherwise {@code false}
186      */
187     public static bool allConfigsInRuleStopStates(ATNConfigSet configs)
188     {
189 	foreach (ATNConfig config; configs.configs) {
190             if (config.state.classinfo != RuleStopState.classinfo) {
191                 return false;
192             }
193         }
194         return true;
195     }
196 
197     /**
198      * Full LL prediction termination.
199      *
200      * <p>Can we stop looking ahead during ATN simulation or is there some
201      * uncertainty as to which alternative we will ultimately pick, after
202      * consuming more input? Even if there are partial conflicts, we might know
203      * that everything is going to resolve to the same minimum alternative. That
204      * means we can stop since no more lookahead will change that fact. On the
205      * other hand, there might be multiple conflicts that resolve to different
206      * minimums. That means we need more look ahead to decide which of those
207      * alternatives we should predict.</p>
208      *
209      * <p>The basic idea is to split the set of configurations {@code C}, into
210      * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
211      * non-conflicting configurations. Two configurations conflict if they have
212      * identical {@link ATNConfig#state} and {@link ATNConfig#context} values
213      * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
214      * and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
215      *
216      * <p>Reduce these configuration subsets to the set of possible alternatives.
217      * You can compute the alternative subsets in one pass as follows:</p>
218      *
219      * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
220      * {@code C} holding {@code s} and {@code ctx} fixed.</p>
221      *
222      * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
223      *
224      * <pre>
225      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
226      * alt and not pred
227      * </pre>
228      *
229      * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
230      *
231      * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
232      * {@code s} and {@code ctx}.</p>
233      *
234      * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
235      * the union of these alternative subsets is a singleton, then no amount of
236      * more lookahead will help us. We will always pick that alternative. If,
237      * however, there is more than one alternative, then we are uncertain which
238      * alternative to predict and must continue looking for resolution. We may
239      * or may not discover an ambiguity in the future, even if there are no
240      * conflicting subsets this round.</p>
241      *
242      * <p>The biggest sin is to terminate early because it means we've made a
243      * decision but were uncertain as to the eventual outcome. We haven't used
244      * enough lookahead. On the other hand, announcing a conflict too late is no
245      * big deal; you will still have the conflict. It's just inefficient. It
246      * might even look until the end of file.</p>
247      *
248      * <p>No special consideration for semantic predicates is required because
249      * predicates are evaluated on-the-fly for full LL prediction, ensuring that
250      * no configuration contains a semantic context during the termination
251      * check.</p>
252      *
253      * <p><strong>CONFLICTING CONFIGS</strong></p>
254      *
255      * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
256      * when {@code i!=j} but {@code x=x'}. Because we merge all
257      * {@code (s, i, _)} configurations together, that means that there are at
258      * most {@code n} configurations associated with state {@code s} for
259      * {@code n} possible alternatives in the decision. The merged stacks
260      * complicate the comparison of configuration contexts {@code x} and
261      * {@code x'}. Sam checks to see if one is a subset of the other by calling
262      * merge and checking to see if the merged result is either {@code x} or
263      * {@code x'}. If the {@code x} associated with lowest alternative {@code i}
264      * is the superset, then {@code i} is the only possible prediction since the
265      * others resolve to {@code min(i)} as well. However, if {@code x} is
266      * associated with {@code j>i} then at least one stack configuration for
267      * {@code j} is not in conflict with alternative {@code i}. The algorithm
268      * should keep going, looking for more lookahead due to the uncertainty.</p>
269      *
270      * <p>For simplicity, I'm doing a equality check between {@code x} and
271      * {@code x'} that lets the algorithm continue to consume lookahead longer
272      * than necessary. The reason I like the equality is of course the
273      * simplicity but also because that is the test you need to detect the
274      * alternatives that are actually in conflict.</p>
275      *
276      * <p><strong>CONTINUE/STOP RULE</strong></p>
277      *
278      * <p>Continue if union of resolved alternative sets from non-conflicting and
279      * conflicting alternative subsets has more than one alternative. We are
280      * uncertain about which alternative to predict.</p>
281      *
282      * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
283      * alternatives are still in the running for the amount of input we've
284      * consumed at this point. The conflicting sets let us to strip away
285      * configurations that won't lead to more states because we resolve
286      * conflicts to the configuration with a minimum alternate for the
287      * conflicting set.</p>
288      *
289      * <p><strong>CASES</strong></p>
290      *
291      * <ul>
292      *
293      * <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
294      *
295      * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
296      * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
297      * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
298      * {@code {1,3}} =&gt; continue
299      * </li>
300      *
301      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
302      * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
303      * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
304      * {@code {1}} =&gt; stop and predict 1</li>
305      *
306      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
307      * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
308      * {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
309      * ambiguity {@code {1,2}}</li>
310      *
311      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
312      * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
313      * {@code {2}} = {@code {1,2}} =&gt; continue</li>
314      *
315      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
316      * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
317      * {@code {3}} = {@code {1,3}} =&gt; continue</li>
318      *
319      * </ul>
320      *
321      * <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
322      *
323      * <p>If all states report the same conflicting set of alternatives, then we
324      * know we have the exact ambiguity set.</p>
325      *
326      * <p><code>|A_<em>i</em>|&gt;1</code> and
327      * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
328      *
329      * <p>In other words, we continue examining lookahead until all {@code A_i}
330      * have more than one alternative and all {@code A_i} are the same. If
331      * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
332      * because the resolved set is {@code {1}}. To determine what the real
333      * ambiguity is, we have to know whether the ambiguity is between one and
334      * two or one and three so we keep going. We can only stop prediction when
335      * we need exact ambiguity detection when the sets look like
336      * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
337      */
338     public static int resolvesToJustOneViableAlt(BitSet[] altsets)
339     {
340 	return getSingleViableAlt(altsets);
341     }
342 
343     /**
344      * Determines if every alternative subset in {@code altsets} contains more
345      * than one alternative.
346      *
347      * @param altsets a collection of alternative subsets
348      * @return {@code true} if every {@link BitSet} in {@code altsets} has
349      * {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
350      */
351     public static bool allSubsetsConflict(BitSet[] altsets)
352     {
353 	return !hasNonConflictingAltSet(altsets);
354     }
355 
356     /**
357      * Determines if any single alternative subset in {@code altsets} contains
358      * exactly one alternative.
359      *
360      * @param altsets a collection of alternative subsets
361      * @return {@code true} if {@code altsets} contains a {@link BitSet} with
362      * {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
363      */
364     public static bool hasNonConflictingAltSet(BitSet[] altsets)
365     {
366 	foreach (BitSet alts; altsets) {
367             if (alts.cardinality == 1) {
368                 return true;
369             }
370         }
371         return false;
372     }
373 
374     /**
375      * Determines if any single alternative subset in {@code altsets} contains
376      * more than one alternative.
377      *
378      * @param altsets a collection of alternative subsets
379      * @return {@code true} if {@code altsets} contains a {@link BitSet} with
380      * {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
381      */
382     public static bool hasConflictingAltSet(BitSet[] altsets)
383     {
384 	foreach (BitSet alts; altsets) {
385             if (alts.cardinality >1) {
386                 return true;
387             }
388         }
389         return false;
390     }
391 
392     /**
393      * Determines if every alternative subset in {@code altsets} is equivalent.
394      *
395      * @param altsets a collection of alternative subsets
396      * @return {@code true} if every member of {@code altsets} is equal to the
397      * others, otherwise {@code false}
398      */
399     public static bool allSubsetsEqual(BitSet[] altsets)
400     {
401         BitSet first;
402         foreach (i, el; altsets) {
403             if (i == 0)
404                 first = el;
405             else
406                 if (!el.opEquals(first)) return false;
407         }
408         return true;
409     }
410 
411     /**
412      * Returns the unique alternative predicted by all alternative subsets in
413      * {@code altsets}. If no such alternative exists, this method returns
414      * {@link ATN#INVALID_ALT_NUMBER}.
415      *
416      * @param altsets a collection of alternative subsets
417      */
418     public static int getUniqueAlt(BitSet[] altsets)
419     {
420 	BitSet all = getAlts(altsets);
421         if (all.cardinality == 1)
422             return all.nextSetBit(0);
423         return ATN.INVALID_ALT_NUMBER;
424     }
425 
426     /**
427      * Gets the complete set of represented alternatives for a collection of
428      * alternative subsets. This method returns the union of each {@link BitSet}
429      * in {@code altsets}.
430      *
431      * @param altsets a collection of alternative subsets
432      * @return the set of represented alternatives in {@code altsets}
433      */
434     public static BitSet getAlts(BitSet[] altsets)
435     {
436 	BitSet all;
437         foreach (BitSet alts; altsets) {
438             all = all.or(alts);
439         }
440         return all;
441     }
442 
443     public static BitSet getAlts(ATNConfigSet configs)
444     {
445 	BitSet *alts;
446         alts = new BitSet;
447         foreach (ATNConfig config; configs.configs) {
448             alts.set(config.alt, true);
449         }
450         return *alts;
451     }
452 
453     /**
454      * This function gets the conflicting alt subsets from a configuration set.
455      * For each configuration {@code c} in {@code configs}:
456      *
457      * <pre>
458      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
459      * alt and not pred
460      * </pre>
461      */
462     public static BitSet[] getConflictingAltSubsets(ATNConfigSet configs)
463     {
464 	AltAndContextMap configToAlts;
465         BitSet *alts;
466 
467         foreach (ATNConfig c; configs.configs) {
468             // c.hashOfFp = &ContextMapObjectEqualityComparator.toHash;
469             // c.opEqualsFp = &ContextMapObjectEqualityComparator.opEquals;
470             if (!configToAlts.hasKey(c))
471                 {
472                     alts = new BitSet();
473                     alts.set(0, false);
474                     configToAlts.put(c, *alts);
475                 }
476             else {
477                 *alts = configToAlts.get(c);
478             }
479             alts.set(c.alt, true);
480         }
481         return configToAlts.altAndContextMap.values;
482     }
483 
484     /**
485      * Get a map from state to alt subset from a configuration set. For each
486      * configuration {@code c} in {@code configs}:
487      *
488      * <pre>
489      * map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
490      * </pre>
491      */
492     public static BitSet[ATNState] getStateToAltMap(ATNConfigSet configs)
493     {
494 	BitSet[ATNState] m;
495         foreach (ATNConfig c; configs.configs) {
496             BitSet alts;
497             if (!(c.state in m)){
498                 alts.clear;
499                 m[c.state] = alts;
500             }
501             alts.set(c.alt, true);
502         }
503         return m;
504     }
505 
506     public static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs)
507     {
508 	BitSet[ATNState] x = getStateToAltMap(configs);
509         foreach (alts; x.values) {
510             if (alts.cardinality == 1)
511                 return true;
512         }
513         return false;
514     }
515 
516     public static int getSingleViableAlt(BitSet[] altsets)
517     {
518 	BitSet viableAlts;
519         foreach (BitSet alts; altsets) {
520             int minAlt = alts.nextSetBit(0);
521             viableAlts.set(minAlt, true);
522             if (viableAlts.cardinality > 1) { // more than 1 viable alt
523                 return ATN.INVALID_ALT_NUMBER;
524             }
525         }
526         return viableAlts.nextSetBit(0);
527     }
528 
529 }