antlr.v4.runtime.atn.PredictionMode source code

1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.atn.PredictionMode;
8 
9 import antlr.v4.runtime.atn.ATN;
10 import antlr.v4.runtime.atn.ATNConfig;
11 import antlr.v4.runtime.atn.ATNConfigSet;
12 import antlr.v4.runtime.atn.ATNState;
13 import antlr.v4.runtime.atn.AltAndContextMap;
14 import antlr.v4.runtime.atn.PredictionModeConst;
15 import antlr.v4.runtime.atn.RuleStopState;
16 import antlr.v4.runtime.atn.ContextMapObjectEqualityComparator;
17 import antlr.v4.runtime.atn.SemanticContext;
18 import antlr.v4.runtime.misc.BitSet;
19 import std.typecons;
20 
21 /**
22  * This enumeration defines the prediction modes available in ANTLR 4 along with
23  * utility methods for analyzing configuration sets for conflicts and/or
24  * ambiguities.
25  */
26 class PredictionMode
27 {
28 
29     /**
30      * Computes the SLL prediction termination condition.
31      *
32      * <p>
33      * This method computes the SLL prediction termination condition for both of
34      * the following cases.</p>
35      *
36      * <ul>
37      * <li>The usual SLL+LL fallback upon SLL conflict</li>
38      * <li>Pure SLL without LL fallback</li>
39      * </ul>
40      *
41      * <p><strong>COMBINED SLL+LL PARSING</strong></p>
42      *
43      * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
44      * ensured regardless of how the termination condition is computed by this
45      * method. Due to the substantially higher cost of LL prediction, the
46      * prediction should only fall back to LL when the additional lookahead
47      * cannot lead to a unique SLL prediction.</p>
48      *
49      * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
50      * conflicting subsets should fall back to full LL, even if the
51      * configuration sets don't resolve to the same alternative (e.g.
52      * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
53      * configuration, SLL could continue with the hopes that more lookahead will
54      * resolve via one of those non-conflicting configurations.</p>
55      *
56      * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
57      * stops when it sees only conflicting configuration subsets. In contrast,
58      * full LL keeps going when there is uncertainty.</p>
59      *
60      * <p><strong>HEURISTIC</strong></p>
61      *
62      * <p>As a heuristic, we stop prediction when we see any conflicting subset
63      * unless we see a state that only has one alternative associated with it.
64      * The single-alt-state thing lets prediction continue upon rules like
65      * (otherwise, it would admit defeat too soon):</p>
66      *
67      * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
68      *
69      * <p>When the ATN simulation reaches the state before {@code ';'}, it has a
70      * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
71      * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
72      * processing this node because alternative to has another way to continue,
73      * via {@code [6|2|[]]}.</p>
74      *
75      * <p>It also let's us continue for this rule:</p>
76      *
77      * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
78      *
79      * <p>After matching input A, we reach the stop state for rule A, state 1.
80      * State 8 is the state right before B. Clearly alternatives 1 and 2
81      * conflict and no amount of further lookahead will separate the two.
82      * However, alternative 3 will be able to continue and so we do not stop
83      * working on this state. In the previous example, we're concerned with
84      * states associated with the conflicting alternatives. Here alt 3 is not
85      * associated with the conflicting configs, but since we can continue
86      * looking for input reasonably, don't declare the state done.</p>
87      *
88      * <p><strong>PURE SLL PARSING</strong></p>
89      *
90      * <p>To handle pure SLL parsing, all we have to do is make sure that we
91      * combine stack contexts for configurations that differ only by semantic
92      * predicate. From there, we can do the usual SLL termination heuristic.</p>
93      *
94      * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
95      *
96      * <p>SLL decisions don't evaluate predicates until after they reach DFA stop
97      * states because they need to create the DFA cache that works in all
98      * semantic situations. In contrast, full LL evaluates predicates collected
99      * during start state computation so it can ignore predicates thereafter.
100      * This means that SLL termination detection can totally ignore semantic
101      * predicates.</p>
102      *
103      * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
104      * semantic predicate contexts so we might see two configurations like the
105      * following.</p>
106      *
107      * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
108      *
109      * <p>Before testing these configurations against others, we have to merge
110      * {@code x} and {@code x'} (without modifying the existing configurations).
111      * For example, we test {@code (x+x')==x''} when looking for conflicts in
112      * the following configurations.</p>
113      *
114      * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
115      *
116      * <p>If the configuration set has predicates (as indicated by
117      * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
118      * the configurations to strip out all of the predicates so that a standard
119      * {@link ATNConfigSet} will merge everything ignoring predicates.</p>
120      */
121     public static bool hasSLLConflictTerminatingPrediction(PredictionModeConst mode, ATNConfigSet configs)
122     {
123 	/* Configs in rule stop states indicate reaching the end of the decision
124          * rule (local context) or end of start rule (full context). If all
125          * configs meet this condition, then none of the configurations is able
126          * to match additional input so we terminate prediction.
127          */
128         if (allConfigsInRuleStopStates(configs)) {
129             return true;
130         }
131 
132         // pure SLL mode parsing
133         if (mode == PredictionModeConst.SLL) {
134             // Don't bother with combining configs from different semantic
135             // contexts if we can fail over to full LL; costs more time
136             // since we'll often fail over anyway.
137             if (configs.hasSemanticContext) {
138                 // dup configs, tossing out semantic predicates
139                 ATNConfigSet dupli = new ATNConfigSet();
140                 auto _ = new SemanticContext();
141                 foreach (ATNConfig c; configs.configs) {
142                     c = new ATNConfig(c, SemanticContext.NONE);
143                     dupli.add(c);
144                 }
145                 configs = dupli;
146             }
147             // now we have combined contexts for configs with dissimilar preds
148         }
149 
150         // pure SLL or combined SLL+LL mode parsing
151 
152         BitSet[] altsets = getConflictingAltSubsets(configs);
153         bool heuristic =
154             hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
155         return heuristic;
156 
157     }
158 
159     /**
160      * Checks if any configuration in {@code configs} is in a
161      * {@link RuleStopState}. Configurations meeting this condition have reached
162      * the end of the decision rule (local context) or end of start rule (full
163      * context).
164      *
165      * @param configs the configuration set to test
166      * @return {@code true} if any configuration in {@code configs} is in a
167      * {@link RuleStopState}, otherwise {@code false}
168      */
169     public static bool hasConfigInRuleStopState(ATNConfigSet configs)
170     {
171 	foreach (ATNConfig c; configs.configs) {
172             if (c.state.classinfo == RuleStopState.classinfo) {
173                 return true;
174             }
175         }
176         return false;
177     }
178 
179     /**
180      * Checks if all configurations in {@code configs} are in a
181      * {@link RuleStopState}. Configurations meeting this condition have reached
182      * the end of the decision rule (local context) or end of start rule (full
183      * context).
184      *
185      * @param configs the configuration set to test
186      * @return {@code true} if all configurations in {@code configs} are in a
187      * {@link RuleStopState}, otherwise {@code false}
188      */
189     public static bool allConfigsInRuleStopStates(ATNConfigSet configs)
190     {
191 	foreach (ATNConfig config; configs.configs) {
192             if (config.state.classinfo != RuleStopState.classinfo) {
193                 return false;
194             }
195         }
196         return true;
197     }
198 
199     /**
200      * Full LL prediction termination.
201      *
202      * <p>Can we stop looking ahead during ATN simulation or is there some
203      * uncertainty as to which alternative we will ultimately pick, after
204      * consuming more input? Even if there are partial conflicts, we might know
205      * that everything is going to resolve to the same minimum alternative. That
206      * means we can stop since no more lookahead will change that fact. On the
207      * other hand, there might be multiple conflicts that resolve to different
208      * minimums. That means we need more look ahead to decide which of those
209      * alternatives we should predict.</p>
210      *
211      * <p>The basic idea is to split the set of configurations {@code C}, into
212      * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
213      * non-conflicting configurations. Two configurations conflict if they have
214      * identical {@link ATNConfig#state} and {@link ATNConfig#context} values
215      * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
216      * and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
217      *
218      * <p>Reduce these configuration subsets to the set of possible alternatives.
219      * You can compute the alternative subsets in one pass as follows:</p>
220      *
221      * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
222      * {@code C} holding {@code s} and {@code ctx} fixed.</p>
223      *
224      * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
225      *
226      * <pre>
227      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
228      * alt and not pred
229      * </pre>
230      *
231      * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
232      *
233      * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
234      * {@code s} and {@code ctx}.</p>
235      *
236      * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
237      * the union of these alternative subsets is a singleton, then no amount of
238      * more lookahead will help us. We will always pick that alternative. If,
239      * however, there is more than one alternative, then we are uncertain which
240      * alternative to predict and must continue looking for resolution. We may
241      * or may not discover an ambiguity in the future, even if there are no
242      * conflicting subsets this round.</p>
243      *
244      * <p>The biggest sin is to terminate early because it means we've made a
245      * decision but were uncertain as to the eventual outcome. We haven't used
246      * enough lookahead. On the other hand, announcing a conflict too late is no
247      * big deal; you will still have the conflict. It's just inefficient. It
248      * might even look until the end of file.</p>
249      *
250      * <p>No special consideration for semantic predicates is required because
251      * predicates are evaluated on-the-fly for full LL prediction, ensuring that
252      * no configuration contains a semantic context during the termination
253      * check.</p>
254      *
255      * <p><strong>CONFLICTING CONFIGS</strong></p>
256      *
257      * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
258      * when {@code i!=j} but {@code x=x'}. Because we merge all
259      * {@code (s, i, _)} configurations together, that means that there are at
260      * most {@code n} configurations associated with state {@code s} for
261      * {@code n} possible alternatives in the decision. The merged stacks
262      * complicate the comparison of configuration contexts {@code x} and
263      * {@code x'}. Sam checks to see if one is a subset of the other by calling
264      * merge and checking to see if the merged result is either {@code x} or
265      * {@code x'}. If the {@code x} associated with lowest alternative {@code i}
266      * is the superset, then {@code i} is the only possible prediction since the
267      * others resolve to {@code min(i)} as well. However, if {@code x} is
268      * associated with {@code j>i} then at least one stack configuration for
269      * {@code j} is not in conflict with alternative {@code i}. The algorithm
270      * should keep going, looking for more lookahead due to the uncertainty.</p>
271      *
272      * <p>For simplicity, I'm doing a equality check between {@code x} and
273      * {@code x'} that lets the algorithm continue to consume lookahead longer
274      * than necessary. The reason I like the equality is of course the
275      * simplicity but also because that is the test you need to detect the
276      * alternatives that are actually in conflict.</p>
277      *
278      * <p><strong>CONTINUE/STOP RULE</strong></p>
279      *
280      * <p>Continue if union of resolved alternative sets from non-conflicting and
281      * conflicting alternative subsets has more than one alternative. We are
282      * uncertain about which alternative to predict.</p>
283      *
284      * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
285      * alternatives are still in the running for the amount of input we've
286      * consumed at this point. The conflicting sets let us to strip away
287      * configurations that won't lead to more states because we resolve
288      * conflicts to the configuration with a minimum alternate for the
289      * conflicting set.</p>
290      *
291      * <p><strong>CASES</strong></p>
292      *
293      * <ul>
294      *
295      * <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
296      *
297      * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
298      * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
299      * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
300      * {@code {1,3}} =&gt; continue
301      * </li>
302      *
303      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
304      * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
305      * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
306      * {@code {1}} =&gt; stop and predict 1</li>
307      *
308      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
309      * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
310      * {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
311      * ambiguity {@code {1,2}}</li>
312      *
313      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
314      * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
315      * {@code {2}} = {@code {1,2}} =&gt; continue</li>
316      *
317      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
318      * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
319      * {@code {3}} = {@code {1,3}} =&gt; continue</li>
320      *
321      * </ul>
322      *
323      * <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
324      *
325      * <p>If all states report the same conflicting set of alternatives, then we
326      * know we have the exact ambiguity set.</p>
327      *
328      * <p><code>|A_<em>i</em>|&gt;1</code> and
329      * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
330      *
331      * <p>In other words, we continue examining lookahead until all {@code A_i}
332      * have more than one alternative and all {@code A_i} are the same. If
333      * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
334      * because the resolved set is {@code {1}}. To determine what the real
335      * ambiguity is, we have to know whether the ambiguity is between one and
336      * two or one and three so we keep going. We can only stop prediction when
337      * we need exact ambiguity detection when the sets look like
338      * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
339      */
340     public static int resolvesToJustOneViableAlt(BitSet[] altsets)
341     {
342 	return getSingleViableAlt(altsets);
343     }
344 
345     /**
346      * Determines if every alternative subset in {@code altsets} contains more
347      * than one alternative.
348      *
349      * @param altsets a collection of alternative subsets
350      * @return {@code true} if every {@link BitSet} in {@code altsets} has
351      * {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
352      */
353     public static bool allSubsetsConflict(BitSet[] altsets)
354     {
355 	return !hasNonConflictingAltSet(altsets);
356     }
357 
358     /**
359      * Determines if any single alternative subset in {@code altsets} contains
360      * exactly one alternative.
361      *
362      * @param altsets a collection of alternative subsets
363      * @return {@code true} if {@code altsets} contains a {@link BitSet} with
364      * {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
365      */
366     public static bool hasNonConflictingAltSet(BitSet[] altsets)
367     {
368 	foreach (BitSet alts; altsets) {
369             if (alts.cardinality == 1) {
370                 return true;
371             }
372         }
373         return false;
374     }
375 
376     /**
377      * Determines if any single alternative subset in {@code altsets} contains
378      * more than one alternative.
379      *
380      * @param altsets a collection of alternative subsets
381      * @return {@code true} if {@code altsets} contains a {@link BitSet} with
382      * {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
383      */
384     public static bool hasConflictingAltSet(BitSet[] altsets)
385     {
386 	foreach (BitSet alts; altsets) {
387             if (alts.cardinality >1) {
388                 return true;
389             }
390         }
391         return false;
392     }
393 
394     /**
395      * Determines if every alternative subset in {@code altsets} is equivalent.
396      *
397      * @param altsets a collection of alternative subsets
398      * @return {@code true} if every member of {@code altsets} is equal to the
399      * others, otherwise {@code false}
400      */
401     public static bool allSubsetsEqual(BitSet[] altsets)
402     {
403         BitSet first;
404         foreach (i, el; altsets) {
405             if (i == 0)
406                 first = el;
407             else
408                 if (!el.opEquals(first)) return false;
409         }
410         return true;
411     }
412 
413     /**
414      * Returns the unique alternative predicted by all alternative subsets in
415      * {@code altsets}. If no such alternative exists, this method returns
416      * {@link ATN#INVALID_ALT_NUMBER}.
417      *
418      * @param altsets a collection of alternative subsets
419      */
420     public static int getUniqueAlt(BitSet[] altsets)
421     {
422 	BitSet all = getAlts(altsets);
423         if (all.cardinality == 1)
424             return all.nextSetBit(0);
425         return ATN.INVALID_ALT_NUMBER;
426     }
427 
428     /**
429      * Gets the complete set of represented alternatives for a collection of
430      * alternative subsets. This method returns the union of each {@link BitSet}
431      * in {@code altsets}.
432      *
433      * @param altsets a collection of alternative subsets
434      * @return the set of represented alternatives in {@code altsets}
435      */
436     public static BitSet getAlts(BitSet[] altsets)
437     {
438 	BitSet all;
439         foreach (BitSet alts; altsets) {
440             all = all.or(alts);
441         }
442         return all;
443     }
444 
445     public static BitSet getAlts(ATNConfigSet configs)
446     {
447 	BitSet *alts;
448         alts = new BitSet;
449         foreach (ATNConfig config; configs.configs) {
450             alts.set(config.alt, true);
451         }
452         return *alts;
453     }
454 
455     /**
456      * This function gets the conflicting alt subsets from a configuration set.
457      * For each configuration {@code c} in {@code configs}:
458      *
459      * <pre>
460      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses stack and context, not
461      * alt and not pred
462      * </pre>
463      */
464     public static BitSet[] getConflictingAltSubsets(ATNConfigSet configs)
465     {
466 	AltAndContextMap configToAlts;
467         BitSet *alts;
468 
469         foreach (ATNConfig c; configs.configs) {
470             auto c_copy = new ATNConfig(c);
471             if (!configToAlts.hasKey(c_copy))
472                 {
473                     alts = new BitSet();
474                     alts.set(0, false); //  initialise alts
475                     configToAlts.put(c_copy, *alts);
476                 }
477             else {
478                 *alts = configToAlts.get(c_copy);
479             }
480             alts.set(c_copy.alt, true);
481         }
482         return configToAlts.altAndContextMap.values;
483     }
484 
485     /**
486      * Get a map from state to alt subset from a configuration set. For each
487      * configuration {@code c} in {@code configs}:
488      *
489      * <pre>
490      * map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
491      * </pre>
492      */
493     public static BitSet[ATNState] getStateToAltMap(ATNConfigSet configs)
494     {
495 	BitSet[ATNState] m;
496         foreach (ATNConfig c; configs.configs) {
497             BitSet alts;
498             if (!(c.state in m)){
499                 alts.clear;
500                 m[c.state] = alts;
501             }
502             alts.set(c.alt, true);
503         }
504         return m;
505     }
506 
507     public static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs)
508     {
509 	BitSet[ATNState] x = getStateToAltMap(configs);
510         foreach (alts; x.values) {
511             if (alts.cardinality == 1)
512                 return true;
513         }
514         return false;
515     }
516 
517     public static int getSingleViableAlt(BitSet[] altsets)
518     {
519 	BitSet viableAlts;
520         foreach (BitSet alts; altsets) {
521             int minAlt = alts.nextSetBit(0);
522             viableAlts.set(minAlt, true);
523             if (viableAlts.cardinality > 1) { // more than 1 viable alt
524                 return ATN.INVALID_ALT_NUMBER;
525             }
526         }
527         return viableAlts.nextSetBit(0);
528     }
529 
530 }