antlr.v4.runtime.atn.PredictionMode source code

1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.atn.PredictionMode;
8 
9 import antlr.v4.runtime.atn.ATN;
10 import antlr.v4.runtime.atn.ATNConfig;
11 import antlr.v4.runtime.atn.ATNConfigSet;
12 import antlr.v4.runtime.atn.ATNState;
13 import antlr.v4.runtime.atn.AltAndContextMap;
14 import antlr.v4.runtime.atn.PredictionModeConst;
15 import antlr.v4.runtime.atn.RuleStopState;
16 import antlr.v4.runtime.atn.ContextMapObjectEqualityComparator;
17 import antlr.v4.runtime.atn.SemanticContext;
18 import antlr.v4.runtime.misc.BitSet;
19 import std.typecons;
20 
21 /**
22  * This enumeration defines the prediction modes available in ANTLR 4 along with
23  * utility methods for analyzing configuration sets for conflicts and/or
24  * ambiguities.
25  */
26 class PredictionMode
27 {
28 
29     /**
30      * Computes the SLL prediction termination condition.
31      *
32      * <p>
33      * This method computes the SLL prediction termination condition for both of
34      * the following cases.</p>
35      *
36      * <ul>
37      * <li>The usual SLL+LL fallback upon SLL conflict</li>
38      * <li>Pure SLL without LL fallback</li>
39      * </ul>
40      *
41      * <p><strong>COMBINED SLL+LL PARSING</strong></p>
42      *
43      * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are
44      * ensured regardless of how the termination condition is computed by this
45      * method. Due to the substantially higher cost of LL prediction, the
46      * prediction should only fall back to LL when the additional lookahead
47      * cannot lead to a unique SLL prediction.</p>
48      *
49      * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only
50      * conflicting subsets should fall back to full LL, even if the
51      * configuration sets don't resolve to the same alternative (e.g.
52      * {@code {1,2}} and {@code {3,4}}). If there is at least one non-conflicting
53      * configuration, SLL could continue with the hopes that more lookahead will
54      * resolve via one of those non-conflicting configurations.</p>
55      *
56      * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing)
57      * stops when it sees only conflicting configuration subsets. In contrast,
58      * full LL keeps going when there is uncertainty.</p>
59      *
60      * <p><strong>HEURISTIC</strong></p>
61      *
62      * <p>As a heuristic, we stop prediction when we see any conflicting subset
63      * unless we see a state that only has one alternative associated with it.
64      * The single-alt-state thing lets prediction continue upon rules like
65      * (otherwise, it would admit defeat too soon):</p>
66      *
67      * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p>
68      *
69      * <p>When the ATN simulation reaches the state before {@code ';'}, it has a
70      * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
71      * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
72      * processing this node because alternative to has another way to continue,
73      * via {@code [6|2|[]]}.</p>
74      *
75      * <p>It also let's us continue for this rule:</p>
76      *
77      * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p>
78      *
79      * <p>After matching input A, we reach the stop state for rule A, state 1.
80      * State 8 is the state right before B. Clearly alternatives 1 and 2
81      * conflict and no amount of further lookahead will separate the two.
82      * However, alternative 3 will be able to continue and so we do not stop
83      * working on this state. In the previous example, we're concerned with
84      * states associated with the conflicting alternatives. Here alt 3 is not
85      * associated with the conflicting configs, but since we can continue
86      * looking for input reasonably, don't declare the state done.</p>
87      *
88      * <p><strong>PURE SLL PARSING</strong></p>
89      *
90      * <p>To handle pure SLL parsing, all we have to do is make sure that we
91      * combine stack contexts for configurations that differ only by semantic
92      * predicate. From there, we can do the usual SLL termination heuristic.</p>
93      *
94      * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p>
95      *
96      * <p>SLL decisions don't evaluate predicates until after they reach DFA stop
97      * states because they need to create the DFA cache that works in all
98      * semantic situations. In contrast, full LL evaluates predicates collected
99      * during start state computation so it can ignore predicates thereafter.
100      * This means that SLL termination detection can totally ignore semantic
101      * predicates.</p>
102      *
103      * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
104      * semantic predicate contexts so we might see two configurations like the
105      * following.</p>
106      *
107      * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p>
108      *
109      * <p>Before testing these configurations against others, we have to merge
110      * {@code x} and {@code x'} (without modifying the existing configurations).
111      * For example, we test {@code (x+x')==x''} when looking for conflicts in
112      * the following configurations.</p>
113      *
114      * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p>
115      *
116      * <p>If the configuration set has predicates (as indicated by
117      * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
118      * the configurations to strip out all of the predicates so that a standard
119      * {@link ATNConfigSet} will merge everything ignoring predicates.</p>
120      */
121     public static bool hasSLLConflictTerminatingPrediction(PredictionModeConst mode, ATNConfigSet configs)
122     {
123         /* Configs in rule stop states indicate reaching the end of the decision
124          * rule (local context) or end of start rule (full context). If all
125          * configs meet this condition, then none of the configurations is able
126          * to match additional input so we terminate prediction.
127          */
128         if (allConfigsInRuleStopStates(configs)) {
129             return true;
130         }
131 
132         // pure SLL mode parsing
133         if (mode == PredictionModeConst.SLL) {
134             // Don't bother with combining configs from different semantic
135             // contexts if we can fail over to full LL; costs more time
136             // since we'll often fail over anyway.
137             if (configs.hasSemanticContext) {
138                 // dup configs, tossing out semantic predicates
139                 ATNConfigSet dupli = new ATNConfigSet();
140                 auto _ = new SemanticContext();
141                 foreach (ATNConfig c; configs.configs) {
142                     c = new ATNConfig(c, SemanticContext.NONE);
143                     dupli.add(c);
144                 }
145                 configs = dupli;
146             }
147             // now we have combined contexts for configs with dissimilar preds
148         }
149 
150         // pure SLL or combined SLL+LL mode parsing
151 
152         BitSet[] altsets = getConflictingAltSubsets(configs);
153         bool heuristic =
154             hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
155         return heuristic;
156 
157     }
158 
159     /**
160      * Checks if any configuration in {@code configs} is in a
161      * {@link RuleStopState}. Configurations meeting this condition have reached
162      * the end of the decision rule (local context) or end of start rule (full
163      * context).
164      *
165      * @param configs the configuration set to test
166      * @return {@code true} if any configuration in {@code configs} is in a
167      * {@link RuleStopState}, otherwise {@code false}
168      */
169     public static bool hasConfigInRuleStopState(ATNConfigSet configs)
170     {
171         foreach (ATNConfig c; configs.configs)
172         {
173             if (cast(RuleStopState)c.state) {
174                 return true;
175             }
176         }
177         return false;
178     }
179 
180     /**
181      * Checks if all configurations in {@code configs} are in a
182      * {@link RuleStopState}. Configurations meeting this condition have reached
183      * the end of the decision rule (local context) or end of start rule (full
184      * context).
185      *
186      * @param configs the configuration set to test
187      * @return {@code true} if all configurations in {@code configs} are in a
188      * {@link RuleStopState}, otherwise {@code false}
189      */
190     public static bool allConfigsInRuleStopStates(ATNConfigSet configs)
191     {
192         foreach (ATNConfig config; configs.configs) {
193             if (! cast(RuleStopState)config.state) {
194                 return false;
195             }
196         }
197         return true;
198     }
199 
200     /**
201      * Full LL prediction termination.
202      *
203      * <p>Can we stop looking ahead during ATN simulation or is there some
204      * uncertainty as to which alternative we will ultimately pick, after
205      * consuming more input? Even if there are partial conflicts, we might know
206      * that everything is going to resolve to the same minimum alternative. That
207      * means we can stop since no more lookahead will change that fact. On the
208      * other hand, there might be multiple conflicts that resolve to different
209      * minimums. That means we need more look ahead to decide which of those
210      * alternatives we should predict.</p>
211      *
212      * <p>The basic idea is to split the set of configurations {@code C}, into
213      * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
214      * non-conflicting configurations. Two configurations conflict if they have
215      * identical {@link ATNConfig#state} and {@link ATNConfig#context} values
216      * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
217      * and {@code (s, j, ctx, _)} for {@code i!=j}.</p>
218      *
219      * <p>Reduce these configuration subsets to the set of possible alternatives.
220      * You can compute the alternative subsets in one pass as follows:</p>
221      *
222      * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
223      * {@code C} holding {@code s} and {@code ctx} fixed.</p>
224      *
225      * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p>
226      *
227      * <pre>
228      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
229      * alt and not pred
230      * </pre>
231      *
232      * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p>
233      *
234      * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with
235      * {@code s} and {@code ctx}.</p>
236      *
237      * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If
238      * the union of these alternative subsets is a singleton, then no amount of
239      * more lookahead will help us. We will always pick that alternative. If,
240      * however, there is more than one alternative, then we are uncertain which
241      * alternative to predict and must continue looking for resolution. We may
242      * or may not discover an ambiguity in the future, even if there are no
243      * conflicting subsets this round.</p>
244      *
245      * <p>The biggest sin is to terminate early because it means we've made a
246      * decision but were uncertain as to the eventual outcome. We haven't used
247      * enough lookahead. On the other hand, announcing a conflict too late is no
248      * big deal; you will still have the conflict. It's just inefficient. It
249      * might even look until the end of file.</p>
250      *
251      * <p>No special consideration for semantic predicates is required because
252      * predicates are evaluated on-the-fly for full LL prediction, ensuring that
253      * no configuration contains a semantic context during the termination
254      * check.</p>
255      *
256      * <p><strong>CONFLICTING CONFIGS</strong></p>
257      *
258      * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
259      * when {@code i!=j} but {@code x=x'}. Because we merge all
260      * {@code (s, i, _)} configurations together, that means that there are at
261      * most {@code n} configurations associated with state {@code s} for
262      * {@code n} possible alternatives in the decision. The merged stacks
263      * complicate the comparison of configuration contexts {@code x} and
264      * {@code x'}. Sam checks to see if one is a subset of the other by calling
265      * merge and checking to see if the merged result is either {@code x} or
266      * {@code x'}. If the {@code x} associated with lowest alternative {@code i}
267      * is the superset, then {@code i} is the only possible prediction since the
268      * others resolve to {@code min(i)} as well. However, if {@code x} is
269      * associated with {@code j>i} then at least one stack configuration for
270      * {@code j} is not in conflict with alternative {@code i}. The algorithm
271      * should keep going, looking for more lookahead due to the uncertainty.</p>
272      *
273      * <p>For simplicity, I'm doing a equality check between {@code x} and
274      * {@code x'} that lets the algorithm continue to consume lookahead longer
275      * than necessary. The reason I like the equality is of course the
276      * simplicity but also because that is the test you need to detect the
277      * alternatives that are actually in conflict.</p>
278      *
279      * <p><strong>CONTINUE/STOP RULE</strong></p>
280      *
281      * <p>Continue if union of resolved alternative sets from non-conflicting and
282      * conflicting alternative subsets has more than one alternative. We are
283      * uncertain about which alternative to predict.</p>
284      *
285      * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
286      * alternatives are still in the running for the amount of input we've
287      * consumed at this point. The conflicting sets let us to strip away
288      * configurations that won't lead to more states because we resolve
289      * conflicts to the configuration with a minimum alternate for the
290      * conflicting set.</p>
291      *
292      * <p><strong>CASES</strong></p>
293      *
294      * <ul>
295      *
296      * <li>no conflicts and more than 1 alternative in set =&gt; continue</li>
297      *
298      * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
299      * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
300      * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
301      * {@code {1,3}} =&gt; continue
302      * </li>
303      *
304      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
305      * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
306      * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
307      * {@code {1}} =&gt; stop and predict 1</li>
308      *
309      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
310      * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
311      * {@code {1}} = {@code {1}} =&gt; stop and predict 1, can announce
312      * ambiguity {@code {1,2}}</li>
313      *
314      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
315      * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
316      * {@code {2}} = {@code {1,2}} =&gt; continue</li>
317      *
318      * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
319      * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
320      * {@code {3}} = {@code {1,3}} =&gt; continue</li>
321      *
322      * </ul>
323      *
324      * <p><strong>EXACT AMBIGUITY DETECTION</strong></p>
325      *
326      * <p>If all states report the same conflicting set of alternatives, then we
327      * know we have the exact ambiguity set.</p>
328      *
329      * <p><code>|A_<em>i</em>|&gt;1</code> and
330      * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p>
331      *
332      * <p>In other words, we continue examining lookahead until all {@code A_i}
333      * have more than one alternative and all {@code A_i} are the same. If
334      * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
335      * because the resolved set is {@code {1}}. To determine what the real
336      * ambiguity is, we have to know whether the ambiguity is between one and
337      * two or one and three so we keep going. We can only stop prediction when
338      * we need exact ambiguity detection when the sets look like
339      * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p>
340      */
341     public static int resolvesToJustOneViableAlt(BitSet[] altsets)
342     {
343         return getSingleViableAlt(altsets);
344     }
345 
346     /**
347      * Determines if every alternative subset in {@code altsets} contains more
348      * than one alternative.
349      *
350      * @param altsets a collection of alternative subsets
351      * @return {@code true} if every {@link BitSet} in {@code altsets} has
352      * {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
353      */
354     public static bool allSubsetsConflict(BitSet[] altsets)
355     {
356         return !hasNonConflictingAltSet(altsets);
357     }
358 
359     /**
360      * Determines if any single alternative subset in {@code altsets} contains
361      * exactly one alternative.
362      *
363      * @param altsets a collection of alternative subsets
364      * @return {@code true} if {@code altsets} contains a {@link BitSet} with
365      * {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
366      */
367     public static bool hasNonConflictingAltSet(BitSet[] altsets)
368     {
369         foreach (BitSet alts; altsets)
370         {
371             if (alts.cardinality == 1) {
372                 return true;
373             }
374         }
375         return false;
376     }
377 
378     /**
379      * Determines if any single alternative subset in {@code altsets} contains
380      * more than one alternative.
381      *
382      * @param altsets a collection of alternative subsets
383      * @return {@code true} if {@code altsets} contains a {@link BitSet} with
384      * {@link BitSet#cardinality cardinality} &gt; 1, otherwise {@code false}
385      */
386     public static bool hasConflictingAltSet(BitSet[] altsets)
387     {
388         foreach (BitSet alts; altsets) {
389             if (alts.cardinality >1) {
390                 return true;
391             }
392         }
393         return false;
394     }
395 
396     /**
397      * Determines if every alternative subset in {@code altsets} is equivalent.
398      *
399      * @param altsets a collection of alternative subsets
400      * @return {@code true} if every member of {@code altsets} is equal to the
401      * others, otherwise {@code false}
402      */
403     public static bool allSubsetsEqual(BitSet[] altsets)
404     {
405         BitSet first;
406         foreach (i, el; altsets) {
407             if (i == 0)
408                 first = el;
409             else
410                 if (!el.opEquals(first)) return false;
411         }
412         return true;
413     }
414 
415     /**
416      * Returns the unique alternative predicted by all alternative subsets in
417      * {@code altsets}. If no such alternative exists, this method returns
418      * {@link ATN#INVALID_ALT_NUMBER}.
419      *
420      * @param altsets a collection of alternative subsets
421      */
422     public static int getUniqueAlt(BitSet[] altsets)
423     {
424         BitSet all = getAlts(altsets);
425         if (all.cardinality == 1)
426             return all.nextSetBit(0);
427         return ATN.INVALID_ALT_NUMBER;
428     }
429 
430     /**
431      * Gets the complete set of represented alternatives for a collection of
432      * alternative subsets. This method returns the union of each {@link BitSet}
433      * in {@code altsets}.
434      *
435      * @param altsets a collection of alternative subsets
436      * @return the set of represented alternatives in {@code altsets}
437      */
438     public static BitSet getAlts(BitSet[] altsets)
439     {
440         BitSet all;
441         foreach (BitSet alts; altsets) {
442             all = all.or(alts);
443         }
444         return all;
445     }
446 
447     public static BitSet getAlts(ATNConfigSet configs)
448     {
449         BitSet *alts;
450         alts = new BitSet;
451         foreach (ATNConfig config; configs.configs) {
452             alts.set(config.alt, true);
453         }
454         return *alts;
455     }
456 
457     /**
458      * This function gets the conflicting alt subsets from a configuration set.
459      * For each configuration {@code c} in {@code configs}:
460      *
461      * <pre>
462      * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses stack and context, not
463      * alt and not pred
464      * </pre>
465      */
466     public static BitSet[] getConflictingAltSubsets(ATNConfigSet configs)
467     {
468         AltAndContextMap configToAlts;
469         BitSet *alts;
470 
471         foreach (ATNConfig c; configs.configs) {
472             auto c_copy = new ATNConfig(c);
473             if (!configToAlts.hasKey(c_copy))
474                 {
475                     alts = new BitSet();
476                     alts.set(0, false); //  initialise alts
477                     configToAlts.put(c_copy, *alts);
478                 }
479             else {
480                 *alts = configToAlts.get(c_copy);
481             }
482             alts.set(c_copy.alt, true);
483         }
484         return configToAlts.altAndContextMap.values;
485     }
486 
487     /**
488      * Get a map from state to alt subset from a configuration set. For each
489      * configuration {@code c} in {@code configs}:
490      *
491      * <pre>
492      * map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
493      * </pre>
494      */
495     public static BitSet[ATNState] getStateToAltMap(ATNConfigSet configs)
496     {
497         BitSet[ATNState] m;
498         foreach (ATNConfig c; configs.configs) {
499             if (!(c.state in m))
500             {
501                 BitSet alts;
502                 alts.set(c.alt, true);
503                 m[c.state] = alts;
504             }
505             else
506             {
507                 m[c.state].set(c.alt, true);
508             }
509         }
510         return m;
511     }
512 
513     public static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs)
514     {
515         BitSet[ATNState] x = getStateToAltMap(configs);
516         foreach (alts; x.values) {
517             if (alts.cardinality == 1)
518                 return true;
519         }
520         return false;
521     }
522 
523     public static int getSingleViableAlt(BitSet[] altsets)
524     {
525         BitSet viableAlts;
526         foreach (BitSet alts; altsets) {
527             int minAlt = alts.nextSetBit(0);
528             viableAlts.set(minAlt, true);
529             if (viableAlts.cardinality > 1) { // more than 1 viable alt
530                 return ATN.INVALID_ALT_NUMBER;
531             }
532         }
533         return viableAlts.nextSetBit(0);
534     }
535 
536 }