antlr.v4.runtime.ParserInterpreter source code

1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.ParserInterpreter;
8 
9 import std.typecons;
10 import std.format;
11 import std.container : DList;
12 import antlr.v4.runtime.Parser;
13 import antlr.v4.runtime.CharStream;
14 import antlr.v4.runtime.InterpreterRuleContext;
15 import antlr.v4.runtime.ParserRuleContext;
16 import antlr.v4.runtime.RecognitionException;
17 import antlr.v4.runtime.FailedPredicateException;
18 import antlr.v4.runtime.UnsupportedOperationException;
19 import antlr.v4.runtime.InputMismatchException;
20 import antlr.v4.runtime.Token;
21 import antlr.v4.runtime.TokenConstantDefinition;
22 import antlr.v4.runtime.TokenStream;
23 import antlr.v4.runtime.TokenSource;
24 import antlr.v4.runtime.Vocabulary;
25 import antlr.v4.runtime.VocabularyImpl;
26 import antlr.v4.runtime.atn.ATN;
27 import antlr.v4.runtime.atn.ATNState;
28 import antlr.v4.runtime.atn.AtomTransition;
29 import antlr.v4.runtime.atn.StateNames;
30 import antlr.v4.runtime.atn.StarLoopEntryState;
31 import antlr.v4.runtime.atn.ActionTransition;
32 import antlr.v4.runtime.atn.RuleTransition;
33 import antlr.v4.runtime.atn.PredicateTransition;
34 import antlr.v4.runtime.atn.PrecedencePredicateTransition;
35 import antlr.v4.runtime.atn.Transition;
36 import antlr.v4.runtime.atn.LoopEndState;
37 import antlr.v4.runtime.atn.ParserATNSimulator;
38 import antlr.v4.runtime.atn.RuleStartState;
39 import antlr.v4.runtime.atn.TransitionStates;
40 import antlr.v4.runtime.dfa.DFA;
41 import antlr.v4.runtime.atn.DecisionState;
42 import antlr.v4.runtime.atn.PredictionContextCache;
43 
44 alias ParentContextPair = Tuple!(ParserRuleContext, "a", int, "b");
45 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b");
46 
47 /**
48  * @uml
49  * A parser simulator that mimics what ANTLR's generated
50  * parser code does. A ParserATNSimulator is used to make
51  * predictions via adaptivePredict but this class moves a pointer through the
52  * ATN to simulate parsing. ParserATNSimulator just
53  * makes us efficient rather than having to backtrack, for example.
54  *
55  * This properly creates parse trees even for left recursive rules.
56  *
57  * We rely on the left recursive rule invocation and special predicate
58  * transitions to make left recursive rules work.
59  *
60  * See TestParserInterpreter for examples.
61  */
62 class ParserInterpreter : Parser
63 {
64 
65     protected string grammarFileName;
66 
67     protected ATN atn;
68 
69     /**
70      * @uml
71      * not shared like it is for generated parsers
72      */
73     protected DFA[] decisionToDFA;
74 
75     protected PredictionContextCache sharedContextCache ;
76 
77     protected string[] tokenNames;
78 
79     protected string[] ruleNames;
80 
81     private Vocabulary vocabulary;
82 
83     /**
84      * @uml
85      * This stack corresponds to the _parentctx, _parentState pair of locals
86      * that would exist on call stack frames with a recursive descent parser;
87      * in the generated function for a left-recursive rule you'd see:
88      *
89      *  private EContext e(int _p) throws RecognitionException {
90      *      ParserRuleContext _parentctx = _ctx;    // Pair.a
91      *      int _parentState = getState();          // Pair.b
92      *      ...
93      *   }
94      *
95      * Those values are used to create new recursive rule invocation contexts
96      * associated with left operand of an alt like "expr '*' expr".
97      */
98     protected DList!ParentContextPair _parentContextStack;
99 
100     /**
101      * @uml
102      * We need a map from (decision,inputIndex)->forced alt for computing ambiguous
103      * parse trees. For now, we allow exactly one override.
104      */
105     protected int overrideDecision = -1;
106 
107     protected int overrideDecisionInputIndex = -1;
108 
109     protected int overrideDecisionAlt = -1;
110 
111     /**
112      * @uml
113      * latch and only override once; error might trigger infinite loop
114      */
115     protected bool overrideDecisionReached = false;;
116 
117     /**
118      * @uml
119      * What is the current context when we override a decisions?
120      * This tellsus what the root of the parse tree is when using override
121      * for an ambiguity/lookahead check.
122      */
123     protected InterpreterRuleContext overrideDecisionRoot = null;
124 
125     protected InterpreterRuleContext rootContext;
126 
127     /**
128      * @uml
129      * deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead.
130      */
131     public this(string grammarFileName, string[] tokenNames, string[] ruleNames, ATN atn,
132         TokenStream input)
133     {
134         this(grammarFileName,
135              VocabularyImpl.fromTokenNames(tokenNames),
136              ruleNames, atn, input);
137     }
138 
139     public this(string grammarFileName, Vocabulary vocabulary, string[] ruleNames, ATN atn,
140         TokenStream input)
141     {
142     super(input);
143         this.grammarFileName = grammarFileName;
144         this.atn = atn;
145         for (int i = 0; i < tokenNames.length; i++) {
146             tokenNames ~= vocabulary.getDisplayName(i);
147         }
148 
149         this.ruleNames = ruleNames;
150         this.vocabulary = vocabulary;
151 
152         // init decision DFA
153         int numberOfDecisions = atn.getNumberOfDecisions();
154         for (int i = 0; i < numberOfDecisions; i++) {
155             DecisionState decisionState = atn.getDecisionState(i);
156             decisionToDFA ~= new DFA(decisionState, i);
157         }
158 
159         // get atn simulator that knows how to do predictions
160         setInterpreter(new ParserATNSimulator(this, atn,
161                                               decisionToDFA,
162                                               sharedContextCache));
163     }
164 
165     /**
166      * @uml
167      * @override
168      */
169     public override void reset()
170     {
171         super.reset();
172         overrideDecisionReached = false;
173         overrideDecisionRoot = null;
174     }
175 
176     /**
177      * @uml
178      * @override
179      */
180     public override ATN getATN()
181     {
182         return atn;
183     }
184 
185     /**
186      * @uml
187      * @override
188      */
189     public override string[] getTokenNames()
190     {
191         return tokenNames;
192     }
193 
194     /**
195      * @uml
196      * @override
197      */
198     public override Vocabulary getVocabulary()
199     {
200         return vocabulary;
201     }
202 
203     /**
204      * @uml
205      * @override
206      */
207     public override string[] getRuleNames()
208     {
209         return ruleNames;
210     }
211 
212     /**
213      * @uml
214      * @override
215      */
216     public override string getGrammarFileName()
217     {
218         return grammarFileName;
219     }
220 
221     /**
222      * @uml
223      * Begin parsing at startRuleIndex
224      */
225     public ParserRuleContext parse(int startRuleIndex)
226     {
227     RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex];
228         rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
229         if (startRuleStartState.isLeftRecursiveRule) {
230             enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
231         }
232         else {
233             enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex);
234         }
235 
236         while ( true ) {
237             ATNState p = getATNState();
238             switch ( p.getStateType() ) {
239             case StateNames.RULE_STOP :
240                 // pop; return from rule
241                 if (ctx_.isEmpty() ) {
242                     if (startRuleStartState.isLeftRecursiveRule) {
243                         ParserRuleContext result = ctx_;
244                         ParentContextPair parentContext = _parentContextStack.back;
245                         _parentContextStack.removeBack();
246                         unrollRecursionContexts(parentContext.a);
247                         return result;
248                     }
249                     else {
250                         exitRule();
251                         return rootContext;
252                     }
253                 }
254                 visitRuleStopState(p);
255                 break;
256             default :
257                 try {
258                     visitState(p);
259                 }
260                 catch (RecognitionException e) {
261                     setState(atn.ruleToStopState[p.ruleIndex].stateNumber);
262                     ctx_.exception = e;
263                     getErrorHandler.reportError(this, e);
264                     recover(e);
265                 }
266 
267                      break;
268             }
269         }
270     }
271 
272     /**
273      * @uml
274      * @override
275      */
276     public override void enterRecursionRule(ParserRuleContext localctx, int state, int ruleIndex,
277         int precedence)
278     {
279         ParentContextPair pair = tuple(ctx_, localctx.invokingState);
280         _parentContextStack.insert(pair);
281         super.enterRecursionRule(localctx, state, ruleIndex, precedence);
282     }
283 
284     protected ATNState getATNState()
285     {
286         return atn.states[getState];
287     }
288 
289     protected void visitState(ATNState p)
290     {
291         //      System.out.println("visitState "+p.stateNumber);
292         int predictedAlt = 1;
293         if (cast(DecisionState)p) {
294             predictedAlt = visitDecisionState(cast(DecisionState)p);
295         }
296 
297         Transition transition = p.transition(predictedAlt - 1);
298         switch (transition.getSerializationType()) {
299         case TransitionStates.EPSILON:
300             if ( p.getStateType()== StateNames.STAR_LOOP_ENTRY &&
301                  (cast(StarLoopEntryState)p).isPrecedenceDecision &&
302                  !cast(LoopEndState)transition.target)
303                 {
304                     // We are at the start of a left recursive rule's (...)* loop
305                     // and we're not taking the exit branch of loop.
306                     InterpreterRuleContext localctx =
307                         createInterpreterRuleContext(_parentContextStack.front.a,
308                                                      _parentContextStack.front.b,
309                                                      ctx_.getRuleIndex());
310                     pushNewRecursionContext(localctx,
311                                             atn.ruleToStartState[p.ruleIndex].stateNumber,
312                                             ctx_.getRuleIndex());
313                 }
314             break;
315         case TransitionStates.ATOM:
316             match((cast(AtomTransition)transition)._label);
317             break;
318         case TransitionStates.RANGE:
319         case TransitionStates.SET:
320         case TransitionStates.NOT_SET:
321             if (!transition.matches(_input.LA(1), TokenConstantDefinition.MIN_USER_TOKEN_TYPE, 65535)) {
322                 recoverInline();
323             }
324             matchWildcard();
325             break;
326 
327         case TransitionStates.WILDCARD:
328             matchWildcard();
329             break;
330 
331         case TransitionStates.RULE:
332             RuleStartState ruleStartState = cast(RuleStartState)transition.target;
333             int ruleIndex = ruleStartState.ruleIndex;
334             InterpreterRuleContext newctx = createInterpreterRuleContext(ctx_, p.stateNumber, ruleIndex);
335             if (ruleStartState.isLeftRecursiveRule) {
336                 enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, (cast(RuleTransition)transition).precedence);
337             }
338             else {
339                 enterRule(newctx, transition.target.stateNumber, ruleIndex);
340             }
341             break;
342 
343         case TransitionStates.PREDICATE:
344             PredicateTransition predicateTransition = cast(PredicateTransition)transition;
345             if (!sempred(ctx_, predicateTransition.ruleIndex, predicateTransition.predIndex)) {
346                 throw new FailedPredicateException(this);
347             }
348 
349             break;
350         case TransitionStates.ACTION:
351             ActionTransition actionTransition = cast(ActionTransition)transition;
352             action(ctx_, actionTransition.ruleIndex, actionTransition.actionIndex);
353             break;
354         case TransitionStates.PRECEDENCE:
355             if (!precpred(ctx_, (cast(PrecedencePredicateTransition)transition).precedence)) {
356                 throw new FailedPredicateException(this, format("precpred(ctx_, %d)", (cast(PrecedencePredicateTransition)transition).precedence));
357             }
358             break;
359         default:
360             throw new UnsupportedOperationException("Unrecognized ATN transition type.");
361         }
362         setState(transition.target.stateNumber);
363     }
364 
365     protected int visitDecisionState(DecisionState p)
366     {
367     int predictedAlt = 1;
368         if (p.getNumberOfTransitions() > 1) {
369             getErrorHandler.sync(this);
370             int decision = p.decision;
371             if (decision == overrideDecision && _input.index() == overrideDecisionInputIndex &&
372                 !overrideDecisionReached )
373                 {
374                     predictedAlt = overrideDecisionAlt;
375                     overrideDecisionReached = true;
376                 }
377             else {
378                 predictedAlt = getInterpreter().adaptivePredict(_input, decision, ctx_);
379             }
380         }
381         return predictedAlt;
382     }
383 
384     /**
385      * @uml
386      * Provide simple "factory" for InterpreterRuleContext's.
387      */
388     protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent,
389         int invokingStateNumber, size_t ruleIndex)
390     {
391         return new InterpreterRuleContext(parent, invokingStateNumber, ruleIndex);
392     }
393 
394     protected void visitRuleStopState(ATNState p)
395     {
396         RuleStartState ruleStartState = atn.ruleToStartState[p.ruleIndex];
397         if (ruleStartState.isLeftRecursiveRule) {
398             ParentContextPair parentContext = _parentContextStack.back;
399             _parentContextStack.removeBack;
400             unrollRecursionContexts(parentContext.a);
401             setState(parentContext.b);
402         }
403         else {
404             exitRule;
405         }
406 
407         RuleTransition ruleTransition = cast(RuleTransition)atn.states[getState].transition(0);
408         setState(ruleTransition.followState.stateNumber);
409     }
410 
411     /**
412      * @uml
413      * Override this parser interpreters normal decision-making process
414      * at a particular decision and input token index. Instead of
415      * allowing the adaptive prediction mechanism to choose the
416      * first alternative within a block that leads to a successful parse,
417      * force it to take the alternative, 1..n for n alternatives.
418      *
419      * As an implementation limitation right now, you can only specify one
420      * override. This is sufficient to allow construction of different
421      * parse trees for ambiguous input. It means re-parsing the entire input
422      * in general because you're never sure where an ambiguous sequence would
423      * live in the various parse trees. For example, in one interpretation,
424      * an ambiguous input sequence would be matched completely in expression
425      * but in another it could match all the way back to the root.
426      *
427      * s : e '!'? ;
428      * e : ID
429      *   | ID '!'
430      *   ;
431      *
432      * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
433      * case, the ambiguous sequence is fully contained only by the root.
434      * In the second case, the ambiguous sequences fully contained within just
435      * e, as in: (e ID !).
436      *
437      * Rather than trying to optimize this and make
438      * some intelligent decisions for optimization purposes, I settled on
439      * just re-parsing the whole input and then using
440      * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
441      * subtree that contains the ambiguous sequence. I originally tried to
442      * record the call stack at the point the parser detected and ambiguity but
443      * left recursive rules create a parse tree stack that does not reflect
444      * the actual call stack. That impedance mismatch was enough to make
445      * it it challenging to restart the parser at a deeply nested rule
446      * invocation.
447      *
448      * Only parser interpreters can override decisions so as to avoid inserting
449      * override checking code in the critical ALL(*) prediction execution path.
450      */
451     public void addDecisionOverride(int decision, int tokenIndex, int forcedAlt)
452     {
453         overrideDecision = decision;
454         overrideDecisionInputIndex = tokenIndex;
455         overrideDecisionAlt = forcedAlt;
456     }
457 
458     public InterpreterRuleContext getOverrideDecisionRoot()
459     {
460         return overrideDecisionRoot;
461     }
462 
463     /**
464      * @uml
465      * Rely on the error handler for this parser but, if no tokens are consumed
466      * to recover, add an error node. Otherwise, nothing is seen in the parse
467      * tree.
468      */
469     protected void recover(RecognitionException e)
470     {
471         TokenFactorySourcePair tokenFactorySourcePair;
472         auto i = _input.index();
473         getErrorHandler.recover(this, e);
474         if ( _input.index()==i ) {
475             // no input consumed, better add an error node
476             if (cast(InputMismatchException)e) {
477                 InputMismatchException ime = cast(InputMismatchException)e;
478                 Token tok = e.getOffendingToken();
479                 int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element
480                 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream());
481                 auto errToken =
482                     tokenFactory().create(tokenFactorySourcePair,
483                                              expectedTokenType, tok.getText(),
484                                              TokenConstantDefinition.DEFAULT_CHANNEL,
485                                              -1, -1, // invalid start/stop
486                                              tok.getLine(), tok.getCharPositionInLine());
487                 ctx_.addErrorNode(errToken);
488             }
489             else { // NoViableAlt
490                 auto tok = e.getOffendingToken;
491                 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream());
492                 auto errToken =
493                     tokenFactory().create(tokenFactorySourcePair,
494                                              TokenConstantDefinition.INVALID_TYPE, tok.getText(),
495                                              TokenConstantDefinition.DEFAULT_CHANNEL,
496                                              -1, -1, // invalid start/stop
497                                              tok.getLine(), tok.getCharPositionInLine());
498                 ctx_.addErrorNode(errToken);
499             }
500         }
501     }
502 
503     protected Token recoverInline()
504     {
505         return _errHandler.recoverInline(this);
506     }
507 
508     /**
509      * @uml
510      * Return the root of the parse, which can be useful if the parser
511      * bails out. You still can access the top node. Note that,
512      * because of the way left recursive rules add children, it's possible
513      * that the root will not have any children if the start rule immediately
514      * called and left recursive rule that fails.
515      */
516     public InterpreterRuleContext getRootContext()
517     {
518         return rootContext;
519     }
520 
521 }