antlr.v4.runtime.ParserInterpreter source code

1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.ParserInterpreter;
8 
9 import std.typecons;
10 import std.format;
11 import std.container : DList;
12 import antlr.v4.runtime.Parser;
13 import antlr.v4.runtime.CharStream;
14 import antlr.v4.runtime.InterpreterRuleContext;
15 import antlr.v4.runtime.ParserRuleContext;
16 import antlr.v4.runtime.RecognitionException;
17 import antlr.v4.runtime.FailedPredicateException;
18 import antlr.v4.runtime.UnsupportedOperationException;
19 import antlr.v4.runtime.InputMismatchException;
20 import antlr.v4.runtime.Token;
21 import antlr.v4.runtime.TokenConstantDefinition;
22 import antlr.v4.runtime.TokenStream;
23 import antlr.v4.runtime.TokenSource;
24 import antlr.v4.runtime.Vocabulary;
25 import antlr.v4.runtime.VocabularyImpl;
26 import antlr.v4.runtime.atn.ATN;
27 import antlr.v4.runtime.atn.ATNState;
28 import antlr.v4.runtime.atn.AtomTransition;
29 import antlr.v4.runtime.atn.StateNames;
30 import antlr.v4.runtime.atn.StarLoopEntryState;
31 import antlr.v4.runtime.atn.ActionTransition;
32 import antlr.v4.runtime.atn.RuleTransition;
33 import antlr.v4.runtime.atn.PredicateTransition;
34 import antlr.v4.runtime.atn.PrecedencePredicateTransition;
35 import antlr.v4.runtime.atn.Transition;
36 import antlr.v4.runtime.atn.LoopEndState;
37 import antlr.v4.runtime.atn.ParserATNSimulator;
38 import antlr.v4.runtime.atn.RuleStartState;
39 import antlr.v4.runtime.atn.TransitionStates;
40 import antlr.v4.runtime.dfa.DFA;
41 import antlr.v4.runtime.atn.DecisionState;
42 import antlr.v4.runtime.atn.PredictionContextCache;
43 
44 alias ParentContextPair = Tuple!(ParserRuleContext, "a", int, "b");
45 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b");
46 
47 // Class ParserInterpreter
48 /**
49  * @uml
50  * A parser simulator that mimics what ANTLR's generated
51  * parser code does. A ParserATNSimulator is used to make
52  * predictions via adaptivePredict but this class moves a pointer through the
53  * ATN to simulate parsing. ParserATNSimulator just
54  * makes us efficient rather than having to backtrack, for example.
55  *
56  * This properly creates parse trees even for left recursive rules.
57  *
58  * We rely on the left recursive rule invocation and special predicate
59  * transitions to make left recursive rules work.
60  *
61  * See TestParserInterpreter for examples.
62  */
63 class ParserInterpreter : Parser
64 {
65 
66     protected string grammarFileName;
67 
68     protected ATN atn;
69 
70     /**
71      * @uml
72      * not shared like it is for generated parsers
73      */
74     protected DFA[] decisionToDFA;
75 
76     protected PredictionContextCache sharedContextCache ;
77 
78     protected string[] tokenNames;
79 
80     protected string[] ruleNames;
81 
82     private Vocabulary vocabulary;
83 
84     /**
85      * @uml
86      * This stack corresponds to the _parentctx, _parentState pair of locals
87      * that would exist on call stack frames with a recursive descent parser;
88      * in the generated function for a left-recursive rule you'd see:
89      *
90      *  private EContext e(int _p) throws RecognitionException {
91      *      ParserRuleContext _parentctx = _ctx;    // Pair.a
92      *      int _parentState = getState();          // Pair.b
93      *      ...
94      *   }
95      *
96      * Those values are used to create new recursive rule invocation contexts
97      * associated with left operand of an alt like "expr '*' expr".
98      */
99     protected DList!ParentContextPair _parentContextStack;
100 
101     /**
102      * @uml
103      * We need a map from (decision,inputIndex)->forced alt for computing ambiguous
104      * parse trees. For now, we allow exactly one override.
105      */
106     protected int overrideDecision = -1;
107 
108     protected int overrideDecisionInputIndex = -1;
109 
110     protected int overrideDecisionAlt = -1;
111 
112     /**
113      * @uml
114      * latch and only override once; error might trigger infinite loop
115      */
116     protected bool overrideDecisionReached = false;;
117 
118     /**
119      * @uml
120      * What is the current context when we override a decisions?
121      * This tellsus what the root of the parse tree is when using override
122      * for an ambiguity/lookahead check.
123      */
124     protected InterpreterRuleContext overrideDecisionRoot = null;
125 
126     protected InterpreterRuleContext rootContext;
127 
128     /**
129      * @uml
130      * deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead.
131      */
132     public this(string grammarFileName, string[] tokenNames, string[] ruleNames, ATN atn,
133         TokenStream input)
134     {
135         this(grammarFileName,
136              VocabularyImpl.fromTokenNames(tokenNames),
137              ruleNames, atn, input);
138     }
139 
140     public this(string grammarFileName, Vocabulary vocabulary, string[] ruleNames, ATN atn,
141         TokenStream input)
142     {
143 	super(input);
144         this.grammarFileName = grammarFileName;
145         this.atn = atn;
146         for (int i = 0; i < tokenNames.length; i++) {
147             tokenNames ~= vocabulary.getDisplayName(i);
148         }
149 
150         this.ruleNames = ruleNames;
151         this.vocabulary = vocabulary;
152 
153         // init decision DFA
154         int numberOfDecisions = atn.getNumberOfDecisions();
155         for (int i = 0; i < numberOfDecisions; i++) {
156             DecisionState decisionState = atn.getDecisionState(i);
157             decisionToDFA ~= new DFA(decisionState, i);
158         }
159 
160         // get atn simulator that knows how to do predictions
161         setInterpreter(new ParserATNSimulator(this, atn,
162                                               decisionToDFA,
163                                               sharedContextCache));
164     }
165 
166     /**
167      * @uml
168      * @override
169      */
170     public override void reset()
171     {
172         super.reset();
173         overrideDecisionReached = false;
174         overrideDecisionRoot = null;
175     }
176 
177     /**
178      * @uml
179      * @override
180      */
181     public override ATN getATN()
182     {
183         return atn;
184     }
185 
186     /**
187      * @uml
188      * @override
189      */
190     public override string[] getTokenNames()
191     {
192         return tokenNames;
193     }
194 
195     /**
196      * @uml
197      * @override
198      */
199     public override Vocabulary getVocabulary()
200     {
201         return vocabulary;
202     }
203 
204     /**
205      * @uml
206      * @override
207      */
208     public override string[] getRuleNames()
209     {
210         return ruleNames;
211     }
212 
213     /**
214      * @uml
215      * @override
216      */
217     public override string getGrammarFileName()
218     {
219         return grammarFileName;
220     }
221 
222     /**
223      * @uml
224      * Begin parsing at startRuleIndex
225      */
226     public ParserRuleContext parse(int startRuleIndex)
227     {
228 	RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex];
229         rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
230         if (startRuleStartState.isLeftRecursiveRule) {
231             enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
232         }
233         else {
234             enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex);
235         }
236 
237         while ( true ) {
238             ATNState p = getATNState();
239             switch ( p.getStateType() ) {
240             case StateNames.RULE_STOP :
241                 // pop; return from rule
242                 if (ctx_.isEmpty() ) {
243                     if (startRuleStartState.isLeftRecursiveRule) {
244                         ParserRuleContext result = ctx_;
245                         ParentContextPair parentContext = _parentContextStack.back;
246                         _parentContextStack.removeBack();
247                         unrollRecursionContexts(parentContext.a);
248                         return result;
249                     }
250                     else {
251                         exitRule();
252                         return rootContext;
253                     }
254                 }
255                 visitRuleStopState(p);
256                 break;
257             default :
258                 try {
259                     visitState(p);
260                 }
261                 catch (RecognitionException e) {
262                     setState(atn.ruleToStopState[p.ruleIndex].stateNumber);
263                     ctx_.exception = e;
264                     getErrorHandler.reportError(this, e);
265                     recover(e);
266                 }
267 
268                      break;
269             }
270         }
271     }
272 
273     /**
274      * @uml
275      * @override
276      */
277     public override void enterRecursionRule(ParserRuleContext localctx, int state, int ruleIndex,
278         int precedence)
279     {
280         ParentContextPair pair = tuple(ctx_, localctx.invokingState);
281         _parentContextStack.insert(pair);
282         super.enterRecursionRule(localctx, state, ruleIndex, precedence);
283     }
284 
285     protected ATNState getATNState()
286     {
287         return atn.states[getState];
288     }
289 
290     protected void visitState(ATNState p)
291     {
292         //		System.out.println("visitState "+p.stateNumber);
293         int predictedAlt = 1;
294         if (p.classinfo == DecisionState.classinfo) {
295             predictedAlt = visitDecisionState(cast(DecisionState) p);
296         }
297 
298         Transition transition = p.transition(predictedAlt - 1);
299         switch (transition.getSerializationType()) {
300         case TransitionStates.EPSILON:
301             if ( p.getStateType()== StateNames.STAR_LOOP_ENTRY &&
302                  (cast(StarLoopEntryState)p).isPrecedenceDecision &&
303                  !(transition.target.classinfo == LoopEndState.classinfo))
304                 {
305                     // We are at the start of a left recursive rule's (...)* loop
306                     // and we're not taking the exit branch of loop.
307                     InterpreterRuleContext localctx =
308                         createInterpreterRuleContext(_parentContextStack.front.a,
309                                                      _parentContextStack.front.b,
310                                                      ctx_.getRuleIndex());
311                     pushNewRecursionContext(localctx,
312                                             atn.ruleToStartState[p.ruleIndex].stateNumber,
313                                             ctx_.getRuleIndex());
314                 }
315             break;
316         case TransitionStates.ATOM:
317             match((cast(AtomTransition)transition)._label);
318             break;
319         case TransitionStates.RANGE:
320         case TransitionStates.SET:
321         case TransitionStates.NOT_SET:
322             if (!transition.matches(_input.LA(1), TokenConstantDefinition.MIN_USER_TOKEN_TYPE, 65535)) {
323                 recoverInline();
324             }
325             matchWildcard();
326             break;
327 
328         case TransitionStates.WILDCARD:
329             matchWildcard();
330             break;
331 
332         case TransitionStates.RULE:
333             RuleStartState ruleStartState = cast(RuleStartState)transition.target;
334             int ruleIndex = ruleStartState.ruleIndex;
335             InterpreterRuleContext newctx = createInterpreterRuleContext(ctx_, p.stateNumber, ruleIndex);
336             if (ruleStartState.isLeftRecursiveRule) {
337                 enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, (cast(RuleTransition)transition).precedence);
338             }
339             else {
340                 enterRule(newctx, transition.target.stateNumber, ruleIndex);
341             }
342             break;
343 
344         case TransitionStates.PREDICATE:
345             PredicateTransition predicateTransition = cast(PredicateTransition)transition;
346             if (!sempred(ctx_, predicateTransition.ruleIndex, predicateTransition.predIndex)) {
347                 throw new FailedPredicateException(this);
348             }
349 
350             break;
351         case TransitionStates.ACTION:
352             ActionTransition actionTransition = cast(ActionTransition)transition;
353             action(ctx_, actionTransition.ruleIndex, actionTransition.actionIndex);
354             break;
355         case TransitionStates.PRECEDENCE:
356             if (!precpred(ctx_, (cast(PrecedencePredicateTransition)transition).precedence)) {
357                 throw new FailedPredicateException(this, format("precpred(ctx_, %d)", (cast(PrecedencePredicateTransition)transition).precedence));
358             }
359             break;
360         default:
361             throw new UnsupportedOperationException("Unrecognized ATN transition type.");
362         }
363         setState(transition.target.stateNumber);
364     }
365 
366     protected int visitDecisionState(DecisionState p)
367     {
368 	int predictedAlt = 1;
369         if (p.getNumberOfTransitions() > 1) {
370             getErrorHandler.sync(this);
371             int decision = p.decision;
372             if (decision == overrideDecision && _input.index() == overrideDecisionInputIndex &&
373                 !overrideDecisionReached )
374                 {
375                     predictedAlt = overrideDecisionAlt;
376                     overrideDecisionReached = true;
377                 }
378             else {
379                 predictedAlt = getInterpreter().adaptivePredict(_input, decision, ctx_);
380             }
381         }
382         return predictedAlt;
383     }
384 
385     /**
386      * @uml
387      * Provide simple "factory" for InterpreterRuleContext's.
388      */
389     protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent,
390         int invokingStateNumber, int ruleIndex)
391     {
392         return new InterpreterRuleContext(parent, invokingStateNumber, ruleIndex);
393     }
394 
395     protected void visitRuleStopState(ATNState p)
396     {
397     }
398 
399     /**
400      * @uml
401      * Override this parser interpreters normal decision-making process
402      * at a particular decision and input token index. Instead of
403      * allowing the adaptive prediction mechanism to choose the
404      * first alternative within a block that leads to a successful parse,
405      * force it to take the alternative, 1..n for n alternatives.
406      *
407      * As an implementation limitation right now, you can only specify one
408      * override. This is sufficient to allow construction of different
409      * parse trees for ambiguous input. It means re-parsing the entire input
410      * in general because you're never sure where an ambiguous sequence would
411      * live in the various parse trees. For example, in one interpretation,
412      * an ambiguous input sequence would be matched completely in expression
413      * but in another it could match all the way back to the root.
414      *
415      * s : e '!'? ;
416      * e : ID
417      *   | ID '!'
418      *   ;
419      *
420      * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
421      * case, the ambiguous sequence is fully contained only by the root.
422      * In the second case, the ambiguous sequences fully contained within just
423      * e, as in: (e ID !).
424      *
425      * Rather than trying to optimize this and make
426      * some intelligent decisions for optimization purposes, I settled on
427      * just re-parsing the whole input and then using
428      * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
429      * subtree that contains the ambiguous sequence. I originally tried to
430      * record the call stack at the point the parser detected and ambiguity but
431      * left recursive rules create a parse tree stack that does not reflect
432      * the actual call stack. That impedance mismatch was enough to make
433      * it it challenging to restart the parser at a deeply nested rule
434      * invocation.
435      *
436      * Only parser interpreters can override decisions so as to avoid inserting
437      * override checking code in the critical ALL(*) prediction execution path.
438      */
439     public void addDecisionOverride(int decision, int tokenIndex, int forcedAlt)
440     {
441         overrideDecision = decision;
442         overrideDecisionInputIndex = tokenIndex;
443         overrideDecisionAlt = forcedAlt;
444     }
445 
446     public InterpreterRuleContext getOverrideDecisionRoot()
447     {
448         return overrideDecisionRoot;
449     }
450 
451     /**
452      * @uml
453      * Rely on the error handler for this parser but, if no tokens are consumed
454      * to recover, add an error node. Otherwise, nothing is seen in the parse
455      * tree.
456      */
457     protected void recover(RecognitionException e)
458     {
459         TokenFactorySourcePair tokenFactorySourcePair;
460 	int i = _input.index();
461         getErrorHandler.recover(this, e);
462         if ( _input.index()==i ) {
463             // no input consumed, better add an error node
464             if (e.classinfo == InputMismatchException.classinfo) {
465                 InputMismatchException ime = cast(InputMismatchException)e;
466                 Token tok = e.getOffendingToken();
467                 int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element
468                 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream());
469                 auto errToken =
470                     tokenFactory().create(tokenFactorySourcePair,
471                                              expectedTokenType, tok.getText(),
472                                              TokenConstantDefinition.DEFAULT_CHANNEL,
473                                              -1, -1, // invalid start/stop
474                                              tok.getLine(), tok.getCharPositionInLine());
475                 ctx_.addErrorNode(errToken);
476             }
477             else { // NoViableAlt
478                 auto tok = e.getOffendingToken;
479                 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream());
480                 auto errToken =
481                     tokenFactory().create(tokenFactorySourcePair,
482                                              TokenConstantDefinition.INVALID_TYPE, tok.getText(),
483                                              TokenConstantDefinition.DEFAULT_CHANNEL,
484                                              -1, -1, // invalid start/stop
485                                              tok.getLine(), tok.getCharPositionInLine());
486                 ctx_.addErrorNode(errToken);
487             }
488         }
489     }
490 
491     protected Token recoverInline()
492     {
493         return _errHandler.recoverInline(this);
494     }
495 
496     /**
497      * @uml
498      * Return the root of the parse, which can be useful if the parser
499      * bails out. You still can access the top node. Note that,
500      * because of the way left recursive rules add children, it's possible
501      * that the root will not have any children if the start rule immediately
502      * called and left recursive rule that fails.
503      */
504     public InterpreterRuleContext getRootContext()
505     {
506         return rootContext;
507     }
508 
509 }