antlr.v4.runtime.ParserInterpreter source code

1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.ParserInterpreter;
8 
9 import std.typecons;
10 import std.format;
11 import std.container : DList;
12 import antlr.v4.runtime.Parser;
13 import antlr.v4.runtime.CharStream;
14 import antlr.v4.runtime.InterpreterRuleContext;
15 import antlr.v4.runtime.ParserRuleContext;
16 import antlr.v4.runtime.RecognitionException;
17 import antlr.v4.runtime.FailedPredicateException;
18 import antlr.v4.runtime.UnsupportedOperationException;
19 import antlr.v4.runtime.InputMismatchException;
20 import antlr.v4.runtime.Token;
21 import antlr.v4.runtime.TokenConstantDefinition;
22 import antlr.v4.runtime.TokenStream;
23 import antlr.v4.runtime.TokenSource;
24 import antlr.v4.runtime.Vocabulary;
25 import antlr.v4.runtime.VocabularyImpl;
26 import antlr.v4.runtime.atn.ATN;
27 import antlr.v4.runtime.atn.ATNState;
28 import antlr.v4.runtime.atn.AtomTransition;
29 import antlr.v4.runtime.atn.StateNames;
30 import antlr.v4.runtime.atn.StarLoopEntryState;
31 import antlr.v4.runtime.atn.ActionTransition;
32 import antlr.v4.runtime.atn.RuleTransition;
33 import antlr.v4.runtime.atn.PredicateTransition;
34 import antlr.v4.runtime.atn.PrecedencePredicateTransition;
35 import antlr.v4.runtime.atn.Transition;
36 import antlr.v4.runtime.atn.LoopEndState;
37 import antlr.v4.runtime.atn.ParserATNSimulator;
38 import antlr.v4.runtime.atn.RuleStartState;
39 import antlr.v4.runtime.atn.TransitionStates;
40 import antlr.v4.runtime.dfa.DFA;
41 import antlr.v4.runtime.atn.DecisionState;
42 import antlr.v4.runtime.atn.PredictionContextCache;
43 
44 alias ParentContextPair = Tuple!(ParserRuleContext, "a", int, "b");
45 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b");
46 
47 /**
48  * @uml
49  * A parser simulator that mimics what ANTLR's generated
50  * parser code does. A ParserATNSimulator is used to make
51  * predictions via adaptivePredict but this class moves a pointer through the
52  * ATN to simulate parsing. ParserATNSimulator just
53  * makes us efficient rather than having to backtrack, for example.
54  *
55  * This properly creates parse trees even for left recursive rules.
56  *
57  * We rely on the left recursive rule invocation and special predicate
58  * transitions to make left recursive rules work.
59  *
60  * See TestParserInterpreter for examples.
61  */
62 class ParserInterpreter : Parser
63 {
64 
65     protected string grammarFileName;
66 
67     protected ATN atn;
68 
69     /**
70      * @uml
71      * not shared like it is for generated parsers
72      */
73     protected DFA[] decisionToDFA;
74 
75     protected PredictionContextCache sharedContextCache ;
76 
77     protected string[] tokenNames;
78 
79     protected string[] ruleNames;
80 
81     private Vocabulary vocabulary;
82 
83     /**
84      * @uml
85      * This stack corresponds to the _parentctx, _parentState pair of locals
86      * that would exist on call stack frames with a recursive descent parser;
87      * in the generated function for a left-recursive rule you'd see:
88      *
89      *  private EContext e(int _p) throws RecognitionException {
90      *      ParserRuleContext _parentctx = _ctx;    // Pair.a
91      *      int _parentState = getState();          // Pair.b
92      *      ...
93      *   }
94      *
95      * Those values are used to create new recursive rule invocation contexts
96      * associated with left operand of an alt like "expr '*' expr".
97      */
98     protected DList!ParentContextPair _parentContextStack;
99 
100     /**
101      * @uml
102      * We need a map from (decision,inputIndex)->forced alt for computing ambiguous
103      * parse trees. For now, we allow exactly one override.
104      */
105     protected int overrideDecision = -1;
106 
107     protected int overrideDecisionInputIndex = -1;
108 
109     protected int overrideDecisionAlt = -1;
110 
111     /**
112      * @uml
113      * latch and only override once; error might trigger infinite loop
114      */
115     protected bool overrideDecisionReached = false;;
116 
117     /**
118      * @uml
119      * What is the current context when we override a decisions?
120      * This tellsus what the root of the parse tree is when using override
121      * for an ambiguity/lookahead check.
122      */
123     protected InterpreterRuleContext overrideDecisionRoot = null;
124 
125     protected InterpreterRuleContext rootContext;
126 
127     /**
128      * @uml
129      * deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead.
130      */
131     public this(string grammarFileName, string[] tokenNames, string[] ruleNames, ATN atn,
132         TokenStream input)
133     {
134         this(grammarFileName,
135              VocabularyImpl.fromTokenNames(tokenNames),
136              ruleNames, atn, input);
137     }
138 
139     public this(string grammarFileName, Vocabulary vocabulary, string[] ruleNames, ATN atn,
140         TokenStream input)
141     {
142 	super(input);
143         this.grammarFileName = grammarFileName;
144         this.atn = atn;
145         for (int i = 0; i < tokenNames.length; i++) {
146             tokenNames ~= vocabulary.getDisplayName(i);
147         }
148 
149         this.ruleNames = ruleNames;
150         this.vocabulary = vocabulary;
151 
152         // init decision DFA
153         int numberOfDecisions = atn.getNumberOfDecisions();
154         for (int i = 0; i < numberOfDecisions; i++) {
155             DecisionState decisionState = atn.getDecisionState(i);
156             decisionToDFA ~= new DFA(decisionState, i);
157         }
158 
159         // get atn simulator that knows how to do predictions
160         setInterpreter(new ParserATNSimulator(this, atn,
161                                               decisionToDFA,
162                                               sharedContextCache));
163     }
164 
165     /**
166      * @uml
167      * @override
168      */
169     public override void reset()
170     {
171         super.reset();
172         overrideDecisionReached = false;
173         overrideDecisionRoot = null;
174     }
175 
176     /**
177      * @uml
178      * @override
179      */
180     public override ATN getATN()
181     {
182         return atn;
183     }
184 
185     /**
186      * @uml
187      * @override
188      */
189     public override string[] getTokenNames()
190     {
191         return tokenNames;
192     }
193 
194     /**
195      * @uml
196      * @override
197      */
198     public override Vocabulary getVocabulary()
199     {
200         return vocabulary;
201     }
202 
203     /**
204      * @uml
205      * @override
206      */
207     public override string[] getRuleNames()
208     {
209         return ruleNames;
210     }
211 
212     /**
213      * @uml
214      * @override
215      */
216     public override string getGrammarFileName()
217     {
218         return grammarFileName;
219     }
220 
221     /**
222      * @uml
223      * Begin parsing at startRuleIndex
224      */
225     public ParserRuleContext parse(int startRuleIndex)
226     {
227 	RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex];
228         rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
229         if (startRuleStartState.isLeftRecursiveRule) {
230             enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
231         }
232         else {
233             enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex);
234         }
235 
236         while ( true ) {
237             ATNState p = getATNState();
238             switch ( p.getStateType() ) {
239             case StateNames.RULE_STOP :
240                 // pop; return from rule
241                 if (ctx_.isEmpty() ) {
242                     if (startRuleStartState.isLeftRecursiveRule) {
243                         ParserRuleContext result = ctx_;
244                         ParentContextPair parentContext = _parentContextStack.back;
245                         _parentContextStack.removeBack();
246                         unrollRecursionContexts(parentContext.a);
247                         return result;
248                     }
249                     else {
250                         exitRule();
251                         return rootContext;
252                     }
253                 }
254                 visitRuleStopState(p);
255                 break;
256             default :
257                 try {
258                     visitState(p);
259                 }
260                 catch (RecognitionException e) {
261                     setState(atn.ruleToStopState[p.ruleIndex].stateNumber);
262                     ctx_.exception = e;
263                     getErrorHandler.reportError(this, e);
264                     recover(e);
265                 }
266 
267                      break;
268             }
269         }
270     }
271 
272     /**
273      * @uml
274      * @override
275      */
276     public override void enterRecursionRule(ParserRuleContext localctx, int state, int ruleIndex,
277         int precedence)
278     {
279         ParentContextPair pair = tuple(ctx_, localctx.invokingState);
280         _parentContextStack.insert(pair);
281         super.enterRecursionRule(localctx, state, ruleIndex, precedence);
282     }
283 
284     protected ATNState getATNState()
285     {
286         return atn.states[getState];
287     }
288 
289     protected void visitState(ATNState p)
290     {
291         //		System.out.println("visitState "+p.stateNumber);
292         int predictedAlt = 1;
293         if (p.classinfo == DecisionState.classinfo) {
294             predictedAlt = visitDecisionState(cast(DecisionState) p);
295         }
296 
297         Transition transition = p.transition(predictedAlt - 1);
298         switch (transition.getSerializationType()) {
299         case TransitionStates.EPSILON:
300             if ( p.getStateType()== StateNames.STAR_LOOP_ENTRY &&
301                  (cast(StarLoopEntryState)p).isPrecedenceDecision &&
302                  !(transition.target.classinfo == LoopEndState.classinfo))
303                 {
304                     // We are at the start of a left recursive rule's (...)* loop
305                     // and we're not taking the exit branch of loop.
306                     InterpreterRuleContext localctx =
307                         createInterpreterRuleContext(_parentContextStack.front.a,
308                                                      _parentContextStack.front.b,
309                                                      ctx_.getRuleIndex());
310                     pushNewRecursionContext(localctx,
311                                             atn.ruleToStartState[p.ruleIndex].stateNumber,
312                                             ctx_.getRuleIndex());
313                 }
314             break;
315         case TransitionStates.ATOM:
316             match((cast(AtomTransition)transition)._label);
317             break;
318         case TransitionStates.RANGE:
319         case TransitionStates.SET:
320         case TransitionStates.NOT_SET:
321             if (!transition.matches(_input.LA(1), TokenConstantDefinition.MIN_USER_TOKEN_TYPE, 65535)) {
322                 recoverInline();
323             }
324             matchWildcard();
325             break;
326 
327         case TransitionStates.WILDCARD:
328             matchWildcard();
329             break;
330 
331         case TransitionStates.RULE:
332             RuleStartState ruleStartState = cast(RuleStartState)transition.target;
333             int ruleIndex = ruleStartState.ruleIndex;
334             InterpreterRuleContext newctx = createInterpreterRuleContext(ctx_, p.stateNumber, ruleIndex);
335             if (ruleStartState.isLeftRecursiveRule) {
336                 enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, (cast(RuleTransition)transition).precedence);
337             }
338             else {
339                 enterRule(newctx, transition.target.stateNumber, ruleIndex);
340             }
341             break;
342 
343         case TransitionStates.PREDICATE:
344             PredicateTransition predicateTransition = cast(PredicateTransition)transition;
345             if (!sempred(ctx_, predicateTransition.ruleIndex, predicateTransition.predIndex)) {
346                 throw new FailedPredicateException(this);
347             }
348 
349             break;
350         case TransitionStates.ACTION:
351             ActionTransition actionTransition = cast(ActionTransition)transition;
352             action(ctx_, actionTransition.ruleIndex, actionTransition.actionIndex);
353             break;
354         case TransitionStates.PRECEDENCE:
355             if (!precpred(ctx_, (cast(PrecedencePredicateTransition)transition).precedence)) {
356                 throw new FailedPredicateException(this, format("precpred(ctx_, %d)", (cast(PrecedencePredicateTransition)transition).precedence));
357             }
358             break;
359         default:
360             throw new UnsupportedOperationException("Unrecognized ATN transition type.");
361         }
362         setState(transition.target.stateNumber);
363     }
364 
365     protected int visitDecisionState(DecisionState p)
366     {
367 	int predictedAlt = 1;
368         if (p.getNumberOfTransitions() > 1) {
369             getErrorHandler.sync(this);
370             int decision = p.decision;
371             if (decision == overrideDecision && _input.index() == overrideDecisionInputIndex &&
372                 !overrideDecisionReached )
373                 {
374                     predictedAlt = overrideDecisionAlt;
375                     overrideDecisionReached = true;
376                 }
377             else {
378                 predictedAlt = getInterpreter().adaptivePredict(_input, decision, ctx_);
379             }
380         }
381         return predictedAlt;
382     }
383 
384     /**
385      * @uml
386      * Provide simple "factory" for InterpreterRuleContext's.
387      */
388     protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent,
389         int invokingStateNumber, int ruleIndex)
390     {
391         return new InterpreterRuleContext(parent, invokingStateNumber, ruleIndex);
392     }
393 
394     protected void visitRuleStopState(ATNState p)
395     {
396     }
397 
398     /**
399      * @uml
400      * Override this parser interpreters normal decision-making process
401      * at a particular decision and input token index. Instead of
402      * allowing the adaptive prediction mechanism to choose the
403      * first alternative within a block that leads to a successful parse,
404      * force it to take the alternative, 1..n for n alternatives.
405      *
406      * As an implementation limitation right now, you can only specify one
407      * override. This is sufficient to allow construction of different
408      * parse trees for ambiguous input. It means re-parsing the entire input
409      * in general because you're never sure where an ambiguous sequence would
410      * live in the various parse trees. For example, in one interpretation,
411      * an ambiguous input sequence would be matched completely in expression
412      * but in another it could match all the way back to the root.
413      *
414      * s : e '!'? ;
415      * e : ID
416      *   | ID '!'
417      *   ;
418      *
419      * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
420      * case, the ambiguous sequence is fully contained only by the root.
421      * In the second case, the ambiguous sequences fully contained within just
422      * e, as in: (e ID !).
423      *
424      * Rather than trying to optimize this and make
425      * some intelligent decisions for optimization purposes, I settled on
426      * just re-parsing the whole input and then using
427      * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
428      * subtree that contains the ambiguous sequence. I originally tried to
429      * record the call stack at the point the parser detected and ambiguity but
430      * left recursive rules create a parse tree stack that does not reflect
431      * the actual call stack. That impedance mismatch was enough to make
432      * it it challenging to restart the parser at a deeply nested rule
433      * invocation.
434      *
435      * Only parser interpreters can override decisions so as to avoid inserting
436      * override checking code in the critical ALL(*) prediction execution path.
437      */
438     public void addDecisionOverride(int decision, int tokenIndex, int forcedAlt)
439     {
440         overrideDecision = decision;
441         overrideDecisionInputIndex = tokenIndex;
442         overrideDecisionAlt = forcedAlt;
443     }
444 
445     public InterpreterRuleContext getOverrideDecisionRoot()
446     {
447         return overrideDecisionRoot;
448     }
449 
450     /**
451      * @uml
452      * Rely on the error handler for this parser but, if no tokens are consumed
453      * to recover, add an error node. Otherwise, nothing is seen in the parse
454      * tree.
455      */
456     protected void recover(RecognitionException e)
457     {
458         TokenFactorySourcePair tokenFactorySourcePair;
459 	int i = _input.index();
460         getErrorHandler.recover(this, e);
461         if ( _input.index()==i ) {
462             // no input consumed, better add an error node
463             if (e.classinfo == InputMismatchException.classinfo) {
464                 InputMismatchException ime = cast(InputMismatchException)e;
465                 Token tok = e.getOffendingToken();
466                 int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element
467                 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream());
468                 auto errToken =
469                     tokenFactory().create(tokenFactorySourcePair,
470                                              expectedTokenType, tok.getText(),
471                                              TokenConstantDefinition.DEFAULT_CHANNEL,
472                                              -1, -1, // invalid start/stop
473                                              tok.getLine(), tok.getCharPositionInLine());
474                 ctx_.addErrorNode(errToken);
475             }
476             else { // NoViableAlt
477                 auto tok = e.getOffendingToken;
478                 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream());
479                 auto errToken =
480                     tokenFactory().create(tokenFactorySourcePair,
481                                              TokenConstantDefinition.INVALID_TYPE, tok.getText(),
482                                              TokenConstantDefinition.DEFAULT_CHANNEL,
483                                              -1, -1, // invalid start/stop
484                                              tok.getLine(), tok.getCharPositionInLine());
485                 ctx_.addErrorNode(errToken);
486             }
487         }
488     }
489 
490     protected Token recoverInline()
491     {
492         return _errHandler.recoverInline(this);
493     }
494 
495     /**
496      * @uml
497      * Return the root of the parse, which can be useful if the parser
498      * bails out. You still can access the top node. Note that,
499      * because of the way left recursive rules add children, it's possible
500      * that the root will not have any children if the start rule immediately
501      * called and left recursive rule that fails.
502      */
503     public InterpreterRuleContext getRootContext()
504     {
505         return rootContext;
506     }
507 
508 }