1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.ParserInterpreter; 8 9 import std.typecons; 10 import std.format; 11 import std.container : DList; 12 import antlr.v4.runtime.Parser; 13 import antlr.v4.runtime.CharStream; 14 import antlr.v4.runtime.InterpreterRuleContext; 15 import antlr.v4.runtime.ParserRuleContext; 16 import antlr.v4.runtime.RecognitionException; 17 import antlr.v4.runtime.FailedPredicateException; 18 import antlr.v4.runtime.UnsupportedOperationException; 19 import antlr.v4.runtime.InputMismatchException; 20 import antlr.v4.runtime.Token; 21 import antlr.v4.runtime.TokenConstantDefinition; 22 import antlr.v4.runtime.TokenStream; 23 import antlr.v4.runtime.TokenSource; 24 import antlr.v4.runtime.Vocabulary; 25 import antlr.v4.runtime.VocabularyImpl; 26 import antlr.v4.runtime.atn.ATN; 27 import antlr.v4.runtime.atn.ATNState; 28 import antlr.v4.runtime.atn.AtomTransition; 29 import antlr.v4.runtime.atn.StateNames; 30 import antlr.v4.runtime.atn.StarLoopEntryState; 31 import antlr.v4.runtime.atn.ActionTransition; 32 import antlr.v4.runtime.atn.RuleTransition; 33 import antlr.v4.runtime.atn.PredicateTransition; 34 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 35 import antlr.v4.runtime.atn.Transition; 36 import antlr.v4.runtime.atn.LoopEndState; 37 import antlr.v4.runtime.atn.ParserATNSimulator; 38 import antlr.v4.runtime.atn.RuleStartState; 39 import antlr.v4.runtime.atn.TransitionStates; 40 import antlr.v4.runtime.dfa.DFA; 41 import antlr.v4.runtime.atn.DecisionState; 42 import antlr.v4.runtime.atn.PredictionContextCache; 43 44 alias ParentContextPair = Tuple!(ParserRuleContext, "a", int, "b"); 45 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b"); 46 47 // Class ParserInterpreter 48 /** 49 * @uml 50 * A parser simulator that mimics what ANTLR's generated 51 * parser code does. A ParserATNSimulator is used to make 52 * predictions via adaptivePredict but this class moves a pointer through the 53 * ATN to simulate parsing. ParserATNSimulator just 54 * makes us efficient rather than having to backtrack, for example. 55 * 56 * This properly creates parse trees even for left recursive rules. 57 * 58 * We rely on the left recursive rule invocation and special predicate 59 * transitions to make left recursive rules work. 60 * 61 * See TestParserInterpreter for examples. 62 */ 63 class ParserInterpreter : Parser 64 { 65 66 protected string grammarFileName; 67 68 protected ATN atn; 69 70 /** 71 * @uml 72 * not shared like it is for generated parsers 73 */ 74 protected DFA[] decisionToDFA; 75 76 protected PredictionContextCache sharedContextCache ; 77 78 protected string[] tokenNames; 79 80 protected string[] ruleNames; 81 82 private Vocabulary vocabulary; 83 84 /** 85 * @uml 86 * This stack corresponds to the _parentctx, _parentState pair of locals 87 * that would exist on call stack frames with a recursive descent parser; 88 * in the generated function for a left-recursive rule you'd see: 89 * 90 * private EContext e(int _p) throws RecognitionException { 91 * ParserRuleContext _parentctx = _ctx; // Pair.a 92 * int _parentState = getState(); // Pair.b 93 * ... 94 * } 95 * 96 * Those values are used to create new recursive rule invocation contexts 97 * associated with left operand of an alt like "expr '*' expr". 98 */ 99 protected DList!ParentContextPair _parentContextStack; 100 101 /** 102 * @uml 103 * We need a map from (decision,inputIndex)->forced alt for computing ambiguous 104 * parse trees. For now, we allow exactly one override. 105 */ 106 protected int overrideDecision = -1; 107 108 protected int overrideDecisionInputIndex = -1; 109 110 protected int overrideDecisionAlt = -1; 111 112 /** 113 * @uml 114 * latch and only override once; error might trigger infinite loop 115 */ 116 protected bool overrideDecisionReached = false;; 117 118 /** 119 * @uml 120 * What is the current context when we override a decisions? 121 * This tellsus what the root of the parse tree is when using override 122 * for an ambiguity/lookahead check. 123 */ 124 protected InterpreterRuleContext overrideDecisionRoot = null; 125 126 protected InterpreterRuleContext rootContext; 127 128 /** 129 * @uml 130 * deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead. 131 */ 132 public this(string grammarFileName, string[] tokenNames, string[] ruleNames, ATN atn, 133 TokenStream input) 134 { 135 this(grammarFileName, 136 VocabularyImpl.fromTokenNames(tokenNames), 137 ruleNames, atn, input); 138 } 139 140 public this(string grammarFileName, Vocabulary vocabulary, string[] ruleNames, ATN atn, 141 TokenStream input) 142 { 143 super(input); 144 this.grammarFileName = grammarFileName; 145 this.atn = atn; 146 for (int i = 0; i < tokenNames.length; i++) { 147 tokenNames ~= vocabulary.getDisplayName(i); 148 } 149 150 this.ruleNames = ruleNames; 151 this.vocabulary = vocabulary; 152 153 // init decision DFA 154 int numberOfDecisions = atn.getNumberOfDecisions(); 155 for (int i = 0; i < numberOfDecisions; i++) { 156 DecisionState decisionState = atn.getDecisionState(i); 157 decisionToDFA ~= new DFA(decisionState, i); 158 } 159 160 // get atn simulator that knows how to do predictions 161 setInterpreter(new ParserATNSimulator(this, atn, 162 decisionToDFA, 163 sharedContextCache)); 164 } 165 166 /** 167 * @uml 168 * @override 169 */ 170 public override void reset() 171 { 172 super.reset(); 173 overrideDecisionReached = false; 174 overrideDecisionRoot = null; 175 } 176 177 /** 178 * @uml 179 * @override 180 */ 181 public override ATN getATN() 182 { 183 return atn; 184 } 185 186 /** 187 * @uml 188 * @override 189 */ 190 public override string[] getTokenNames() 191 { 192 return tokenNames; 193 } 194 195 /** 196 * @uml 197 * @override 198 */ 199 public override Vocabulary getVocabulary() 200 { 201 return vocabulary; 202 } 203 204 /** 205 * @uml 206 * @override 207 */ 208 public override string[] getRuleNames() 209 { 210 return ruleNames; 211 } 212 213 /** 214 * @uml 215 * @override 216 */ 217 public override string getGrammarFileName() 218 { 219 return grammarFileName; 220 } 221 222 /** 223 * @uml 224 * Begin parsing at startRuleIndex 225 */ 226 public ParserRuleContext parse(int startRuleIndex) 227 { 228 RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex]; 229 rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex); 230 if (startRuleStartState.isLeftRecursiveRule) { 231 enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0); 232 } 233 else { 234 enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex); 235 } 236 237 while ( true ) { 238 ATNState p = getATNState(); 239 switch ( p.getStateType() ) { 240 case StateNames.RULE_STOP : 241 // pop; return from rule 242 if (ctx_.isEmpty() ) { 243 if (startRuleStartState.isLeftRecursiveRule) { 244 ParserRuleContext result = ctx_; 245 ParentContextPair parentContext = _parentContextStack.back; 246 _parentContextStack.removeBack(); 247 unrollRecursionContexts(parentContext.a); 248 return result; 249 } 250 else { 251 exitRule(); 252 return rootContext; 253 } 254 } 255 visitRuleStopState(p); 256 break; 257 default : 258 try { 259 visitState(p); 260 } 261 catch (RecognitionException e) { 262 setState(atn.ruleToStopState[p.ruleIndex].stateNumber); 263 ctx_.exception = e; 264 getErrorHandler.reportError(this, e); 265 recover(e); 266 } 267 268 break; 269 } 270 } 271 } 272 273 /** 274 * @uml 275 * @override 276 */ 277 public override void enterRecursionRule(ParserRuleContext localctx, int state, int ruleIndex, 278 int precedence) 279 { 280 ParentContextPair pair = tuple(ctx_, localctx.invokingState); 281 _parentContextStack.insert(pair); 282 super.enterRecursionRule(localctx, state, ruleIndex, precedence); 283 } 284 285 protected ATNState getATNState() 286 { 287 return atn.states[getState]; 288 } 289 290 protected void visitState(ATNState p) 291 { 292 // System.out.println("visitState "+p.stateNumber); 293 int predictedAlt = 1; 294 if (p.classinfo == DecisionState.classinfo) { 295 predictedAlt = visitDecisionState(cast(DecisionState) p); 296 } 297 298 Transition transition = p.transition(predictedAlt - 1); 299 switch (transition.getSerializationType()) { 300 case TransitionStates.EPSILON: 301 if ( p.getStateType()== StateNames.STAR_LOOP_ENTRY && 302 (cast(StarLoopEntryState)p).isPrecedenceDecision && 303 !(transition.target.classinfo == LoopEndState.classinfo)) 304 { 305 // We are at the start of a left recursive rule's (...)* loop 306 // and we're not taking the exit branch of loop. 307 InterpreterRuleContext localctx = 308 createInterpreterRuleContext(_parentContextStack.front.a, 309 _parentContextStack.front.b, 310 ctx_.getRuleIndex()); 311 pushNewRecursionContext(localctx, 312 atn.ruleToStartState[p.ruleIndex].stateNumber, 313 ctx_.getRuleIndex()); 314 } 315 break; 316 case TransitionStates.ATOM: 317 match((cast(AtomTransition)transition)._label); 318 break; 319 case TransitionStates.RANGE: 320 case TransitionStates.SET: 321 case TransitionStates.NOT_SET: 322 if (!transition.matches(_input.LA(1), TokenConstantDefinition.MIN_USER_TOKEN_TYPE, 65535)) { 323 recoverInline(); 324 } 325 matchWildcard(); 326 break; 327 328 case TransitionStates.WILDCARD: 329 matchWildcard(); 330 break; 331 332 case TransitionStates.RULE: 333 RuleStartState ruleStartState = cast(RuleStartState)transition.target; 334 int ruleIndex = ruleStartState.ruleIndex; 335 InterpreterRuleContext newctx = createInterpreterRuleContext(ctx_, p.stateNumber, ruleIndex); 336 if (ruleStartState.isLeftRecursiveRule) { 337 enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, (cast(RuleTransition)transition).precedence); 338 } 339 else { 340 enterRule(newctx, transition.target.stateNumber, ruleIndex); 341 } 342 break; 343 344 case TransitionStates.PREDICATE: 345 PredicateTransition predicateTransition = cast(PredicateTransition)transition; 346 if (!sempred(ctx_, predicateTransition.ruleIndex, predicateTransition.predIndex)) { 347 throw new FailedPredicateException(this); 348 } 349 350 break; 351 case TransitionStates.ACTION: 352 ActionTransition actionTransition = cast(ActionTransition)transition; 353 action(ctx_, actionTransition.ruleIndex, actionTransition.actionIndex); 354 break; 355 case TransitionStates.PRECEDENCE: 356 if (!precpred(ctx_, (cast(PrecedencePredicateTransition)transition).precedence)) { 357 throw new FailedPredicateException(this, format("precpred(ctx_, %d)", (cast(PrecedencePredicateTransition)transition).precedence)); 358 } 359 break; 360 default: 361 throw new UnsupportedOperationException("Unrecognized ATN transition type."); 362 } 363 setState(transition.target.stateNumber); 364 } 365 366 protected int visitDecisionState(DecisionState p) 367 { 368 int predictedAlt = 1; 369 if (p.getNumberOfTransitions() > 1) { 370 getErrorHandler.sync(this); 371 int decision = p.decision; 372 if (decision == overrideDecision && _input.index() == overrideDecisionInputIndex && 373 !overrideDecisionReached ) 374 { 375 predictedAlt = overrideDecisionAlt; 376 overrideDecisionReached = true; 377 } 378 else { 379 predictedAlt = getInterpreter().adaptivePredict(_input, decision, ctx_); 380 } 381 } 382 return predictedAlt; 383 } 384 385 /** 386 * @uml 387 * Provide simple "factory" for InterpreterRuleContext's. 388 */ 389 protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent, 390 int invokingStateNumber, int ruleIndex) 391 { 392 return new InterpreterRuleContext(parent, invokingStateNumber, ruleIndex); 393 } 394 395 protected void visitRuleStopState(ATNState p) 396 { 397 } 398 399 /** 400 * @uml 401 * Override this parser interpreters normal decision-making process 402 * at a particular decision and input token index. Instead of 403 * allowing the adaptive prediction mechanism to choose the 404 * first alternative within a block that leads to a successful parse, 405 * force it to take the alternative, 1..n for n alternatives. 406 * 407 * As an implementation limitation right now, you can only specify one 408 * override. This is sufficient to allow construction of different 409 * parse trees for ambiguous input. It means re-parsing the entire input 410 * in general because you're never sure where an ambiguous sequence would 411 * live in the various parse trees. For example, in one interpretation, 412 * an ambiguous input sequence would be matched completely in expression 413 * but in another it could match all the way back to the root. 414 * 415 * s : e '!'? ; 416 * e : ID 417 * | ID '!' 418 * ; 419 * 420 * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first 421 * case, the ambiguous sequence is fully contained only by the root. 422 * In the second case, the ambiguous sequences fully contained within just 423 * e, as in: (e ID !). 424 * 425 * Rather than trying to optimize this and make 426 * some intelligent decisions for optimization purposes, I settled on 427 * just re-parsing the whole input and then using 428 * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal 429 * subtree that contains the ambiguous sequence. I originally tried to 430 * record the call stack at the point the parser detected and ambiguity but 431 * left recursive rules create a parse tree stack that does not reflect 432 * the actual call stack. That impedance mismatch was enough to make 433 * it it challenging to restart the parser at a deeply nested rule 434 * invocation. 435 * 436 * Only parser interpreters can override decisions so as to avoid inserting 437 * override checking code in the critical ALL(*) prediction execution path. 438 */ 439 public void addDecisionOverride(int decision, int tokenIndex, int forcedAlt) 440 { 441 overrideDecision = decision; 442 overrideDecisionInputIndex = tokenIndex; 443 overrideDecisionAlt = forcedAlt; 444 } 445 446 public InterpreterRuleContext getOverrideDecisionRoot() 447 { 448 return overrideDecisionRoot; 449 } 450 451 /** 452 * @uml 453 * Rely on the error handler for this parser but, if no tokens are consumed 454 * to recover, add an error node. Otherwise, nothing is seen in the parse 455 * tree. 456 */ 457 protected void recover(RecognitionException e) 458 { 459 TokenFactorySourcePair tokenFactorySourcePair; 460 int i = _input.index(); 461 getErrorHandler.recover(this, e); 462 if ( _input.index()==i ) { 463 // no input consumed, better add an error node 464 if (e.classinfo == InputMismatchException.classinfo) { 465 InputMismatchException ime = cast(InputMismatchException)e; 466 Token tok = e.getOffendingToken(); 467 int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element 468 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream()); 469 auto errToken = 470 tokenFactory().create(tokenFactorySourcePair, 471 expectedTokenType, tok.getText(), 472 TokenConstantDefinition.DEFAULT_CHANNEL, 473 -1, -1, // invalid start/stop 474 tok.getLine(), tok.getCharPositionInLine()); 475 ctx_.addErrorNode(errToken); 476 } 477 else { // NoViableAlt 478 auto tok = e.getOffendingToken; 479 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream()); 480 auto errToken = 481 tokenFactory().create(tokenFactorySourcePair, 482 TokenConstantDefinition.INVALID_TYPE, tok.getText(), 483 TokenConstantDefinition.DEFAULT_CHANNEL, 484 -1, -1, // invalid start/stop 485 tok.getLine(), tok.getCharPositionInLine()); 486 ctx_.addErrorNode(errToken); 487 } 488 } 489 } 490 491 protected Token recoverInline() 492 { 493 return _errHandler.recoverInline(this); 494 } 495 496 /** 497 * @uml 498 * Return the root of the parse, which can be useful if the parser 499 * bails out. You still can access the top node. Note that, 500 * because of the way left recursive rules add children, it's possible 501 * that the root will not have any children if the start rule immediately 502 * called and left recursive rule that fails. 503 */ 504 public InterpreterRuleContext getRootContext() 505 { 506 return rootContext; 507 } 508 509 }