1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.ParserInterpreter; 8 9 import std.typecons; 10 import std.format; 11 import std.container : DList; 12 import antlr.v4.runtime.Parser; 13 import antlr.v4.runtime.CharStream; 14 import antlr.v4.runtime.InterpreterRuleContext; 15 import antlr.v4.runtime.ParserRuleContext; 16 import antlr.v4.runtime.RecognitionException; 17 import antlr.v4.runtime.FailedPredicateException; 18 import antlr.v4.runtime.UnsupportedOperationException; 19 import antlr.v4.runtime.InputMismatchException; 20 import antlr.v4.runtime.Token; 21 import antlr.v4.runtime.TokenConstantDefinition; 22 import antlr.v4.runtime.TokenStream; 23 import antlr.v4.runtime.TokenSource; 24 import antlr.v4.runtime.Vocabulary; 25 import antlr.v4.runtime.VocabularyImpl; 26 import antlr.v4.runtime.atn.ATN; 27 import antlr.v4.runtime.atn.ATNState; 28 import antlr.v4.runtime.atn.AtomTransition; 29 import antlr.v4.runtime.atn.StateNames; 30 import antlr.v4.runtime.atn.StarLoopEntryState; 31 import antlr.v4.runtime.atn.ActionTransition; 32 import antlr.v4.runtime.atn.RuleTransition; 33 import antlr.v4.runtime.atn.PredicateTransition; 34 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 35 import antlr.v4.runtime.atn.Transition; 36 import antlr.v4.runtime.atn.LoopEndState; 37 import antlr.v4.runtime.atn.ParserATNSimulator; 38 import antlr.v4.runtime.atn.RuleStartState; 39 import antlr.v4.runtime.atn.TransitionStates; 40 import antlr.v4.runtime.dfa.DFA; 41 import antlr.v4.runtime.atn.DecisionState; 42 import antlr.v4.runtime.atn.PredictionContextCache; 43 44 alias ParentContextPair = Tuple!(ParserRuleContext, "a", int, "b"); 45 alias TokenFactorySourcePair = Tuple!(TokenSource, "a", CharStream, "b"); 46 47 /** 48 * @uml 49 * A parser simulator that mimics what ANTLR's generated 50 * parser code does. A ParserATNSimulator is used to make 51 * predictions via adaptivePredict but this class moves a pointer through the 52 * ATN to simulate parsing. ParserATNSimulator just 53 * makes us efficient rather than having to backtrack, for example. 54 * 55 * This properly creates parse trees even for left recursive rules. 56 * 57 * We rely on the left recursive rule invocation and special predicate 58 * transitions to make left recursive rules work. 59 * 60 * See TestParserInterpreter for examples. 61 */ 62 class ParserInterpreter : Parser 63 { 64 65 protected string grammarFileName; 66 67 protected ATN atn; 68 69 /** 70 * @uml 71 * not shared like it is for generated parsers 72 */ 73 protected DFA[] decisionToDFA; 74 75 protected PredictionContextCache sharedContextCache ; 76 77 protected string[] tokenNames; 78 79 protected string[] ruleNames; 80 81 private Vocabulary vocabulary; 82 83 /** 84 * @uml 85 * This stack corresponds to the _parentctx, _parentState pair of locals 86 * that would exist on call stack frames with a recursive descent parser; 87 * in the generated function for a left-recursive rule you'd see: 88 * 89 * private EContext e(int _p) throws RecognitionException { 90 * ParserRuleContext _parentctx = _ctx; // Pair.a 91 * int _parentState = getState(); // Pair.b 92 * ... 93 * } 94 * 95 * Those values are used to create new recursive rule invocation contexts 96 * associated with left operand of an alt like "expr '*' expr". 97 */ 98 protected DList!ParentContextPair _parentContextStack; 99 100 /** 101 * @uml 102 * We need a map from (decision,inputIndex)->forced alt for computing ambiguous 103 * parse trees. For now, we allow exactly one override. 104 */ 105 protected int overrideDecision = -1; 106 107 protected int overrideDecisionInputIndex = -1; 108 109 protected int overrideDecisionAlt = -1; 110 111 /** 112 * @uml 113 * latch and only override once; error might trigger infinite loop 114 */ 115 protected bool overrideDecisionReached = false;; 116 117 /** 118 * @uml 119 * What is the current context when we override a decisions? 120 * This tellsus what the root of the parse tree is when using override 121 * for an ambiguity/lookahead check. 122 */ 123 protected InterpreterRuleContext overrideDecisionRoot = null; 124 125 protected InterpreterRuleContext rootContext; 126 127 /** 128 * @uml 129 * deprecated Use {@link #ParserInterpreter(String, Vocabulary, Collection, ATN, TokenStream)} instead. 130 */ 131 public this(string grammarFileName, string[] tokenNames, string[] ruleNames, ATN atn, 132 TokenStream input) 133 { 134 this(grammarFileName, 135 VocabularyImpl.fromTokenNames(tokenNames), 136 ruleNames, atn, input); 137 } 138 139 public this(string grammarFileName, Vocabulary vocabulary, string[] ruleNames, ATN atn, 140 TokenStream input) 141 { 142 super(input); 143 this.grammarFileName = grammarFileName; 144 this.atn = atn; 145 for (int i = 0; i < tokenNames.length; i++) { 146 tokenNames ~= vocabulary.getDisplayName(i); 147 } 148 149 this.ruleNames = ruleNames; 150 this.vocabulary = vocabulary; 151 152 // init decision DFA 153 int numberOfDecisions = atn.getNumberOfDecisions(); 154 for (int i = 0; i < numberOfDecisions; i++) { 155 DecisionState decisionState = atn.getDecisionState(i); 156 decisionToDFA ~= new DFA(decisionState, i); 157 } 158 159 // get atn simulator that knows how to do predictions 160 setInterpreter(new ParserATNSimulator(this, atn, 161 decisionToDFA, 162 sharedContextCache)); 163 } 164 165 /** 166 * @uml 167 * @override 168 */ 169 public override void reset() 170 { 171 super.reset(); 172 overrideDecisionReached = false; 173 overrideDecisionRoot = null; 174 } 175 176 /** 177 * @uml 178 * @override 179 */ 180 public override ATN getATN() 181 { 182 return atn; 183 } 184 185 /** 186 * @uml 187 * @override 188 */ 189 public override string[] getTokenNames() 190 { 191 return tokenNames; 192 } 193 194 /** 195 * @uml 196 * @override 197 */ 198 public override Vocabulary getVocabulary() 199 { 200 return vocabulary; 201 } 202 203 /** 204 * @uml 205 * @override 206 */ 207 public override string[] getRuleNames() 208 { 209 return ruleNames; 210 } 211 212 /** 213 * @uml 214 * @override 215 */ 216 public override string getGrammarFileName() 217 { 218 return grammarFileName; 219 } 220 221 /** 222 * @uml 223 * Begin parsing at startRuleIndex 224 */ 225 public ParserRuleContext parse(int startRuleIndex) 226 { 227 RuleStartState startRuleStartState = atn.ruleToStartState[startRuleIndex]; 228 rootContext = createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex); 229 if (startRuleStartState.isLeftRecursiveRule) { 230 enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0); 231 } 232 else { 233 enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex); 234 } 235 236 while ( true ) { 237 ATNState p = getATNState(); 238 switch ( p.getStateType() ) { 239 case StateNames.RULE_STOP : 240 // pop; return from rule 241 if (ctx_.isEmpty() ) { 242 if (startRuleStartState.isLeftRecursiveRule) { 243 ParserRuleContext result = ctx_; 244 ParentContextPair parentContext = _parentContextStack.back; 245 _parentContextStack.removeBack(); 246 unrollRecursionContexts(parentContext.a); 247 return result; 248 } 249 else { 250 exitRule(); 251 return rootContext; 252 } 253 } 254 visitRuleStopState(p); 255 break; 256 default : 257 try { 258 visitState(p); 259 } 260 catch (RecognitionException e) { 261 setState(atn.ruleToStopState[p.ruleIndex].stateNumber); 262 ctx_.exception = e; 263 getErrorHandler.reportError(this, e); 264 recover(e); 265 } 266 267 break; 268 } 269 } 270 } 271 272 /** 273 * @uml 274 * @override 275 */ 276 public override void enterRecursionRule(ParserRuleContext localctx, int state, int ruleIndex, 277 int precedence) 278 { 279 ParentContextPair pair = tuple(ctx_, localctx.invokingState); 280 _parentContextStack.insert(pair); 281 super.enterRecursionRule(localctx, state, ruleIndex, precedence); 282 } 283 284 protected ATNState getATNState() 285 { 286 return atn.states[getState]; 287 } 288 289 protected void visitState(ATNState p) 290 { 291 // System.out.println("visitState "+p.stateNumber); 292 int predictedAlt = 1; 293 if (cast(DecisionState)p) { 294 predictedAlt = visitDecisionState(cast(DecisionState)p); 295 } 296 297 Transition transition = p.transition(predictedAlt - 1); 298 switch (transition.getSerializationType()) { 299 case TransitionStates.EPSILON: 300 if ( p.getStateType()== StateNames.STAR_LOOP_ENTRY && 301 (cast(StarLoopEntryState)p).isPrecedenceDecision && 302 !cast(LoopEndState)transition.target) 303 { 304 // We are at the start of a left recursive rule's (...)* loop 305 // and we're not taking the exit branch of loop. 306 InterpreterRuleContext localctx = 307 createInterpreterRuleContext(_parentContextStack.front.a, 308 _parentContextStack.front.b, 309 ctx_.getRuleIndex()); 310 pushNewRecursionContext(localctx, 311 atn.ruleToStartState[p.ruleIndex].stateNumber, 312 ctx_.getRuleIndex()); 313 } 314 break; 315 case TransitionStates.ATOM: 316 match((cast(AtomTransition)transition)._label); 317 break; 318 case TransitionStates.RANGE: 319 case TransitionStates.SET: 320 case TransitionStates.NOT_SET: 321 if (!transition.matches(_input.LA(1), TokenConstantDefinition.MIN_USER_TOKEN_TYPE, 65535)) { 322 recoverInline(); 323 } 324 matchWildcard(); 325 break; 326 327 case TransitionStates.WILDCARD: 328 matchWildcard(); 329 break; 330 331 case TransitionStates.RULE: 332 RuleStartState ruleStartState = cast(RuleStartState)transition.target; 333 int ruleIndex = ruleStartState.ruleIndex; 334 InterpreterRuleContext newctx = createInterpreterRuleContext(ctx_, p.stateNumber, ruleIndex); 335 if (ruleStartState.isLeftRecursiveRule) { 336 enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, (cast(RuleTransition)transition).precedence); 337 } 338 else { 339 enterRule(newctx, transition.target.stateNumber, ruleIndex); 340 } 341 break; 342 343 case TransitionStates.PREDICATE: 344 PredicateTransition predicateTransition = cast(PredicateTransition)transition; 345 if (!sempred(ctx_, predicateTransition.ruleIndex, predicateTransition.predIndex)) { 346 throw new FailedPredicateException(this); 347 } 348 349 break; 350 case TransitionStates.ACTION: 351 ActionTransition actionTransition = cast(ActionTransition)transition; 352 action(ctx_, actionTransition.ruleIndex, actionTransition.actionIndex); 353 break; 354 case TransitionStates.PRECEDENCE: 355 if (!precpred(ctx_, (cast(PrecedencePredicateTransition)transition).precedence)) { 356 throw new FailedPredicateException(this, format("precpred(ctx_, %d)", (cast(PrecedencePredicateTransition)transition).precedence)); 357 } 358 break; 359 default: 360 throw new UnsupportedOperationException("Unrecognized ATN transition type."); 361 } 362 setState(transition.target.stateNumber); 363 } 364 365 protected int visitDecisionState(DecisionState p) 366 { 367 int predictedAlt = 1; 368 if (p.getNumberOfTransitions() > 1) { 369 getErrorHandler.sync(this); 370 int decision = p.decision; 371 if (decision == overrideDecision && _input.index() == overrideDecisionInputIndex && 372 !overrideDecisionReached ) 373 { 374 predictedAlt = overrideDecisionAlt; 375 overrideDecisionReached = true; 376 } 377 else { 378 predictedAlt = getInterpreter().adaptivePredict(_input, decision, ctx_); 379 } 380 } 381 return predictedAlt; 382 } 383 384 /** 385 * @uml 386 * Provide simple "factory" for InterpreterRuleContext's. 387 */ 388 protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent, 389 int invokingStateNumber, size_t ruleIndex) 390 { 391 return new InterpreterRuleContext(parent, invokingStateNumber, ruleIndex); 392 } 393 394 protected void visitRuleStopState(ATNState p) 395 { 396 RuleStartState ruleStartState = atn.ruleToStartState[p.ruleIndex]; 397 if (ruleStartState.isLeftRecursiveRule) { 398 ParentContextPair parentContext = _parentContextStack.back; 399 _parentContextStack.removeBack; 400 unrollRecursionContexts(parentContext.a); 401 setState(parentContext.b); 402 } 403 else { 404 exitRule; 405 } 406 407 RuleTransition ruleTransition = cast(RuleTransition)atn.states[getState].transition(0); 408 setState(ruleTransition.followState.stateNumber); 409 } 410 411 /** 412 * @uml 413 * Override this parser interpreters normal decision-making process 414 * at a particular decision and input token index. Instead of 415 * allowing the adaptive prediction mechanism to choose the 416 * first alternative within a block that leads to a successful parse, 417 * force it to take the alternative, 1..n for n alternatives. 418 * 419 * As an implementation limitation right now, you can only specify one 420 * override. This is sufficient to allow construction of different 421 * parse trees for ambiguous input. It means re-parsing the entire input 422 * in general because you're never sure where an ambiguous sequence would 423 * live in the various parse trees. For example, in one interpretation, 424 * an ambiguous input sequence would be matched completely in expression 425 * but in another it could match all the way back to the root. 426 * 427 * s : e '!'? ; 428 * e : ID 429 * | ID '!' 430 * ; 431 * 432 * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first 433 * case, the ambiguous sequence is fully contained only by the root. 434 * In the second case, the ambiguous sequences fully contained within just 435 * e, as in: (e ID !). 436 * 437 * Rather than trying to optimize this and make 438 * some intelligent decisions for optimization purposes, I settled on 439 * just re-parsing the whole input and then using 440 * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal 441 * subtree that contains the ambiguous sequence. I originally tried to 442 * record the call stack at the point the parser detected and ambiguity but 443 * left recursive rules create a parse tree stack that does not reflect 444 * the actual call stack. That impedance mismatch was enough to make 445 * it it challenging to restart the parser at a deeply nested rule 446 * invocation. 447 * 448 * Only parser interpreters can override decisions so as to avoid inserting 449 * override checking code in the critical ALL(*) prediction execution path. 450 */ 451 public void addDecisionOverride(int decision, int tokenIndex, int forcedAlt) 452 { 453 overrideDecision = decision; 454 overrideDecisionInputIndex = tokenIndex; 455 overrideDecisionAlt = forcedAlt; 456 } 457 458 public InterpreterRuleContext getOverrideDecisionRoot() 459 { 460 return overrideDecisionRoot; 461 } 462 463 /** 464 * @uml 465 * Rely on the error handler for this parser but, if no tokens are consumed 466 * to recover, add an error node. Otherwise, nothing is seen in the parse 467 * tree. 468 */ 469 protected void recover(RecognitionException e) 470 { 471 TokenFactorySourcePair tokenFactorySourcePair; 472 auto i = _input.index(); 473 getErrorHandler.recover(this, e); 474 if ( _input.index()==i ) { 475 // no input consumed, better add an error node 476 if (cast(InputMismatchException)e) { 477 InputMismatchException ime = cast(InputMismatchException)e; 478 Token tok = e.getOffendingToken(); 479 int expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element 480 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream()); 481 auto errToken = 482 tokenFactory().create(tokenFactorySourcePair, 483 expectedTokenType, tok.getText(), 484 TokenConstantDefinition.DEFAULT_CHANNEL, 485 -1, -1, // invalid start/stop 486 tok.getLine(), tok.getCharPositionInLine()); 487 ctx_.addErrorNode(errToken); 488 } 489 else { // NoViableAlt 490 auto tok = e.getOffendingToken; 491 tokenFactorySourcePair = tuple(tok.getTokenSource(), tok.getTokenSource().getInputStream()); 492 auto errToken = 493 tokenFactory().create(tokenFactorySourcePair, 494 TokenConstantDefinition.INVALID_TYPE, tok.getText(), 495 TokenConstantDefinition.DEFAULT_CHANNEL, 496 -1, -1, // invalid start/stop 497 tok.getLine(), tok.getCharPositionInLine()); 498 ctx_.addErrorNode(errToken); 499 } 500 } 501 } 502 503 protected Token recoverInline() 504 { 505 return _errHandler.recoverInline(this); 506 } 507 508 /** 509 * @uml 510 * Return the root of the parse, which can be useful if the parser 511 * bails out. You still can access the top node. Note that, 512 * because of the way left recursive rules add children, it's possible 513 * that the root will not have any children if the start rule immediately 514 * called and left recursive rule that fails. 515 */ 516 public InterpreterRuleContext getRootContext() 517 { 518 return rootContext; 519 } 520 521 }