1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ATNDeserializer; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.IllegalStateException; 11 import antlr.v4.runtime.Token; 12 import antlr.v4.runtime.TokenConstantDefinition; 13 import antlr.v4.runtime.UnsupportedOperationException; 14 import antlr.v4.runtime.atn.ATN; 15 import antlr.v4.runtime.atn.ATNDeserializationOptions; 16 import antlr.v4.runtime.atn.ATNState; 17 import antlr.v4.runtime.atn.ATNType; 18 import antlr.v4.runtime.atn.ActionTransition; 19 import antlr.v4.runtime.atn.AtomTransition; 20 import antlr.v4.runtime.atn.BasicBlockStartState; 21 import antlr.v4.runtime.atn.BasicState; 22 import antlr.v4.runtime.atn.BlockEndState; 23 import antlr.v4.runtime.atn.BlockStartState; 24 import antlr.v4.runtime.atn.DecisionState; 25 import antlr.v4.runtime.atn.EpsilonTransition; 26 import antlr.v4.runtime.atn.LexerAction; 27 import antlr.v4.runtime.atn.LexerActionType; 28 import antlr.v4.runtime.atn.LexerChannelAction; 29 import antlr.v4.runtime.atn.LexerCustomAction; 30 import antlr.v4.runtime.atn.LexerModeAction; 31 import antlr.v4.runtime.atn.LexerMoreAction; 32 import antlr.v4.runtime.atn.LexerPopModeAction; 33 import antlr.v4.runtime.atn.LexerPushModeAction; 34 import antlr.v4.runtime.atn.LexerSkipAction; 35 import antlr.v4.runtime.atn.LexerTypeAction; 36 import antlr.v4.runtime.atn.LoopEndState; 37 import antlr.v4.runtime.atn.NotSetTransition; 38 import antlr.v4.runtime.atn.PlusBlockStartState; 39 import antlr.v4.runtime.atn.PlusLoopbackState; 40 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 41 import antlr.v4.runtime.atn.PredicateTransition; 42 import antlr.v4.runtime.atn.RangeTransition; 43 import antlr.v4.runtime.atn.RuleStartState; 44 import antlr.v4.runtime.atn.RuleStopState; 45 import antlr.v4.runtime.atn.RuleTransition; 46 import antlr.v4.runtime.atn.SetTransition; 47 import antlr.v4.runtime.atn.StarBlockStartState; 48 import antlr.v4.runtime.atn.StarLoopEntryState; 49 import antlr.v4.runtime.atn.StarLoopbackState; 50 import antlr.v4.runtime.atn.StateNames; 51 import antlr.v4.runtime.atn.TokensStartState; 52 import antlr.v4.runtime.atn.Transition; 53 import antlr.v4.runtime.atn.TransitionStates; 54 import antlr.v4.runtime.atn.WildcardTransition; 55 import antlr.v4.runtime.misc; 56 import std.algorithm: canFind; 57 import std.algorithm: map; 58 import std.algorithm: reverse; 59 import std.array; 60 import std.conv; 61 import std.format; 62 import std.stdio; 63 import std.uuid; 64 65 // Class ATNDeserializer 66 /** 67 * TODO add class description 68 */ 69 class ATNDeserializer 70 { 71 72 public static immutable int SERIALIZED_VERSION = 3; 73 74 /** 75 * This is the earliest supported serialized UUID. 76 */ 77 private static UUID BASE_SERIALIZED_UUID; 78 79 /** 80 * This UUID indicates an extension of {@link BASE_SERIALIZED_UUID} for the 81 * addition of precedence predicates. 82 */ 83 private static UUID ADDED_PRECEDENCE_TRANSITIONS; 84 85 /** 86 * This UUID indicates an extension of {@link #ADDED_PRECEDENCE_TRANSITIONS} 87 * for the addition of lexer actions encoded as a sequence of 88 * {@link LexerAction} instances. 89 */ 90 private static UUID ADDED_LEXER_ACTIONS; 91 92 private static UUID ADDED_UNICODE_SMP; 93 94 /** 95 * This list contains all of the currently supported UUIDs, ordered by when 96 * the feature first appeared in this branch. 97 */ 98 private static UUID[] SUPPORTED_UUIDS; 99 100 /** 101 * This is the current serialized UUID. 102 */ 103 public static UUID SERIALIZED_UUID; 104 105 private ATNDeserializationOptions deserializationOptions; 106 107 private int[] data; 108 109 private int p; 110 111 public UUID uuid; 112 113 public static this() 114 { 115 /* WARNING: DO NOT MERGE THESE LINES. If UUIDs differ during a merge, 116 * resolve the conflict by generating a new ID! 117 */ 118 BASE_SERIALIZED_UUID = UUID("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3"); 119 ADDED_PRECEDENCE_TRANSITIONS = UUID("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61"); 120 ADDED_LEXER_ACTIONS = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E"); 121 ADDED_UNICODE_SMP = UUID("59627784-3BE5-417A-B9EB-8131A7286089"); 122 SUPPORTED_UUIDS ~= BASE_SERIALIZED_UUID; 123 SUPPORTED_UUIDS ~= ADDED_PRECEDENCE_TRANSITIONS; 124 SUPPORTED_UUIDS ~= ADDED_LEXER_ACTIONS; 125 SUPPORTED_UUIDS ~= ADDED_UNICODE_SMP; 126 127 SERIALIZED_UUID = ADDED_LEXER_ACTIONS; 128 } 129 130 public this() 131 { 132 this(ATNDeserializationOptions.defaultOptions); 133 } 134 135 public this(ATNDeserializationOptions deserializationOptions) 136 { 137 if (deserializationOptions is null) { 138 deserializationOptions = new ATNDeserializationOptions(); 139 } 140 this.deserializationOptions = deserializationOptions; 141 } 142 143 /** 144 * Determines if a particular serialized representation of an ATN supports 145 * a particular feature, identified by the {@link UUID} used for serializing 146 * the ATN at the time the feature was first introduced. 147 * 148 * @param feature The {@link UUID} marking the first time the feature was 149 * supported in the serialized ATN. 150 * @param actualUuid The {@link UUID} of the actual serialized ATN which is 151 * currently being deserialized. 152 * @return {@code true} if the {@code actualUuid} value represents a 153 * serialized ATN at or after the feature identified by {@code feature} was 154 * introduced; otherwise, {@code false}. 155 */ 156 protected bool isFeatureSupported(UUID feature, UUID actualUuid) 157 { 158 bool not_found = true; 159 foreach (uu; SUPPORTED_UUIDS) { 160 if (not_found && uu == feature) { 161 if (uu == actualUuid) 162 return true; 163 else 164 not_found = false; 165 } 166 else { 167 if (uu == actualUuid) { 168 return true; 169 } 170 } 171 } 172 return false; 173 } 174 175 public ATN deserialize(wstring input_data) 176 { 177 reset(input_data); 178 checkVersion; 179 checkUUID; 180 ATN atn = readATN; 181 readStates(atn); 182 readRules(atn); 183 readModes(atn); 184 IntervalSet[] sets; 185 readSets(sets, atn, &readInt); 186 if (isFeatureSupported(ADDED_UNICODE_SMP, uuid)) 187 { 188 readSets(sets, atn, &readInt32); 189 } 190 readEdges(atn, sets); 191 readDecisions(atn); 192 readLexerActions(atn); 193 markPrecedenceDecisions(atn); 194 if (deserializationOptions.verifyATN) { 195 verifyATN(atn); 196 } 197 if (deserializationOptions.generateRuleBypassTransitions && atn.grammarType == ATNType.PARSER) { 198 generateRuleBypassTransitions(atn); 199 } 200 if (deserializationOptions.optimize) { 201 optimizeATN(atn); 202 } 203 identifyTailCalls(atn); 204 return atn; 205 } 206 207 /** 208 * @uml 209 * verify assumptions 210 */ 211 protected void verifyATN(ATN atn) 212 { 213 // verify assumptions 214 foreach (ATNState state; atn.states) { 215 if (state is null) { 216 continue; 217 } 218 219 checkCondition(state.onlyHasEpsilonTransitions || state.getNumberOfTransitions <= 1); 220 221 if (cast(PlusBlockStartState)state) { 222 checkCondition((cast(PlusBlockStartState)state).loopBackState !is null); 223 } 224 225 if (cast(StarLoopEntryState)state) { 226 StarLoopEntryState starLoopEntryState = cast(StarLoopEntryState)state; 227 checkCondition(starLoopEntryState.loopBackState !is null); 228 checkCondition(starLoopEntryState.getNumberOfTransitions() == 2); 229 230 if (cast(StarBlockStartState)(starLoopEntryState.transition(0).target)) { 231 checkCondition(cast(LoopEndState)(starLoopEntryState.transition(1).target) !is null); 232 checkCondition(!starLoopEntryState.nonGreedy); 233 } 234 else if (cast(LoopEndState)(starLoopEntryState.transition(0).target)) { 235 checkCondition(cast(StarBlockStartState)(starLoopEntryState.transition(1).target) !is null); 236 checkCondition(starLoopEntryState.nonGreedy); 237 } 238 else { 239 throw new IllegalStateException(); 240 } 241 } 242 243 if (cast(StarLoopbackState)state) { 244 checkCondition(state.getNumberOfTransitions() == 1); 245 checkCondition(cast(StarLoopEntryState)(state.transition(0).target) !is null); 246 } 247 248 if (cast(LoopEndState)state) { 249 checkCondition((cast(LoopEndState)state).loopBackState !is null); 250 } 251 252 if (cast(RuleStartState)state) { 253 checkCondition((cast(RuleStartState)state).stopState !is null); 254 } 255 256 if (cast(BlockStartState)state) { 257 checkCondition((cast(BlockStartState)state).endState !is null); 258 } 259 260 if (cast(BlockEndState)state) { 261 checkCondition((cast(BlockEndState)state).startState !is null); 262 } 263 264 if (cast(DecisionState)state) { 265 DecisionState decisionState = cast(DecisionState)state; 266 checkCondition(decisionState.getNumberOfTransitions() <= 1 || decisionState.decision >= 0); 267 } 268 else { 269 checkCondition(state.getNumberOfTransitions() <= 1 || state.classinfo == RuleStopState.classinfo); 270 } 271 } 272 273 } 274 275 private void readSets(ref IntervalSet[] sets, ATN aTN, int delegate() readUnicode) 276 { 277 // SETS 278 int nsets = readInt; 279 for (int i=0; i<nsets; i++) { 280 IntervalSet set = new IntervalSet(); 281 sets ~= set; 282 int nintervals = readInt; 283 bool containsEof = readInt != 0; 284 if (containsEof) { 285 set.add(-1); 286 } 287 for (int j=0; j<nintervals; j++) { 288 set.add(readUnicode(), readUnicode()); 289 } 290 } 291 } 292 293 protected void markPrecedenceDecisions(ATN atn) 294 { 295 foreach (ATNState state; atn.states) { 296 if (!cast(StarLoopEntryState)state) { 297 continue; 298 } 299 300 /* We analyze the ATN to determine if this ATN decision state is the 301 * decision for the closure block that determines whether a 302 * precedence rule should continue or complete. 303 */ 304 if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) { 305 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 306 if (cast(LoopEndState)maybeLoopEndState) { 307 if (maybeLoopEndState.epsilonOnlyTransitions && 308 maybeLoopEndState.transitions[0].target.classinfo == 309 RuleStopState.classinfo) { 310 (cast(StarLoopEntryState)state).isPrecedenceDecision = true; 311 } 312 } 313 } 314 } 315 316 } 317 318 protected void checkCondition(bool condition) 319 { 320 checkCondition(condition, null); 321 } 322 323 protected void checkCondition(bool condition, string message) 324 { 325 if (!condition) { 326 throw new IllegalStateException(message); 327 } 328 } 329 330 protected Transition edgeFactory(ATN atn, int type, int src, int trg, int arg1, int arg2, 331 int arg3, IntervalSet[] sets) 332 { 333 ATNState target = atn.states[trg]; 334 with(TransitionStates) { 335 switch (type) { 336 case EPSILON : return new EpsilonTransition(target); 337 case RANGE : 338 if (arg3 != 0) { 339 return new RangeTransition(target, TokenConstantDefinition.EOF, arg2); 340 } 341 else { 342 return new RangeTransition(target, arg1, arg2); 343 } 344 case RULE : 345 RuleTransition rt = new RuleTransition(cast(RuleStartState)atn.states[arg1], arg2, arg3, target); 346 return rt; 347 case PREDICATE : 348 PredicateTransition pt = new PredicateTransition(target, arg1, arg2, arg3 != 0); 349 return pt; 350 case PRECEDENCE: 351 return new PrecedencePredicateTransition(target, arg1); 352 case ATOM : 353 if (arg3 != 0) { 354 return new AtomTransition(target, TokenConstantDefinition.EOF); 355 } 356 else { 357 return new AtomTransition(target, arg1); 358 } 359 case ACTION : 360 ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0); 361 return a; 362 case SET : return new SetTransition(target, sets[arg1]); 363 case NOT_SET : return new NotSetTransition(target, sets[arg1]); 364 case WILDCARD : return new WildcardTransition(target); 365 default: throw new IllegalArgumentException("The specified transition type is not valid."); 366 } 367 } 368 } 369 370 protected ATNState stateFactory(int type, int ruleIndex) 371 { 372 ATNState s; 373 with(StateNames) { 374 switch (type) { 375 case INVALID: return null; 376 case BASIC : s = new BasicState(); break; 377 case RULE_START : s = new RuleStartState(); break; 378 case BLOCK_START : s = new BasicBlockStartState(); break; 379 case PLUS_BLOCK_START : s = new PlusBlockStartState(); break; 380 case STAR_BLOCK_START : s = new StarBlockStartState(); break; 381 case TOKEN_START : s = new TokensStartState(); break; 382 case RULE_STOP : s = new RuleStopState(); break; 383 case BLOCK_END : s = new BlockEndState(); break; 384 case STAR_LOOP_BACK : s = new StarLoopbackState(); break; 385 case STAR_LOOP_ENTRY : s = new StarLoopEntryState(); break; 386 case PLUS_LOOP_BACK : s = new PlusLoopbackState(); break; 387 case LOOP_END : s = new LoopEndState(); break; 388 default : 389 string message = format("The specified state type %d is not valid.", type); 390 throw new IllegalArgumentException(message); 391 } 392 } 393 s.ruleIndex = ruleIndex; 394 return s; 395 } 396 397 protected LexerAction lexerActionFactory(LexerActionType type, int data1, int data2) 398 { 399 switch (type) { 400 case LexerActionType.CHANNEL: 401 return new LexerChannelAction(data1); 402 403 case LexerActionType.CUSTOM: 404 return new LexerCustomAction(data1, data2); 405 406 case LexerActionType.MODE: 407 return new LexerModeAction(data1); 408 409 case LexerActionType.MORE: 410 return cast(LexerAction)LexerMoreAction.instance; 411 412 case LexerActionType.POP_MODE: 413 return cast(LexerAction)LexerPopModeAction.instance; 414 415 case LexerActionType.PUSH_MODE: 416 return new LexerPushModeAction(data1); 417 418 case LexerActionType.SKIP: 419 return cast(LexerAction)LexerSkipAction.instance; 420 421 case LexerActionType.TYPE: 422 return new LexerTypeAction(data1); 423 424 default: 425 string message = format("The specified lexer action type %d is not valid.", type); 426 throw new IllegalArgumentException(message); 427 } 428 } 429 430 private void reset(const wstring data) 431 { 432 wchar el; 433 for(int i; i < data.length; i++) { 434 el = data[i]; 435 if (el == '[') { 436 if (i+7 >= data.length) { 437 this.data ~=to!int('[') - 2; 438 continue; 439 } 440 if (data[i+7] != ']') { 441 this.data ~=to!int('[') - 2; 442 continue; 443 } 444 this.data ~= parseOct(data, i) -2; 445 i += 7; 446 continue; 447 } 448 if (el < 2) 449 this.data ~= -1; 450 else 451 this.data ~= el - 2; 452 } 453 // don't adjust the first value since that's the version number 454 this.data[0] = data[0]; 455 p = 0; 456 } 457 458 private void checkVersion() 459 { 460 auto version_atn = readInt; 461 if (version_atn != SERIALIZED_VERSION) { 462 string reason = format("Could not deserialize ATN with version %d (expected %d).", version_atn, SERIALIZED_VERSION); 463 assert(false, reason); 464 } 465 } 466 467 private void checkUUID() 468 { 469 uuid = readUUID; 470 if (!SUPPORTED_UUIDS.canFind(uuid)) { 471 string reason = format("Could not deserialize ATN with UUID %s (expected %s or a legacy UUID).", uuid, SERIALIZED_UUID); 472 assert(false, reason); 473 } 474 } 475 476 private ATN readATN() 477 { 478 ATNType grammarType = cast(ATNType)readInt; 479 int maxTokenType = readInt; 480 return new ATN(grammarType, maxTokenType); 481 } 482 483 private void readStates(ATN atn) 484 { 485 // STATES 486 int[LoopEndState][] loopBackStateNumbers; 487 int[BlockStartState][] endStateNumbers; 488 int nstates = readInt; 489 debug(deserializer) 490 writefln("%s: Number of states %s", this.p-1 , nstates); 491 for (int i=0; i<nstates; i++) { 492 int stype = readInt; 493 // ignore bad type of states 494 if (stype == StateNames.INVALID) { 495 atn.addState(null); 496 continue; 497 } 498 499 int ruleIndex = readInt; 500 if (ruleIndex == char.max) { 501 ruleIndex = -1; 502 } 503 debug(deserializer) 504 writefln("%s: readState %s, ruleIndex %s, index %s", 505 this.p-1, 506 stype, 507 ruleIndex, 508 i); 509 ATNState s = stateFactory(stype, ruleIndex); 510 if (stype == StateNames.LOOP_END) { 511 // special case 512 int loopBackStateNumber = readInt; 513 int[LoopEndState] _a; 514 _a[cast(LoopEndState)s] = loopBackStateNumber; 515 loopBackStateNumbers ~= _a; 516 } 517 else 518 if (cast(BlockStartState)s) { 519 int endStateNumber = readInt; 520 int[BlockStartState] _a; 521 _a[cast(BlockStartState)s] = endStateNumber; 522 endStateNumbers ~= _a; 523 } 524 atn.addState(s); 525 } 526 527 // delay the assignment of loop back and end states until we know all the state instances have been initialized 528 foreach (ref pair; loopBackStateNumbers) { 529 pair.keys[0].loopBackState = atn.states[pair[pair.keys[0]]]; 530 } 531 532 foreach (ref pair; endStateNumbers) { 533 pair.keys[0].endState = cast(BlockEndState)atn.states[pair[pair.keys[0]]]; 534 } 535 536 int numNonGreedyStates = readInt; 537 for (int i = 0; i < numNonGreedyStates; i++) { 538 int stateNumber = readInt; 539 (cast(DecisionState)atn.states[stateNumber]).nonGreedy = true; 540 } 541 542 int numPrecedenceStates = readInt; 543 for (int i = 0; i < numPrecedenceStates; i++) { 544 int stateNumber = readInt; 545 (cast(RuleStartState)atn.states[stateNumber]).isLeftRecursiveRule = true; 546 } 547 548 } 549 550 private void readRules(ATN atn) 551 { 552 // // RULES 553 int nrules = readInt; 554 if ( atn.grammarType == ATNType.LEXER ) { 555 atn.ruleToTokenType = new int[nrules]; 556 } 557 debug(deserializer) 558 writefln("%s: Number of rules %s", this.p-1 , nrules); 559 atn.ruleToStartState = new RuleStartState[nrules]; 560 for (int i=0; i<nrules; i++) { 561 int s = readInt; 562 RuleStartState startState = cast(RuleStartState)atn.states[s]; 563 atn.ruleToStartState[i] = startState; 564 if ( atn.grammarType == ATNType.LEXER ) { 565 int tokenType = readInt; 566 if (tokenType == 0xFFFF) { 567 tokenType = TokenConstantDefinition.EOF; 568 } 569 570 atn.ruleToTokenType[i] = tokenType; 571 } 572 } 573 574 atn.ruleToStopState = new RuleStopState[nrules]; 575 foreach (ATNState state; atn.states) { 576 if (!cast(RuleStopState)state) { 577 continue; 578 } 579 RuleStopState stopState = cast(RuleStopState)state; 580 atn.ruleToStopState[state.ruleIndex] = stopState; 581 atn.ruleToStartState[state.ruleIndex].stopState = stopState; 582 } 583 584 } 585 586 public void readModes(ATN atn) 587 { 588 // MODES 589 int nmodes = readInt; 590 for (int i=0; i<nmodes; i++) { 591 int s = readInt; 592 atn.modeToStartState ~= cast(TokensStartState)atn.states[s]; 593 } 594 } 595 596 private void readEdges(ATN atn, IntervalSet[] sets) 597 { 598 // EDGES 599 600 int nedges = readInt; 601 for (int i=0; i<nedges; i++) { 602 int src = readInt; 603 int trg = readInt; 604 int ttype = readInt; 605 int arg1 = readInt; 606 int arg2 = readInt; 607 int arg3 = readInt; 608 Transition trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); 609 debug(deserializer) { 610 writefln("EDGES %1$s %2$s->%3$s %4$s %5$s %6$s %7$s", 611 trans.classinfo, src, 612 trg, Transition.serializationNames[ttype], 613 arg1, arg2, arg3); 614 } 615 ATNState srcState = atn.states[src]; 616 srcState.addTransition(trans); 617 } 618 619 // edges for rule stop states can be derived, so they aren't serialized 620 foreach (ATNState state; atn.states) { 621 for (int i = 0; i < state.getNumberOfTransitions(); i++) { 622 Transition t = state.transition(i); 623 if (!cast(RuleTransition)t) { 624 continue; 625 } 626 627 RuleTransition ruleTransition = cast(RuleTransition)t; 628 int outermostPrecedenceReturn = -1; 629 if (atn.ruleToStartState[ruleTransition.target.ruleIndex].isLeftRecursiveRule) { 630 if (ruleTransition.precedence == 0) { 631 outermostPrecedenceReturn = ruleTransition.target.ruleIndex; 632 } 633 } 634 EpsilonTransition returnTransition = new EpsilonTransition(ruleTransition.followState, outermostPrecedenceReturn); 635 atn.ruleToStopState[ruleTransition.target.ruleIndex].addTransition(returnTransition); 636 } 637 } 638 639 foreach (ATNState state; atn.states) { 640 if (cast(BlockStartState)state) { 641 // we need to know the end state to set its start state 642 if ((cast(BlockStartState)state).endState is null) { 643 throw new IllegalStateException(); 644 } 645 646 // block end states can only be associated to a single block start state 647 if ((cast(BlockStartState)state).endState.startState !is null) { 648 throw new IllegalStateException(); 649 } 650 651 (cast(BlockStartState)state).endState.startState = cast(BlockStartState)state; 652 } 653 654 if (cast(PlusLoopbackState)state) { 655 PlusLoopbackState loopbackState = cast(PlusLoopbackState)state; 656 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 657 ATNState target = loopbackState.transition(i).target; 658 if (cast(PlusBlockStartState)target) { 659 (cast(PlusBlockStartState)target).loopBackState = loopbackState; 660 } 661 } 662 } 663 else if (cast(StarLoopbackState)state) { 664 StarLoopbackState loopbackState = cast(StarLoopbackState)state; 665 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 666 ATNState target = loopbackState.transition(i).target; 667 if (cast(StarLoopEntryState)target) { 668 (cast(StarLoopEntryState)target).loopBackState = loopbackState; 669 } 670 } 671 } 672 } 673 674 } 675 676 private void readDecisions(ATN atn) 677 { 678 // DECISIONS 679 680 int ndecisions = readInt; 681 for (int i=1; i<=ndecisions; i++) { 682 int s = readInt; 683 DecisionState decState = cast(DecisionState)atn.states[s]; 684 atn.decisionToState ~= decState; 685 decState.decision = i-1; 686 } 687 } 688 689 private void readLexerActions(ATN atn) 690 { 691 // LEXER ACTIONS 692 693 if (atn.grammarType == ATNType.LEXER) { 694 atn.lexerActions = new LexerAction[readInt]; 695 for (int i = 0; i < atn.lexerActions.length; i++) { 696 LexerActionType actionType = cast(LexerActionType)readInt; 697 int data1 = readInt; 698 if (data1 == 0xFFFF) { 699 data1 = -1; 700 } 701 702 int data2 = readInt; 703 if (data2 == 0xFFFF) { 704 data2 = -1; 705 } 706 LexerAction lexerAction = lexerActionFactory(actionType, data1, data2); 707 atn.lexerActions[i] = lexerAction; 708 } 709 } 710 } 711 712 private void optimizeATN(ATN atn) 713 { 714 while (true) 715 { 716 int optimizationCount = 0; 717 optimizationCount += inlineSetRules(atn); 718 optimizationCount += combineChainedEpsilons(atn); 719 bool preserveOrder = atn.grammarType == ATNType.LEXER; 720 optimizationCount += optimizeSets(atn, preserveOrder); 721 if (optimizationCount == 0) 722 { 723 break; 724 } 725 } 726 if (deserializationOptions.verifyATN) 727 { 728 // reverify after modification 729 verifyATN(atn); 730 } 731 } 732 733 public int parseOct(wstring data, int p) 734 { 735 int res = 0; 736 for (auto i = p + 1; i < p + 7; i++) { 737 res = res<<3 | to!int(data[i] - 0x30); 738 } 739 return res; 740 } 741 742 private UUID readUUID() 743 { 744 ubyte[16] data; 745 long b = readLong; 746 for(int i = 0; i < 8; i++) { 747 data[i] = b & 0xff; 748 b = b >>> 8; 749 } 750 long a = readLong; 751 for(int i = 8; i < 16; i++) { 752 data[i] = a & 0xff; 753 a = a >>> 8; 754 } 755 data[].reverse(); 756 auto uuid = UUID(data); 757 return uuid; 758 } 759 760 private long readLong() 761 { 762 long lowOrder = to!long(readInt32) & 0x00000000FFFFFFFFL; 763 return lowOrder | (to!long(readInt32) << 32); 764 } 765 766 private int readInt32() 767 { 768 return data[p++] | data[p++] << 16; 769 } 770 771 private int readInt() 772 { 773 return data[p++]; 774 } 775 776 private static void identifyTailCalls(ATN atn) 777 { 778 foreach (ATNState state; atn.states) 779 { 780 foreach (Transition transition; state.transitions) 781 { 782 if (!(cast(RuleTransition)transition)) 783 { 784 continue; 785 } 786 RuleTransition ruleTransition = cast(RuleTransition)transition; 787 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 788 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 789 } 790 if (!state.isOptimized) 791 { 792 continue; 793 } 794 foreach (Transition transition; state.optimizedTransitions) 795 { 796 if (!(cast(RuleTransition)transition)) 797 { 798 continue; 799 } 800 RuleTransition ruleTransition = cast(RuleTransition)transition; 801 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 802 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 803 } 804 } 805 } 806 807 private static bool testTailCall(ATN atn, RuleTransition transition, bool optimizedPath) 808 { 809 if (optimizedPath && transition.optimizedTailCall) 810 { 811 return true; 812 } 813 BitSet *reachable = new BitSet(atn.states.length); 814 ATNState[] worklist; 815 worklist ~= transition.followState; 816 while (worklist.length > 0) 817 { 818 ATNState state = worklist[$-1]; 819 worklist.length -= 1; 820 if (reachable.get(state.stateNumber)) 821 { 822 continue; 823 } 824 if (cast(RuleStopState)state) 825 { 826 continue; 827 } 828 if (!state.onlyHasEpsilonTransitions) 829 { 830 return false; 831 } 832 Transition[] transitions = optimizedPath ? state.optimizedTransitions : state.transitions; 833 foreach (Transition t; transitions) 834 { 835 if (t.getSerializationType != TransitionStates.EPSILON) 836 { 837 return false; 838 } 839 worklist ~= t.target; 840 } 841 } 842 return true; 843 } 844 845 public void generateRuleBypassTransitions(ATN atn) 846 { 847 atn.ruleToTokenType = new int[atn.ruleToStartState.length]; 848 for (int i = 0; i < atn.ruleToStartState.length; i++) { 849 atn.ruleToTokenType[i] = atn.maxTokenType + i + 1; 850 } 851 852 for (int i = 0; i < atn.ruleToStartState.length; i++) { 853 BasicBlockStartState bypassStart = new BasicBlockStartState(); 854 bypassStart.ruleIndex = i; 855 atn.addState(bypassStart); 856 857 BlockEndState bypassStop = new BlockEndState(); 858 bypassStop.ruleIndex = i; 859 atn.addState(bypassStop); 860 861 bypassStart.endState = bypassStop; 862 atn.defineDecisionState(bypassStart); 863 864 bypassStop.startState = bypassStart; 865 866 ATNState endState; 867 Transition excludeTransition = null; 868 if (atn.ruleToStartState[i].isLeftRecursiveRule) { 869 // wrap from the beginning of the rule to the StarLoopEntryState 870 endState = null; 871 foreach (ATNState state; atn.states) { 872 if (state.ruleIndex != i) { 873 continue; 874 } 875 876 if (state.classinfo != StarLoopEntryState.classinfo) { 877 continue; 878 } 879 880 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 881 if (maybeLoopEndState.classinfo != LoopEndState.classinfo) { 882 continue; 883 } 884 885 if (maybeLoopEndState.epsilonOnlyTransitions && maybeLoopEndState.transition(0).target.classinfo == RuleStopState.classinfo) { 886 endState = state; 887 break; 888 } 889 } 890 891 if (endState is null) { 892 throw new UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); 893 } 894 895 excludeTransition = (cast(StarLoopEntryState)endState).loopBackState.transition(0); 896 } 897 else { 898 endState = atn.ruleToStopState[i]; 899 } 900 901 // all non-excluded transitions that currently target end state need to target blockEnd instead 902 foreach (ATNState state; atn.states) { 903 foreach (Transition transition; state.transitions) { 904 if (transition == excludeTransition) { 905 continue; 906 } 907 908 if (transition.target == endState) { 909 transition.target = bypassStop; 910 } 911 } 912 } 913 914 // all transitions leaving the rule start state need to leave blockStart instead 915 while (atn.ruleToStartState[i].getNumberOfTransitions() > 0) { 916 Transition transition = atn.ruleToStartState[i].removeTransition(atn.ruleToStartState[i].getNumberOfTransitions() - 1); 917 bypassStart.addTransition(transition); 918 } 919 920 // link the new states 921 atn.ruleToStartState[i].addTransition(new EpsilonTransition(bypassStart)); 922 bypassStop.addTransition(new EpsilonTransition(endState)); 923 924 ATNState matchState = new BasicState(); 925 atn.addState(matchState); 926 matchState.addTransition(new AtomTransition(bypassStop, atn.ruleToTokenType[i])); 927 bypassStart.addTransition(new EpsilonTransition(matchState)); 928 } 929 930 if (deserializationOptions.verifyATN()) { 931 // reverify after modification 932 verifyATN(atn); 933 } 934 } 935 936 private static int inlineSetRules(ATN atn) 937 { 938 int inlinedCalls = 0; 939 Transition[] ruleToInlineTransition; 940 for (int i = 0; i < atn.ruleToStartState.length; i++) 941 { 942 RuleStartState startState = atn.ruleToStartState[i]; 943 ATNState middleState = startState; 944 while (middleState.onlyHasEpsilonTransitions && middleState.numberOfOptimizedTransitions == 1 && middleState.getOptimizedTransition(0).getSerializationType == TransitionStates.EPSILON) 945 { 946 middleState = middleState.getOptimizedTransition(0).target; 947 } 948 if (middleState.numberOfOptimizedTransitions != 1) 949 { 950 continue; 951 } 952 Transition matchTransition = middleState.getOptimizedTransition(0); 953 ATNState matchTarget = matchTransition.target; 954 if (matchTransition.isEpsilon || !matchTarget.onlyHasEpsilonTransitions || matchTarget.numberOfOptimizedTransitions != 1 || !(cast(RuleStopState)matchTarget.getOptimizedTransition(0).target)) 955 { 956 continue; 957 } 958 with(TransitionStates) { 959 switch (matchTransition.getSerializationType) 960 { 961 case ATOM: 962 case RANGE: 963 case SET: 964 { 965 ruleToInlineTransition[i] = matchTransition; 966 break; 967 } 968 969 case NOT_SET: 970 case WILDCARD: 971 { 972 // not implemented yet 973 continue; 974 } 975 976 default: 977 { 978 continue; 979 } 980 } 981 } 982 } 983 for (int stateNumber = 0; stateNumber < atn.states.length; stateNumber++) 984 { 985 ATNState state = atn.states[stateNumber]; 986 if (state.ruleIndex < 0) 987 { 988 continue; 989 } 990 Transition[] optimizedTransitions = null; 991 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) { 992 Transition transition = state.getOptimizedTransition(i); 993 if (!(cast(RuleTransition)transition)) 994 { 995 if (optimizedTransitions != null) 996 { 997 optimizedTransitions ~= transition; 998 } 999 continue; 1000 } 1001 RuleTransition ruleTransition = cast(RuleTransition)transition; 1002 Transition effective = ruleToInlineTransition[ruleTransition.target.ruleIndex]; 1003 if (effective is null) 1004 { 1005 if (optimizedTransitions != null) 1006 { 1007 optimizedTransitions ~= transition; 1008 } 1009 continue; 1010 } 1011 if (optimizedTransitions == null) 1012 { 1013 optimizedTransitions = []; 1014 for (int j = 0; j < i; j++) 1015 { 1016 optimizedTransitions ~= state.getOptimizedTransition(i); 1017 } 1018 } 1019 inlinedCalls++; 1020 ATNState target = ruleTransition.followState; 1021 ATNState intermediateState = new BasicState(); 1022 intermediateState.setRuleIndex(target.ruleIndex); 1023 atn.addState(intermediateState); 1024 optimizedTransitions ~= new EpsilonTransition(intermediateState); 1025 with(TransitionStates) { 1026 switch (effective.getSerializationType) 1027 { 1028 case ATOM: 1029 { 1030 intermediateState.addTransition(new AtomTransition(target, (cast(AtomTransition)effective)._label)); 1031 break; 1032 } 1033 1034 case RANGE: 1035 { 1036 intermediateState.addTransition(new RangeTransition(target, (cast(RangeTransition)effective).from, (cast(RangeTransition)effective).to)); 1037 break; 1038 } 1039 1040 case SET: 1041 { 1042 intermediateState.addTransition(new SetTransition(target, effective.label)); 1043 break; 1044 } 1045 1046 default: 1047 { 1048 assert(false, "NotSupportedException"); 1049 } 1050 } 1051 } 1052 } 1053 if (optimizedTransitions != null) 1054 { 1055 if (state.isOptimized) 1056 { 1057 while (state.numberOfOptimizedTransitions > 0) 1058 { 1059 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1060 } 1061 } 1062 foreach (Transition transition; optimizedTransitions) 1063 { 1064 state.addOptimizedTransition(transition); 1065 } 1066 } 1067 } 1068 return inlinedCalls; 1069 } 1070 1071 private static int combineChainedEpsilons(ATN atn) 1072 { 1073 int removedEdges = 0; 1074 foreach (ATNState state; atn.states) 1075 { 1076 if (!state.onlyHasEpsilonTransitions || cast(RuleStopState)state) 1077 { 1078 continue; 1079 } 1080 Transition[] optimizedTransitions = null; 1081 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) 1082 { 1083 Transition transition = state.getOptimizedTransition(i); 1084 ATNState intermediate = transition.target; 1085 if (transition.getSerializationType != TransitionStates.EPSILON || 1086 (cast(EpsilonTransition)transition).outermostPrecedenceReturn != -1 || 1087 intermediate.getStateType != StateNames.BASIC || 1088 !intermediate.onlyHasEpsilonTransitions) 1089 { 1090 if (optimizedTransitions != null) 1091 { 1092 optimizedTransitions ~= transition; 1093 } 1094 goto nextTransition_continue; 1095 } 1096 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1097 { 1098 if (intermediate.getOptimizedTransition(j).getSerializationType != TransitionStates.EPSILON || 1099 1100 (cast(EpsilonTransition)intermediate.getOptimizedTransition(j)).outermostPrecedenceReturn != -1) 1101 { 1102 if (optimizedTransitions != null) 1103 { 1104 optimizedTransitions ~= transition; 1105 } 1106 goto nextTransition_continue; 1107 } 1108 } 1109 removedEdges++; 1110 if (optimizedTransitions == null) 1111 { 1112 optimizedTransitions = []; 1113 for (int j = 0; j < i; j++) 1114 { 1115 optimizedTransitions ~= state.getOptimizedTransition(j); 1116 } 1117 } 1118 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1119 { 1120 ATNState target = intermediate.getOptimizedTransition(j).target; 1121 optimizedTransitions ~= new EpsilonTransition(target); 1122 } 1123 nextTransition_continue: ; 1124 } 1125 1126 if (optimizedTransitions != null) 1127 { 1128 if (state.isOptimized) 1129 { 1130 while (state.numberOfOptimizedTransitions > 0) 1131 { 1132 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1133 } 1134 } 1135 foreach (Transition transition; optimizedTransitions) 1136 { 1137 state.addOptimizedTransition(transition); 1138 } 1139 } 1140 } 1141 1142 return removedEdges; 1143 } 1144 1145 private static int optimizeSets(ATN atn, bool preserveOrder) 1146 { 1147 if (preserveOrder) 1148 { 1149 // this optimization currently doesn't preserve edge order. 1150 return 0; 1151 } 1152 int removedPaths = 0; 1153 DecisionState[] decisions = atn.decisionToState; 1154 foreach (DecisionState decision; decisions) 1155 { 1156 IntervalSet setTransitions = new IntervalSet(); 1157 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1158 { 1159 Transition epsTransition = decision.optimizedTransitions[i]; 1160 if (!(cast(EpsilonTransition)epsTransition)) 1161 { 1162 continue; 1163 } 1164 if (epsTransition.target.optimizedTransitions.length != 1) 1165 { 1166 continue; 1167 } 1168 Transition transition = epsTransition.target.optimizedTransitions[0]; 1169 if (!(cast(BlockEndState)transition.target)) 1170 { 1171 continue; 1172 } 1173 if (cast(NotSetTransition)transition) 1174 { 1175 // TODO: not yet implemented 1176 continue; 1177 } 1178 if (cast(AtomTransition)transition || 1179 cast(RangeTransition)transition || 1180 cast(SetTransition)transition) 1181 { 1182 setTransitions.add(i); 1183 } 1184 } 1185 if (setTransitions.size <= 1) 1186 { 1187 continue; 1188 } 1189 Transition[] optimizedTransitions; 1190 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1191 { 1192 if (!setTransitions.contains(i)) 1193 { 1194 optimizedTransitions ~= decision.optimizedTransitions[i]; 1195 } 1196 } 1197 ATNState blockEndState = decision.optimizedTransitions[setTransitions.getMinElement].target.optimizedTransitions[0].target; 1198 IntervalSet matchSet = new IntervalSet; 1199 for (int i = 0; i < setTransitions.intervals.length; i++) 1200 { 1201 Interval interval = setTransitions.intervals[i]; 1202 for (int j = interval.a; j <= interval.b; j++) 1203 { 1204 Transition matchTransition = decision.optimizedTransitions[j].target.optimizedTransitions[0]; 1205 if (cast(NotSetTransition)matchTransition) 1206 { 1207 assert(false, "Not yet implemented."); 1208 } 1209 else 1210 { 1211 matchSet.addAll(matchTransition.label); 1212 } 1213 } 1214 } 1215 Transition newTransition; 1216 if (matchSet.intervals.length == 1) 1217 { 1218 if (matchSet.size == 1) 1219 { 1220 newTransition = new AtomTransition(blockEndState, matchSet.getMinElement); 1221 } 1222 else 1223 { 1224 Interval matchInterval = matchSet.intervals[0]; 1225 newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b); 1226 } 1227 } 1228 else 1229 { 1230 newTransition = new SetTransition(blockEndState, matchSet); 1231 } 1232 ATNState optimizedState = new BasicState(); 1233 optimizedState.setRuleIndex(decision.ruleIndex); 1234 atn.addState(optimizedState); 1235 optimizedState.addTransition(newTransition); 1236 optimizedTransitions ~= new EpsilonTransition(optimizedState); 1237 removedPaths += decision.numberOfOptimizedTransitions - optimizedTransitions.length; 1238 if (decision.isOptimized) 1239 { 1240 while (decision.numberOfOptimizedTransitions > 0) 1241 { 1242 decision.removeOptimizedTransition(decision.numberOfOptimizedTransitions - 1); 1243 } 1244 } 1245 foreach (Transition transition; optimizedTransitions) 1246 { 1247 decision.addOptimizedTransition(transition); 1248 } 1249 } 1250 return removedPaths; 1251 } 1252 1253 } 1254 1255 version(unittest) { 1256 import dshould : be, equal, not, should; 1257 import unit_threaded; 1258 @Tags("des11") 1259 @("testEncodingATNDeserialize") 1260 unittest { 1261 auto des = new ATNDeserializer; 1262 des.should.not.be(null); 1263 } 1264 }