1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ATNDeserializer; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.IllegalStateException; 11 import antlr.v4.runtime.Token; 12 import antlr.v4.runtime.TokenConstantDefinition; 13 import antlr.v4.runtime.UnsupportedOperationException; 14 import antlr.v4.runtime.atn.ATN; 15 import antlr.v4.runtime.atn.ATNDeserializationOptions; 16 import antlr.v4.runtime.atn.ATNState; 17 import antlr.v4.runtime.atn.ATNType; 18 import antlr.v4.runtime.atn.ActionTransition; 19 import antlr.v4.runtime.atn.AtomTransition; 20 import antlr.v4.runtime.atn.BasicBlockStartState; 21 import antlr.v4.runtime.atn.BasicState; 22 import antlr.v4.runtime.atn.BlockEndState; 23 import antlr.v4.runtime.atn.BlockStartState; 24 import antlr.v4.runtime.atn.DecisionState; 25 import antlr.v4.runtime.atn.EpsilonTransition; 26 import antlr.v4.runtime.atn.LexerAction; 27 import antlr.v4.runtime.atn.LexerActionType; 28 import antlr.v4.runtime.atn.LexerChannelAction; 29 import antlr.v4.runtime.atn.LexerCustomAction; 30 import antlr.v4.runtime.atn.LexerModeAction; 31 import antlr.v4.runtime.atn.LexerMoreAction; 32 import antlr.v4.runtime.atn.LexerPopModeAction; 33 import antlr.v4.runtime.atn.LexerPushModeAction; 34 import antlr.v4.runtime.atn.LexerSkipAction; 35 import antlr.v4.runtime.atn.LexerTypeAction; 36 import antlr.v4.runtime.atn.LoopEndState; 37 import antlr.v4.runtime.atn.NotSetTransition; 38 import antlr.v4.runtime.atn.PlusBlockStartState; 39 import antlr.v4.runtime.atn.PlusLoopbackState; 40 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 41 import antlr.v4.runtime.atn.PredicateTransition; 42 import antlr.v4.runtime.atn.RangeTransition; 43 import antlr.v4.runtime.atn.RuleStartState; 44 import antlr.v4.runtime.atn.RuleStopState; 45 import antlr.v4.runtime.atn.RuleTransition; 46 import antlr.v4.runtime.atn.SetTransition; 47 import antlr.v4.runtime.atn.StarBlockStartState; 48 import antlr.v4.runtime.atn.StarLoopEntryState; 49 import antlr.v4.runtime.atn.StarLoopbackState; 50 import antlr.v4.runtime.atn.StateNames; 51 import antlr.v4.runtime.atn.TokensStartState; 52 import antlr.v4.runtime.atn.Transition; 53 import antlr.v4.runtime.atn.TransitionStates; 54 import antlr.v4.runtime.atn.WildcardTransition; 55 import antlr.v4.runtime.misc; 56 import std.algorithm: canFind; 57 import std.algorithm: map; 58 import std.algorithm: reverse; 59 import std.array; 60 import std.conv; 61 import std.format; 62 import std.stdio; 63 import std.uuid; 64 65 /** 66 * TODO add class description 67 */ 68 class ATNDeserializer 69 { 70 71 public static immutable int SERIALIZED_VERSION = 3; 72 73 /** 74 * This is the earliest supported serialized UUID. 75 */ 76 private static UUID BASE_SERIALIZED_UUID; 77 78 /** 79 * This UUID indicates an extension of {@link BASE_SERIALIZED_UUID} for the 80 * addition of precedence predicates. 81 */ 82 private static UUID ADDED_PRECEDENCE_TRANSITIONS; 83 84 /** 85 * This UUID indicates an extension of {@link #ADDED_PRECEDENCE_TRANSITIONS} 86 * for the addition of lexer actions encoded as a sequence of 87 * {@link LexerAction} instances. 88 */ 89 private static UUID ADDED_LEXER_ACTIONS; 90 91 private static UUID ADDED_UNICODE_SMP; 92 93 /** 94 * This list contains all of the currently supported UUIDs, ordered by when 95 * the feature first appeared in this branch. 96 */ 97 private static UUID[] SUPPORTED_UUIDS; 98 99 /** 100 * This is the current serialized UUID. 101 */ 102 public static UUID SERIALIZED_UUID; 103 104 private ATNDeserializationOptions deserializationOptions; 105 106 private int[] data; 107 108 private int p; 109 110 public UUID uuid; 111 112 public static this() 113 { 114 /* WARNING: DO NOT MERGE THESE LINES. If UUIDs differ during a merge, 115 * resolve the conflict by generating a new ID! 116 */ 117 BASE_SERIALIZED_UUID = UUID("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3"); 118 ADDED_PRECEDENCE_TRANSITIONS = UUID("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61"); 119 ADDED_LEXER_ACTIONS = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E"); 120 ADDED_UNICODE_SMP = UUID("59627784-3BE5-417A-B9EB-8131A7286089"); 121 SUPPORTED_UUIDS ~= BASE_SERIALIZED_UUID; 122 SUPPORTED_UUIDS ~= ADDED_PRECEDENCE_TRANSITIONS; 123 SUPPORTED_UUIDS ~= ADDED_LEXER_ACTIONS; 124 SUPPORTED_UUIDS ~= ADDED_UNICODE_SMP; 125 126 SERIALIZED_UUID = ADDED_LEXER_ACTIONS; 127 } 128 129 public this() 130 { 131 this(ATNDeserializationOptions.defaultOptions); 132 } 133 134 public this(ATNDeserializationOptions deserializationOptions) 135 { 136 if (deserializationOptions is null) { 137 deserializationOptions = new ATNDeserializationOptions(); 138 } 139 this.deserializationOptions = deserializationOptions; 140 } 141 142 /** 143 * Determines if a particular serialized representation of an ATN supports 144 * a particular feature, identified by the {@link UUID} used for serializing 145 * the ATN at the time the feature was first introduced. 146 * 147 * @param feature The {@link UUID} marking the first time the feature was 148 * supported in the serialized ATN. 149 * @param actualUuid The {@link UUID} of the actual serialized ATN which is 150 * currently being deserialized. 151 * @return {@code true} if the {@code actualUuid} value represents a 152 * serialized ATN at or after the feature identified by {@code feature} was 153 * introduced; otherwise, {@code false}. 154 */ 155 protected bool isFeatureSupported(UUID feature, UUID actualUuid) 156 { 157 bool not_found = true; 158 foreach (uu; SUPPORTED_UUIDS) { 159 if (not_found && uu == feature) { 160 if (uu == actualUuid) 161 return true; 162 else 163 not_found = false; 164 } 165 else { 166 if (uu == actualUuid) { 167 return true; 168 } 169 } 170 } 171 return false; 172 } 173 174 public ATN deserialize(wstring input_data) 175 { 176 reset(input_data); 177 checkVersion; 178 checkUUID; 179 ATN atn = readATN; 180 readStates(atn); 181 readRules(atn); 182 readModes(atn); 183 IntervalSet[] sets; 184 readSets(sets, atn, &readInt); 185 if (isFeatureSupported(ADDED_UNICODE_SMP, uuid)) 186 { 187 readSets(sets, atn, &readInt32); 188 } 189 readEdges(atn, sets); 190 readDecisions(atn); 191 readLexerActions(atn); 192 markPrecedenceDecisions(atn); 193 if (deserializationOptions.verifyATN) { 194 verifyATN(atn); 195 } 196 if (deserializationOptions.generateRuleBypassTransitions && atn.grammarType == ATNType.PARSER) { 197 generateRuleBypassTransitions(atn); 198 } 199 if (deserializationOptions.optimize) { 200 optimizeATN(atn); 201 } 202 identifyTailCalls(atn); 203 return atn; 204 } 205 206 /** 207 * @uml 208 * verify assumptions 209 */ 210 protected void verifyATN(ATN atn) 211 { 212 // verify assumptions 213 foreach (ATNState state; atn.states) { 214 if (state is null) { 215 continue; 216 } 217 218 checkCondition(state.onlyHasEpsilonTransitions || state.getNumberOfTransitions <= 1); 219 220 if (cast(PlusBlockStartState)state) { 221 checkCondition((cast(PlusBlockStartState)state).loopBackState !is null); 222 } 223 224 if (cast(StarLoopEntryState)state) { 225 StarLoopEntryState starLoopEntryState = cast(StarLoopEntryState)state; 226 checkCondition(starLoopEntryState.loopBackState !is null); 227 checkCondition(starLoopEntryState.getNumberOfTransitions() == 2); 228 229 if (cast(StarBlockStartState)(starLoopEntryState.transition(0).target)) { 230 checkCondition(cast(LoopEndState)(starLoopEntryState.transition(1).target) !is null); 231 checkCondition(!starLoopEntryState.nonGreedy); 232 } 233 else if (cast(LoopEndState)(starLoopEntryState.transition(0).target)) { 234 checkCondition(cast(StarBlockStartState)(starLoopEntryState.transition(1).target) !is null); 235 checkCondition(starLoopEntryState.nonGreedy); 236 } 237 else { 238 throw new IllegalStateException(); 239 } 240 } 241 242 if (cast(StarLoopbackState)state) { 243 checkCondition(state.getNumberOfTransitions() == 1); 244 checkCondition(cast(StarLoopEntryState)(state.transition(0).target) !is null); 245 } 246 247 if (cast(LoopEndState)state) { 248 checkCondition((cast(LoopEndState)state).loopBackState !is null); 249 } 250 251 if (cast(RuleStartState)state) { 252 checkCondition((cast(RuleStartState)state).stopState !is null); 253 } 254 255 if (cast(BlockStartState)state) { 256 checkCondition((cast(BlockStartState)state).endState !is null); 257 } 258 259 if (cast(BlockEndState)state) { 260 checkCondition((cast(BlockEndState)state).startState !is null); 261 } 262 263 if (cast(DecisionState)state) { 264 DecisionState decisionState = cast(DecisionState)state; 265 checkCondition(decisionState.getNumberOfTransitions() <= 1 || decisionState.decision >= 0); 266 } 267 else { 268 checkCondition(state.getNumberOfTransitions() <= 1 || state.classinfo == RuleStopState.classinfo); 269 } 270 } 271 272 } 273 274 private void readSets(ref IntervalSet[] sets, ATN aTN, int delegate() readUnicode) 275 { 276 // SETS 277 int nsets = readInt; 278 for (int i=0; i<nsets; i++) { 279 IntervalSet set = new IntervalSet(); 280 sets ~= set; 281 int nintervals = readInt; 282 bool containsEof = readInt != 0; 283 if (containsEof) { 284 set.add(-1); 285 } 286 for (int j=0; j<nintervals; j++) { 287 set.add(readUnicode(), readUnicode()); 288 } 289 } 290 } 291 292 protected void markPrecedenceDecisions(ATN atn) 293 { 294 foreach (ATNState state; atn.states) { 295 if (!cast(StarLoopEntryState)state) { 296 continue; 297 } 298 299 /* We analyze the ATN to determine if this ATN decision state is the 300 * decision for the closure block that determines whether a 301 * precedence rule should continue or complete. 302 */ 303 if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) { 304 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 305 if (cast(LoopEndState)maybeLoopEndState) { 306 if (maybeLoopEndState.epsilonOnlyTransitions && 307 maybeLoopEndState.transitions[0].target.classinfo == 308 RuleStopState.classinfo) { 309 (cast(StarLoopEntryState)state).isPrecedenceDecision = true; 310 } 311 } 312 } 313 } 314 315 } 316 317 protected void checkCondition(bool condition) 318 { 319 checkCondition(condition, null); 320 } 321 322 protected void checkCondition(bool condition, string message) 323 { 324 if (!condition) { 325 throw new IllegalStateException(message); 326 } 327 } 328 329 protected Transition edgeFactory(ATN atn, int type, int src, int trg, int arg1, int arg2, 330 int arg3, IntervalSet[] sets) 331 { 332 ATNState target = atn.states[trg]; 333 with(TransitionStates) { 334 switch (type) { 335 case EPSILON : return new EpsilonTransition(target); 336 case RANGE : 337 if (arg3 != 0) { 338 return new RangeTransition(target, TokenConstantDefinition.EOF, arg2); 339 } 340 else { 341 return new RangeTransition(target, arg1, arg2); 342 } 343 case RULE : 344 RuleTransition rt = new RuleTransition(cast(RuleStartState)atn.states[arg1], arg2, arg3, target); 345 return rt; 346 case PREDICATE : 347 PredicateTransition pt = new PredicateTransition(target, arg1, arg2, arg3 != 0); 348 return pt; 349 case PRECEDENCE: 350 return new PrecedencePredicateTransition(target, arg1); 351 case ATOM : 352 if (arg3 != 0) { 353 return new AtomTransition(target, TokenConstantDefinition.EOF); 354 } 355 else { 356 return new AtomTransition(target, arg1); 357 } 358 case ACTION : 359 ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0); 360 return a; 361 case SET : return new SetTransition(target, sets[arg1]); 362 case NOT_SET : return new NotSetTransition(target, sets[arg1]); 363 case WILDCARD : return new WildcardTransition(target); 364 default: throw new IllegalArgumentException("The specified transition type is not valid."); 365 } 366 } 367 } 368 369 protected ATNState stateFactory(int type, int ruleIndex) 370 { 371 ATNState s; 372 with(StateNames) { 373 switch (type) { 374 case INVALID: return null; 375 case BASIC : s = new BasicState(); break; 376 case RULE_START : s = new RuleStartState(); break; 377 case BLOCK_START : s = new BasicBlockStartState(); break; 378 case PLUS_BLOCK_START : s = new PlusBlockStartState(); break; 379 case STAR_BLOCK_START : s = new StarBlockStartState(); break; 380 case TOKEN_START : s = new TokensStartState(); break; 381 case RULE_STOP : s = new RuleStopState(); break; 382 case BLOCK_END : s = new BlockEndState(); break; 383 case STAR_LOOP_BACK : s = new StarLoopbackState(); break; 384 case STAR_LOOP_ENTRY : s = new StarLoopEntryState(); break; 385 case PLUS_LOOP_BACK : s = new PlusLoopbackState(); break; 386 case LOOP_END : s = new LoopEndState(); break; 387 default : 388 string message = format("The specified state type %d is not valid.", type); 389 throw new IllegalArgumentException(message); 390 } 391 } 392 s.ruleIndex = ruleIndex; 393 return s; 394 } 395 396 protected LexerAction lexerActionFactory(LexerActionType type, int data1, int data2) 397 { 398 switch (type) { 399 case LexerActionType.CHANNEL: 400 return new LexerChannelAction(data1); 401 402 case LexerActionType.CUSTOM: 403 return new LexerCustomAction(data1, data2); 404 405 case LexerActionType.MODE: 406 return new LexerModeAction(data1); 407 408 case LexerActionType.MORE: 409 return cast(LexerAction)LexerMoreAction.instance; 410 411 case LexerActionType.POP_MODE: 412 return cast(LexerAction)LexerPopModeAction.instance; 413 414 case LexerActionType.PUSH_MODE: 415 return new LexerPushModeAction(data1); 416 417 case LexerActionType.SKIP: 418 return cast(LexerAction)LexerSkipAction.instance; 419 420 case LexerActionType.TYPE: 421 return new LexerTypeAction(data1); 422 423 default: 424 string message = format("The specified lexer action type %d is not valid.", type); 425 throw new IllegalArgumentException(message); 426 } 427 } 428 429 private void reset(const wstring data) 430 { 431 wchar el; 432 for(int i; i < data.length; i++) { 433 el = data[i]; 434 if (el == '[') { 435 if (i+7 >= data.length) { 436 this.data ~=to!int('[') - 2; 437 continue; 438 } 439 if (data[i+7] != ']') { 440 this.data ~=to!int('[') - 2; 441 continue; 442 } 443 this.data ~= parseOct(data, i) -2; 444 i += 7; 445 continue; 446 } 447 if (el < 2) 448 this.data ~= -1; 449 else 450 this.data ~= el - 2; 451 } 452 // don't adjust the first value since that's the version number 453 this.data[0] = data[0]; 454 p = 0; 455 } 456 457 private void checkVersion() 458 { 459 auto version_atn = readInt; 460 if (version_atn != SERIALIZED_VERSION) { 461 string reason = format("Could not deserialize ATN with version %d (expected %d).", version_atn, SERIALIZED_VERSION); 462 assert(false, reason); 463 } 464 } 465 466 private void checkUUID() 467 { 468 uuid = readUUID; 469 if (!SUPPORTED_UUIDS.canFind(uuid)) { 470 string reason = format("Could not deserialize ATN with UUID %s (expected %s or a legacy UUID).", uuid, SERIALIZED_UUID); 471 assert(false, reason); 472 } 473 } 474 475 private ATN readATN() 476 { 477 ATNType grammarType = cast(ATNType)readInt; 478 int maxTokenType = readInt; 479 return new ATN(grammarType, maxTokenType); 480 } 481 482 private void readStates(ATN atn) 483 { 484 // STATES 485 int[LoopEndState][] loopBackStateNumbers; 486 int[BlockStartState][] endStateNumbers; 487 int nstates = readInt; 488 debug(deserializer) 489 writefln("%s: Number of states %s", this.p-1 , nstates); 490 for (int i=0; i<nstates; i++) { 491 int stype = readInt; 492 // ignore bad type of states 493 if (stype == StateNames.INVALID) { 494 atn.addState(null); 495 continue; 496 } 497 498 int ruleIndex = readInt; 499 if (ruleIndex == char.max) { 500 ruleIndex = -1; 501 } 502 debug(deserializer) 503 writefln("%s: readState %s, ruleIndex %s, index %s", 504 this.p-1, 505 stype, 506 ruleIndex, 507 i); 508 ATNState s = stateFactory(stype, ruleIndex); 509 if (stype == StateNames.LOOP_END) { 510 // special case 511 int loopBackStateNumber = readInt; 512 int[LoopEndState] _a; 513 _a[cast(LoopEndState)s] = loopBackStateNumber; 514 loopBackStateNumbers ~= _a; 515 } 516 else 517 if (cast(BlockStartState)s) { 518 int endStateNumber = readInt; 519 int[BlockStartState] _a; 520 _a[cast(BlockStartState)s] = endStateNumber; 521 endStateNumbers ~= _a; 522 } 523 atn.addState(s); 524 } 525 526 // delay the assignment of loop back and end states until we know all the state instances have been initialized 527 foreach (ref pair; loopBackStateNumbers) { 528 pair.keys[0].loopBackState = atn.states[pair[pair.keys[0]]]; 529 } 530 531 foreach (ref pair; endStateNumbers) { 532 pair.keys[0].endState = cast(BlockEndState)atn.states[pair[pair.keys[0]]]; 533 } 534 535 int numNonGreedyStates = readInt; 536 for (int i = 0; i < numNonGreedyStates; i++) { 537 int stateNumber = readInt; 538 (cast(DecisionState)atn.states[stateNumber]).nonGreedy = true; 539 } 540 541 int numPrecedenceStates = readInt; 542 for (int i = 0; i < numPrecedenceStates; i++) { 543 int stateNumber = readInt; 544 (cast(RuleStartState)atn.states[stateNumber]).isLeftRecursiveRule = true; 545 } 546 547 } 548 549 private void readRules(ATN atn) 550 { 551 // // RULES 552 int nrules = readInt; 553 if ( atn.grammarType == ATNType.LEXER ) { 554 atn.ruleToTokenType = new int[nrules]; 555 } 556 debug(deserializer) 557 writefln("%s: Number of rules %s", this.p-1 , nrules); 558 atn.ruleToStartState = new RuleStartState[nrules]; 559 for (int i=0; i<nrules; i++) { 560 int s = readInt; 561 RuleStartState startState = cast(RuleStartState)atn.states[s]; 562 atn.ruleToStartState[i] = startState; 563 if ( atn.grammarType == ATNType.LEXER ) { 564 int tokenType = readInt; 565 if (tokenType == 0xFFFF) { 566 tokenType = TokenConstantDefinition.EOF; 567 } 568 569 atn.ruleToTokenType[i] = tokenType; 570 } 571 } 572 573 atn.ruleToStopState = new RuleStopState[nrules]; 574 foreach (ATNState state; atn.states) { 575 if (!cast(RuleStopState)state) { 576 continue; 577 } 578 RuleStopState stopState = cast(RuleStopState)state; 579 atn.ruleToStopState[state.ruleIndex] = stopState; 580 atn.ruleToStartState[state.ruleIndex].stopState = stopState; 581 } 582 583 } 584 585 public void readModes(ATN atn) 586 { 587 // MODES 588 int nmodes = readInt; 589 for (int i=0; i<nmodes; i++) { 590 int s = readInt; 591 atn.modeToStartState ~= cast(TokensStartState)atn.states[s]; 592 } 593 } 594 595 private void readEdges(ATN atn, IntervalSet[] sets) 596 { 597 // EDGES 598 599 int nedges = readInt; 600 for (int i=0; i<nedges; i++) { 601 int src = readInt; 602 int trg = readInt; 603 int ttype = readInt; 604 int arg1 = readInt; 605 int arg2 = readInt; 606 int arg3 = readInt; 607 Transition trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); 608 debug(deserializer) { 609 writefln("EDGES %1$s %2$s->%3$s %4$s %5$s %6$s %7$s", 610 trans.classinfo, src, 611 trg, Transition.serializationNames[ttype], 612 arg1, arg2, arg3); 613 } 614 ATNState srcState = atn.states[src]; 615 srcState.addTransition(trans); 616 } 617 618 // edges for rule stop states can be derived, so they aren't serialized 619 foreach (ATNState state; atn.states) { 620 for (int i = 0; i < state.getNumberOfTransitions(); i++) { 621 Transition t = state.transition(i); 622 if (!cast(RuleTransition)t) { 623 continue; 624 } 625 626 RuleTransition ruleTransition = cast(RuleTransition)t; 627 int outermostPrecedenceReturn = -1; 628 if (atn.ruleToStartState[ruleTransition.target.ruleIndex].isLeftRecursiveRule) { 629 if (ruleTransition.precedence == 0) { 630 outermostPrecedenceReturn = ruleTransition.target.ruleIndex; 631 } 632 } 633 EpsilonTransition returnTransition = new EpsilonTransition(ruleTransition.followState, outermostPrecedenceReturn); 634 atn.ruleToStopState[ruleTransition.target.ruleIndex].addTransition(returnTransition); 635 } 636 } 637 638 foreach (ATNState state; atn.states) { 639 if (cast(BlockStartState)state) { 640 // we need to know the end state to set its start state 641 if ((cast(BlockStartState)state).endState is null) { 642 throw new IllegalStateException(); 643 } 644 645 // block end states can only be associated to a single block start state 646 if ((cast(BlockStartState)state).endState.startState !is null) { 647 throw new IllegalStateException(); 648 } 649 650 (cast(BlockStartState)state).endState.startState = cast(BlockStartState)state; 651 } 652 653 if (cast(PlusLoopbackState)state) { 654 PlusLoopbackState loopbackState = cast(PlusLoopbackState)state; 655 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 656 ATNState target = loopbackState.transition(i).target; 657 if (cast(PlusBlockStartState)target) { 658 (cast(PlusBlockStartState)target).loopBackState = loopbackState; 659 } 660 } 661 } 662 else if (cast(StarLoopbackState)state) { 663 StarLoopbackState loopbackState = cast(StarLoopbackState)state; 664 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 665 ATNState target = loopbackState.transition(i).target; 666 if (cast(StarLoopEntryState)target) { 667 (cast(StarLoopEntryState)target).loopBackState = loopbackState; 668 } 669 } 670 } 671 } 672 673 } 674 675 private void readDecisions(ATN atn) 676 { 677 // DECISIONS 678 679 int ndecisions = readInt; 680 for (int i=1; i<=ndecisions; i++) { 681 int s = readInt; 682 DecisionState decState = cast(DecisionState)atn.states[s]; 683 atn.decisionToState ~= decState; 684 decState.decision = i-1; 685 } 686 } 687 688 private void readLexerActions(ATN atn) 689 { 690 // LEXER ACTIONS 691 692 if (atn.grammarType == ATNType.LEXER) { 693 atn.lexerActions = new LexerAction[readInt]; 694 for (int i = 0; i < atn.lexerActions.length; i++) { 695 LexerActionType actionType = cast(LexerActionType)readInt; 696 int data1 = readInt; 697 if (data1 == 0xFFFF) { 698 data1 = -1; 699 } 700 701 int data2 = readInt; 702 if (data2 == 0xFFFF) { 703 data2 = -1; 704 } 705 LexerAction lexerAction = lexerActionFactory(actionType, data1, data2); 706 atn.lexerActions[i] = lexerAction; 707 } 708 } 709 } 710 711 private void optimizeATN(ATN atn) 712 { 713 while (true) 714 { 715 int optimizationCount = 0; 716 optimizationCount += inlineSetRules(atn); 717 optimizationCount += combineChainedEpsilons(atn); 718 bool preserveOrder = atn.grammarType == ATNType.LEXER; 719 optimizationCount += optimizeSets(atn, preserveOrder); 720 if (optimizationCount == 0) 721 { 722 break; 723 } 724 } 725 if (deserializationOptions.verifyATN) 726 { 727 // reverify after modification 728 verifyATN(atn); 729 } 730 } 731 732 public int parseOct(wstring data, int p) 733 { 734 int res = 0; 735 for (auto i = p + 1; i < p + 7; i++) { 736 res = res<<3 | to!int(data[i] - 0x30); 737 } 738 return res; 739 } 740 741 private UUID readUUID() 742 { 743 ubyte[16] data; 744 long b = readLong; 745 for(int i = 0; i < 8; i++) { 746 data[i] = b & 0xff; 747 b = b >>> 8; 748 } 749 long a = readLong; 750 for(int i = 8; i < 16; i++) { 751 data[i] = a & 0xff; 752 a = a >>> 8; 753 } 754 data[].reverse(); 755 auto uuid = UUID(data); 756 return uuid; 757 } 758 759 private long readLong() 760 { 761 long lowOrder = to!long(readInt32) & 0x00000000FFFFFFFFL; 762 return lowOrder | (to!long(readInt32) << 32); 763 } 764 765 private int readInt32() 766 { 767 return data[p++] | data[p++] << 16; 768 } 769 770 private int readInt() 771 { 772 return data[p++]; 773 } 774 775 private static void identifyTailCalls(ATN atn) 776 { 777 foreach (ATNState state; atn.states) 778 { 779 foreach (Transition transition; state.transitions) 780 { 781 if (!(cast(RuleTransition)transition)) 782 { 783 continue; 784 } 785 RuleTransition ruleTransition = cast(RuleTransition)transition; 786 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 787 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 788 } 789 if (!state.isOptimized) 790 { 791 continue; 792 } 793 foreach (Transition transition; state.optimizedTransitions) 794 { 795 if (!(cast(RuleTransition)transition)) 796 { 797 continue; 798 } 799 RuleTransition ruleTransition = cast(RuleTransition)transition; 800 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 801 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 802 } 803 } 804 } 805 806 private static bool testTailCall(ATN atn, RuleTransition transition, bool optimizedPath) 807 { 808 if (optimizedPath && transition.optimizedTailCall) 809 { 810 return true; 811 } 812 BitSet *reachable = new BitSet(atn.states.length); 813 ATNState[] worklist; 814 worklist ~= transition.followState; 815 while (worklist.length > 0) 816 { 817 ATNState state = worklist[$-1]; 818 worklist.length -= 1; 819 if (reachable.get(state.stateNumber)) 820 { 821 continue; 822 } 823 if (cast(RuleStopState)state) 824 { 825 continue; 826 } 827 if (!state.onlyHasEpsilonTransitions) 828 { 829 return false; 830 } 831 Transition[] transitions = optimizedPath ? state.optimizedTransitions : state.transitions; 832 foreach (Transition t; transitions) 833 { 834 if (t.getSerializationType != TransitionStates.EPSILON) 835 { 836 return false; 837 } 838 worklist ~= t.target; 839 } 840 } 841 return true; 842 } 843 844 public void generateRuleBypassTransitions(ATN atn) 845 { 846 atn.ruleToTokenType = new int[atn.ruleToStartState.length]; 847 for (int i = 0; i < atn.ruleToStartState.length; i++) { 848 atn.ruleToTokenType[i] = atn.maxTokenType + i + 1; 849 } 850 851 for (int i = 0; i < atn.ruleToStartState.length; i++) { 852 BasicBlockStartState bypassStart = new BasicBlockStartState(); 853 bypassStart.ruleIndex = i; 854 atn.addState(bypassStart); 855 856 BlockEndState bypassStop = new BlockEndState(); 857 bypassStop.ruleIndex = i; 858 atn.addState(bypassStop); 859 860 bypassStart.endState = bypassStop; 861 atn.defineDecisionState(bypassStart); 862 863 bypassStop.startState = bypassStart; 864 865 ATNState endState; 866 Transition excludeTransition = null; 867 if (atn.ruleToStartState[i].isLeftRecursiveRule) { 868 // wrap from the beginning of the rule to the StarLoopEntryState 869 endState = null; 870 foreach (ATNState state; atn.states) { 871 if (state.ruleIndex != i) { 872 continue; 873 } 874 875 if (state.classinfo != StarLoopEntryState.classinfo) { 876 continue; 877 } 878 879 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 880 if (maybeLoopEndState.classinfo != LoopEndState.classinfo) { 881 continue; 882 } 883 884 if (maybeLoopEndState.epsilonOnlyTransitions && maybeLoopEndState.transition(0).target.classinfo == RuleStopState.classinfo) { 885 endState = state; 886 break; 887 } 888 } 889 890 if (endState is null) { 891 throw new UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); 892 } 893 894 excludeTransition = (cast(StarLoopEntryState)endState).loopBackState.transition(0); 895 } 896 else { 897 endState = atn.ruleToStopState[i]; 898 } 899 900 // all non-excluded transitions that currently target end state need to target blockEnd instead 901 foreach (ATNState state; atn.states) { 902 foreach (Transition transition; state.transitions) { 903 if (transition == excludeTransition) { 904 continue; 905 } 906 907 if (transition.target == endState) { 908 transition.target = bypassStop; 909 } 910 } 911 } 912 913 // all transitions leaving the rule start state need to leave blockStart instead 914 while (atn.ruleToStartState[i].getNumberOfTransitions() > 0) { 915 Transition transition = atn.ruleToStartState[i].removeTransition(atn.ruleToStartState[i].getNumberOfTransitions() - 1); 916 bypassStart.addTransition(transition); 917 } 918 919 // link the new states 920 atn.ruleToStartState[i].addTransition(new EpsilonTransition(bypassStart)); 921 bypassStop.addTransition(new EpsilonTransition(endState)); 922 923 ATNState matchState = new BasicState(); 924 atn.addState(matchState); 925 matchState.addTransition(new AtomTransition(bypassStop, atn.ruleToTokenType[i])); 926 bypassStart.addTransition(new EpsilonTransition(matchState)); 927 } 928 929 if (deserializationOptions.verifyATN()) { 930 // reverify after modification 931 verifyATN(atn); 932 } 933 } 934 935 private static int inlineSetRules(ATN atn) 936 { 937 int inlinedCalls = 0; 938 Transition[] ruleToInlineTransition; 939 for (int i = 0; i < atn.ruleToStartState.length; i++) 940 { 941 RuleStartState startState = atn.ruleToStartState[i]; 942 ATNState middleState = startState; 943 while (middleState.onlyHasEpsilonTransitions && middleState.numberOfOptimizedTransitions == 1 && middleState.getOptimizedTransition(0).getSerializationType == TransitionStates.EPSILON) 944 { 945 middleState = middleState.getOptimizedTransition(0).target; 946 } 947 if (middleState.numberOfOptimizedTransitions != 1) 948 { 949 continue; 950 } 951 Transition matchTransition = middleState.getOptimizedTransition(0); 952 ATNState matchTarget = matchTransition.target; 953 if (matchTransition.isEpsilon || !matchTarget.onlyHasEpsilonTransitions || matchTarget.numberOfOptimizedTransitions != 1 || !(cast(RuleStopState)matchTarget.getOptimizedTransition(0).target)) 954 { 955 continue; 956 } 957 with(TransitionStates) { 958 switch (matchTransition.getSerializationType) 959 { 960 case ATOM: 961 case RANGE: 962 case SET: 963 { 964 ruleToInlineTransition[i] = matchTransition; 965 break; 966 } 967 968 case NOT_SET: 969 case WILDCARD: 970 { 971 // not implemented yet 972 continue; 973 } 974 975 default: 976 { 977 continue; 978 } 979 } 980 } 981 } 982 for (int stateNumber = 0; stateNumber < atn.states.length; stateNumber++) 983 { 984 ATNState state = atn.states[stateNumber]; 985 if (state.ruleIndex < 0) 986 { 987 continue; 988 } 989 Transition[] optimizedTransitions = null; 990 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) { 991 Transition transition = state.getOptimizedTransition(i); 992 if (!(cast(RuleTransition)transition)) 993 { 994 if (optimizedTransitions != null) 995 { 996 optimizedTransitions ~= transition; 997 } 998 continue; 999 } 1000 RuleTransition ruleTransition = cast(RuleTransition)transition; 1001 Transition effective = ruleToInlineTransition[ruleTransition.target.ruleIndex]; 1002 if (effective is null) 1003 { 1004 if (optimizedTransitions != null) 1005 { 1006 optimizedTransitions ~= transition; 1007 } 1008 continue; 1009 } 1010 if (optimizedTransitions == null) 1011 { 1012 optimizedTransitions = []; 1013 for (int j = 0; j < i; j++) 1014 { 1015 optimizedTransitions ~= state.getOptimizedTransition(i); 1016 } 1017 } 1018 inlinedCalls++; 1019 ATNState target = ruleTransition.followState; 1020 ATNState intermediateState = new BasicState(); 1021 intermediateState.setRuleIndex(target.ruleIndex); 1022 atn.addState(intermediateState); 1023 optimizedTransitions ~= new EpsilonTransition(intermediateState); 1024 with(TransitionStates) { 1025 switch (effective.getSerializationType) 1026 { 1027 case ATOM: 1028 { 1029 intermediateState.addTransition(new AtomTransition(target, (cast(AtomTransition)effective)._label)); 1030 break; 1031 } 1032 1033 case RANGE: 1034 { 1035 intermediateState.addTransition(new RangeTransition(target, (cast(RangeTransition)effective).from, (cast(RangeTransition)effective).to)); 1036 break; 1037 } 1038 1039 case SET: 1040 { 1041 intermediateState.addTransition(new SetTransition(target, effective.label)); 1042 break; 1043 } 1044 1045 default: 1046 { 1047 assert(false, "NotSupportedException"); 1048 } 1049 } 1050 } 1051 } 1052 if (optimizedTransitions != null) 1053 { 1054 if (state.isOptimized) 1055 { 1056 while (state.numberOfOptimizedTransitions > 0) 1057 { 1058 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1059 } 1060 } 1061 foreach (Transition transition; optimizedTransitions) 1062 { 1063 state.addOptimizedTransition(transition); 1064 } 1065 } 1066 } 1067 return inlinedCalls; 1068 } 1069 1070 private static int combineChainedEpsilons(ATN atn) 1071 { 1072 int removedEdges = 0; 1073 foreach (ATNState state; atn.states) 1074 { 1075 if (!state.onlyHasEpsilonTransitions || cast(RuleStopState)state) 1076 { 1077 continue; 1078 } 1079 Transition[] optimizedTransitions = null; 1080 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) 1081 { 1082 Transition transition = state.getOptimizedTransition(i); 1083 ATNState intermediate = transition.target; 1084 if (transition.getSerializationType != TransitionStates.EPSILON || 1085 (cast(EpsilonTransition)transition).outermostPrecedenceReturn != -1 || 1086 intermediate.getStateType != StateNames.BASIC || 1087 !intermediate.onlyHasEpsilonTransitions) 1088 { 1089 if (optimizedTransitions != null) 1090 { 1091 optimizedTransitions ~= transition; 1092 } 1093 goto nextTransition_continue; 1094 } 1095 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1096 { 1097 if (intermediate.getOptimizedTransition(j).getSerializationType != TransitionStates.EPSILON || 1098 1099 (cast(EpsilonTransition)intermediate.getOptimizedTransition(j)).outermostPrecedenceReturn != -1) 1100 { 1101 if (optimizedTransitions != null) 1102 { 1103 optimizedTransitions ~= transition; 1104 } 1105 goto nextTransition_continue; 1106 } 1107 } 1108 removedEdges++; 1109 if (optimizedTransitions == null) 1110 { 1111 optimizedTransitions = []; 1112 for (int j = 0; j < i; j++) 1113 { 1114 optimizedTransitions ~= state.getOptimizedTransition(j); 1115 } 1116 } 1117 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1118 { 1119 ATNState target = intermediate.getOptimizedTransition(j).target; 1120 optimizedTransitions ~= new EpsilonTransition(target); 1121 } 1122 nextTransition_continue: ; 1123 } 1124 1125 if (optimizedTransitions != null) 1126 { 1127 if (state.isOptimized) 1128 { 1129 while (state.numberOfOptimizedTransitions > 0) 1130 { 1131 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1132 } 1133 } 1134 foreach (Transition transition; optimizedTransitions) 1135 { 1136 state.addOptimizedTransition(transition); 1137 } 1138 } 1139 } 1140 1141 return removedEdges; 1142 } 1143 1144 private static int optimizeSets(ATN atn, bool preserveOrder) 1145 { 1146 if (preserveOrder) 1147 { 1148 // this optimization currently doesn't preserve edge order. 1149 return 0; 1150 } 1151 int removedPaths = 0; 1152 DecisionState[] decisions = atn.decisionToState; 1153 foreach (DecisionState decision; decisions) 1154 { 1155 IntervalSet setTransitions = new IntervalSet(); 1156 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1157 { 1158 Transition epsTransition = decision.optimizedTransitions[i]; 1159 if (!(cast(EpsilonTransition)epsTransition)) 1160 { 1161 continue; 1162 } 1163 if (epsTransition.target.optimizedTransitions.length != 1) 1164 { 1165 continue; 1166 } 1167 Transition transition = epsTransition.target.optimizedTransitions[0]; 1168 if (!(cast(BlockEndState)transition.target)) 1169 { 1170 continue; 1171 } 1172 if (cast(NotSetTransition)transition) 1173 { 1174 // TODO: not yet implemented 1175 continue; 1176 } 1177 if (cast(AtomTransition)transition || 1178 cast(RangeTransition)transition || 1179 cast(SetTransition)transition) 1180 { 1181 setTransitions.add(i); 1182 } 1183 } 1184 if (setTransitions.size <= 1) 1185 { 1186 continue; 1187 } 1188 Transition[] optimizedTransitions; 1189 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1190 { 1191 if (!setTransitions.contains(i)) 1192 { 1193 optimizedTransitions ~= decision.optimizedTransitions[i]; 1194 } 1195 } 1196 ATNState blockEndState = decision.optimizedTransitions[setTransitions.getMinElement].target.optimizedTransitions[0].target; 1197 IntervalSet matchSet = new IntervalSet; 1198 for (int i = 0; i < setTransitions.intervals.length; i++) 1199 { 1200 Interval interval = setTransitions.intervals[i]; 1201 for (int j = interval.a; j <= interval.b; j++) 1202 { 1203 Transition matchTransition = decision.optimizedTransitions[j].target.optimizedTransitions[0]; 1204 if (cast(NotSetTransition)matchTransition) 1205 { 1206 assert(false, "Not yet implemented."); 1207 } 1208 else 1209 { 1210 matchSet.addAll(matchTransition.label); 1211 } 1212 } 1213 } 1214 Transition newTransition; 1215 if (matchSet.intervals.length == 1) 1216 { 1217 if (matchSet.size == 1) 1218 { 1219 newTransition = new AtomTransition(blockEndState, matchSet.getMinElement); 1220 } 1221 else 1222 { 1223 Interval matchInterval = matchSet.intervals[0]; 1224 newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b); 1225 } 1226 } 1227 else 1228 { 1229 newTransition = new SetTransition(blockEndState, matchSet); 1230 } 1231 ATNState optimizedState = new BasicState(); 1232 optimizedState.setRuleIndex(decision.ruleIndex); 1233 atn.addState(optimizedState); 1234 optimizedState.addTransition(newTransition); 1235 optimizedTransitions ~= new EpsilonTransition(optimizedState); 1236 removedPaths += decision.numberOfOptimizedTransitions - optimizedTransitions.length; 1237 if (decision.isOptimized) 1238 { 1239 while (decision.numberOfOptimizedTransitions > 0) 1240 { 1241 decision.removeOptimizedTransition(decision.numberOfOptimizedTransitions - 1); 1242 } 1243 } 1244 foreach (Transition transition; optimizedTransitions) 1245 { 1246 decision.addOptimizedTransition(transition); 1247 } 1248 } 1249 return removedPaths; 1250 } 1251 1252 } 1253 1254 version(unittest) { 1255 import dshould : be, equal, not, should; 1256 import std.typecons : tuple; 1257 import unit_threaded; 1258 1259 @Tags("des11") 1260 @("testEncodingATNDeserialize") 1261 unittest { 1262 auto des = new ATNDeserializer; 1263 des.should.not.be(null); 1264 } 1265 }