1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ATNDeserializer; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.IllegalStateException; 11 import antlr.v4.runtime.Token; 12 import antlr.v4.runtime.TokenConstantDefinition; 13 import antlr.v4.runtime.UnsupportedOperationException; 14 import antlr.v4.runtime.atn.ATN; 15 import antlr.v4.runtime.atn.ATNDeserializationOptions; 16 import antlr.v4.runtime.atn.ATNState; 17 import antlr.v4.runtime.atn.ATNType; 18 import antlr.v4.runtime.atn.ActionTransition; 19 import antlr.v4.runtime.atn.AtomTransition; 20 import antlr.v4.runtime.atn.BasicBlockStartState; 21 import antlr.v4.runtime.atn.BasicState; 22 import antlr.v4.runtime.atn.BlockEndState; 23 import antlr.v4.runtime.atn.BlockStartState; 24 import antlr.v4.runtime.atn.DecisionState; 25 import antlr.v4.runtime.atn.EpsilonTransition; 26 import antlr.v4.runtime.atn.LexerAction; 27 import antlr.v4.runtime.atn.LexerActionType; 28 import antlr.v4.runtime.atn.LexerChannelAction; 29 import antlr.v4.runtime.atn.LexerCustomAction; 30 import antlr.v4.runtime.atn.LexerModeAction; 31 import antlr.v4.runtime.atn.LexerMoreAction; 32 import antlr.v4.runtime.atn.LexerPopModeAction; 33 import antlr.v4.runtime.atn.LexerPushModeAction; 34 import antlr.v4.runtime.atn.LexerSkipAction; 35 import antlr.v4.runtime.atn.LexerTypeAction; 36 import antlr.v4.runtime.atn.LoopEndState; 37 import antlr.v4.runtime.atn.NotSetTransition; 38 import antlr.v4.runtime.atn.PlusBlockStartState; 39 import antlr.v4.runtime.atn.PlusLoopbackState; 40 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 41 import antlr.v4.runtime.atn.PredicateTransition; 42 import antlr.v4.runtime.atn.RangeTransition; 43 import antlr.v4.runtime.atn.RuleStartState; 44 import antlr.v4.runtime.atn.RuleStopState; 45 import antlr.v4.runtime.atn.RuleTransition; 46 import antlr.v4.runtime.atn.SetTransition; 47 import antlr.v4.runtime.atn.StarBlockStartState; 48 import antlr.v4.runtime.atn.StarLoopEntryState; 49 import antlr.v4.runtime.atn.StarLoopbackState; 50 import antlr.v4.runtime.atn.StateNames; 51 import antlr.v4.runtime.atn.TokensStartState; 52 import antlr.v4.runtime.atn.Transition; 53 import antlr.v4.runtime.atn.TransitionStates; 54 import antlr.v4.runtime.atn.WildcardTransition; 55 import antlr.v4.runtime.misc; 56 import std.algorithm: canFind; 57 import std.algorithm: map; 58 import std.algorithm: reverse; 59 import std.array; 60 import std.conv; 61 import std.format; 62 import std.stdio; 63 import std.uuid; 64 import std.utf; 65 66 /** 67 * TODO add class description 68 */ 69 class ATNDeserializer 70 { 71 72 public static immutable int SERIALIZED_VERSION = 3; 73 74 /** 75 * This is the earliest supported serialized UUID. 76 */ 77 private static UUID BASE_SERIALIZED_UUID; 78 79 /** 80 * This UUID indicates an extension of {@link BASE_SERIALIZED_UUID} for the 81 * addition of precedence predicates. 82 */ 83 private static UUID ADDED_PRECEDENCE_TRANSITIONS; 84 85 /** 86 * This UUID indicates an extension of {@link #ADDED_PRECEDENCE_TRANSITIONS} 87 * for the addition of lexer actions encoded as a sequence of 88 * {@link LexerAction} instances. 89 */ 90 private static UUID ADDED_LEXER_ACTIONS; 91 92 private static UUID ADDED_UNICODE_SMP; 93 94 /** 95 * This list contains all of the currently supported UUIDs, ordered by when 96 * the feature first appeared in this branch. 97 */ 98 private static UUID[] SUPPORTED_UUIDS; 99 100 /** 101 * This is the current serialized UUID. 102 */ 103 public static UUID SERIALIZED_UUID; 104 105 private ATNDeserializationOptions deserializationOptions; 106 107 private int[] data; 108 109 private int p; 110 111 public UUID uuid; 112 113 public static this() 114 { 115 /* WARNING: DO NOT MERGE THESE LINES. If UUIDs differ during a merge, 116 * resolve the conflict by generating a new ID! 117 */ 118 BASE_SERIALIZED_UUID = UUID("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3"); 119 ADDED_PRECEDENCE_TRANSITIONS = UUID("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61"); 120 ADDED_LEXER_ACTIONS = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E"); 121 ADDED_UNICODE_SMP = UUID("59627784-3BE5-417A-B9EB-8131A7286089"); 122 SUPPORTED_UUIDS ~= BASE_SERIALIZED_UUID; 123 SUPPORTED_UUIDS ~= ADDED_PRECEDENCE_TRANSITIONS; 124 SUPPORTED_UUIDS ~= ADDED_LEXER_ACTIONS; 125 SUPPORTED_UUIDS ~= ADDED_UNICODE_SMP; 126 127 SERIALIZED_UUID = ADDED_LEXER_ACTIONS; 128 } 129 130 public this() 131 { 132 this(ATNDeserializationOptions.defaultOptions); 133 } 134 135 public this(ATNDeserializationOptions deserializationOptions) 136 { 137 if (deserializationOptions is null) { 138 deserializationOptions = new ATNDeserializationOptions(); 139 } 140 this.deserializationOptions = deserializationOptions; 141 } 142 143 /** 144 * Determines if a particular serialized representation of an ATN supports 145 * a particular feature, identified by the {@link UUID} used for serializing 146 * the ATN at the time the feature was first introduced. 147 * 148 * @param feature The {@link UUID} marking the first time the feature was 149 * supported in the serialized ATN. 150 * @param actualUuid The {@link UUID} of the actual serialized ATN which is 151 * currently being deserialized. 152 * @return {@code true} if the {@code actualUuid} value represents a 153 * serialized ATN at or after the feature identified by {@code feature} was 154 * introduced; otherwise, {@code false}. 155 */ 156 protected bool isFeatureSupported(UUID feature, UUID actualUuid) 157 { 158 bool not_found = true; 159 foreach (uu; SUPPORTED_UUIDS) { 160 if (not_found && uu == feature) { 161 if (uu == actualUuid) 162 return true; 163 else 164 not_found = false; 165 } 166 else { 167 if (uu == actualUuid) { 168 return true; 169 } 170 } 171 } 172 return false; 173 } 174 175 public ATN deserialize(wstring input_data) 176 { 177 reset(input_data); 178 checkVersion; 179 checkUUID; 180 ATN atn = readATN; 181 readStates(atn); 182 readRules(atn); 183 readModes(atn); 184 IntervalSet[] sets; 185 readSets(sets, atn, &readInt); 186 if (isFeatureSupported(ADDED_UNICODE_SMP, uuid)) 187 { 188 readSets(sets, atn, &readInt32); 189 } 190 readEdges(atn, sets); 191 readDecisions(atn); 192 readLexerActions(atn); 193 markPrecedenceDecisions(atn); 194 if (deserializationOptions.verifyATN) { 195 verifyATN(atn); 196 } 197 if (deserializationOptions.generateRuleBypassTransitions && atn.grammarType == ATNType.PARSER) { 198 generateRuleBypassTransitions(atn); 199 } 200 if (deserializationOptions.optimize) { 201 optimizeATN(atn); 202 } 203 identifyTailCalls(atn); 204 return atn; 205 } 206 207 protected void verifyATN(ATN atn) 208 { 209 // verify assumptions 210 foreach (ATNState state; atn.states) { 211 if (state is null) { 212 continue; 213 } 214 215 checkCondition(state.onlyHasEpsilonTransitions || state.getNumberOfTransitions <= 1); 216 217 if (cast(PlusBlockStartState)state) { 218 checkCondition((cast(PlusBlockStartState)state).loopBackState !is null); 219 } 220 221 if (cast(StarLoopEntryState)state) { 222 StarLoopEntryState starLoopEntryState = cast(StarLoopEntryState)state; 223 checkCondition(starLoopEntryState.loopBackState !is null); 224 checkCondition(starLoopEntryState.getNumberOfTransitions() == 2); 225 226 if (cast(StarBlockStartState)(starLoopEntryState.transition(0).target)) { 227 checkCondition(cast(LoopEndState)(starLoopEntryState.transition(1).target) !is null); 228 checkCondition(!starLoopEntryState.nonGreedy); 229 } 230 else if (cast(LoopEndState)(starLoopEntryState.transition(0).target)) { 231 checkCondition(cast(StarBlockStartState)(starLoopEntryState.transition(1).target) !is null); 232 checkCondition(starLoopEntryState.nonGreedy); 233 } 234 else { 235 throw new IllegalStateException(); 236 } 237 } 238 239 if (cast(StarLoopbackState)state) { 240 checkCondition(state.getNumberOfTransitions() == 1); 241 checkCondition(cast(StarLoopEntryState)(state.transition(0).target) !is null); 242 } 243 244 if (cast(LoopEndState)state) { 245 checkCondition((cast(LoopEndState)state).loopBackState !is null); 246 } 247 248 if (cast(RuleStartState)state) { 249 checkCondition((cast(RuleStartState)state).stopState !is null); 250 } 251 252 if (cast(BlockStartState)state) { 253 checkCondition((cast(BlockStartState)state).endState !is null); 254 } 255 256 if (cast(BlockEndState)state) { 257 checkCondition((cast(BlockEndState)state).startState !is null); 258 } 259 260 if (cast(DecisionState)state) { 261 DecisionState decisionState = cast(DecisionState)state; 262 checkCondition(decisionState.getNumberOfTransitions() <= 1 || decisionState.decision >= 0); 263 } 264 else { 265 checkCondition(state.getNumberOfTransitions() <= 1 || state.classinfo == RuleStopState.classinfo); 266 } 267 } 268 269 } 270 271 private void readSets(ref IntervalSet[] sets, ATN aTN, int delegate() readUnicode) 272 { 273 // SETS 274 int nsets = readInt; 275 for (int i=0; i<nsets; i++) { 276 IntervalSet set = new IntervalSet(); 277 sets ~= set; 278 int nintervals = readInt; 279 bool containsEof = readInt != 0; 280 if (containsEof) { 281 set.add(-1); 282 } 283 for (int j=0; j<nintervals; j++) { 284 set.add(readUnicode(), readUnicode()); 285 } 286 } 287 } 288 289 protected void markPrecedenceDecisions(ATN atn) 290 { 291 foreach (ATNState state; atn.states) { 292 if (!cast(StarLoopEntryState)state) { 293 continue; 294 } 295 296 /* We analyze the ATN to determine if this ATN decision state is the 297 * decision for the closure block that determines whether a 298 * precedence rule should continue or complete. 299 */ 300 if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) { 301 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 302 if (cast(LoopEndState)maybeLoopEndState) { 303 if (maybeLoopEndState.epsilonOnlyTransitions && 304 maybeLoopEndState.transitions[0].target.classinfo == 305 RuleStopState.classinfo) { 306 (cast(StarLoopEntryState)state).isPrecedenceDecision = true; 307 } 308 } 309 } 310 } 311 312 } 313 314 protected void checkCondition(bool condition) 315 { 316 checkCondition(condition, null); 317 } 318 319 protected void checkCondition(bool condition, string message) 320 { 321 if (!condition) { 322 throw new IllegalStateException(message); 323 } 324 } 325 326 protected Transition edgeFactory(ATN atn, int type, int src, int trg, int arg1, int arg2, 327 int arg3, IntervalSet[] sets) 328 { 329 ATNState target = atn.states[trg]; 330 with(TransitionStates) { 331 switch (type) { 332 case EPSILON : return new EpsilonTransition(target); 333 case RANGE : 334 if (arg3 != 0) { 335 return new RangeTransition(target, TokenConstantDefinition.EOF, arg2); 336 } 337 else { 338 return new RangeTransition(target, arg1, arg2); 339 } 340 case RULE : 341 RuleTransition rt = new RuleTransition(cast(RuleStartState)atn.states[arg1], arg2, arg3, target); 342 return rt; 343 case PREDICATE : 344 PredicateTransition pt = new PredicateTransition(target, arg1, arg2, arg3 != 0); 345 return pt; 346 case PRECEDENCE: 347 return new PrecedencePredicateTransition(target, arg1); 348 case ATOM : 349 if (arg3 != 0) { 350 return new AtomTransition(target, TokenConstantDefinition.EOF); 351 } 352 else { 353 return new AtomTransition(target, arg1); 354 } 355 case ACTION : 356 ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0); 357 return a; 358 case SET : return new SetTransition(target, sets[arg1]); 359 case NOT_SET : return new NotSetTransition(target, sets[arg1]); 360 case WILDCARD : return new WildcardTransition(target); 361 default: throw new IllegalArgumentException("The specified transition type is not valid."); 362 } 363 } 364 } 365 366 protected ATNState stateFactory(int type, int ruleIndex) 367 { 368 ATNState s; 369 with(StateNames) { 370 switch (type) { 371 case INVALID: return null; 372 case BASIC : s = new BasicState(); break; 373 case RULE_START : s = new RuleStartState(); break; 374 case BLOCK_START : s = new BasicBlockStartState(); break; 375 case PLUS_BLOCK_START : s = new PlusBlockStartState(); break; 376 case STAR_BLOCK_START : s = new StarBlockStartState(); break; 377 case TOKEN_START : s = new TokensStartState(); break; 378 case RULE_STOP : s = new RuleStopState(); break; 379 case BLOCK_END : s = new BlockEndState(); break; 380 case STAR_LOOP_BACK : s = new StarLoopbackState(); break; 381 case STAR_LOOP_ENTRY : s = new StarLoopEntryState(); break; 382 case PLUS_LOOP_BACK : s = new PlusLoopbackState(); break; 383 case LOOP_END : s = new LoopEndState(); break; 384 default : 385 string message = format("The specified state type %d is not valid.", type); 386 throw new IllegalArgumentException(message); 387 } 388 } 389 s.ruleIndex = ruleIndex; 390 return s; 391 } 392 393 protected LexerAction lexerActionFactory(LexerActionType type, int data1, int data2) 394 { 395 switch (type) { 396 case LexerActionType.CHANNEL: 397 return new LexerChannelAction(data1); 398 399 case LexerActionType.CUSTOM: 400 return new LexerCustomAction(data1, data2); 401 402 case LexerActionType.MODE: 403 return new LexerModeAction(data1); 404 405 case LexerActionType.MORE: 406 return cast(LexerAction)LexerMoreAction.instance; 407 408 case LexerActionType.POP_MODE: 409 return cast(LexerAction)LexerPopModeAction.instance; 410 411 case LexerActionType.PUSH_MODE: 412 return new LexerPushModeAction(data1); 413 414 case LexerActionType.SKIP: 415 return cast(LexerAction)LexerSkipAction.instance; 416 417 case LexerActionType.TYPE: 418 return new LexerTypeAction(data1); 419 420 default: 421 string message = format("The specified lexer action type %d is not valid.", type); 422 throw new IllegalArgumentException(message); 423 } 424 } 425 426 private void reset(const wstring atn) 427 { 428 wchar el; 429 data.length = 0; 430 431 for(int i; i < atn.length; i++) { 432 el = atn[i]; 433 if (el == '[') { 434 if (i+7 >= atn.length) { 435 data ~=to!int('[') - 2; 436 continue; 437 } 438 if (atn[i+7] != ']') { 439 data ~=to!int('[') - 2; 440 continue; 441 } 442 data ~= parseOct(atn, i) -2; 443 i += 7; 444 continue; 445 } 446 if (el >1) 447 data ~= (el - 2) & 0xffff; 448 else 449 data ~= (el - 3) & 0xffff; 450 } 451 // don't adjust the first value since that's the version number 452 data[0] = atn[0]; 453 p = 0; 454 } 455 456 private void checkVersion() 457 { 458 auto version_atn = readInt; 459 if (version_atn != SERIALIZED_VERSION) { 460 string reason = format("Could not deserialize ATN with version %d (expected %d).", version_atn, SERIALIZED_VERSION); 461 assert(false, reason); 462 } 463 } 464 465 private void checkUUID() 466 { 467 uuid = readUUID; 468 if (!SUPPORTED_UUIDS.canFind(uuid)) { 469 string reason = format("Could not deserialize ATN with UUID %s (expected %s or a legacy UUID).", uuid, SERIALIZED_UUID); 470 assert(false, reason); 471 } 472 } 473 474 private ATN readATN() 475 { 476 ATNType grammarType = cast(ATNType)readInt; 477 int maxTokenType = readInt; 478 return new ATN(grammarType, maxTokenType); 479 } 480 481 private void readStates(ATN atn) 482 { 483 // STATES 484 int[LoopEndState][] loopBackStateNumbers; 485 int[BlockStartState][] endStateNumbers; 486 int nstates = readInt; 487 debug(deserializer) 488 writefln("%s: Number of states %s", this.p-1 , nstates); 489 for (int i=0; i<nstates; i++) { 490 int stype = readInt; 491 // ignore bad type of states 492 if (stype == StateNames.INVALID) { 493 atn.addState(null); 494 continue; 495 } 496 497 int ruleIndex = readInt; 498 if (ruleIndex == wchar.max) { 499 ruleIndex = -1; 500 } 501 debug(deserializer) 502 writefln("%s: readState %s, ruleIndex %s, index %s", 503 this.p-1, 504 stype, 505 ruleIndex, 506 i); 507 ATNState s = stateFactory(stype, ruleIndex); 508 if (stype == StateNames.LOOP_END) { 509 // special case 510 int loopBackStateNumber = readInt; 511 int[LoopEndState] _a; 512 _a[cast(LoopEndState)s] = loopBackStateNumber; 513 loopBackStateNumbers ~= _a; 514 } 515 else 516 if (cast(BlockStartState)s) { 517 int endStateNumber = readInt; 518 int[BlockStartState] _a; 519 _a[cast(BlockStartState)s] = endStateNumber; 520 endStateNumbers ~= _a; 521 } 522 atn.addState(s); 523 } 524 525 // delay the assignment of loop back and end states until we know all the state instances have been initialized 526 foreach (ref pair; loopBackStateNumbers) { 527 pair.keys[0].loopBackState = atn.states[pair[pair.keys[0]]]; 528 } 529 530 foreach (ref pair; endStateNumbers) { 531 pair.keys[0].endState = cast(BlockEndState)atn.states[pair[pair.keys[0]]]; 532 } 533 534 int numNonGreedyStates = readInt; 535 for (int i = 0; i < numNonGreedyStates; i++) { 536 int stateNumber = readInt; 537 (cast(DecisionState)atn.states[stateNumber]).nonGreedy = true; 538 } 539 540 int numPrecedenceStates = readInt; 541 for (int i = 0; i < numPrecedenceStates; i++) { 542 int stateNumber = readInt; 543 (cast(RuleStartState)atn.states[stateNumber]).isLeftRecursiveRule = true; 544 } 545 546 } 547 548 private void readRules(ATN atn) 549 { 550 // RULES 551 int nrules = readInt; 552 if ( atn.grammarType == ATNType.LEXER ) { 553 atn.ruleToTokenType = new int[nrules]; 554 } 555 debug(deserializer) 556 writefln("%s: Number of rules %s", this.p-1 , nrules); 557 atn.ruleToStartState = new RuleStartState[nrules]; 558 for (int i=0; i<nrules; i++) { 559 int s = readInt; 560 RuleStartState startState = cast(RuleStartState)atn.states[s]; 561 atn.ruleToStartState[i] = startState; 562 if ( atn.grammarType == ATNType.LEXER ) { 563 int tokenType = readInt; 564 if (tokenType == 0xFFFF) { 565 tokenType = TokenConstantDefinition.EOF; 566 } 567 568 atn.ruleToTokenType[i] = tokenType; 569 } 570 } 571 572 atn.ruleToStopState = new RuleStopState[nrules]; 573 foreach (ATNState state; atn.states) { 574 if (!cast(RuleStopState)state) { 575 continue; 576 } 577 RuleStopState stopState = cast(RuleStopState)state; 578 atn.ruleToStopState[state.ruleIndex] = stopState; 579 atn.ruleToStartState[state.ruleIndex].stopState = stopState; 580 } 581 582 } 583 584 public void readModes(ATN atn) 585 { 586 // MODES 587 int nmodes = readInt; 588 for (int i=0; i<nmodes; i++) { 589 int s = readInt; 590 atn.modeToStartState ~= cast(TokensStartState)atn.states[s]; 591 } 592 } 593 594 private void readEdges(ATN atn, IntervalSet[] sets) 595 { 596 // EDGES 597 598 int nedges = readInt; 599 for (int i=0; i<nedges; i++) { 600 int src = readInt; 601 int trg = readInt; 602 int ttype = readInt; 603 int arg1 = readInt; 604 int arg2 = readInt; 605 int arg3 = readInt; 606 Transition trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); 607 debug(deserializer) { 608 writefln("EDGES %1$s %2$s->%3$s %4$s %5$s %6$s %7$s", 609 trans.classinfo, src, 610 trg, Transition.serializationNames[ttype], 611 arg1, arg2, arg3); 612 } 613 ATNState srcState = atn.states[src]; 614 srcState.addTransition(trans); 615 } 616 617 // edges for rule stop states can be derived, so they aren't serialized 618 foreach (ATNState state; atn.states) { 619 for (int i = 0; i < state.getNumberOfTransitions(); i++) { 620 Transition t = state.transition(i); 621 if (!cast(RuleTransition)t) { 622 continue; 623 } 624 625 RuleTransition ruleTransition = cast(RuleTransition)t; 626 int outermostPrecedenceReturn = -1; 627 if (atn.ruleToStartState[ruleTransition.target.ruleIndex].isLeftRecursiveRule) { 628 if (ruleTransition.precedence == 0) { 629 outermostPrecedenceReturn = ruleTransition.target.ruleIndex; 630 } 631 } 632 EpsilonTransition returnTransition = new EpsilonTransition(ruleTransition.followState, outermostPrecedenceReturn); 633 atn.ruleToStopState[ruleTransition.target.ruleIndex].addTransition(returnTransition); 634 } 635 } 636 637 foreach (ATNState state; atn.states) { 638 if (cast(BlockStartState)state) { 639 // we need to know the end state to set its start state 640 if ((cast(BlockStartState)state).endState is null) { 641 throw new IllegalStateException(); 642 } 643 644 // block end states can only be associated to a single block start state 645 if ((cast(BlockStartState)state).endState.startState !is null) { 646 throw new IllegalStateException(); 647 } 648 649 (cast(BlockStartState)state).endState.startState = cast(BlockStartState)state; 650 } 651 652 if (cast(PlusLoopbackState)state) { 653 PlusLoopbackState loopbackState = cast(PlusLoopbackState)state; 654 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 655 ATNState target = loopbackState.transition(i).target; 656 if (cast(PlusBlockStartState)target) { 657 (cast(PlusBlockStartState)target).loopBackState = loopbackState; 658 } 659 } 660 } 661 else if (cast(StarLoopbackState)state) { 662 StarLoopbackState loopbackState = cast(StarLoopbackState)state; 663 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 664 ATNState target = loopbackState.transition(i).target; 665 if (cast(StarLoopEntryState)target) { 666 (cast(StarLoopEntryState)target).loopBackState = loopbackState; 667 } 668 } 669 } 670 } 671 672 } 673 674 private void readDecisions(ATN atn) 675 { 676 // DECISIONS 677 678 int ndecisions = readInt; 679 for (int i=1; i<=ndecisions; i++) { 680 int s = readInt; 681 DecisionState decState = cast(DecisionState)atn.states[s]; 682 atn.decisionToState ~= decState; 683 decState.decision = i-1; 684 } 685 } 686 687 private void readLexerActions(ATN atn) 688 { 689 // LEXER ACTIONS 690 691 if (atn.grammarType == ATNType.LEXER) { 692 atn.lexerActions = new LexerAction[readInt]; 693 for (int i = 0; i < atn.lexerActions.length; i++) { 694 LexerActionType actionType = cast(LexerActionType)readInt; 695 int data1 = readInt; 696 if (data1 == 0xFFFF) { 697 data1 = -1; 698 } 699 700 int data2 = readInt; 701 if (data2 == 0xFFFF) { 702 data2 = -1; 703 } 704 LexerAction lexerAction = lexerActionFactory(actionType, data1, data2); 705 atn.lexerActions[i] = lexerAction; 706 } 707 } 708 } 709 710 private void optimizeATN(ATN atn) 711 { 712 while (true) 713 { 714 int optimizationCount = 0; 715 optimizationCount += inlineSetRules(atn); 716 optimizationCount += combineChainedEpsilons(atn); 717 bool preserveOrder = atn.grammarType == ATNType.LEXER; 718 optimizationCount += optimizeSets(atn, preserveOrder); 719 if (optimizationCount == 0) 720 { 721 break; 722 } 723 } 724 if (deserializationOptions.verifyATN) 725 { 726 // reverify after modification 727 verifyATN(atn); 728 } 729 } 730 731 public int parseOct(wstring data, size_t p) 732 { 733 int res = 0; 734 for (auto i = p + 1; i < p + 7; i++) { 735 res = res<<3 | to!int(data[i] - 0x30); 736 } 737 return res; 738 } 739 740 private UUID readUUID() 741 { 742 ubyte[16] data; 743 long b = readLong; 744 for(int i = 0; i < 8; i++) { 745 data[i] = b & 0xff; 746 b = b >>> 8; 747 } 748 long a = readLong; 749 for(int i = 8; i < 16; i++) { 750 data[i] = a & 0xff; 751 a = a >>> 8; 752 } 753 data[].reverse(); 754 auto uuid = UUID(data); 755 return uuid; 756 } 757 758 private long readLong() 759 { 760 long lowOrder = to!long(readInt32) & 0x00000000FFFFFFFFL; 761 return lowOrder | (to!long(readInt32) << 32); 762 } 763 764 private int readInt32() 765 { 766 return data[p++] | data[p++] << 16; 767 } 768 769 private int readInt() 770 { 771 return data[p++]; 772 } 773 774 private static void identifyTailCalls(ATN atn) 775 { 776 foreach (ATNState state; atn.states) 777 { 778 foreach (Transition transition; state.transitions) 779 { 780 if (!(cast(RuleTransition)transition)) 781 { 782 continue; 783 } 784 RuleTransition ruleTransition = cast(RuleTransition)transition; 785 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 786 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 787 } 788 if (!state.isOptimized) 789 { 790 continue; 791 } 792 foreach (Transition transition; state.optimizedTransitions) 793 { 794 if (!(cast(RuleTransition)transition)) 795 { 796 continue; 797 } 798 RuleTransition ruleTransition = cast(RuleTransition)transition; 799 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 800 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 801 } 802 } 803 } 804 805 private static bool testTailCall(ATN atn, RuleTransition transition, bool optimizedPath) 806 { 807 if (optimizedPath && transition.optimizedTailCall) 808 { 809 return true; 810 } 811 BitSet *reachable = new BitSet(atn.states.length); 812 ATNState[] worklist; 813 worklist ~= transition.followState; 814 while (worklist.length > 0) 815 { 816 ATNState state = worklist[$-1]; 817 worklist.length -= 1; 818 if (reachable.get(state.stateNumber)) 819 { 820 continue; 821 } 822 if (cast(RuleStopState)state) 823 { 824 continue; 825 } 826 if (!state.onlyHasEpsilonTransitions) 827 { 828 return false; 829 } 830 Transition[] transitions = optimizedPath ? state.optimizedTransitions : state.transitions; 831 foreach (Transition t; transitions) 832 { 833 if (t.getSerializationType != TransitionStates.EPSILON) 834 { 835 return false; 836 } 837 worklist ~= t.target; 838 } 839 } 840 return true; 841 } 842 843 public void generateRuleBypassTransitions(ATN atn) 844 { 845 atn.ruleToTokenType = new int[atn.ruleToStartState.length]; 846 for (int i = 0; i < atn.ruleToStartState.length; i++) { 847 atn.ruleToTokenType[i] = atn.maxTokenType + i + 1; 848 } 849 850 for (int i = 0; i < atn.ruleToStartState.length; i++) { 851 BasicBlockStartState bypassStart = new BasicBlockStartState(); 852 bypassStart.ruleIndex = i; 853 atn.addState(bypassStart); 854 855 BlockEndState bypassStop = new BlockEndState(); 856 bypassStop.ruleIndex = i; 857 atn.addState(bypassStop); 858 859 bypassStart.endState = bypassStop; 860 atn.defineDecisionState(bypassStart); 861 862 bypassStop.startState = bypassStart; 863 864 ATNState endState; 865 Transition excludeTransition = null; 866 if (atn.ruleToStartState[i].isLeftRecursiveRule) { 867 // wrap from the beginning of the rule to the StarLoopEntryState 868 endState = null; 869 foreach (ATNState state; atn.states) { 870 if (state.ruleIndex != i) { 871 continue; 872 } 873 874 if (state.classinfo != StarLoopEntryState.classinfo) { 875 continue; 876 } 877 878 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 879 if (maybeLoopEndState.classinfo != LoopEndState.classinfo) { 880 continue; 881 } 882 883 if (maybeLoopEndState.epsilonOnlyTransitions && maybeLoopEndState.transition(0).target.classinfo == RuleStopState.classinfo) { 884 endState = state; 885 break; 886 } 887 } 888 889 if (endState is null) { 890 throw new UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); 891 } 892 893 excludeTransition = (cast(StarLoopEntryState)endState).loopBackState.transition(0); 894 } 895 else { 896 endState = atn.ruleToStopState[i]; 897 } 898 899 // all non-excluded transitions that currently target end state need to target blockEnd instead 900 foreach (ATNState state; atn.states) { 901 foreach (Transition transition; state.transitions) { 902 if (transition == excludeTransition) { 903 continue; 904 } 905 906 if (transition.target == endState) { 907 transition.target = bypassStop; 908 } 909 } 910 } 911 912 // all transitions leaving the rule start state need to leave blockStart instead 913 while (atn.ruleToStartState[i].getNumberOfTransitions() > 0) { 914 Transition transition = atn.ruleToStartState[i].removeTransition(atn.ruleToStartState[i].getNumberOfTransitions() - 1); 915 bypassStart.addTransition(transition); 916 } 917 918 // link the new states 919 atn.ruleToStartState[i].addTransition(new EpsilonTransition(bypassStart)); 920 bypassStop.addTransition(new EpsilonTransition(endState)); 921 922 ATNState matchState = new BasicState(); 923 atn.addState(matchState); 924 matchState.addTransition(new AtomTransition(bypassStop, atn.ruleToTokenType[i])); 925 bypassStart.addTransition(new EpsilonTransition(matchState)); 926 } 927 928 if (deserializationOptions.verifyATN()) { 929 // reverify after modification 930 verifyATN(atn); 931 } 932 } 933 934 private static int inlineSetRules(ATN atn) 935 { 936 int inlinedCalls = 0; 937 Transition[] ruleToInlineTransition; 938 for (int i = 0; i < atn.ruleToStartState.length; i++) 939 { 940 RuleStartState startState = atn.ruleToStartState[i]; 941 ATNState middleState = startState; 942 while (middleState.onlyHasEpsilonTransitions && middleState.numberOfOptimizedTransitions == 1 && middleState.getOptimizedTransition(0).getSerializationType == TransitionStates.EPSILON) 943 { 944 middleState = middleState.getOptimizedTransition(0).target; 945 } 946 if (middleState.numberOfOptimizedTransitions != 1) 947 { 948 continue; 949 } 950 Transition matchTransition = middleState.getOptimizedTransition(0); 951 ATNState matchTarget = matchTransition.target; 952 if (matchTransition.isEpsilon || !matchTarget.onlyHasEpsilonTransitions || matchTarget.numberOfOptimizedTransitions != 1 || !(cast(RuleStopState)matchTarget.getOptimizedTransition(0).target)) 953 { 954 continue; 955 } 956 with(TransitionStates) { 957 switch (matchTransition.getSerializationType) 958 { 959 case ATOM: 960 case RANGE: 961 case SET: 962 { 963 ruleToInlineTransition[i] = matchTransition; 964 break; 965 } 966 967 case NOT_SET: 968 case WILDCARD: 969 { 970 // not implemented yet 971 continue; 972 } 973 974 default: 975 { 976 continue; 977 } 978 } 979 } 980 } 981 for (int stateNumber = 0; stateNumber < atn.states.length; stateNumber++) 982 { 983 ATNState state = atn.states[stateNumber]; 984 if (state.ruleIndex < 0) 985 { 986 continue; 987 } 988 Transition[] optimizedTransitions = null; 989 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) { 990 Transition transition = state.getOptimizedTransition(i); 991 if (!(cast(RuleTransition)transition)) 992 { 993 if (optimizedTransitions != null) 994 { 995 optimizedTransitions ~= transition; 996 } 997 continue; 998 } 999 RuleTransition ruleTransition = cast(RuleTransition)transition; 1000 Transition effective = ruleToInlineTransition[ruleTransition.target.ruleIndex]; 1001 if (effective is null) 1002 { 1003 if (optimizedTransitions != null) 1004 { 1005 optimizedTransitions ~= transition; 1006 } 1007 continue; 1008 } 1009 if (optimizedTransitions == null) 1010 { 1011 optimizedTransitions = []; 1012 for (int j = 0; j < i; j++) 1013 { 1014 optimizedTransitions ~= state.getOptimizedTransition(i); 1015 } 1016 } 1017 inlinedCalls++; 1018 ATNState target = ruleTransition.followState; 1019 ATNState intermediateState = new BasicState(); 1020 intermediateState.setRuleIndex(target.ruleIndex); 1021 atn.addState(intermediateState); 1022 optimizedTransitions ~= new EpsilonTransition(intermediateState); 1023 with(TransitionStates) { 1024 switch (effective.getSerializationType) 1025 { 1026 case ATOM: 1027 { 1028 intermediateState.addTransition(new AtomTransition(target, (cast(AtomTransition)effective)._label)); 1029 break; 1030 } 1031 1032 case RANGE: 1033 { 1034 intermediateState.addTransition(new RangeTransition(target, (cast(RangeTransition)effective).from, (cast(RangeTransition)effective).to)); 1035 break; 1036 } 1037 1038 case SET: 1039 { 1040 intermediateState.addTransition(new SetTransition(target, effective.label)); 1041 break; 1042 } 1043 1044 default: 1045 { 1046 assert(false, "NotSupportedException"); 1047 } 1048 } 1049 } 1050 } 1051 if (optimizedTransitions != null) 1052 { 1053 if (state.isOptimized) 1054 { 1055 while (state.numberOfOptimizedTransitions > 0) 1056 { 1057 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1058 } 1059 } 1060 foreach (Transition transition; optimizedTransitions) 1061 { 1062 state.addOptimizedTransition(transition); 1063 } 1064 } 1065 } 1066 return inlinedCalls; 1067 } 1068 1069 private static int combineChainedEpsilons(ATN atn) 1070 { 1071 int removedEdges = 0; 1072 foreach (ATNState state; atn.states) 1073 { 1074 if (!state.onlyHasEpsilonTransitions || cast(RuleStopState)state) 1075 { 1076 continue; 1077 } 1078 Transition[] optimizedTransitions = null; 1079 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) 1080 { 1081 Transition transition = state.getOptimizedTransition(i); 1082 ATNState intermediate = transition.target; 1083 if (transition.getSerializationType != TransitionStates.EPSILON || 1084 (cast(EpsilonTransition)transition).outermostPrecedenceReturn != -1 || 1085 intermediate.getStateType != StateNames.BASIC || 1086 !intermediate.onlyHasEpsilonTransitions) 1087 { 1088 if (optimizedTransitions != null) 1089 { 1090 optimizedTransitions ~= transition; 1091 } 1092 goto nextTransition_continue; 1093 } 1094 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1095 { 1096 if (intermediate.getOptimizedTransition(j).getSerializationType != TransitionStates.EPSILON || 1097 1098 (cast(EpsilonTransition)intermediate.getOptimizedTransition(j)).outermostPrecedenceReturn != -1) 1099 { 1100 if (optimizedTransitions != null) 1101 { 1102 optimizedTransitions ~= transition; 1103 } 1104 goto nextTransition_continue; 1105 } 1106 } 1107 removedEdges++; 1108 if (optimizedTransitions == null) 1109 { 1110 optimizedTransitions = []; 1111 for (int j = 0; j < i; j++) 1112 { 1113 optimizedTransitions ~= state.getOptimizedTransition(j); 1114 } 1115 } 1116 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1117 { 1118 ATNState target = intermediate.getOptimizedTransition(j).target; 1119 optimizedTransitions ~= new EpsilonTransition(target); 1120 } 1121 nextTransition_continue: ; 1122 } 1123 1124 if (optimizedTransitions != null) 1125 { 1126 if (state.isOptimized) 1127 { 1128 while (state.numberOfOptimizedTransitions > 0) 1129 { 1130 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1131 } 1132 } 1133 foreach (Transition transition; optimizedTransitions) 1134 { 1135 state.addOptimizedTransition(transition); 1136 } 1137 } 1138 } 1139 1140 return removedEdges; 1141 } 1142 1143 private static int optimizeSets(ATN atn, bool preserveOrder) 1144 { 1145 if (preserveOrder) 1146 { 1147 // this optimization currently doesn't preserve edge order. 1148 return 0; 1149 } 1150 int removedPaths = 0; 1151 DecisionState[] decisions = atn.decisionToState; 1152 foreach (DecisionState decision; decisions) 1153 { 1154 IntervalSet setTransitions = new IntervalSet(); 1155 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1156 { 1157 Transition epsTransition = decision.optimizedTransitions[i]; 1158 if (!(cast(EpsilonTransition)epsTransition)) 1159 { 1160 continue; 1161 } 1162 if (epsTransition.target.optimizedTransitions.length != 1) 1163 { 1164 continue; 1165 } 1166 Transition transition = epsTransition.target.optimizedTransitions[0]; 1167 if (!(cast(BlockEndState)transition.target)) 1168 { 1169 continue; 1170 } 1171 if (cast(NotSetTransition)transition) 1172 { 1173 // TODO: not yet implemented 1174 continue; 1175 } 1176 if (cast(AtomTransition)transition || 1177 cast(RangeTransition)transition || 1178 cast(SetTransition)transition) 1179 { 1180 setTransitions.add(i); 1181 } 1182 } 1183 if (setTransitions.size <= 1) 1184 { 1185 continue; 1186 } 1187 Transition[] optimizedTransitions; 1188 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1189 { 1190 if (!setTransitions.contains(i)) 1191 { 1192 optimizedTransitions ~= decision.optimizedTransitions[i]; 1193 } 1194 } 1195 ATNState blockEndState = decision.optimizedTransitions[setTransitions.getMinElement].target.optimizedTransitions[0].target; 1196 IntervalSet matchSet = new IntervalSet; 1197 for (int i = 0; i < setTransitions.intervals.length; i++) 1198 { 1199 Interval interval = setTransitions.intervals[i]; 1200 for (int j = interval.a; j <= interval.b; j++) 1201 { 1202 Transition matchTransition = decision.optimizedTransitions[j].target.optimizedTransitions[0]; 1203 if (cast(NotSetTransition)matchTransition) 1204 { 1205 assert(false, "Not yet implemented."); 1206 } 1207 else 1208 { 1209 matchSet.addAll(matchTransition.label); 1210 } 1211 } 1212 } 1213 Transition newTransition; 1214 if (matchSet.intervals.length == 1) 1215 { 1216 if (matchSet.size == 1) 1217 { 1218 newTransition = new AtomTransition(blockEndState, matchSet.getMinElement); 1219 } 1220 else 1221 { 1222 Interval matchInterval = matchSet.intervals[0]; 1223 newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b); 1224 } 1225 } 1226 else 1227 { 1228 newTransition = new SetTransition(blockEndState, matchSet); 1229 } 1230 ATNState optimizedState = new BasicState(); 1231 optimizedState.setRuleIndex(decision.ruleIndex); 1232 atn.addState(optimizedState); 1233 optimizedState.addTransition(newTransition); 1234 optimizedTransitions ~= new EpsilonTransition(optimizedState); 1235 removedPaths += decision.numberOfOptimizedTransitions - optimizedTransitions.length; 1236 if (decision.isOptimized) 1237 { 1238 while (decision.numberOfOptimizedTransitions > 0) 1239 { 1240 decision.removeOptimizedTransition(decision.numberOfOptimizedTransitions - 1); 1241 } 1242 } 1243 foreach (Transition transition; optimizedTransitions) 1244 { 1245 decision.addOptimizedTransition(transition); 1246 } 1247 } 1248 return removedPaths; 1249 } 1250 1251 } 1252 1253 version(unittest) { 1254 import dshould : be, equal, not, should; 1255 import std.typecons : tuple; 1256 import unit_threaded; 1257 1258 @Tags("des11") 1259 @("testEncodingATNDeserialize") 1260 unittest { 1261 auto des = new ATNDeserializer; 1262 des.should.not.be(null); 1263 } 1264 }