1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.ATNDeserializer; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.IllegalStateException; 11 import antlr.v4.runtime.Token; 12 import antlr.v4.runtime.TokenConstantDefinition; 13 import antlr.v4.runtime.UnsupportedOperationException; 14 import antlr.v4.runtime.atn.ATN; 15 import antlr.v4.runtime.atn.ATNDeserializationOptions; 16 import antlr.v4.runtime.atn.ATNState; 17 import antlr.v4.runtime.atn.ATNType; 18 import antlr.v4.runtime.atn.ActionTransition; 19 import antlr.v4.runtime.atn.AtomTransition; 20 import antlr.v4.runtime.atn.BasicBlockStartState; 21 import antlr.v4.runtime.atn.BasicState; 22 import antlr.v4.runtime.atn.BlockEndState; 23 import antlr.v4.runtime.atn.BlockStartState; 24 import antlr.v4.runtime.atn.DecisionState; 25 import antlr.v4.runtime.atn.EpsilonTransition; 26 import antlr.v4.runtime.atn.LexerAction; 27 import antlr.v4.runtime.atn.LexerActionType; 28 import antlr.v4.runtime.atn.LexerChannelAction; 29 import antlr.v4.runtime.atn.LexerCustomAction; 30 import antlr.v4.runtime.atn.LexerModeAction; 31 import antlr.v4.runtime.atn.LexerMoreAction; 32 import antlr.v4.runtime.atn.LexerPopModeAction; 33 import antlr.v4.runtime.atn.LexerPushModeAction; 34 import antlr.v4.runtime.atn.LexerSkipAction; 35 import antlr.v4.runtime.atn.LexerTypeAction; 36 import antlr.v4.runtime.atn.LoopEndState; 37 import antlr.v4.runtime.atn.NotSetTransition; 38 import antlr.v4.runtime.atn.PlusBlockStartState; 39 import antlr.v4.runtime.atn.PlusLoopbackState; 40 import antlr.v4.runtime.atn.PrecedencePredicateTransition; 41 import antlr.v4.runtime.atn.PredicateTransition; 42 import antlr.v4.runtime.atn.RangeTransition; 43 import antlr.v4.runtime.atn.RuleStartState; 44 import antlr.v4.runtime.atn.RuleStopState; 45 import antlr.v4.runtime.atn.RuleTransition; 46 import antlr.v4.runtime.atn.SetTransition; 47 import antlr.v4.runtime.atn.StarBlockStartState; 48 import antlr.v4.runtime.atn.StarLoopEntryState; 49 import antlr.v4.runtime.atn.StarLoopbackState; 50 import antlr.v4.runtime.atn.StateNames; 51 import antlr.v4.runtime.atn.TokensStartState; 52 import antlr.v4.runtime.atn.Transition; 53 import antlr.v4.runtime.atn.TransitionStates; 54 import antlr.v4.runtime.atn.WildcardTransition; 55 import antlr.v4.runtime.misc; 56 import std.algorithm: canFind; 57 import std.algorithm: map; 58 import std.algorithm: reverse; 59 import std.array; 60 import std.conv; 61 import std.format; 62 import std.stdio; 63 import std.uuid; 64 import std.utf; 65 66 /** 67 * TODO add class description 68 */ 69 class ATNDeserializer 70 { 71 72 enum int SERIALIZED_VERSION = 3; 73 74 /** 75 * This is the earliest supported serialized UUID. 76 */ 77 private static UUID BASE_SERIALIZED_UUID; 78 79 /** 80 * This UUID indicates an extension of {@link BASE_SERIALIZED_UUID} for the 81 * addition of precedence predicates. 82 */ 83 private static UUID ADDED_PRECEDENCE_TRANSITIONS; 84 85 /** 86 * This UUID indicates an extension of {@link #ADDED_PRECEDENCE_TRANSITIONS} 87 * for the addition of lexer actions encoded as a sequence of 88 * {@link LexerAction} instances. 89 */ 90 private static UUID ADDED_LEXER_ACTIONS; 91 92 private static UUID ADDED_UNICODE_SMP; 93 94 /** 95 * This list contains all of the currently supported UUIDs, ordered by when 96 * the feature first appeared in this branch. 97 */ 98 private static UUID[] SUPPORTED_UUIDS; 99 100 /** 101 * This is the current serialized UUID. 102 */ 103 public static UUID SERIALIZED_UUID; 104 105 private ATNDeserializationOptions deserializationOptions; 106 107 private int[] data; 108 109 private int p; 110 111 public UUID uuid; 112 113 public static this() 114 { 115 /* WARNING: DO NOT MERGE THESE LINES. If UUIDs differ during a merge, 116 * resolve the conflict by generating a new ID! 117 */ 118 BASE_SERIALIZED_UUID = UUID("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3"); 119 ADDED_PRECEDENCE_TRANSITIONS = UUID("1DA0C57D-6C06-438A-9B27-10BCB3CE0F61"); 120 ADDED_LEXER_ACTIONS = UUID("AADB8D7E-AEEF-4415-AD2B-8204D6CF042E"); 121 ADDED_UNICODE_SMP = UUID("59627784-3BE5-417A-B9EB-8131A7286089"); 122 SUPPORTED_UUIDS ~= BASE_SERIALIZED_UUID; 123 SUPPORTED_UUIDS ~= ADDED_PRECEDENCE_TRANSITIONS; 124 SUPPORTED_UUIDS ~= ADDED_LEXER_ACTIONS; 125 SUPPORTED_UUIDS ~= ADDED_UNICODE_SMP; 126 127 SERIALIZED_UUID = ADDED_LEXER_ACTIONS; 128 } 129 130 public this() 131 { 132 this(ATNDeserializationOptions.defaultOptions); 133 } 134 135 public this(ATNDeserializationOptions deserializationOptions) 136 { 137 if (deserializationOptions is null) { 138 deserializationOptions = new ATNDeserializationOptions(); 139 } 140 this.deserializationOptions = deserializationOptions; 141 } 142 143 /** 144 * Determines if a particular serialized representation of an ATN supports 145 * a particular feature, identified by the {@link UUID} used for serializing 146 * the ATN at the time the feature was first introduced. 147 * 148 * @param feature The {@link UUID} marking the first time the feature was 149 * supported in the serialized ATN. 150 * @param actualUuid The {@link UUID} of the actual serialized ATN which is 151 * currently being deserialized. 152 * @return {@code true} if the {@code actualUuid} value represents a 153 * serialized ATN at or after the feature identified by {@code feature} was 154 * introduced; otherwise, {@code false}. 155 */ 156 protected bool isFeatureSupported(UUID feature, UUID actualUuid) 157 { 158 bool not_found = true; 159 foreach (uu; SUPPORTED_UUIDS) { 160 if (not_found && uu == feature) { 161 if (uu == actualUuid) 162 return true; 163 else 164 not_found = false; 165 } 166 else { 167 if (uu == actualUuid) { 168 return true; 169 } 170 } 171 } 172 return false; 173 } 174 175 public ATN deserialize(wstring input_data) 176 { 177 reset(input_data); 178 checkVersion; 179 checkUUID; 180 ATN atn = readATN; 181 readStates(atn); 182 readRules(atn); 183 readModes(atn); 184 IntervalSet[] sets; 185 readSets(sets, atn, &readInt); 186 if (isFeatureSupported(ADDED_UNICODE_SMP, uuid)) 187 { 188 readSets(sets, atn, &readInt32); 189 } 190 readEdges(atn, sets); 191 readDecisions(atn); 192 readLexerActions(atn); 193 markPrecedenceDecisions(atn); 194 if (deserializationOptions.verifyATN) { 195 verifyATN(atn); 196 } 197 if (deserializationOptions.generateRuleBypassTransitions && atn.grammarType == ATNType.PARSER) { 198 generateRuleBypassTransitions(atn); 199 } 200 if (deserializationOptions.optimize) { 201 optimizeATN(atn); 202 } 203 identifyTailCalls(atn); 204 return atn; 205 } 206 207 protected void verifyATN(ATN atn) 208 { 209 // verify assumptions 210 foreach (ATNState state; atn.states) { 211 if (state is null) { 212 continue; 213 } 214 215 checkCondition(state.onlyHasEpsilonTransitions || state.getNumberOfTransitions <= 1); 216 217 if (cast(PlusBlockStartState)state) { 218 checkCondition((cast(PlusBlockStartState)state).loopBackState !is null); 219 } 220 221 if (cast(StarLoopEntryState)state) { 222 StarLoopEntryState starLoopEntryState = cast(StarLoopEntryState)state; 223 checkCondition(starLoopEntryState.loopBackState !is null); 224 checkCondition(starLoopEntryState.getNumberOfTransitions() == 2); 225 226 if (cast(StarBlockStartState)(starLoopEntryState.transition(0).target)) { 227 checkCondition(cast(LoopEndState)(starLoopEntryState.transition(1).target) !is null); 228 checkCondition(!starLoopEntryState.nonGreedy); 229 } 230 else if (cast(LoopEndState)(starLoopEntryState.transition(0).target)) { 231 checkCondition(cast(StarBlockStartState)(starLoopEntryState.transition(1).target) !is null); 232 checkCondition(starLoopEntryState.nonGreedy); 233 } 234 else { 235 throw new IllegalStateException(); 236 } 237 } 238 239 if (cast(StarLoopbackState)state) { 240 checkCondition(state.getNumberOfTransitions() == 1); 241 checkCondition(cast(StarLoopEntryState)(state.transition(0).target) !is null); 242 } 243 244 if (cast(LoopEndState)state) { 245 checkCondition((cast(LoopEndState)state).loopBackState !is null); 246 } 247 248 if (cast(RuleStartState)state) { 249 checkCondition((cast(RuleStartState)state).stopState !is null); 250 } 251 252 if (cast(BlockStartState)state) { 253 checkCondition((cast(BlockStartState)state).endState !is null); 254 } 255 256 if (cast(BlockEndState)state) { 257 checkCondition((cast(BlockEndState)state).startState !is null); 258 } 259 260 if (cast(DecisionState)state) { 261 DecisionState decisionState = cast(DecisionState)state; 262 checkCondition(decisionState.getNumberOfTransitions() <= 1 || decisionState.decision >= 0); 263 } 264 else { 265 checkCondition(state.getNumberOfTransitions() <= 1 || state.classinfo == RuleStopState.classinfo); 266 } 267 } 268 269 } 270 271 private void readSets(ref IntervalSet[] sets, ATN aTN, int delegate() readUnicode) 272 { 273 // SETS 274 int nsets = readInt; 275 for (int i=0; i<nsets; i++) { 276 IntervalSet set = new IntervalSet(); 277 sets ~= set; 278 int nintervals = readInt; 279 bool containsEof = readInt != 0; 280 if (containsEof) { 281 set.add(-1); 282 } 283 for (int j=0; j<nintervals; j++) { 284 set.add(readUnicode(), readUnicode()); 285 } 286 } 287 } 288 289 protected void markPrecedenceDecisions(ATN atn) 290 { 291 foreach (ATNState state; atn.states) { 292 if (!cast(StarLoopEntryState)state) { 293 continue; 294 } 295 296 /* We analyze the ATN to determine if this ATN decision state is the 297 * decision for the closure block that determines whether a 298 * precedence rule should continue or complete. 299 */ 300 if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) { 301 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 302 if (cast(LoopEndState)maybeLoopEndState) { 303 if (maybeLoopEndState.epsilonOnlyTransitions && 304 maybeLoopEndState.transitions[0].target.classinfo == 305 RuleStopState.classinfo) { 306 (cast(StarLoopEntryState)state).isPrecedenceDecision = true; 307 } 308 } 309 } 310 } 311 312 } 313 314 protected void checkCondition(bool condition) 315 { 316 checkCondition(condition, null); 317 } 318 319 protected void checkCondition(bool condition, string message) 320 { 321 if (!condition) { 322 throw new IllegalStateException(message); 323 } 324 } 325 326 protected Transition edgeFactory(ATN atn, int type, int src, int trg, int arg1, int arg2, 327 int arg3, IntervalSet[] sets) 328 { 329 ATNState target = atn.states[trg]; 330 with(TransitionStates) { 331 switch (type) { 332 case EPSILON : return new EpsilonTransition(target); 333 case RANGE : 334 if (arg3 != 0) { 335 return new RangeTransition(target, TokenConstantDefinition.EOF, arg2); 336 } 337 else { 338 return new RangeTransition(target, arg1, arg2); 339 } 340 case RULE : 341 RuleTransition rt = new RuleTransition(cast(RuleStartState)atn.states[arg1], arg2, arg3, target); 342 return rt; 343 case PREDICATE : 344 PredicateTransition pt = new PredicateTransition(target, arg1, arg2, arg3 != 0); 345 return pt; 346 case PRECEDENCE: 347 return new PrecedencePredicateTransition(target, arg1); 348 case ATOM : 349 if (arg3 != 0) { 350 return new AtomTransition(target, TokenConstantDefinition.EOF); 351 } 352 else { 353 return new AtomTransition(target, arg1); 354 } 355 case ACTION : 356 ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0); 357 return a; 358 case SET : return new SetTransition(target, sets[arg1]); 359 case NOT_SET : return new NotSetTransition(target, sets[arg1]); 360 case WILDCARD : return new WildcardTransition(target); 361 default: throw new IllegalArgumentException("The specified transition type is not valid."); 362 } 363 } 364 } 365 366 protected ATNState stateFactory(int type, int ruleIndex) 367 { 368 ATNState s; 369 with(StateNames) { 370 switch (type) { 371 case INVALID: return null; 372 case BASIC : s = new BasicState(); break; 373 case RULE_START : s = new RuleStartState(); break; 374 case BLOCK_START : s = new BasicBlockStartState(); break; 375 case PLUS_BLOCK_START : s = new PlusBlockStartState(); break; 376 case STAR_BLOCK_START : s = new StarBlockStartState(); break; 377 case TOKEN_START : s = new TokensStartState(); break; 378 case RULE_STOP : s = new RuleStopState(); break; 379 case BLOCK_END : s = new BlockEndState(); break; 380 case STAR_LOOP_BACK : s = new StarLoopbackState(); break; 381 case STAR_LOOP_ENTRY : s = new StarLoopEntryState(); break; 382 case PLUS_LOOP_BACK : s = new PlusLoopbackState(); break; 383 case LOOP_END : s = new LoopEndState(); break; 384 default : 385 string message = format("The specified state type %d is not valid.", type); 386 throw new IllegalArgumentException(message); 387 } 388 } 389 s.ruleIndex = ruleIndex; 390 return s; 391 } 392 393 protected LexerAction lexerActionFactory(LexerActionType type, int data1, int data2) 394 { 395 switch (type) { 396 case LexerActionType.CHANNEL: 397 return new LexerChannelAction(data1); 398 399 case LexerActionType.CUSTOM: 400 return new LexerCustomAction(data1, data2); 401 402 case LexerActionType.MODE: 403 return new LexerModeAction(data1); 404 405 case LexerActionType.MORE: 406 return cast(LexerAction)LexerMoreAction.instance; 407 408 case LexerActionType.POP_MODE: 409 return cast(LexerAction)LexerPopModeAction.instance; 410 411 case LexerActionType.PUSH_MODE: 412 return new LexerPushModeAction(data1); 413 414 case LexerActionType.SKIP: 415 return cast(LexerAction)LexerSkipAction.instance; 416 417 case LexerActionType.TYPE: 418 return new LexerTypeAction(data1); 419 420 default: 421 string message = format("The specified lexer action type %d is not valid.", type); 422 throw new IllegalArgumentException(message); 423 } 424 } 425 426 private void reset(const wstring atn) 427 { 428 wchar el; 429 data.length = 0; 430 431 for(int i; i < atn.length; i++) { 432 el = atn[i]; 433 if (el == '[') { 434 if (i+7 >= atn.length) { 435 data ~=to!int('[') - 2; 436 continue; 437 } 438 if (atn[i+7] != ']') { 439 data ~=to!int('[') - 2; 440 continue; 441 } 442 if (hasSixOct(atn, i)) 443 { 444 data ~= readSixOct(atn, i) -2; 445 i += 7; 446 continue; 447 } 448 } 449 if (el > 1) 450 data ~= (el - 2) & 0xffff; 451 else 452 data ~= (el - 3) & 0xffff; 453 } 454 // don't adjust the first value since that's the version number 455 data[0] = atn[0]; 456 p = 0; 457 } 458 459 private void checkVersion() 460 { 461 auto version_atn = readInt; 462 if (version_atn != SERIALIZED_VERSION) { 463 string reason = format("Could not deserialize ATN with version %d (expected %d).", version_atn, SERIALIZED_VERSION); 464 assert(false, reason); 465 } 466 } 467 468 private void checkUUID() 469 { 470 uuid = readUUID; 471 if (!SUPPORTED_UUIDS.canFind(uuid)) { 472 string reason = format("Could not deserialize ATN with UUID %s (expected %s or a legacy UUID).", uuid, SERIALIZED_UUID); 473 assert(false, reason); 474 } 475 } 476 477 private ATN readATN() 478 { 479 ATNType grammarType = cast(ATNType)readInt; 480 int maxTokenType = readInt; 481 return new ATN(grammarType, maxTokenType); 482 } 483 484 private void readStates(ATN atn) 485 { 486 // STATES 487 int[LoopEndState][] loopBackStateNumbers; 488 int[BlockStartState][] endStateNumbers; 489 int nstates = readInt; 490 debug(deserializer) 491 writefln("%s: Number of states %s", this.p-1 , nstates); 492 for (int i=0; i<nstates; i++) { 493 int stype = readInt; 494 // ignore bad type of states 495 if (stype == StateNames.INVALID) { 496 atn.addState(null); 497 continue; 498 } 499 500 int ruleIndex = readInt; 501 if (ruleIndex == wchar.max) { 502 ruleIndex = -1; 503 } 504 debug(deserializer) 505 writefln("%s: readState %s, ruleIndex %s, index %s", 506 this.p-1, 507 stype, 508 ruleIndex, 509 i); 510 ATNState s = stateFactory(stype, ruleIndex); 511 if (stype == StateNames.LOOP_END) { 512 // special case 513 int loopBackStateNumber = readInt; 514 int[LoopEndState] _a; 515 _a[cast(LoopEndState)s] = loopBackStateNumber; 516 loopBackStateNumbers ~= _a; 517 } 518 else 519 if (cast(BlockStartState)s) { 520 int endStateNumber = readInt; 521 int[BlockStartState] _a; 522 _a[cast(BlockStartState)s] = endStateNumber; 523 endStateNumbers ~= _a; 524 } 525 atn.addState(s); 526 } 527 528 // delay the assignment of loop back and end states until we know all the state instances have been initialized 529 foreach (ref pair; loopBackStateNumbers) { 530 pair.keys[0].loopBackState = atn.states[pair[pair.keys[0]]]; 531 } 532 533 foreach (ref pair; endStateNumbers) { 534 pair.keys[0].endState = cast(BlockEndState)atn.states[pair[pair.keys[0]]]; 535 } 536 537 int numNonGreedyStates = readInt; 538 for (int i = 0; i < numNonGreedyStates; i++) { 539 int stateNumber = readInt; 540 (cast(DecisionState)atn.states[stateNumber]).nonGreedy = true; 541 } 542 543 int numPrecedenceStates = readInt; 544 for (int i = 0; i < numPrecedenceStates; i++) { 545 int stateNumber = readInt; 546 (cast(RuleStartState)atn.states[stateNumber]).isLeftRecursiveRule = true; 547 } 548 549 } 550 551 private void readRules(ATN atn) 552 { 553 // RULES 554 int nrules = readInt; 555 556 if ( atn.grammarType == ATNType.LEXER ) { 557 atn.ruleToTokenType = new int[nrules]; 558 } 559 560 atn.ruleToStartState = new RuleStartState[nrules]; 561 for (int i=0; i<nrules; i++) { 562 int s = readInt; 563 RuleStartState startState = cast(RuleStartState)atn.states[s]; 564 atn.ruleToStartState[i] = startState; 565 if ( atn.grammarType == ATNType.LEXER ) { 566 int tokenType = readInt; 567 if (tokenType == 0xFFFF) { 568 tokenType = TokenConstantDefinition.EOF; 569 } 570 571 atn.ruleToTokenType[i] = tokenType; 572 } 573 } 574 575 atn.ruleToStopState = new RuleStopState[nrules]; 576 foreach (ATNState state; atn.states) { 577 if (!cast(RuleStopState)state) { 578 continue; 579 } 580 RuleStopState stopState = cast(RuleStopState)state; 581 atn.ruleToStopState[state.ruleIndex] = stopState; 582 atn.ruleToStartState[state.ruleIndex].stopState = stopState; 583 } 584 585 } 586 587 public void readModes(ATN atn) 588 { 589 // MODES 590 int nmodes = readInt; 591 for (int i=0; i<nmodes; i++) { 592 int s = readInt; 593 atn.modeToStartState ~= cast(TokensStartState)atn.states[s]; 594 } 595 } 596 597 private void readEdges(ATN atn, IntervalSet[] sets) 598 { 599 // EDGES 600 601 int nedges = readInt; 602 for (int i=0; i<nedges; i++) { 603 int src = readInt; 604 int trg = readInt; 605 int ttype = readInt; 606 int arg1 = readInt; 607 int arg2 = readInt; 608 int arg3 = readInt; 609 Transition trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); 610 debug(deserializer) { 611 writefln!"EDGES %1$s %2$s->%3$s %4$s %5$s %6$s %7$s" 612 ( 613 trans.classinfo, src, 614 trg, 615 Transition.serializationNames[ttype], 616 arg1, arg2, arg3 617 ); 618 } 619 ATNState srcState = atn.states[src]; 620 srcState.addTransition(trans); 621 } 622 623 // edges for rule stop states can be derived, so they aren't serialized 624 foreach (ATNState state; atn.states) { 625 for (int i = 0; i < state.getNumberOfTransitions(); i++) { 626 Transition t = state.transition(i); 627 if (!cast(RuleTransition)t) { 628 continue; 629 } 630 631 RuleTransition ruleTransition = cast(RuleTransition)t; 632 int outermostPrecedenceReturn = -1; 633 if (atn.ruleToStartState[ruleTransition.target.ruleIndex].isLeftRecursiveRule) { 634 if (ruleTransition.precedence == 0) { 635 outermostPrecedenceReturn = ruleTransition.target.ruleIndex; 636 } 637 } 638 EpsilonTransition returnTransition = new EpsilonTransition(ruleTransition.followState, outermostPrecedenceReturn); 639 atn.ruleToStopState[ruleTransition.target.ruleIndex].addTransition(returnTransition); 640 } 641 } 642 643 foreach (ATNState state; atn.states) { 644 if (cast(BlockStartState)state) { 645 // we need to know the end state to set its start state 646 if ((cast(BlockStartState)state).endState is null) { 647 throw new IllegalStateException(); 648 } 649 650 // block end states can only be associated to a single block start state 651 if ((cast(BlockStartState)state).endState.startState !is null) { 652 throw new IllegalStateException(); 653 } 654 655 (cast(BlockStartState)state).endState.startState = cast(BlockStartState)state; 656 } 657 658 if (cast(PlusLoopbackState)state) { 659 PlusLoopbackState loopbackState = cast(PlusLoopbackState)state; 660 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 661 ATNState target = loopbackState.transition(i).target; 662 if (cast(PlusBlockStartState)target) { 663 (cast(PlusBlockStartState)target).loopBackState = loopbackState; 664 } 665 } 666 } 667 else if (cast(StarLoopbackState)state) { 668 StarLoopbackState loopbackState = cast(StarLoopbackState)state; 669 for (int i = 0; i < loopbackState.getNumberOfTransitions(); i++) { 670 ATNState target = loopbackState.transition(i).target; 671 if (cast(StarLoopEntryState)target) { 672 (cast(StarLoopEntryState)target).loopBackState = loopbackState; 673 } 674 } 675 } 676 } 677 678 } 679 680 private void readDecisions(ATN atn) 681 { 682 // DECISIONS 683 684 int ndecisions = readInt; 685 for (int i=1; i<=ndecisions; i++) { 686 int s = readInt; 687 DecisionState decState = cast(DecisionState)atn.states[s]; 688 atn.decisionToState ~= decState; 689 decState.decision = i-1; 690 } 691 } 692 693 private void readLexerActions(ATN atn) 694 { 695 // LEXER ACTIONS 696 697 if (atn.grammarType == ATNType.LEXER) { 698 atn.lexerActions = new LexerAction[readInt]; 699 for (int i = 0; i < atn.lexerActions.length; i++) { 700 LexerActionType actionType = cast(LexerActionType)readInt; 701 int data1 = readInt; 702 if (data1 == 0xFFFF) { 703 data1 = -1; 704 } 705 706 int data2 = readInt; 707 if (data2 == 0xFFFF) { 708 data2 = -1; 709 } 710 LexerAction lexerAction = lexerActionFactory(actionType, data1, data2); 711 atn.lexerActions[i] = lexerAction; 712 } 713 } 714 } 715 716 private void optimizeATN(ATN atn) 717 { 718 while (true) 719 { 720 int optimizationCount = 0; 721 optimizationCount += inlineSetRules(atn); 722 optimizationCount += combineChainedEpsilons(atn); 723 bool preserveOrder = atn.grammarType == ATNType.LEXER; 724 optimizationCount += optimizeSets(atn, preserveOrder); 725 if (optimizationCount == 0) 726 { 727 break; 728 } 729 } 730 if (deserializationOptions.verifyATN) 731 { 732 // reverify after modification 733 verifyATN(atn); 734 } 735 } 736 737 public bool hasSixOct(wstring data, size_t p) 738 { 739 for (auto i = p + 1; i < p + 7; i++) { 740 auto c = to!int(data[i] - 0x30); 741 if (c > 7 || c < 0) 742 return false; 743 } 744 return true; 745 } 746 747 public int readSixOct(wstring data, size_t p) 748 { 749 int res = 0; 750 for (auto i = p + 1; i < p + 7; i++) { 751 res = res<<3 | to!int(data[i] - 0x30); 752 } 753 return res; 754 } 755 756 private UUID readUUID() 757 { 758 ubyte[16] data; 759 long b = readLong; 760 for(int i = 0; i < 8; i++) { 761 data[i] = b & 0xff; 762 b = b >>> 8; 763 } 764 long a = readLong; 765 for (int i = 8; i < 16; i++) { 766 data[i] = a & 0xff; 767 a = a >>> 8; 768 } 769 data[].reverse(); 770 auto uuid = UUID(data); 771 return uuid; 772 } 773 774 private long readLong() 775 { 776 long lowOrder = to!long(readInt32) & 0x00000000FFFFFFFFL; 777 return lowOrder | (to!long(readInt32) << 32); 778 } 779 780 private int readInt32() 781 { 782 return data[p++] | data[p++] << 16; 783 } 784 785 private int readInt() 786 { 787 return data[p++]; 788 } 789 790 private static void identifyTailCalls(ATN atn) 791 { 792 foreach (ATNState state; atn.states) 793 { 794 foreach (Transition transition; state.transitions) 795 { 796 if (!(cast(RuleTransition)transition)) 797 { 798 continue; 799 } 800 RuleTransition ruleTransition = cast(RuleTransition)transition; 801 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 802 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 803 } 804 if (!state.isOptimized) 805 { 806 continue; 807 } 808 foreach (Transition transition; state.optimizedTransitions) 809 { 810 if (!(cast(RuleTransition)transition)) 811 { 812 continue; 813 } 814 RuleTransition ruleTransition = cast(RuleTransition)transition; 815 ruleTransition.tailCall = testTailCall(atn, ruleTransition, false); 816 ruleTransition.optimizedTailCall = testTailCall(atn, ruleTransition, true); 817 } 818 } 819 } 820 821 private static bool testTailCall(ATN atn, RuleTransition transition, bool optimizedPath) 822 { 823 if (optimizedPath && transition.optimizedTailCall) 824 { 825 return true; 826 } 827 BitSet *reachable = new BitSet(atn.states.length); 828 ATNState[] worklist; 829 worklist ~= transition.followState; 830 while (worklist.length > 0) 831 { 832 ATNState state = worklist[$-1]; 833 worklist.length -= 1; 834 if (reachable.get(state.stateNumber)) 835 { 836 continue; 837 } 838 if (cast(RuleStopState)state) 839 { 840 continue; 841 } 842 if (!state.onlyHasEpsilonTransitions) 843 { 844 return false; 845 } 846 Transition[] transitions = optimizedPath ? state.optimizedTransitions : state.transitions; 847 foreach (Transition t; transitions) 848 { 849 if (t.getSerializationType != TransitionStates.EPSILON) 850 { 851 return false; 852 } 853 worklist ~= t.target; 854 } 855 } 856 return true; 857 } 858 859 public void generateRuleBypassTransitions(ATN atn) 860 { 861 atn.ruleToTokenType = new int[atn.ruleToStartState.length]; 862 for (int i = 0; i < atn.ruleToStartState.length; i++) { 863 atn.ruleToTokenType[i] = atn.maxTokenType + i + 1; 864 } 865 866 for (int i = 0; i < atn.ruleToStartState.length; i++) { 867 BasicBlockStartState bypassStart = new BasicBlockStartState(); 868 bypassStart.ruleIndex = i; 869 atn.addState(bypassStart); 870 871 BlockEndState bypassStop = new BlockEndState(); 872 bypassStop.ruleIndex = i; 873 atn.addState(bypassStop); 874 875 bypassStart.endState = bypassStop; 876 atn.defineDecisionState(bypassStart); 877 878 bypassStop.startState = bypassStart; 879 880 ATNState endState; 881 Transition excludeTransition = null; 882 if (atn.ruleToStartState[i].isLeftRecursiveRule) { 883 // wrap from the beginning of the rule to the StarLoopEntryState 884 endState = null; 885 foreach (ATNState state; atn.states) { 886 if (state.ruleIndex != i) { 887 continue; 888 } 889 890 if (state.classinfo != StarLoopEntryState.classinfo) { 891 continue; 892 } 893 894 ATNState maybeLoopEndState = state.transition(state.getNumberOfTransitions() - 1).target; 895 if (maybeLoopEndState.classinfo != LoopEndState.classinfo) { 896 continue; 897 } 898 899 if (maybeLoopEndState.epsilonOnlyTransitions && maybeLoopEndState.transition(0).target.classinfo == RuleStopState.classinfo) { 900 endState = state; 901 break; 902 } 903 } 904 905 if (endState is null) { 906 throw new UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); 907 } 908 909 excludeTransition = (cast(StarLoopEntryState)endState).loopBackState.transition(0); 910 } 911 else { 912 endState = atn.ruleToStopState[i]; 913 } 914 915 // all non-excluded transitions that currently target end state need to target blockEnd instead 916 foreach (ATNState state; atn.states) { 917 foreach (Transition transition; state.transitions) { 918 if (transition == excludeTransition) { 919 continue; 920 } 921 922 if (transition.target == endState) { 923 transition.target = bypassStop; 924 } 925 } 926 } 927 928 // all transitions leaving the rule start state need to leave blockStart instead 929 while (atn.ruleToStartState[i].getNumberOfTransitions() > 0) { 930 Transition transition = atn.ruleToStartState[i].removeTransition(atn.ruleToStartState[i].getNumberOfTransitions() - 1); 931 bypassStart.addTransition(transition); 932 } 933 934 // link the new states 935 atn.ruleToStartState[i].addTransition(new EpsilonTransition(bypassStart)); 936 bypassStop.addTransition(new EpsilonTransition(endState)); 937 938 ATNState matchState = new BasicState(); 939 atn.addState(matchState); 940 matchState.addTransition(new AtomTransition(bypassStop, atn.ruleToTokenType[i])); 941 bypassStart.addTransition(new EpsilonTransition(matchState)); 942 } 943 944 if (deserializationOptions.verifyATN()) { 945 // reverify after modification 946 verifyATN(atn); 947 } 948 } 949 950 private static int inlineSetRules(ATN atn) 951 { 952 int inlinedCalls = 0; 953 Transition[] ruleToInlineTransition; 954 for (int i = 0; i < atn.ruleToStartState.length; i++) 955 { 956 RuleStartState startState = atn.ruleToStartState[i]; 957 ATNState middleState = startState; 958 while (middleState.onlyHasEpsilonTransitions && middleState.numberOfOptimizedTransitions == 1 && middleState.getOptimizedTransition(0).getSerializationType == TransitionStates.EPSILON) 959 { 960 middleState = middleState.getOptimizedTransition(0).target; 961 } 962 if (middleState.numberOfOptimizedTransitions != 1) 963 { 964 continue; 965 } 966 Transition matchTransition = middleState.getOptimizedTransition(0); 967 ATNState matchTarget = matchTransition.target; 968 if (matchTransition.isEpsilon || !matchTarget.onlyHasEpsilonTransitions || matchTarget.numberOfOptimizedTransitions != 1 || !(cast(RuleStopState)matchTarget.getOptimizedTransition(0).target)) 969 { 970 continue; 971 } 972 with(TransitionStates) { 973 switch (matchTransition.getSerializationType) 974 { 975 case ATOM: 976 case RANGE: 977 case SET: 978 { 979 ruleToInlineTransition[i] = matchTransition; 980 break; 981 } 982 983 case NOT_SET: 984 case WILDCARD: 985 { 986 // not implemented yet 987 continue; 988 } 989 990 default: 991 { 992 continue; 993 } 994 } 995 } 996 } 997 for (int stateNumber = 0; stateNumber < atn.states.length; stateNumber++) 998 { 999 ATNState state = atn.states[stateNumber]; 1000 if (state.ruleIndex < 0) 1001 { 1002 continue; 1003 } 1004 Transition[] optimizedTransitions = null; 1005 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) { 1006 Transition transition = state.getOptimizedTransition(i); 1007 if (!(cast(RuleTransition)transition)) 1008 { 1009 if (optimizedTransitions != null) 1010 { 1011 optimizedTransitions ~= transition; 1012 } 1013 continue; 1014 } 1015 RuleTransition ruleTransition = cast(RuleTransition)transition; 1016 Transition effective = ruleToInlineTransition[ruleTransition.target.ruleIndex]; 1017 if (effective is null) 1018 { 1019 if (optimizedTransitions != null) 1020 { 1021 optimizedTransitions ~= transition; 1022 } 1023 continue; 1024 } 1025 if (optimizedTransitions == null) 1026 { 1027 optimizedTransitions = []; 1028 for (int j = 0; j < i; j++) 1029 { 1030 optimizedTransitions ~= state.getOptimizedTransition(i); 1031 } 1032 } 1033 inlinedCalls++; 1034 ATNState target = ruleTransition.followState; 1035 ATNState intermediateState = new BasicState(); 1036 intermediateState.setRuleIndex(target.ruleIndex); 1037 atn.addState(intermediateState); 1038 optimizedTransitions ~= new EpsilonTransition(intermediateState); 1039 with(TransitionStates) { 1040 switch (effective.getSerializationType) 1041 { 1042 case ATOM: 1043 { 1044 intermediateState.addTransition(new AtomTransition(target, (cast(AtomTransition)effective)._label)); 1045 break; 1046 } 1047 1048 case RANGE: 1049 { 1050 intermediateState.addTransition(new RangeTransition(target, (cast(RangeTransition)effective).from, (cast(RangeTransition)effective).to)); 1051 break; 1052 } 1053 1054 case SET: 1055 { 1056 intermediateState.addTransition(new SetTransition(target, effective.label)); 1057 break; 1058 } 1059 1060 default: 1061 { 1062 assert(false, "NotSupportedException"); 1063 } 1064 } 1065 } 1066 } 1067 if (optimizedTransitions != null) 1068 { 1069 if (state.isOptimized) 1070 { 1071 while (state.numberOfOptimizedTransitions > 0) 1072 { 1073 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1074 } 1075 } 1076 foreach (Transition transition; optimizedTransitions) 1077 { 1078 state.addOptimizedTransition(transition); 1079 } 1080 } 1081 } 1082 return inlinedCalls; 1083 } 1084 1085 private static int combineChainedEpsilons(ATN atn) 1086 { 1087 int removedEdges = 0; 1088 foreach (ATNState state; atn.states) 1089 { 1090 if (!state.onlyHasEpsilonTransitions || cast(RuleStopState)state) 1091 { 1092 continue; 1093 } 1094 Transition[] optimizedTransitions = null; 1095 for (int i = 0; i < state.numberOfOptimizedTransitions; i++) 1096 { 1097 Transition transition = state.getOptimizedTransition(i); 1098 ATNState intermediate = transition.target; 1099 if (transition.getSerializationType != TransitionStates.EPSILON || 1100 (cast(EpsilonTransition)transition).outermostPrecedenceReturn != -1 || 1101 intermediate.getStateType != StateNames.BASIC || 1102 !intermediate.onlyHasEpsilonTransitions) 1103 { 1104 if (optimizedTransitions != null) 1105 { 1106 optimizedTransitions ~= transition; 1107 } 1108 goto nextTransition_continue; 1109 } 1110 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1111 { 1112 if (intermediate.getOptimizedTransition(j).getSerializationType != TransitionStates.EPSILON || 1113 1114 (cast(EpsilonTransition)intermediate.getOptimizedTransition(j)).outermostPrecedenceReturn != -1) 1115 { 1116 if (optimizedTransitions != null) 1117 { 1118 optimizedTransitions ~= transition; 1119 } 1120 goto nextTransition_continue; 1121 } 1122 } 1123 removedEdges++; 1124 if (optimizedTransitions == null) 1125 { 1126 optimizedTransitions = []; 1127 for (int j = 0; j < i; j++) 1128 { 1129 optimizedTransitions ~= state.getOptimizedTransition(j); 1130 } 1131 } 1132 for (int j = 0; j < intermediate.numberOfOptimizedTransitions; j++) 1133 { 1134 ATNState target = intermediate.getOptimizedTransition(j).target; 1135 optimizedTransitions ~= new EpsilonTransition(target); 1136 } 1137 nextTransition_continue: ; 1138 } 1139 1140 if (optimizedTransitions != null) 1141 { 1142 if (state.isOptimized) 1143 { 1144 while (state.numberOfOptimizedTransitions > 0) 1145 { 1146 state.removeOptimizedTransition(state.numberOfOptimizedTransitions - 1); 1147 } 1148 } 1149 foreach (Transition transition; optimizedTransitions) 1150 { 1151 state.addOptimizedTransition(transition); 1152 } 1153 } 1154 } 1155 1156 return removedEdges; 1157 } 1158 1159 private static int optimizeSets(ATN atn, bool preserveOrder) 1160 { 1161 if (preserveOrder) 1162 { 1163 // this optimization currently doesn't preserve edge order. 1164 return 0; 1165 } 1166 int removedPaths = 0; 1167 DecisionState[] decisions = atn.decisionToState; 1168 foreach (DecisionState decision; decisions) 1169 { 1170 IntervalSet setTransitions = new IntervalSet(); 1171 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1172 { 1173 Transition epsTransition = decision.optimizedTransitions[i]; 1174 if (!(cast(EpsilonTransition)epsTransition)) 1175 { 1176 continue; 1177 } 1178 if (epsTransition.target.optimizedTransitions.length != 1) 1179 { 1180 continue; 1181 } 1182 Transition transition = epsTransition.target.optimizedTransitions[0]; 1183 if (!(cast(BlockEndState)transition.target)) 1184 { 1185 continue; 1186 } 1187 if (cast(NotSetTransition)transition) 1188 { 1189 // TODO: not yet implemented 1190 continue; 1191 } 1192 if (cast(AtomTransition)transition || 1193 cast(RangeTransition)transition || 1194 cast(SetTransition)transition) 1195 { 1196 setTransitions.add(i); 1197 } 1198 } 1199 if (setTransitions.size <= 1) 1200 { 1201 continue; 1202 } 1203 Transition[] optimizedTransitions; 1204 for (int i = 0; i < decision.optimizedTransitions.length; i++) 1205 { 1206 if (!setTransitions.contains(i)) 1207 { 1208 optimizedTransitions ~= decision.optimizedTransitions[i]; 1209 } 1210 } 1211 ATNState blockEndState = decision.optimizedTransitions[setTransitions.getMinElement].target.optimizedTransitions[0].target; 1212 IntervalSet matchSet = new IntervalSet; 1213 for (int i = 0; i < setTransitions.intervals.length; i++) 1214 { 1215 Interval interval = setTransitions.intervals[i]; 1216 for (int j = interval.a; j <= interval.b; j++) 1217 { 1218 Transition matchTransition = decision.optimizedTransitions[j].target.optimizedTransitions[0]; 1219 if (cast(NotSetTransition)matchTransition) 1220 { 1221 assert(false, "Not yet implemented."); 1222 } 1223 else 1224 { 1225 matchSet.addAll(matchTransition.label); 1226 } 1227 } 1228 } 1229 Transition newTransition; 1230 if (matchSet.intervals.length == 1) 1231 { 1232 if (matchSet.size == 1) 1233 { 1234 newTransition = new AtomTransition(blockEndState, matchSet.getMinElement); 1235 } 1236 else 1237 { 1238 Interval matchInterval = matchSet.intervals[0]; 1239 newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b); 1240 } 1241 } 1242 else 1243 { 1244 newTransition = new SetTransition(blockEndState, matchSet); 1245 } 1246 ATNState optimizedState = new BasicState(); 1247 optimizedState.setRuleIndex(decision.ruleIndex); 1248 atn.addState(optimizedState); 1249 optimizedState.addTransition(newTransition); 1250 optimizedTransitions ~= new EpsilonTransition(optimizedState); 1251 removedPaths += decision.numberOfOptimizedTransitions - optimizedTransitions.length; 1252 if (decision.isOptimized) 1253 { 1254 while (decision.numberOfOptimizedTransitions > 0) 1255 { 1256 decision.removeOptimizedTransition(decision.numberOfOptimizedTransitions - 1); 1257 } 1258 } 1259 foreach (Transition transition; optimizedTransitions) 1260 { 1261 decision.addOptimizedTransition(transition); 1262 } 1263 } 1264 return removedPaths; 1265 } 1266 1267 } 1268 1269 version(unittest) { 1270 import dshould : be, equal, not, should; 1271 import std.typecons : tuple; 1272 import unit_threaded; 1273 1274 @Tags("des11") 1275 @("testEncodingATNDeserialize") 1276 unittest { 1277 auto des = new ATNDeserializer; 1278 des.should.not.be(null); 1279 } 1280 }