1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.TokenStreamRewriter; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.InsertAfterOp; 11 import antlr.v4.runtime.InsertBeforeOp; 12 import antlr.v4.runtime.ReplaceOp; 13 import antlr.v4.runtime.RewriteOperation; 14 import antlr.v4.runtime.Token; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.misc.Interval; 18 import std.algorithm.comparison; 19 import std.format; 20 import std.conv; 21 import std.variant; 22 23 /** 24 * Useful for rewriting out a buffered input token stream after doing some 25 * augmentation or other manipulations on it. 26 * 27 * <p> 28 * You can insert stuff, replace, and delete chunks. Note that the operations 29 * are done lazily--only if you convert the buffer to a {@link String} with 30 * {@link TokenStream#getText()}. This is very efficient because you are not 31 * moving data around all the time. As the buffer of tokens is converted to 32 * strings, the {@link #getText()} method(s) scan the input token stream and 33 * check to see if there is an operation at the current index. If so, the 34 * operation is done and then normal {@link String} rendering continues on the 35 * buffer. This is like having multiple Turing machine instruction streams 36 * (programs) operating on a single input tape. :)</p> 37 * 38 * <p> 39 * This rewriter makes no modifications to the token stream. It does not ask the 40 * stream to fill itself up nor does it advance the input cursor. The token 41 * stream {@link TokenStream#index()} will return the same value before and 42 * after any {@link #getText()} call.</p> 43 * 44 * <p> 45 * The rewriter only works on tokens that you have in the buffer and ignores the 46 * current input cursor. If you are buffering tokens on-demand, calling 47 * {@link #getText()} halfway through the input will only do rewrites for those 48 * tokens in the first half of the file.</p> 49 * 50 * <p> 51 * Since the operations are done lazily at {@link #getText}-time, operations do 52 * not screw up the token index values. That is, an insert operation at token 53 * index {@code i} does not change the index values for tokens 54 * {@code i}+1..n-1.</p> 55 * 56 * <p> 57 * Because operations never actually alter the buffer, you may always get the 58 * original token stream back without undoing anything. Since the instructions 59 * are queued up, you can easily simulate transactions and roll back any changes 60 * if there is an error just by removing instructions. For example,</p> 61 * 62 * <pre> 63 * CharStream input = new ANTLRFileStream("input"); 64 * TLexer lex = new TLexer(input); 65 * CommonTokenStream tokens = new CommonTokenStream(lex); 66 * T parser = new T(tokens); 67 * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); 68 * parser.startRule(); 69 * </pre> 70 * 71 * <p> 72 * Then in the rules, you can execute (assuming rewriter is visible):</p> 73 * 74 * <pre> 75 * Token t,u; 76 * ... 77 * rewriter.insertAfter(t, "text to put after t");} 78 * rewriter.insertAfter(u, "text after u");} 79 * System.out.println(rewriter.getText()); 80 * </pre> 81 * 82 * <p> 83 * You can also have multiple "instruction streams" and get multiple rewrites 84 * from a single pass over the input. Just name the instruction streams and use 85 * that name again when printing the buffer. This could be useful for generating 86 * a C file and also its header file--all from the same buffer:</p> 87 * 88 * <pre> 89 * rewriter.insertAfter("pass1", t, "text to put after t");} 90 * rewriter.insertAfter("pass2", u, "text after u");} 91 * System.out.println(rewriter.getText("pass1")); 92 * System.out.println(rewriter.getText("pass2")); 93 * </pre> 94 * 95 * <p> 96 * If you don't use named rewrite streams, a "default" stream is used as the 97 * first example shows.</p> 98 */ 99 class TokenStreamRewriter 100 { 101 102 public static immutable string DEFAULT_PROGRAM_NAME = "default"; 103 104 public static immutable int MIN_TOKEN_INDEX = 0; 105 106 /** 107 * Our source stream 108 * @uml 109 * @read 110 */ 111 private static TokenStream tokens_; 112 113 protected RewriteOperation[][string] programs; 114 115 protected size_t[string] lastRewriteTokenIndexes; 116 117 private RewriteOperation[] rewriteOps; 118 119 public this() 120 { 121 } 122 123 public this(TokenStream tokens) 124 { 125 tokens_ = tokens; 126 programs[DEFAULT_PROGRAM_NAME] = rewriteOps; 127 } 128 129 public void rollback(int instructionIndex) 130 { 131 rollback(DEFAULT_PROGRAM_NAME, instructionIndex); 132 } 133 134 /** 135 * Rollback the instruction stream for a program so that 136 * the indicated instruction (via instructionIndex) is no 137 * longer in the stream. UNTESTED! 138 */ 139 public void rollback(string programName, int instructionIndex) 140 { 141 RewriteOperation[] ist = programs[programName]; 142 if (programName in programs) { 143 programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex]; 144 } 145 } 146 147 public void deleteProgram() 148 { 149 deleteProgram(DEFAULT_PROGRAM_NAME); 150 } 151 152 /** 153 * Reset the program so that no instructions exist 154 */ 155 public void deleteProgram(string programName) 156 { 157 rollback(programName, MIN_TOKEN_INDEX); 158 } 159 160 public void insertAfter(Token t, Variant text) 161 { 162 insertAfter(DEFAULT_PROGRAM_NAME, t, text); 163 } 164 165 public void insertAfter(int index, Variant text) 166 { 167 insertAfter(DEFAULT_PROGRAM_NAME, index, text); 168 } 169 170 public void insertAfter(string programName, Token t, Variant text) 171 { 172 insertAfter(programName, t.getTokenIndex, text); 173 } 174 175 public void insertAfter(string programName, int index, Variant text) 176 { 177 // to insert after, just insert before next index (even if past end) 178 RewriteOperation op = new InsertAfterOp(index, text); 179 op.instructionIndex = programs[programName].length; 180 programs[programName] ~= op; 181 } 182 183 public void insertBefore(Token t, Variant text) 184 { 185 insertBefore(DEFAULT_PROGRAM_NAME, t, text); 186 } 187 188 public void insertBefore(size_t index, Variant text) 189 { 190 insertBefore(DEFAULT_PROGRAM_NAME, index, text); 191 } 192 193 public void insertBefore(string programName, Token t, Variant text) 194 { 195 insertBefore(programName, t.getTokenIndex(), text); 196 } 197 198 public void insertBefore(string programName, size_t index, Variant text) 199 { 200 RewriteOperation op = new InsertBeforeOp(index, text); 201 op.instructionIndex = programs[programName].length; 202 programs[programName] ~= op; 203 } 204 205 public void replace(size_t index, Variant text) 206 { 207 replace(DEFAULT_PROGRAM_NAME, index, index, text); 208 } 209 210 public void replace(size_t from, size_t to, Variant text) 211 { 212 replace(DEFAULT_PROGRAM_NAME, from, to, text); 213 } 214 215 public void replace(Token indexT, Variant text) 216 { 217 replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); 218 } 219 220 public void replace(Token from, Token to, Variant text) 221 { 222 replace(DEFAULT_PROGRAM_NAME, from, to, text); 223 } 224 225 public void replace(string programName, size_t from, size_t to, Variant text) 226 { 227 if ( from > to || from<0 || to<0 || to >= tokens_.size ) { 228 throw 229 new 230 IllegalArgumentException( 231 format("replace: range invalid: %s..%s(size=%s)", 232 from, to, tokens_.size)); 233 } 234 RewriteOperation op = new ReplaceOp(from, to, text); 235 op.instructionIndex = programs[programName].length; 236 programs[programName] ~= op; 237 238 debug(TokenStreamRewriter) { 239 import std.stdio : writefln; 240 writefln("replace end: op = %s, programs = %s", op, programs); 241 } 242 } 243 244 public void replace(string programName, Token from, Token to, Variant text) 245 { 246 debug(TokenStreamRewriter) { 247 import std.stdio : writefln; 248 writefln("replace constructor2: from = %s, to = %s, text = %s", from, to, text); 249 } 250 replace(programName, 251 from.getTokenIndex, 252 to.getTokenIndex, 253 text); 254 } 255 256 /** 257 * Delete token (can not use delete as identifier) 258 */ 259 public void deleteT(size_t index) 260 { 261 deleteT(DEFAULT_PROGRAM_NAME, index, index); 262 } 263 264 public void deleteT(size_t from, size_t to) 265 { 266 deleteT(DEFAULT_PROGRAM_NAME, from, to); 267 } 268 269 public void deleteT(Token indexT) 270 { 271 deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT); 272 } 273 274 public void deleteT(Token from, Token to) 275 { 276 deleteT(DEFAULT_PROGRAM_NAME, from, to); 277 } 278 279 public void deleteT(string programName, size_t from, size_t to) 280 { 281 Variant Null; 282 replace(programName, from, to, Null); 283 } 284 285 public void deleteT(string programName, Token from, Token to) 286 { 287 Variant Null; 288 replace(programName, from, to, Null); 289 } 290 291 public size_t getLastRewriteTokenIndex() 292 { 293 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); 294 } 295 296 private size_t getLastRewriteTokenIndex(string programName) 297 { 298 if (programName in lastRewriteTokenIndexes) { 299 return lastRewriteTokenIndexes[programName]; 300 } 301 else { 302 return -1; 303 } 304 } 305 306 private void setLastRewriteTokenIndex(string programName, size_t i) 307 { 308 lastRewriteTokenIndexes[programName] = i; 309 } 310 311 private RewriteOperation[] getProgram(string name) 312 { 313 if (name in programs) { 314 return programs[name]; 315 } 316 else { 317 return initializeProgram(name); 318 } 319 } 320 321 private RewriteOperation[] initializeProgram(string name) 322 { 323 RewriteOperation[] iso; 324 programs[name] = iso; 325 return iso; 326 } 327 328 /** 329 * Return the text from the original tokens altered per the 330 * instructions given to this rewriter. 331 */ 332 public Variant getText() 333 { 334 return getText(DEFAULT_PROGRAM_NAME, Interval.of(0, tokens_.size - 1)); 335 } 336 337 /** 338 * Return the text from the original tokens altered per the 339 * instructions given to this rewriter in programName. 340 */ 341 public Variant getText(string programName) 342 { 343 return getText(programName, Interval.of(0, tokens_.size - 1)); 344 } 345 346 /** 347 * Return the text associated with the tokens in the interval from the 348 * original token stream but with the alterations given to this rewriter. 349 * The interval refers to the indexes in the original token stream. 350 * We do not alter the token stream in any way, so the indexes 351 * and intervals are still consistent. Includes any operations done 352 * to the first and last token in the interval. So, if you did an 353 * insertBefore on the first token, you would get that insertion. 354 * The same is true if you do an insertAfter the stop token. 355 */ 356 public Variant getText(Interval interval) 357 { 358 return getText(DEFAULT_PROGRAM_NAME, interval); 359 } 360 361 public Variant getText(string programName, Interval interval) 362 { 363 RewriteOperation[] rewrites; 364 365 if (programName in programs) 366 rewrites = programs[programName]; 367 368 int start = interval.a; 369 int stop = interval.b; 370 371 // ensure start/end are in range 372 if ( stop > tokens_.size-1 ) 373 stop = tokens_.size-1; 374 if ( start < 0 ) 375 start = 0; 376 377 if (!rewrites) { 378 return tokens_.getText(interval); // no instructions to execute 379 } 380 381 Variant buf; 382 383 // First, optimize instruction stream 384 RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites); 385 386 // Walk buffer, executing instructions and emitting tokens 387 int i = start; 388 389 debug(TokenStreamRewriter) { 390 import std.stdio : stderr, writefln; 391 writefln("tokens_.size = %s", tokens_.size); 392 } 393 394 while (i <= stop && i < tokens_.size) { 395 Token t = tokens_.get(i); 396 debug(TokenStreamRewriter) { 397 import std.stdio : stderr, writefln; 398 writefln("i = %s, token = %s", i, t); 399 } 400 RewriteOperation op; 401 if (i in indexToOp) 402 op = indexToOp[i]; 403 debug(TokenStreamRewriter) { 404 import std.stdio : stderr, writefln; 405 writefln("indexToOp = %s", indexToOp); 406 } 407 indexToOp.remove(i); // remove so any left have index size-1 408 debug(TokenStreamRewriter) { 409 import std.stdio : stderr, writefln; 410 writefln("indexToOp end = %s", indexToOp); 411 } 412 if (!op) { 413 // no operation at that index, just dump token 414 if (t.getType != TokenConstantDefinition.EOF) { 415 Variant Null; 416 if (buf is Null) 417 buf = t.getText; 418 else 419 buf ~= t.getText; 420 } 421 i++; // move to next token 422 } 423 else { 424 i = to!int(op.execute(buf)); // execute operation and skip 425 } 426 } 427 428 // include stuff after end if it's last index in buffer 429 // So, if they did an insertAfter(lastValidIndex, "foo"), include 430 // foo if end==lastValidIndex. 431 if (stop == tokens_.size()-1) { 432 // Scan any remaining operations after last token 433 // should be included (they will be inserts). 434 foreach (RewriteOperation op; indexToOp.values()) { 435 if (op.index >= tokens_.size-1) 436 buf ~= op.text; 437 } 438 } 439 return buf; 440 } 441 442 /** 443 * overlapping replaces that are not completed nested). Inserts to 444 * same index need to be combined etc... Here are the cases: 445 * 446 * I.i.u I.j.v leave alone, nonoverlapping 447 * I.i.u I.i.v combine: Iivu 448 * 449 * R.i-j.u R.x-y.v | i-j in x-y delete first R 450 * R.i-j.u R.i-j.v delete first R 451 * R.i-j.u R.x-y.v | x-y in i-j ERROR 452 * R.i-j.u R.x-y.v | boundaries overlap ERROR 453 * 454 * Delete special case of replace (text==null): 455 * D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 456 * 457 * I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before 458 * we're not deleting i) 459 * I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping 460 * R.x-y.v I.i.u | i in x-y ERROR 461 * R.x-y.v I.x.u R.x-y.uv (combine, delete I) 462 * R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 463 * 464 * I.i.u = insert u before op @ index i 465 * R.x-y.u = replace x-y indexed tokens with u 466 * 467 * First we need to examine replaces. For any replace op: 468 * 469 * 1. wipe out any insertions before op within that range. 470 * 2. Drop any replace op before that is contained completely within 471 * that range. 472 * 3. Throw exception upon boundary overlap with any previous replace. 473 * 474 * Then we can deal with inserts: 475 * 476 * 1. for any inserts to same index, combine even if not adjacent. 477 * 2. for any prior replace with same left boundary, combine this 478 * insert with replace and delete this replace. 479 * 3. throw exception if index in same range as previous replace 480 * 481 * Don't actually delete; make op null in list. Easier to walk list. 482 * Later we can throw as we add to index → op map. 483 * 484 * Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 485 * inserted stuff would be before the replace range. But, if you 486 * add tokens in front of a method body '{' and then delete the method 487 * body, I think the stuff before the '{' you added should disappear too. 488 * 489 * Return a map from token index to operation. 490 */ 491 protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites) 492 { 493 debug(TokenStreamRewriter) { 494 import std.stdio : writefln; 495 writefln("reduceToSingleOperationPerIndex"); 496 foreach (i, rew; rewrites) 497 writefln("\trewrites[%s] = %s", i, rew); 498 } 499 500 // WALK REPLACES 501 for (size_t i = 0; i < rewrites.length; i++) { 502 RewriteOperation op = rewrites[i]; 503 debug(TokenStreamRewriter) { 504 import std.stdio : writefln; 505 writefln("op0 = %s", op); 506 } 507 if (op is null) continue; 508 if (!(cast(ReplaceOp)op)) { 509 continue; 510 } 511 debug(TokenStreamRewriter) { 512 import std.stdio : writefln; 513 writefln("op = %s", op); 514 } 515 ReplaceOp rop = cast(ReplaceOp)rewrites[i]; 516 // Wipe prior inserts within range 517 InsertBeforeOp[] inserts = getKindOfOps!(InsertBeforeOp)(rewrites, i); 518 foreach (InsertBeforeOp iop; inserts) { 519 if ( iop.index == rop.index ) { 520 // E.g., insert before 2, delete 2..2; update replace 521 // text to include insert before, kill insert 522 rewrites[iop.instructionIndex] = null; 523 Variant Null; 524 rop.text = iop.text ~ (rop.text !is Null?rop.text:Null); 525 } 526 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) { 527 // delete insert as it's a no-op. 528 rewrites[iop.instructionIndex] = null; 529 } 530 } 531 // Drop any prior replaces contained within 532 ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i); 533 foreach (ReplaceOp prevRop; prevReplaces) { 534 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) { 535 // delete replace as it's a no-op. 536 rewrites[prevRop.instructionIndex] = null; 537 continue; 538 } 539 // throw exception unless disjoint or identical 540 bool disjoint = 541 prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex; 542 // Delete special case of replace (text==null): 543 // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 544 if ( prevRop.text==null && rop.text==null && !disjoint ) { 545 //System.out.println("overlapping deletes: "+prevRop+", "+rop); 546 rewrites[prevRop.instructionIndex] = null; // kill first delete 547 rop.index = min(prevRop.index, rop.index); 548 rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex); 549 debug { 550 import std.stdio : stderr, writefln; 551 stderr.writefln("new rop %s", rop); 552 } 553 } 554 else if ( !disjoint ) { 555 throw 556 new 557 IllegalArgumentException(format( 558 "replace op boundaries of %s overlap with previous %s", 559 rop, 560 prevRop)); 561 } 562 } 563 } 564 565 // WALK INSERTS 566 debug(TokenStreamRewriter) { 567 import std.stdio : stderr, writefln; 568 writefln("WALK INSERTS"); 569 } 570 for (int i = 0; i < rewrites.length; i++) { 571 RewriteOperation op = rewrites[i]; 572 if (op is null) continue; 573 if (!(cast(InsertBeforeOp)op)) continue; 574 InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i]; 575 // combine current insert with prior if any at same index 576 InsertBeforeOp[] prevInserts = getKindOfOps!(InsertBeforeOp)(rewrites, i); 577 foreach (InsertBeforeOp prevIop; prevInserts) { 578 debug(TokenStreamRewriter) { 579 import std.stdio : stderr, writefln; 580 writefln("prevIop = %s", prevIop); 581 } 582 if (prevIop.index == iop.index) { 583 if (cast(InsertAfterOp)prevIop) { 584 iop.text = catOpText(prevIop.text, iop.text); 585 rewrites[prevIop.instructionIndex] = null; 586 } 587 else if (cast(InsertBeforeOp)prevIop) { // combine objects 588 // convert to strings...we're in process of toString'ing 589 // whole token buffer so no lazy eval issue with any templates 590 iop.text = catOpText(iop.text, prevIop.text); 591 // delete redundant prior insert 592 rewrites[prevIop.instructionIndex] = null; 593 } 594 } 595 } 596 // look for replaces where iop.index is in range; error 597 debug(TokenStreamRewriter) { 598 import std.stdio : stderr, writefln; 599 writefln("look for replaces where iop.index is in range, i = %s", i); 600 } 601 ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i); 602 debug(TokenStreamRewriter) { 603 import std.stdio : stderr, writefln; 604 writefln("prevReplaces = %s", prevReplaces); 605 } 606 foreach (ReplaceOp rop; prevReplaces) { 607 if ( iop.index == rop.index ) { 608 rop.text = catOpText(iop.text, rop.text); 609 rewrites[i] = null; // delete current insert 610 continue; 611 } 612 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) { 613 throw 614 new 615 IllegalArgumentException( 616 format("insert op %s within boundaries of previous %s", 617 iop, rop)); 618 } 619 } 620 } 621 622 debug(TokenStreamRewriter) { 623 import std.stdio : stderr, writefln; 624 writefln("rewrites after = %s", rewrites); 625 } 626 RewriteOperation[size_t] m; 627 for (int i = 0; i < rewrites.length; i++) { 628 RewriteOperation op = rewrites[i]; 629 if (!op) continue; // ignore deleted ops 630 if (op.index in m) { 631 throw new Error("should only be one op per index"); 632 } 633 m[op.index] = op; 634 } 635 return m; 636 } 637 638 protected Variant catOpText(Variant a, Variant b) 639 { 640 Variant Null; 641 if (a !is Null && b !is Null) 642 return a ~ b; 643 if (a !is Null) 644 return a; 645 return b; 646 } 647 648 protected auto getKindOfOps(U)(RewriteOperation[] rewrites, size_t before) 649 { 650 U[] ops; 651 for (int i=0; i<before && i<rewrites.length; i++) { 652 RewriteOperation op = rewrites[i]; 653 if (op is null) continue; // ignore deleted 654 if (U.classinfo == op.classinfo) { 655 ops ~= cast(U)(op); 656 } 657 } 658 return ops; 659 } 660 661 public static TokenStream tokens() 662 { 663 return tokens_; 664 } 665 666 }