1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.TokenStreamRewriter; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.InsertAfterOp; 11 import antlr.v4.runtime.InsertBeforeOp; 12 import antlr.v4.runtime.ReplaceOp; 13 import antlr.v4.runtime.RewriteOperation; 14 import antlr.v4.runtime.Token; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.misc.Interval; 18 import std.algorithm.comparison; 19 import std.format; 20 import std.conv; 21 22 /** 23 * Useful for rewriting out a buffered input token stream after doing some 24 * augmentation or other manipulations on it. 25 * 26 * <p> 27 * You can insert stuff, replace, and delete chunks. Note that the operations 28 * are done lazily--only if you convert the buffer to a {@link String} with 29 * {@link TokenStream#getText()}. This is very efficient because you are not 30 * moving data around all the time. As the buffer of tokens is converted to 31 * strings, the {@link #getText()} method(s) scan the input token stream and 32 * check to see if there is an operation at the current index. If so, the 33 * operation is done and then normal {@link String} rendering continues on the 34 * buffer. This is like having multiple Turing machine instruction streams 35 * (programs) operating on a single input tape. :)</p> 36 * 37 * <p> 38 * This rewriter makes no modifications to the token stream. It does not ask the 39 * stream to fill itself up nor does it advance the input cursor. The token 40 * stream {@link TokenStream#index()} will return the same value before and 41 * after any {@link #getText()} call.</p> 42 * 43 * <p> 44 * The rewriter only works on tokens that you have in the buffer and ignores the 45 * current input cursor. If you are buffering tokens on-demand, calling 46 * {@link #getText()} halfway through the input will only do rewrites for those 47 * tokens in the first half of the file.</p> 48 * 49 * <p> 50 * Since the operations are done lazily at {@link #getText}-time, operations do 51 * not screw up the token index values. That is, an insert operation at token 52 * index {@code i} does not change the index values for tokens 53 * {@code i}+1..n-1.</p> 54 * 55 * <p> 56 * Because operations never actually alter the buffer, you may always get the 57 * original token stream back without undoing anything. Since the instructions 58 * are queued up, you can easily simulate transactions and roll back any changes 59 * if there is an error just by removing instructions. For example,</p> 60 * 61 * <pre> 62 * CharStream input = new ANTLRFileStream("input"); 63 * TLexer lex = new TLexer(input); 64 * CommonTokenStream tokens = new CommonTokenStream(lex); 65 * T parser = new T(tokens); 66 * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); 67 * parser.startRule(); 68 * </pre> 69 * 70 * <p> 71 * Then in the rules, you can execute (assuming rewriter is visible):</p> 72 * 73 * <pre> 74 * Token t,u; 75 * ... 76 * rewriter.insertAfter(t, "text to put after t");} 77 * rewriter.insertAfter(u, "text after u");} 78 * System.out.println(rewriter.getText()); 79 * </pre> 80 * 81 * <p> 82 * You can also have multiple "instruction streams" and get multiple rewrites 83 * from a single pass over the input. Just name the instruction streams and use 84 * that name again when printing the buffer. This could be useful for generating 85 * a C file and also its header file--all from the same buffer:</p> 86 * 87 * <pre> 88 * rewriter.insertAfter("pass1", t, "text to put after t");} 89 * rewriter.insertAfter("pass2", u, "text after u");} 90 * System.out.println(rewriter.getText("pass1")); 91 * System.out.println(rewriter.getText("pass2")); 92 * </pre> 93 * 94 * <p> 95 * If you don't use named rewrite streams, a "default" stream is used as the 96 * first example shows.</p> 97 */ 98 class TokenStreamRewriter(T) 99 { 100 101 public static immutable string DEFAULT_PROGRAM_NAME = "default"; 102 103 public static immutable int MIN_TOKEN_INDEX = 0; 104 105 /** 106 * Our source stream 107 * @uml 108 * @read 109 */ 110 private static TokenStream tokens_; 111 112 protected RewriteOperation!T[][string] programs; 113 114 protected size_t[string] lastRewriteTokenIndexes; 115 116 private RewriteOperation!T[] rewriteOps; 117 118 public this() 119 { 120 } 121 122 public this(TokenStream tokens) 123 { 124 tokens_ = tokens; 125 programs[DEFAULT_PROGRAM_NAME] = rewriteOps; 126 } 127 128 public void rollback(int instructionIndex) 129 { 130 rollback(DEFAULT_PROGRAM_NAME, instructionIndex); 131 } 132 133 /** 134 * Rollback the instruction stream for a program so that 135 * the indicated instruction (via instructionIndex) is no 136 * longer in the stream. UNTESTED! 137 */ 138 public void rollback(string programName, int instructionIndex) 139 { 140 RewriteOperation!T[] ist = programs[programName]; 141 if (programName in programs) { 142 programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex]; 143 } 144 } 145 146 public void deleteProgram() 147 { 148 deleteProgram(DEFAULT_PROGRAM_NAME); 149 } 150 151 /** 152 * Reset the program so that no instructions exist 153 */ 154 public void deleteProgram(string programName) 155 { 156 rollback(programName, MIN_TOKEN_INDEX); 157 } 158 159 public void insertAfter(Token t, T text) 160 { 161 insertAfter(DEFAULT_PROGRAM_NAME, t, text); 162 } 163 164 public void insertAfter(int index, T text) 165 { 166 insertAfter(DEFAULT_PROGRAM_NAME, index, text); 167 } 168 169 public void insertAfter(string programName, Token t, T text) 170 { 171 insertAfter(programName, t.getTokenIndex, text); 172 } 173 174 public void insertAfter(string programName, int index, T text) 175 { 176 // to insert after, just insert before next index (even if past end) 177 RewriteOperation!T op = new InsertAfterOp!T(index, text); 178 op.instructionIndex = programs[programName].length; 179 programs[programName] ~= op; 180 } 181 182 public void insertBefore(Token t, T text) 183 { 184 insertBefore(DEFAULT_PROGRAM_NAME, t, text); 185 } 186 187 public void insertBefore(size_t index, T text) 188 { 189 insertBefore(DEFAULT_PROGRAM_NAME, index, text); 190 } 191 192 public void insertBefore(string programName, Token t, T text) 193 { 194 insertBefore(programName, t.getTokenIndex(), text); 195 } 196 197 public void insertBefore(string programName, size_t index, T text) 198 { 199 RewriteOperation!T op = new InsertBeforeOp!T(index, text); 200 op.instructionIndex = programs[programName].length; 201 programs[programName] ~= op; 202 } 203 204 public void replace(size_t index, T text) 205 { 206 replace(DEFAULT_PROGRAM_NAME, index, index, text); 207 } 208 209 public void replace(size_t from, size_t to, T text) 210 { 211 replace(DEFAULT_PROGRAM_NAME, from, to, text); 212 } 213 214 public void replace(Token indexT, T text) 215 { 216 replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); 217 } 218 219 public void replace(Token from, Token to, T text) 220 { 221 replace(DEFAULT_PROGRAM_NAME, from, to, text); 222 } 223 224 public void replace(string programName, size_t from, size_t to, T text) 225 { 226 debug(TokenStreamRewriter) { 227 import std.stdio : writefln; 228 writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text); 229 } 230 if ( from > to || from<0 || to<0 || to >= tokens_.size ) { 231 throw 232 new 233 IllegalArgumentException( 234 format("replace: range invalid: %s..%s(size=%s)", 235 from, to, tokens_.size)); 236 } 237 RewriteOperation!T op = new ReplaceOp!T(from, to, text); 238 op.instructionIndex = programs[programName].length; 239 programs[programName] ~= op; 240 } 241 242 public void replace(string programName, Token from, Token to, T text) 243 { 244 debug(TokenStreamRewriter) { 245 import std.stdio : writefln; 246 writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text); 247 } 248 replace(programName, 249 from.getTokenIndex, 250 to.getTokenIndex, 251 text); 252 } 253 254 /** 255 * Delete token (can not use delete as identifier) 256 */ 257 public void deleteT(size_t index) 258 { 259 deleteT(DEFAULT_PROGRAM_NAME, index, index); 260 } 261 262 public void deleteT(size_t from, size_t to) 263 { 264 deleteT(DEFAULT_PROGRAM_NAME, from, to); 265 } 266 267 public void deleteT(Token indexT) 268 { 269 deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT); 270 } 271 272 public void deleteT(Token from, Token to) 273 { 274 deleteT(DEFAULT_PROGRAM_NAME, from, to); 275 } 276 277 public void deleteT(string programName, size_t from, size_t to) 278 { 279 replace(programName,from,to,null); 280 } 281 282 public void deleteT(string programName, Token from, Token to) 283 { 284 replace(programName,from,to,null); 285 } 286 287 public size_t getLastRewriteTokenIndex() 288 { 289 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); 290 } 291 292 private size_t getLastRewriteTokenIndex(string programName) 293 { 294 if (programName in lastRewriteTokenIndexes) { 295 return lastRewriteTokenIndexes[programName]; 296 } 297 else { 298 return -1; 299 } 300 } 301 302 private void setLastRewriteTokenIndex(string programName, size_t i) 303 { 304 lastRewriteTokenIndexes[programName] = i; 305 } 306 307 private RewriteOperation!T[] getProgram(string name) 308 { 309 if (name in programs) { 310 return programs[name]; 311 } 312 else { 313 return initializeProgram(name); 314 } 315 } 316 317 private RewriteOperation!T[] initializeProgram(string name) 318 { 319 RewriteOperation!T[] iso; 320 programs[name] = iso; 321 return iso; 322 } 323 324 /** 325 * Return the text from the original tokens altered per the 326 * instructions given to this rewriter. 327 */ 328 public T getText() 329 { 330 return getText(DEFAULT_PROGRAM_NAME, Interval.of(0,tokens_.size()-1)); 331 } 332 333 /** 334 * Return the text from the original tokens altered per the 335 * instructions given to this rewriter in programName. 336 */ 337 public T getText(string programName) 338 { 339 return getText(programName, Interval.of(0,tokens_.size-1)); 340 } 341 342 /** 343 * Return the text associated with the tokens in the interval from the 344 * original token stream but with the alterations given to this rewriter. 345 * The interval refers to the indexes in the original token stream. 346 * We do not alter the token stream in any way, so the indexes 347 * and intervals are still consistent. Includes any operations done 348 * to the first and last token in the interval. So, if you did an 349 * insertBefore on the first token, you would get that insertion. 350 * The same is true if you do an insertAfter the stop token. 351 */ 352 public T getText(Interval interval) 353 { 354 return getText(DEFAULT_PROGRAM_NAME, interval); 355 } 356 357 public T getText(string programName, Interval interval) 358 { 359 RewriteOperation!T[] rewrites; 360 361 if (programName in programs) 362 rewrites = programs[programName]; 363 364 int start = interval.a; 365 int stop = interval.b; 366 367 // ensure start/end are in range 368 if ( stop > tokens_.size-1 ) 369 stop = tokens_.size-1; 370 if ( start < 0 ) 371 start = 0; 372 373 if (rewrites.length == 0) { 374 static if (is(T == string)) { 375 return tokens_.getText(interval); // no instructions to execute 376 } 377 else { 378 return getPositionText(tokens_, interval); 379 } 380 } 381 382 T buf; 383 384 // First, optimize instruction stream 385 RewriteOperation!T[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites); 386 387 // Walk buffer, executing instructions and emitting tokens 388 int i = start; 389 390 while (i <= stop && i < tokens_.size) { 391 Token t = tokens_.get(i); 392 RewriteOperation!T op; 393 if (i in indexToOp) 394 op = indexToOp[i]; 395 indexToOp.remove(i); // remove so any left have index size-1 396 if (!op) { 397 // no operation at that index, just dump token 398 if (t.getType != TokenConstantDefinition.EOF) { 399 static if (is(T == string)) { 400 buf ~= t.getText; 401 } 402 else { 403 buf ~= getPositionText(t); 404 } 405 } 406 i++; // move to next token 407 } 408 else { 409 i = to!int(op.execute(buf)); // execute operation and skip 410 } 411 } 412 413 // include stuff after end if it's last index in buffer 414 // So, if they did an insertAfter(lastValidIndex, "foo"), include 415 // foo if end==lastValidIndex. 416 if (stop == tokens_.size()-1) { 417 // Scan any remaining operations after last token 418 // should be included (they will be inserts). 419 foreach (RewriteOperation!T op; indexToOp.values()) { 420 if (op.index >= tokens_.size-1) 421 buf ~= to!T(op.text); 422 } 423 } 424 return buf; 425 } 426 427 /** 428 * overlapping replaces that are not completed nested). Inserts to 429 * same index need to be combined etc... Here are the cases: 430 * 431 * I.i.u I.j.v leave alone, nonoverlapping 432 * I.i.u I.i.v combine: Iivu 433 * 434 * R.i-j.u R.x-y.v | i-j in x-y delete first R 435 * R.i-j.u R.i-j.v delete first R 436 * R.i-j.u R.x-y.v | x-y in i-j ERROR 437 * R.i-j.u R.x-y.v | boundaries overlap ERROR 438 * 439 * Delete special case of replace (text==null): 440 * D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 441 * 442 * I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before 443 * we're not deleting i) 444 * I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping 445 * R.x-y.v I.i.u | i in x-y ERROR 446 * R.x-y.v I.x.u R.x-y.uv (combine, delete I) 447 * R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 448 * 449 * I.i.u = insert u before op @ index i 450 * R.x-y.u = replace x-y indexed tokens with u 451 * 452 * First we need to examine replaces. For any replace op: 453 * 454 * 1. wipe out any insertions before op within that range. 455 * 2. Drop any replace op before that is contained completely within 456 * that range. 457 * 3. Throw exception upon boundary overlap with any previous replace. 458 * 459 * Then we can deal with inserts: 460 * 461 * 1. for any inserts to same index, combine even if not adjacent. 462 * 2. for any prior replace with same left boundary, combine this 463 * insert with replace and delete this replace. 464 * 3. throw exception if index in same range as previous replace 465 * 466 * Don't actually delete; make op null in list. Easier to walk list. 467 * Later we can throw as we add to index → op map. 468 * 469 * Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 470 * inserted stuff would be before the replace range. But, if you 471 * add tokens in front of a method body '{' and then delete the method 472 * body, I think the stuff before the '{' you added should disappear too. 473 * 474 * Return a map from token index to operation. 475 */ 476 protected RewriteOperation!T[size_t] reduceToSingleOperationPerIndex(RewriteOperation!T[] rewrites) 477 { 478 debug(TokenStreamRewriter) { 479 import std.stdio : writefln; 480 writefln("reduceToSingleOperationPerIndex"); 481 foreach (i, rew; rewrites) 482 writefln("\trewrites[%s] = %s", i, rew); 483 } 484 485 // WALK REPLACES 486 for (size_t i = 0; i < rewrites.length; i++) { 487 RewriteOperation!T op = rewrites[i]; 488 debug(TokenStreamRewriter) { 489 import std.stdio : writefln; 490 writefln("op0 = %s", op); 491 } 492 if (op is null) continue; 493 if (!(cast(ReplaceOp!T)op)) { 494 continue; 495 } 496 debug(TokenStreamRewriter) { 497 import std.stdio : writefln; 498 writefln("op = %s", op); 499 } 500 ReplaceOp!T rop = cast(ReplaceOp!T)rewrites[i]; 501 // Wipe prior inserts within range 502 InsertBeforeOp!T[] inserts = getKindOfOps!(InsertBeforeOp!T)(rewrites, i); 503 foreach (InsertBeforeOp!T iop; inserts) { 504 if ( iop.index == rop.index ) { 505 // E.g., insert before 2, delete 2..2; update replace 506 // text to include insert before, kill insert 507 rewrites[iop.instructionIndex] = null; 508 rop.text = iop.text ~ (rop.text !is null?rop.text:T.init); 509 } 510 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) { 511 // delete insert as it's a no-op. 512 rewrites[iop.instructionIndex] = null; 513 } 514 } 515 // Drop any prior replaces contained within 516 ReplaceOp!T[] prevReplaces = getKindOfOps!(ReplaceOp!T)(rewrites, i); 517 foreach (ReplaceOp!T prevRop; prevReplaces) { 518 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) { 519 // delete replace as it's a no-op. 520 rewrites[prevRop.instructionIndex] = null; 521 continue; 522 } 523 // throw exception unless disjoint or identical 524 bool disjoint = 525 prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex; 526 // Delete special case of replace (text==null): 527 // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 528 if ( prevRop.text==null && rop.text==null && !disjoint ) { 529 //System.out.println("overlapping deletes: "+prevRop+", "+rop); 530 rewrites[prevRop.instructionIndex] = null; // kill first delete 531 rop.index = min(prevRop.index, rop.index); 532 rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex); 533 debug { 534 import std.stdio : stderr, writefln; 535 stderr.writefln("new rop %s", rop); 536 } 537 } 538 else if ( !disjoint ) { 539 throw 540 new 541 IllegalArgumentException(format( 542 "replace op boundaries of %s overlap with previous %s", 543 rop, 544 prevRop)); 545 } 546 } 547 } 548 549 // WALK INSERTS 550 debug(TokenStreamRewriter) { 551 import std.stdio : stderr, writefln; 552 writefln("WALK INSERTS"); 553 } 554 for (int i = 0; i < rewrites.length; i++) { 555 RewriteOperation!T op = rewrites[i]; 556 if (op is null) continue; 557 if (!(cast(InsertBeforeOp!T)op)) continue; 558 InsertBeforeOp!T iop = cast(InsertBeforeOp!T)rewrites[i]; 559 // combine current insert with prior if any at same index 560 InsertBeforeOp!T[] prevInserts = getKindOfOps!(InsertBeforeOp!T)(rewrites, i); 561 foreach (InsertBeforeOp!T prevIop; prevInserts) { 562 debug(TokenStreamRewriter) { 563 import std.stdio : stderr, writefln; 564 writefln("prevIop = %s", prevIop); 565 } 566 if (prevIop.index == iop.index) { 567 if (cast(InsertAfterOp!T)prevIop) { 568 iop.text = catOpText(prevIop.text, iop.text); 569 rewrites[prevIop.instructionIndex] = null; 570 } 571 else if (cast(InsertBeforeOp!T)prevIop) { // combine objects 572 // convert to strings...we're in process of toString'ing 573 // whole token buffer so no lazy eval issue with any templates 574 iop.text = catOpText(iop.text, prevIop.text); 575 // delete redundant prior insert 576 rewrites[prevIop.instructionIndex] = null; 577 } 578 } 579 } 580 // look for replaces where iop.index is in range; error 581 debug(TokenStreamRewriter) { 582 import std.stdio : stderr, writefln; 583 writefln("look for replaces where iop.index is in range, i = %s", i); 584 } 585 ReplaceOp!T[] prevReplaces = getKindOfOps!(ReplaceOp!T)(rewrites, i); 586 debug(TokenStreamRewriter) { 587 import std.stdio : stderr, writefln; 588 writefln("prevReplaces = %s", prevReplaces); 589 } 590 foreach (ReplaceOp!T rop; prevReplaces) { 591 if ( iop.index == rop.index ) { 592 rop.text = catOpText(iop.text, rop.text); 593 rewrites[i] = null; // delete current insert 594 continue; 595 } 596 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) { 597 throw 598 new 599 IllegalArgumentException( 600 format("insert op %s within boundaries of previous %s", 601 iop, rop)); 602 } 603 } 604 } 605 606 debug(TokenStreamRewriter) { 607 import std.stdio : stderr, writefln; 608 writefln("rewrites after = %s", rewrites); 609 } 610 RewriteOperation!T[size_t] m; 611 for (int i = 0; i < rewrites.length; i++) { 612 RewriteOperation!T op = rewrites[i]; 613 if (op is null) continue; // ignore deleted ops 614 if (op.index in m) { 615 throw new Error("should only be one op per index"); 616 } 617 m[op.index] = op; 618 } 619 return m; 620 } 621 622 protected T catOpText(T a, T b) 623 { 624 if (a && b) 625 return a ~ b; 626 if (a) 627 return a; 628 return b; 629 } 630 631 protected auto getKindOfOps(U)(RewriteOperation!T[] rewrites, size_t before) 632 { 633 U[] ops; 634 for (int i=0; i<before && i<rewrites.length; i++) { 635 RewriteOperation!T op = rewrites[i]; 636 if (op is null) continue; // ignore deleted 637 if (U.classinfo == op.classinfo) { 638 ops ~= cast(U)(op); 639 } 640 } 641 return ops; 642 } 643 644 protected T getPositionText(Token token) 645 { 646 T buf; 647 return buf; 648 } 649 650 protected T getPositionText(TokenStream tokens, Interval interval) 651 { 652 T buf; 653 return buf; 654 } 655 656 public static TokenStream tokens() 657 { 658 return tokens_; 659 } 660 661 }