1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.TokenStreamRewriter; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.InsertAfterOp; 11 import antlr.v4.runtime.InsertBeforeOp; 12 import antlr.v4.runtime.ReplaceOp; 13 import antlr.v4.runtime.RewriteOperation; 14 import antlr.v4.runtime.Token; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.misc.Interval; 18 import std.algorithm.comparison; 19 import std.format; 20 import std.conv; 21 22 /** 23 * Useful for rewriting out a buffered input token stream after doing some 24 * augmentation or other manipulations on it. 25 * 26 * <p> 27 * You can insert stuff, replace, and delete chunks. Note that the operations 28 * are done lazily--only if you convert the buffer to a {@link String} with 29 * {@link TokenStream#getText()}. This is very efficient because you are not 30 * moving data around all the time. As the buffer of tokens is converted to 31 * strings, the {@link #getText()} method(s) scan the input token stream and 32 * check to see if there is an operation at the current index. If so, the 33 * operation is done and then normal {@link String} rendering continues on the 34 * buffer. This is like having multiple Turing machine instruction streams 35 * (programs) operating on a single input tape. :)</p> 36 * 37 * <p> 38 * This rewriter makes no modifications to the token stream. It does not ask the 39 * stream to fill itself up nor does it advance the input cursor. The token 40 * stream {@link TokenStream#index()} will return the same value before and 41 * after any {@link #getText()} call.</p> 42 * 43 * <p> 44 * The rewriter only works on tokens that you have in the buffer and ignores the 45 * current input cursor. If you are buffering tokens on-demand, calling 46 * {@link #getText()} halfway through the input will only do rewrites for those 47 * tokens in the first half of the file.</p> 48 * 49 * <p> 50 * Since the operations are done lazily at {@link #getText}-time, operations do 51 * not screw up the token index values. That is, an insert operation at token 52 * index {@code i} does not change the index values for tokens 53 * {@code i}+1..n-1.</p> 54 * 55 * <p> 56 * Because operations never actually alter the buffer, you may always get the 57 * original token stream back without undoing anything. Since the instructions 58 * are queued up, you can easily simulate transactions and roll back any changes 59 * if there is an error just by removing instructions. For example,</p> 60 * 61 * <pre> 62 * CharStream input = new ANTLRFileStream("input"); 63 * TLexer lex = new TLexer(input); 64 * CommonTokenStream tokens = new CommonTokenStream(lex); 65 * T parser = new T(tokens); 66 * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); 67 * parser.startRule(); 68 * </pre> 69 * 70 * <p> 71 * Then in the rules, you can execute (assuming rewriter is visible):</p> 72 * 73 * <pre> 74 * Token t,u; 75 * ... 76 * rewriter.insertAfter(t, "text to put after t");} 77 * rewriter.insertAfter(u, "text after u");} 78 * System.out.println(rewriter.getText()); 79 * </pre> 80 * 81 * <p> 82 * You can also have multiple "instruction streams" and get multiple rewrites 83 * from a single pass over the input. Just name the instruction streams and use 84 * that name again when printing the buffer. This could be useful for generating 85 * a C file and also its header file--all from the same buffer:</p> 86 * 87 * <pre> 88 * rewriter.insertAfter("pass1", t, "text to put after t");} 89 * rewriter.insertAfter("pass2", u, "text after u");} 90 * System.out.println(rewriter.getText("pass1")); 91 * System.out.println(rewriter.getText("pass2")); 92 * </pre> 93 * 94 * <p> 95 * If you don't use named rewrite streams, a "default" stream is used as the 96 * first example shows.</p> 97 */ 98 class TokenStreamRewriter 99 { 100 101 public static immutable string DEFAULT_PROGRAM_NAME = "default"; 102 103 public static immutable int MIN_TOKEN_INDEX = 0; 104 105 /** 106 * Our source stream 107 * @uml 108 * @read 109 */ 110 private static TokenStream tokens_; 111 112 protected RewriteOperation[][string] programs; 113 114 protected size_t[string] lastRewriteTokenIndexes; 115 116 private RewriteOperation[] rewriteOps; 117 118 public this() 119 { 120 } 121 122 public this(TokenStream tokens) 123 { 124 tokens_ = tokens; 125 programs[DEFAULT_PROGRAM_NAME] = rewriteOps; 126 } 127 128 public void rollback(int instructionIndex) 129 { 130 rollback(DEFAULT_PROGRAM_NAME, instructionIndex); 131 } 132 133 /** 134 * Rollback the instruction stream for a program so that 135 * the indicated instruction (via instructionIndex) is no 136 * longer in the stream. UNTESTED! 137 */ 138 public void rollback(string programName, int instructionIndex) 139 { 140 RewriteOperation[] ist = programs[programName]; 141 if (programName in programs) { 142 programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex]; 143 } 144 } 145 146 public void deleteProgram() 147 { 148 deleteProgram(DEFAULT_PROGRAM_NAME); 149 } 150 151 /** 152 * Reset the program so that no instructions exist 153 */ 154 public void deleteProgram(string programName) 155 { 156 rollback(programName, MIN_TOKEN_INDEX); 157 } 158 159 public void insertAfter(Token t, string text) 160 { 161 insertAfter(DEFAULT_PROGRAM_NAME, t, text); 162 } 163 164 public void insertAfter(int index, string text) 165 { 166 insertAfter(DEFAULT_PROGRAM_NAME, index, text); 167 } 168 169 public void insertAfter(string programName, Token t, string text) 170 { 171 insertAfter(programName, t.getTokenIndex, text); 172 } 173 174 public void insertAfter(string programName, int index, string text) 175 { 176 // to insert after, just insert before next index (even if past end) 177 RewriteOperation op = new InsertAfterOp(index, text); 178 op.instructionIndex = programs[programName].length; 179 programs[programName] ~= op; 180 } 181 182 public void insertBefore(Token t, string text) 183 { 184 insertBefore(DEFAULT_PROGRAM_NAME, t, text); 185 } 186 187 public void insertBefore(size_t index, string text) 188 { 189 insertBefore(DEFAULT_PROGRAM_NAME, index, text); 190 } 191 192 public void insertBefore(string programName, Token t, string text) 193 { 194 insertBefore(programName, t.getTokenIndex(), text); 195 } 196 197 public void insertBefore(string programName, size_t index, string text) 198 { 199 RewriteOperation op = new InsertBeforeOp(index, text); 200 op.instructionIndex = programs[programName].length; 201 programs[programName] ~= op; 202 } 203 204 public void replace(size_t index, string text) 205 { 206 replace(DEFAULT_PROGRAM_NAME, index, index, text); 207 } 208 209 public void replace(size_t from, size_t to, string text) 210 { 211 replace(DEFAULT_PROGRAM_NAME, from, to, text); 212 } 213 214 public void replace(Token indexT, string text) 215 { 216 replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); 217 } 218 219 public void replace(Token from, Token to, string text) 220 { 221 replace(DEFAULT_PROGRAM_NAME, from, to, text); 222 } 223 224 public void replace(string programName, size_t from, size_t to, string text) 225 { 226 debug(TokenStreamRewriter) { 227 import std.stdio : writefln; 228 writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text); 229 } 230 if ( from > to || from<0 || to<0 || to >= tokens_.size ) { 231 throw 232 new 233 IllegalArgumentException( 234 format("replace: range invalid: %s..%s(size=%s)", 235 from, to, tokens_.size)); 236 } 237 RewriteOperation op = new ReplaceOp(from, to, text); 238 op.instructionIndex = programs[programName].length; 239 programs[programName] ~= op; 240 } 241 242 public void replace(string programName, Token from, Token to, string text) 243 { 244 debug(TokenStreamRewriter) { 245 import std.stdio : writefln; 246 writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text); 247 } 248 replace(programName, 249 from.getTokenIndex, 250 to.getTokenIndex, 251 text); 252 } 253 254 /** 255 * Delete token (can not use delete as identifier) 256 */ 257 public void deleteT(size_t index) 258 { 259 deleteT(DEFAULT_PROGRAM_NAME, index, index); 260 } 261 262 public void deleteT(size_t from, size_t to) 263 { 264 deleteT(DEFAULT_PROGRAM_NAME, from, to); 265 } 266 267 public void deleteT(Token indexT) 268 { 269 deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT); 270 } 271 272 public void deleteT(Token from, Token to) 273 { 274 deleteT(DEFAULT_PROGRAM_NAME, from, to); 275 } 276 277 public void deleteT(string programName, size_t from, size_t to) 278 { 279 replace(programName,from,to,null); 280 } 281 282 public void deleteT(string programName, Token from, Token to) 283 { 284 replace(programName,from,to,null); 285 } 286 287 public size_t getLastRewriteTokenIndex() 288 { 289 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); 290 } 291 292 private size_t getLastRewriteTokenIndex(string programName) 293 { 294 if (programName in lastRewriteTokenIndexes) { 295 return lastRewriteTokenIndexes[programName]; 296 } 297 else { 298 return -1; 299 } 300 } 301 302 private void setLastRewriteTokenIndex(string programName, size_t i) 303 { 304 lastRewriteTokenIndexes[programName] = i; 305 } 306 307 private RewriteOperation[] getProgram(string name) 308 { 309 if (name in programs) { 310 return programs[name]; 311 } 312 else { 313 return initializeProgram(name); 314 } 315 } 316 317 private RewriteOperation[] initializeProgram(string name) 318 { 319 RewriteOperation[] iso; 320 programs[name] = iso; 321 return iso; 322 } 323 324 /** 325 * Return the text from the original tokens altered per the 326 * instructions given to this rewriter. 327 */ 328 public string getText() 329 { 330 return getText(DEFAULT_PROGRAM_NAME, Interval.of(0,tokens_.size()-1)); 331 } 332 333 /** 334 * Return the text from the original tokens altered per the 335 * instructions given to this rewriter in programName. 336 */ 337 public string getText(string programName) 338 { 339 return getText(programName, Interval.of(0,tokens_.size-1)); 340 } 341 342 /** 343 * Return the text associated with the tokens in the interval from the 344 * original token stream but with the alterations given to this rewriter. 345 * The interval refers to the indexes in the original token stream. 346 * We do not alter the token stream in any way, so the indexes 347 * and intervals are still consistent. Includes any operations done 348 * to the first and last token in the interval. So, if you did an 349 * insertBefore on the first token, you would get that insertion. 350 * The same is true if you do an insertAfter the stop token. 351 */ 352 public string getText(Interval interval) 353 { 354 return getText(DEFAULT_PROGRAM_NAME, interval); 355 } 356 357 public string getText(string programName, Interval interval) 358 { 359 RewriteOperation[] rewrites; 360 361 if (programName in programs) 362 rewrites = programs[programName]; 363 364 int start = interval.a; 365 int stop = interval.b; 366 367 // ensure start/end are in range 368 if ( stop > tokens_.size-1 ) 369 stop = tokens_.size-1; 370 if ( start < 0 ) 371 start = 0; 372 if (rewrites.length == 0) { 373 return tokens_.getText(interval); // no instructions to execute 374 } 375 376 string buf; 377 378 // First, optimize instruction stream 379 RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites); 380 381 // Walk buffer, executing instructions and emitting tokens 382 int i = start; 383 384 while (i <= stop && i < tokens_.size) { 385 Token t = tokens_.get(i); 386 RewriteOperation op; 387 if (i in indexToOp) 388 op = indexToOp[i]; 389 indexToOp.remove(i); // remove so any left have index size-1 390 if (!op) { 391 // no operation at that index, just dump token 392 if (t.getType != TokenConstantDefinition.EOF) 393 buf ~= t.getText; 394 i++; // move to next token 395 } 396 else { 397 i = to!int(op.execute(buf)); // execute operation and skip 398 } 399 } 400 401 // include stuff after end if it's last index in buffer 402 // So, if they did an insertAfter(lastValidIndex, "foo"), include 403 // foo if end==lastValidIndex. 404 if (stop == tokens_.size()-1) { 405 // Scan any remaining operations after last token 406 // should be included (they will be inserts). 407 foreach (RewriteOperation op; indexToOp.values()) { 408 if (op.index >= tokens_.size-1) 409 buf ~= to!string(op.text); 410 } 411 } 412 return buf; 413 } 414 415 /** 416 * overlapping replaces that are not completed nested). Inserts to 417 * same index need to be combined etc... Here are the cases: 418 * 419 * I.i.u I.j.v leave alone, nonoverlapping 420 * I.i.u I.i.v combine: Iivu 421 * 422 * R.i-j.u R.x-y.v | i-j in x-y delete first R 423 * R.i-j.u R.i-j.v delete first R 424 * R.i-j.u R.x-y.v | x-y in i-j ERROR 425 * R.i-j.u R.x-y.v | boundaries overlap ERROR 426 * 427 * Delete special case of replace (text==null): 428 * D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 429 * 430 * I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before 431 * we're not deleting i) 432 * I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping 433 * R.x-y.v I.i.u | i in x-y ERROR 434 * R.x-y.v I.x.u R.x-y.uv (combine, delete I) 435 * R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 436 * 437 * I.i.u = insert u before op @ index i 438 * R.x-y.u = replace x-y indexed tokens with u 439 * 440 * First we need to examine replaces. For any replace op: 441 * 442 * 1. wipe out any insertions before op within that range. 443 * 2. Drop any replace op before that is contained completely within 444 * that range. 445 * 3. Throw exception upon boundary overlap with any previous replace. 446 * 447 * Then we can deal with inserts: 448 * 449 * 1. for any inserts to same index, combine even if not adjacent. 450 * 2. for any prior replace with same left boundary, combine this 451 * insert with replace and delete this replace. 452 * 3. throw exception if index in same range as previous replace 453 * 454 * Don't actually delete; make op null in list. Easier to walk list. 455 * Later we can throw as we add to index → op map. 456 * 457 * Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 458 * inserted stuff would be before the replace range. But, if you 459 * add tokens in front of a method body '{' and then delete the method 460 * body, I think the stuff before the '{' you added should disappear too. 461 * 462 * Return a map from token index to operation. 463 */ 464 protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites) 465 { 466 debug(TokenStreamRewriter) { 467 import std.stdio : writefln; 468 writefln("reduceToSingleOperationPerIndex"); 469 foreach (i, rew; rewrites) 470 writefln("\trewrites[%s] = %s", i, rew); 471 } 472 473 // WALK REPLACES 474 for (size_t i = 0; i < rewrites.length; i++) { 475 RewriteOperation op = rewrites[i]; 476 debug(TokenStreamRewriter) { 477 import std.stdio : writefln; 478 writefln("op0 = %s", op); 479 } 480 if (op is null) continue; 481 if (!(cast(ReplaceOp)op)) { 482 continue; 483 } 484 debug(TokenStreamRewriter) { 485 import std.stdio : writefln; 486 writefln("op = %s", op); 487 } 488 ReplaceOp rop = cast(ReplaceOp)rewrites[i]; 489 // Wipe prior inserts within range 490 InsertBeforeOp[] inserts = getKindOfOps!InsertBeforeOp(rewrites, i); 491 foreach (InsertBeforeOp iop; inserts) { 492 if ( iop.index == rop.index ) { 493 // E.g., insert before 2, delete 2..2; update replace 494 // text to include insert before, kill insert 495 rewrites[iop.instructionIndex] = null; 496 rop.text = iop.text ~ (rop.text !is null?rop.text:""); 497 } 498 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) { 499 // delete insert as it's a no-op. 500 rewrites[iop.instructionIndex] = null; 501 } 502 } 503 // Drop any prior replaces contained within 504 ReplaceOp[] prevReplaces = getKindOfOps!ReplaceOp(rewrites, i); 505 foreach (ReplaceOp prevRop; prevReplaces) { 506 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) { 507 // delete replace as it's a no-op. 508 rewrites[prevRop.instructionIndex] = null; 509 continue; 510 } 511 // throw exception unless disjoint or identical 512 bool disjoint = 513 prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex; 514 // Delete special case of replace (text==null): 515 // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 516 if ( prevRop.text==null && rop.text==null && !disjoint ) { 517 //System.out.println("overlapping deletes: "+prevRop+", "+rop); 518 rewrites[prevRop.instructionIndex] = null; // kill first delete 519 rop.index = min(prevRop.index, rop.index); 520 rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex); 521 debug { 522 import std.stdio : stderr, writefln; 523 stderr.writefln("new rop %s", rop); 524 } 525 } 526 else if ( !disjoint ) { 527 throw 528 new 529 IllegalArgumentException(format( 530 "replace op boundaries of %s overlap with previous %s", 531 rop, 532 prevRop)); 533 } 534 } 535 } 536 537 // WALK INSERTS 538 debug(TokenStreamRewriter) { 539 import std.stdio : stderr, writefln; 540 writefln("WALK INSERTS"); 541 } 542 for (int i = 0; i < rewrites.length; i++) { 543 RewriteOperation op = rewrites[i]; 544 if (op is null) continue; 545 if (!(cast(InsertBeforeOp)op)) continue; 546 InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i]; 547 // combine current insert with prior if any at same index 548 InsertBeforeOp[] prevInserts = getKindOfOps!InsertBeforeOp(rewrites, i); 549 foreach (InsertBeforeOp prevIop; prevInserts) { 550 debug(TokenStreamRewriter) { 551 import std.stdio : stderr, writefln; 552 writefln("prevIop = %s", prevIop); 553 } 554 if (prevIop.index == iop.index) { 555 if (cast(InsertAfterOp)prevIop) { 556 iop.text = catOpText(prevIop.text, iop.text); 557 rewrites[prevIop.instructionIndex] = null; 558 } 559 else if (cast(InsertBeforeOp)prevIop) { // combine objects 560 // convert to strings...we're in process of toString'ing 561 // whole token buffer so no lazy eval issue with any templates 562 iop.text = catOpText(iop.text, prevIop.text); 563 // delete redundant prior insert 564 rewrites[prevIop.instructionIndex] = null; 565 } 566 } 567 } 568 // look for replaces where iop.index is in range; error 569 debug(TokenStreamRewriter) { 570 import std.stdio : stderr, writefln; 571 writefln("look for replaces where iop.index is in range, i = %s", i); 572 } 573 ReplaceOp[] prevReplaces = getKindOfOps!ReplaceOp(rewrites, i); 574 debug(TokenStreamRewriter) { 575 import std.stdio : stderr, writefln; 576 writefln("prevReplaces = %s", prevReplaces); 577 } 578 foreach (ReplaceOp rop; prevReplaces) { 579 if ( iop.index == rop.index ) { 580 rop.text = catOpText(iop.text, rop.text); 581 rewrites[i] = null; // delete current insert 582 continue; 583 } 584 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) { 585 throw 586 new 587 IllegalArgumentException( 588 format("insert op %s within boundaries of previous %s", 589 iop, rop)); 590 } 591 } 592 } 593 594 debug(TokenStreamRewriter) { 595 import std.stdio : stderr, writefln; 596 writefln("rewrites after = %s", rewrites); 597 } 598 RewriteOperation[size_t] m; 599 for (int i = 0; i < rewrites.length; i++) { 600 RewriteOperation op = rewrites[i]; 601 if (op is null) continue; // ignore deleted ops 602 if (op.index in m) { 603 throw new Error("should only be one op per index"); 604 } 605 m[op.index] = op; 606 } 607 return m; 608 } 609 610 protected string catOpText(string a, string b) 611 { 612 string x; 613 string y; 614 if (a !is null) x = a; 615 if (b !is null) y = b; 616 return x~y; 617 } 618 619 protected auto getKindOfOps(T)(RewriteOperation[] rewrites, size_t before) 620 { 621 T[] ops; 622 for (int i=0; i<before && i<rewrites.length; i++) { 623 RewriteOperation op = rewrites[i]; 624 if (op is null) continue; // ignore deleted 625 if (T.classinfo == op.classinfo) { 626 ops ~= cast(T)(op); 627 } 628 } 629 return ops; 630 } 631 632 public static TokenStream tokens() 633 { 634 return tokens_; 635 } 636 637 }