1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.TokenStreamRewriter; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.InsertAfterOp; 11 import antlr.v4.runtime.InsertBeforeOp; 12 import antlr.v4.runtime.ReplaceOp; 13 import antlr.v4.runtime.RewriteOperation; 14 import antlr.v4.runtime.Token; 15 import antlr.v4.runtime.TokenConstantDefinition; 16 import antlr.v4.runtime.TokenStream; 17 import antlr.v4.runtime.misc.Interval; 18 import std.algorithm.comparison; 19 import std.conv; 20 import std.format; 21 import std.variant; 22 23 /** 24 * Useful for rewriting out a buffered input token stream after doing some 25 * augmentation or other manipulations on it. 26 * 27 * You can insert stuff, replace, and delete chunks. Note that the operations 28 * are done lazily--only if you convert the buffer to a {@link String} with 29 * {@link TokenStream#getText()}. This is very efficient because you are not 30 * moving data around all the time. As the buffer of tokens is converted to 31 * strings, the {@link #getText()} method(s) scan the input token stream and 32 * check to see if there is an operation at the current index. If so, the 33 * operation is done and then normal {@link String} rendering continues on the 34 * buffer. This is like having multiple Turing machine instruction streams 35 * (programs) operating on a single input tape. 36 * 37 * This rewriter makes no modifications to the token stream. It does not ask the 38 * stream to fill itself up nor does it advance the input cursor. The token 39 * stream {@link TokenStream#index()} will return the same value before and 40 * after any {@link #getText()} call. 41 * 42 * The rewriter only works on tokens that you have in the buffer and ignores the 43 * current input cursor. If you are buffering tokens on-demand, calling 44 * {@link #getText()} halfway through the input will only do rewrites for those 45 * tokens in the first half of the file. 46 * 47 * Since the operations are done lazily at {@link #getText}-time, operations do 48 * not screw up the token index values. That is, an insert operation at token 49 * index {@code i} does not change the index values for tokens 50 * {@code i}+1..n-1. 51 * 52 * Because operations never actually alter the buffer, you may always get the 53 * original token stream back without undoing anything. Since the instructions 54 * are queued up, you can easily simulate transactions and roll back any changes 55 * if there is an error just by removing instructions. For example, 56 * 57 * Examples: 58 * --- 59 * CharStream input = new ANTLRFileStream("input"); 60 * TLexer lex = new TLexer(input); 61 * CommonTokenStream tokens = new CommonTokenStream(lex); 62 * T parser = new T(tokens); 63 * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); 64 * parser.startRule(); 65 * --- 66 * 67 * Then in the rules, you can execute (assuming rewriter is visible): 68 * 69 * Examples: 70 * --- 71 * Token t,u; 72 * ... 73 * rewriter.insertAfter(t, "text to put after t");} 74 * rewriter.insertAfter(u, "text after u");} 75 * System.out.println(rewriter.getText()); 76 * --- 77 * 78 * You can also have multiple "instruction streams" and get multiple rewrites 79 * from a single pass over the input. Just name the instruction streams and use 80 * that name again when printing the buffer. This could be useful for generating 81 * a C file and also its header file--all from the same buffer: 82 * 83 * Examples: 84 * --- 85 * rewriter.insertAfter("pass1", t, "text to put after t");} 86 * rewriter.insertAfter("pass2", u, "text after u");} 87 * System.out.println(rewriter.getText("pass1")); 88 * System.out.println(rewriter.getText("pass2")); 89 * --- 90 * 91 * If you don't use named rewrite streams, a "default" stream is used as the 92 * first example shows. 93 */ 94 class TokenStreamRewriter 95 { 96 97 enum string DEFAULT_PROGRAM_NAME = "default"; 98 99 enum int MIN_TOKEN_INDEX = 0; 100 101 /** 102 * Our source stream 103 * @uml 104 * @read 105 */ 106 private static TokenStream tokens_; 107 108 protected RewriteOperation[][string] programs; 109 110 protected size_t[string] lastRewriteTokenIndexes; 111 112 private RewriteOperation[] rewriteOps; 113 114 public this() 115 { 116 } 117 118 public this(TokenStream tokens) 119 { 120 tokens_ = tokens; 121 programs[DEFAULT_PROGRAM_NAME] = rewriteOps; 122 } 123 124 public void rollback(int instructionIndex) 125 { 126 rollback(DEFAULT_PROGRAM_NAME, instructionIndex); 127 } 128 129 /** 130 * Rollback the instruction stream for a program so that 131 * the indicated instruction (via instructionIndex) is no 132 * longer in the stream. UNTESTED! 133 */ 134 public void rollback(string programName, int instructionIndex) 135 { 136 RewriteOperation[] ist = programs[programName]; 137 if (programName in programs) { 138 programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex]; 139 } 140 } 141 142 public void deleteProgram() 143 { 144 deleteProgram(DEFAULT_PROGRAM_NAME); 145 } 146 147 /** 148 * Reset the program so that no instructions exist 149 */ 150 public void deleteProgram(string programName) 151 { 152 rollback(programName, MIN_TOKEN_INDEX); 153 } 154 155 public void insertAfter(Token t, Variant text) 156 { 157 insertAfter(DEFAULT_PROGRAM_NAME, t, text); 158 } 159 160 public void insertAfter(int index, Variant text) 161 { 162 insertAfter(DEFAULT_PROGRAM_NAME, index, text); 163 } 164 165 public void insertAfter(string programName, Token t, Variant text) 166 { 167 insertAfter(programName, t.getTokenIndex, text); 168 } 169 170 public void insertAfter(string programName, size_t index, Variant text) 171 { 172 // to insert after, just insert before next index (even if past end) 173 RewriteOperation op = new InsertAfterOp(index, text); 174 op.instructionIndex = programs[programName].length; 175 programs[programName] ~= op; 176 } 177 178 public void insertBefore(Token t, Variant text) 179 { 180 insertBefore(DEFAULT_PROGRAM_NAME, t, text); 181 } 182 183 public void insertBefore(size_t index, Variant text) 184 { 185 insertBefore(DEFAULT_PROGRAM_NAME, index, text); 186 } 187 188 public void insertBefore(string programName, Token t, Variant text) 189 { 190 insertBefore(programName, t.getTokenIndex(), text); 191 } 192 193 public void insertBefore(string programName, size_t index, Variant text) 194 { 195 RewriteOperation op = new InsertBeforeOp(index, text); 196 op.instructionIndex = programs[programName].length; 197 programs[programName] ~= op; 198 } 199 200 public void replace(size_t index, Variant text) 201 { 202 replace(DEFAULT_PROGRAM_NAME, index, index, text); 203 } 204 205 public void replace(size_t from, size_t to, Variant text) 206 { 207 replace(DEFAULT_PROGRAM_NAME, from, to, text); 208 } 209 210 public void replace(Token indexT, Variant text) 211 { 212 replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); 213 } 214 215 public void replace(Token from, Token to, Variant text) 216 { 217 replace(DEFAULT_PROGRAM_NAME, from, to, text); 218 } 219 220 public void replace(string programName, size_t from, size_t to, Variant text) 221 { 222 if ( from > to || from<0 || to<0 || to >= tokens_.size ) { 223 throw 224 new IllegalArgumentException( 225 format!"replace: range invalid: %s..%s(size=%s)" 226 (from, to, tokens_.size)); 227 } 228 RewriteOperation op = new ReplaceOp(from, to, text); 229 op.instructionIndex = programs[programName].length; 230 programs[programName] ~= op; 231 232 debug(TokenStreamRewriter) { 233 import std.stdio : writefln; 234 writefln("replace end: op = %s, programs = %s", op, programs); 235 } 236 } 237 238 public void replace(string programName, Token from, Token to, Variant text) 239 { 240 debug(TokenStreamRewriter) { 241 import std.stdio : writefln; 242 writefln("replace constructor2: from = %s, to = %s, text = %s", from, to, text); 243 } 244 replace(programName, 245 from.getTokenIndex, 246 to.getTokenIndex, 247 text); 248 } 249 250 /** 251 * Delete token (can not use delete as identifier) 252 */ 253 public void deleteT(size_t index) 254 { 255 deleteT(DEFAULT_PROGRAM_NAME, index, index); 256 } 257 258 public void deleteT(size_t from, size_t to) 259 { 260 deleteT(DEFAULT_PROGRAM_NAME, from, to); 261 } 262 263 public void deleteT(Token indexT) 264 { 265 deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT); 266 } 267 268 public void deleteT(Token from, Token to) 269 { 270 deleteT(DEFAULT_PROGRAM_NAME, from, to); 271 } 272 273 public void deleteT(string programName, size_t from, size_t to) 274 { 275 Variant Null; 276 replace(programName, from, to, Null); 277 } 278 279 public void deleteT(string programName, Token from, Token to) 280 { 281 Variant Null; 282 replace(programName, from, to, Null); 283 } 284 285 public size_t getLastRewriteTokenIndex() 286 { 287 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); 288 } 289 290 private size_t getLastRewriteTokenIndex(string programName) 291 { 292 if (programName in lastRewriteTokenIndexes) { 293 return lastRewriteTokenIndexes[programName]; 294 } 295 else { 296 return -1; 297 } 298 } 299 300 private void setLastRewriteTokenIndex(string programName, size_t i) 301 { 302 lastRewriteTokenIndexes[programName] = i; 303 } 304 305 private RewriteOperation[] getProgram(string name) 306 { 307 if (name in programs) { 308 return programs[name]; 309 } 310 else { 311 return initializeProgram(name); 312 } 313 } 314 315 private RewriteOperation[] initializeProgram(string name) 316 { 317 RewriteOperation[] iso; 318 programs[name] = iso; 319 return iso; 320 } 321 322 /** 323 * Return the text from the original tokens altered per the 324 * instructions given to this rewriter. 325 */ 326 public Variant getText() 327 { 328 return getText(DEFAULT_PROGRAM_NAME, Interval.of(0, to!int(tokens_.size) - 1)); 329 } 330 331 /** 332 * Return the text from the original tokens altered per the 333 * instructions given to this rewriter in programName. 334 */ 335 public Variant getText(string programName) 336 { 337 return getText(programName, Interval.of(0, to!int(tokens_.size) - 1)); 338 } 339 340 /** 341 * Return the text associated with the tokens in the interval from the 342 * original token stream but with the alterations given to this rewriter. 343 * The interval refers to the indexes in the original token stream. 344 * We do not alter the token stream in any way, so the indexes 345 * and intervals are still consistent. Includes any operations done 346 * to the first and last token in the interval. So, if you did an 347 * insertBefore on the first token, you would get that insertion. 348 * The same is true if you do an insertAfter the stop token. 349 */ 350 public Variant getText(Interval interval) 351 { 352 return getText(DEFAULT_PROGRAM_NAME, interval); 353 } 354 355 public Variant getText(string programName, Interval interval) 356 { 357 RewriteOperation[] rewrites; 358 359 if (programName in programs) 360 rewrites = programs[programName]; 361 362 int start = interval.a; 363 int stop = interval.b; 364 365 // ensure start/end are in range 366 if ( stop > to!int(tokens_.size) - 1 ) 367 stop = to!int(tokens_.size) - 1; 368 if ( start < 0 ) 369 start = 0; 370 371 if (!rewrites) { 372 return tokens_.getText(interval); // no instructions to execute 373 } 374 375 Variant buf; 376 377 // First, optimize instruction stream 378 RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites); 379 380 // Walk buffer, executing instructions and emitting tokens 381 int i = start; 382 383 debug(TokenStreamRewriter) { 384 import std.stdio : stderr, writefln; 385 writefln("tokens_.size = %s", tokens_.size); 386 } 387 388 while (i <= stop && i < tokens_.size) { 389 Token t = tokens_.get(i); 390 debug(TokenStreamRewriter) { 391 import std.stdio : stderr, writefln; 392 writefln("i = %s, token = %s", i, t); 393 } 394 RewriteOperation op; 395 if (i in indexToOp) 396 op = indexToOp[i]; 397 398 indexToOp.remove(i); // remove so any left have index size-1 399 400 if (!op) { 401 // no operation at that index, just dump token 402 if (t.getType != TokenConstantDefinition.EOF) { 403 Variant Null; 404 buf is Null ? buf = t.getText : (buf ~= t.getText); 405 } 406 i++; // move to next token 407 } 408 else { 409 i = to!int(op.execute(buf)); // execute operation and skip 410 } 411 } 412 413 // include stuff after end if it's last index in buffer 414 // So, if they did an insertAfter(lastValidIndex, "foo"), include 415 // foo if end==lastValidIndex. 416 if (stop == tokens_.size()-1) { 417 // Scan any remaining operations after last token 418 // should be included (they will be inserts). 419 foreach (RewriteOperation op; indexToOp.values()) { 420 if (op.index >= tokens_.size-1) 421 buf ~= op.text; 422 } 423 } 424 return buf; 425 } 426 427 /** 428 * We need to combine operations and report invalid operations (like 429 * overlapping replaces that are not completed nested). Inserts to 430 * same index need to be combined etc. 431 * 432 * Here are the cases: 433 * 434 * I.i.u I.j.v leave alone, nonoverlapping<br> 435 * I.i.u I.i.v combine: Iivu 436 * 437 * R.i-j.u R.x-y.v | i-j in x-y delete first R<br> 438 * R.i-j.u R.i-j.v delete first R<br> 439 * R.i-j.u R.x-y.v | x-y in i-j ERROR<br> 440 * R.i-j.u R.x-y.v | boundaries overlap ERROR 441 * 442 * Delete special case of replace (text==null):<br> 443 * D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 444 * 445 * I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before<br> 446 * we're not deleting i)<br> 447 * I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping<br> 448 * R.x-y.v I.i.u | i in x-y ERROR<br> 449 * R.x-y.v I.x.u R.x-y.uv (combine, delete I)<br> 450 * R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 451 * 452 * I.i.u = insert u before op @ index i<br> 453 * R.x-y.u = replace x-y indexed tokens with u 454 * 455 * First we need to examine replaces. For any replace op: 456 * 457 * 1. wipe out any insertions before op within that range.<br> 458 * 2. Drop any replace op before that is contained completely within 459 * that range.<br> 460 * 3. Throw exception upon boundary overlap with any previous replace. 461 * 462 * Then we can deal with inserts: 463 * 464 * 1. for any inserts to same index, combine even if not adjacent.<br> 465 * 2. for any prior replace with same left boundary, combine this 466 * insert with replace and delete this replace.<br> 467 * 3. throw exception if index in same range as previous replace 468 * 469 * Don't actually delete; make op null in list. Easier to walk list. 470 * Later we can throw as we add to index → op map. 471 * 472 * Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 473 * inserted stuff would be before the replace range. But, if you 474 * add tokens in front of a method body '{' and then delete the method 475 * body, I think the stuff before the '{' you added should disappear too. 476 * 477 * Return: 478 * a map from token index to operation. 479 */ 480 protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites) 481 { 482 debug(TokenStreamRewriter) { 483 import std.stdio : writefln; 484 writefln("reduceToSingleOperationPerIndex"); 485 foreach (i, rew; rewrites) 486 writefln("\trewrites[%s] = %s", i, rew); 487 } 488 489 // WALK REPLACES 490 for (size_t i = 0; i < rewrites.length; i++) { 491 RewriteOperation op = rewrites[i]; 492 debug(TokenStreamRewriter) { 493 import std.stdio : writefln; 494 writefln("op0 = %s", op); 495 } 496 if (op is null) continue; 497 if (!(cast(ReplaceOp)op)) { 498 continue; 499 } 500 debug(TokenStreamRewriter) { 501 import std.stdio : writefln; 502 writefln("op = %s", op); 503 } 504 ReplaceOp rop = cast(ReplaceOp)rewrites[i]; 505 // Wipe prior inserts within range 506 InsertBeforeOp[] inserts = getKindOfOps!(InsertBeforeOp)(rewrites, i); 507 foreach (InsertBeforeOp iop; inserts) { 508 if ( iop.index == rop.index ) { 509 // E.g., insert before 2, delete 2..2; update replace 510 // text to include insert before, kill insert 511 rewrites[iop.instructionIndex] = null; 512 Variant Null; 513 rop.text = iop.text ~ (rop.text !is Null?rop.text:Null); 514 } 515 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) { 516 // delete insert as it's a no-op. 517 rewrites[iop.instructionIndex] = null; 518 } 519 } 520 // Drop any prior replaces contained within 521 ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i); 522 foreach (ReplaceOp prevRop; prevReplaces) { 523 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) { 524 // delete replace as it's a no-op. 525 rewrites[prevRop.instructionIndex] = null; 526 continue; 527 } 528 // throw exception unless disjoint or identical 529 bool disjoint = 530 prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex; 531 // Delete special case of replace (text==null): 532 // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) 533 if ( prevRop.text==null && rop.text==null && !disjoint ) { 534 debug(TokenStreamRewriter) { 535 import std.stdio : writefln; 536 writefln("overlapping deletes: %s, %s", prevRop, rop); 537 } 538 rewrites[prevRop.instructionIndex] = null; // kill first delete 539 rop.index = min(prevRop.index, rop.index); 540 rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex); 541 debug { 542 import std.stdio : stderr, writefln; 543 stderr.writefln("new rop %s", rop); 544 } 545 } 546 else if ( !disjoint ) { 547 throw 548 new 549 IllegalArgumentException(format( 550 "replace op boundaries of %s overlap with previous %s", 551 rop, 552 prevRop)); 553 } 554 } 555 } 556 557 // WALK INSERTS 558 debug(TokenStreamRewriter) { 559 import std.stdio : stderr, writefln; 560 writefln("WALK INSERTS"); 561 } 562 for (int i = 0; i < rewrites.length; i++) { 563 RewriteOperation op = rewrites[i]; 564 if (op is null) continue; 565 if (!(cast(InsertBeforeOp)op)) continue; 566 InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i]; 567 // combine current insert with prior if any at same index 568 InsertBeforeOp[] prevInserts = getKindOfOps!(InsertBeforeOp)(rewrites, i); 569 foreach (InsertBeforeOp prevIop; prevInserts) { 570 debug(TokenStreamRewriter) { 571 import std.stdio : writefln; 572 writefln("prevIop = %s", prevIop); 573 } 574 if (prevIop.index == iop.index) { 575 if (cast(InsertAfterOp)prevIop) { 576 iop.text = catOpText(prevIop.text, iop.text); 577 rewrites[prevIop.instructionIndex] = null; 578 } 579 else if (cast(InsertBeforeOp)prevIop) { // combine objects 580 // convert to strings...we're in process of toString'ing 581 // whole token buffer so no lazy eval issue with any templates 582 iop.text = catOpText(iop.text, prevIop.text); 583 // delete redundant prior insert 584 rewrites[prevIop.instructionIndex] = null; 585 } 586 } 587 } 588 // look for replaces where iop.index is in range; error 589 debug(TokenStreamRewriter) { 590 import std.stdio : stderr, writefln; 591 writefln("look for replaces where iop.index is in range, i = %s", i); 592 } 593 ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i); 594 debug(TokenStreamRewriter) { 595 import std.stdio : stderr, writefln; 596 writefln("prevReplaces = %s", prevReplaces); 597 } 598 foreach (ReplaceOp rop; prevReplaces) { 599 if ( iop.index == rop.index ) { 600 rop.text = catOpText(iop.text, rop.text); 601 rewrites[i] = null; // delete current insert 602 continue; 603 } 604 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) { 605 throw 606 new 607 IllegalArgumentException( 608 format("insert op %s within boundaries of previous %s", 609 iop, rop)); 610 } 611 } 612 } 613 614 debug(TokenStreamRewriter) { 615 import std.stdio : stderr, writefln; 616 writefln("rewrites after = %s", rewrites); 617 } 618 RewriteOperation[size_t] m; 619 for (int i = 0; i < rewrites.length; i++) { 620 RewriteOperation op = rewrites[i]; 621 if (!op) continue; // ignore deleted ops 622 if (op.index in m) { 623 throw new Error("should only be one op per index"); 624 } 625 m[op.index] = op; 626 } 627 return m; 628 } 629 630 protected Variant catOpText(Variant a, Variant b) 631 { 632 Variant Null; 633 if (a !is Null && b !is Null) 634 return a ~ b; 635 if (a !is Null) 636 return a; 637 return b; 638 } 639 640 protected auto getKindOfOps(U)(RewriteOperation[] rewrites, size_t before) 641 { 642 U[] ops; 643 for (int i=0; i<before && i<rewrites.length; i++) { 644 RewriteOperation op = rewrites[i]; 645 if (op is null) continue; // ignore deleted 646 if (U.classinfo == op.classinfo) { 647 ops ~= cast(U)(op); 648 } 649 } 650 return ops; 651 } 652 653 public static TokenStream tokens() 654 { 655 return tokens_; 656 } 657 658 }