1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.TokenStreamRewriter;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.InsertAfterOp;
11 import antlr.v4.runtime.InsertBeforeOp;
12 import antlr.v4.runtime.ReplaceOp;
13 import antlr.v4.runtime.RewriteOperation;
14 import antlr.v4.runtime.Token;
15 import antlr.v4.runtime.TokenConstantDefinition;
16 import antlr.v4.runtime.TokenStream;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm.comparison;
19 import std.conv;
20 import std.format;
21 import std.variant;
22 
23 /**
24  * Useful for rewriting out a buffered input token stream after doing some
25  * augmentation or other manipulations on it.
26  *
27  * You can insert stuff, replace, and delete chunks. Note that the operations
28  * are done lazily--only if you convert the buffer to a {@link String} with
29  * {@link TokenStream#getText()}. This is very efficient because you are not
30  * moving data around all the time. As the buffer of tokens is converted to
31  * strings, the {@link #getText()} method(s) scan the input token stream and
32  * check to see if there is an operation at the current index. If so, the
33  * operation is done and then normal {@link String} rendering continues on the
34  * buffer. This is like having multiple Turing machine instruction streams
35  * (programs) operating on a single input tape.
36  *
37  * This rewriter makes no modifications to the token stream. It does not ask the
38  * stream to fill itself up nor does it advance the input cursor. The token
39  * stream {@link TokenStream#index()} will return the same value before and
40  * after any {@link #getText()} call.
41  *
42  * The rewriter only works on tokens that you have in the buffer and ignores the
43  * current input cursor. If you are buffering tokens on-demand, calling
44  * {@link #getText()} halfway through the input will only do rewrites for those
45  * tokens in the first half of the file.
46  *
47  * Since the operations are done lazily at {@link #getText}-time, operations do
48  * not screw up the token index values. That is, an insert operation at token
49  * index {@code i} does not change the index values for tokens
50  * {@code i}+1..n-1.
51  *
52  * Because operations never actually alter the buffer, you may always get the
53  * original token stream back without undoing anything. Since the instructions
54  * are queued up, you can easily simulate transactions and roll back any changes
55  * if there is an error just by removing instructions. For example,
56  *
57  * Examples:
58  * ---
59  * CharStream input = new ANTLRFileStream("input");
60  * TLexer lex = new TLexer(input);
61  * CommonTokenStream tokens = new CommonTokenStream(lex);
62  * T parser = new T(tokens);
63  * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
64  * parser.startRule();
65  * ---
66  *
67  * Then in the rules, you can execute (assuming rewriter is visible):
68  *
69  * Examples:
70  * ---
71  * Token t,u;
72  * ...
73  * rewriter.insertAfter(t, "text to put after t");}
74  * rewriter.insertAfter(u, "text after u");}
75  * System.out.println(rewriter.getText());
76  * ---
77  *
78  * You can also have multiple "instruction streams" and get multiple rewrites
79  * from a single pass over the input. Just name the instruction streams and use
80  * that name again when printing the buffer. This could be useful for generating
81  * a C file and also its header file--all from the same buffer:
82  *
83  * Examples:
84  * ---
85  * rewriter.insertAfter("pass1", t, "text to put after t");}
86  * rewriter.insertAfter("pass2", u, "text after u");}
87  * System.out.println(rewriter.getText("pass1"));
88  * System.out.println(rewriter.getText("pass2"));
89  * ---
90  *
91  * If you don't use named rewrite streams, a "default" stream is used as the
92  * first example shows.
93  */
94 class TokenStreamRewriter
95 {
96 
97     public static immutable string DEFAULT_PROGRAM_NAME = "default";
98 
99     public static immutable int MIN_TOKEN_INDEX = 0;
100 
101     /**
102      * Our source stream
103      * @uml
104      * @read
105      */
106     private static TokenStream tokens_;
107 
108     protected RewriteOperation[][string] programs;
109 
110     protected size_t[string] lastRewriteTokenIndexes;
111 
112     private RewriteOperation[] rewriteOps;
113 
114     public this()
115     {
116     }
117 
118     public this(TokenStream tokens)
119     {
120         tokens_ = tokens;
121         programs[DEFAULT_PROGRAM_NAME] = rewriteOps;
122     }
123 
124     public void rollback(int instructionIndex)
125     {
126         rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
127     }
128 
129     /**
130      * Rollback the instruction stream for a program so that
131      *  the indicated instruction (via instructionIndex) is no
132      *  longer in the stream. UNTESTED!
133      */
134     public void rollback(string programName, int instructionIndex)
135     {
136         RewriteOperation[] ist = programs[programName];
137         if (programName in programs) {
138             programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex];
139         }
140     }
141 
142     public void deleteProgram()
143     {
144         deleteProgram(DEFAULT_PROGRAM_NAME);
145     }
146 
147     /**
148      * Reset the program so that no instructions exist
149      */
150     public void deleteProgram(string programName)
151     {
152         rollback(programName, MIN_TOKEN_INDEX);
153     }
154 
155     public void insertAfter(Token t, Variant text)
156     {
157         insertAfter(DEFAULT_PROGRAM_NAME, t, text);
158     }
159 
160     public void insertAfter(int index, Variant text)
161     {
162         insertAfter(DEFAULT_PROGRAM_NAME, index, text);
163     }
164 
165     public void insertAfter(string programName, Token t, Variant text)
166     {
167         insertAfter(programName, t.getTokenIndex, text);
168     }
169 
170     public void insertAfter(string programName, int index, Variant text)
171     {
172         // to insert after, just insert before next index (even if past end)
173         RewriteOperation op = new InsertAfterOp(index, text);
174         op.instructionIndex = programs[programName].length;
175         programs[programName] ~= op;
176     }
177 
178     public void insertBefore(Token t, Variant text)
179     {
180         insertBefore(DEFAULT_PROGRAM_NAME, t, text);
181     }
182 
183     public void insertBefore(size_t index, Variant text)
184     {
185         insertBefore(DEFAULT_PROGRAM_NAME, index, text);
186     }
187 
188     public void insertBefore(string programName, Token t, Variant text)
189     {
190         insertBefore(programName, t.getTokenIndex(), text);
191     }
192 
193     public void insertBefore(string programName, size_t index, Variant text)
194     {
195         RewriteOperation op = new InsertBeforeOp(index, text);
196         op.instructionIndex = programs[programName].length;
197         programs[programName] ~= op;
198     }
199 
200     public void replace(size_t index, Variant text)
201     {
202         replace(DEFAULT_PROGRAM_NAME, index, index, text);
203     }
204 
205     public void replace(size_t from, size_t to, Variant text)
206     {
207         replace(DEFAULT_PROGRAM_NAME, from, to, text);
208     }
209 
210     public void replace(Token indexT, Variant text)
211     {
212         replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
213     }
214 
215     public void replace(Token from, Token to, Variant text)
216     {
217         replace(DEFAULT_PROGRAM_NAME, from, to, text);
218     }
219 
220     public void replace(string programName, size_t from, size_t to, Variant text)
221     {
222         if ( from > to || from<0 || to<0 || to >= tokens_.size ) {
223             throw
224                 new
225                 IllegalArgumentException(
226                                          format("replace: range invalid: %s..%s(size=%s)",
227                                                 from, to, tokens_.size));
228         }
229         RewriteOperation op = new ReplaceOp(from, to, text);
230         op.instructionIndex = programs[programName].length;
231         programs[programName] ~= op;
232 
233         debug(TokenStreamRewriter) {
234             import std.stdio : writefln;
235             writefln("replace end: op = %s, programs = %s", op, programs);
236         }
237     }
238 
239     public void replace(string programName, Token from, Token to, Variant text)
240     {
241         debug(TokenStreamRewriter) {
242             import std.stdio : writefln;
243             writefln("replace constructor2: from = %s, to = %s, text = %s", from, to, text);
244         }
245         replace(programName,
246                 from.getTokenIndex,
247                 to.getTokenIndex,
248                 text);
249     }
250 
251     /**
252      * Delete token (can not use delete as identifier)
253      */
254     public void deleteT(size_t index)
255     {
256         deleteT(DEFAULT_PROGRAM_NAME, index, index);
257     }
258 
259     public void deleteT(size_t from, size_t to)
260     {
261         deleteT(DEFAULT_PROGRAM_NAME, from, to);
262     }
263 
264     public void deleteT(Token indexT)
265     {
266         deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT);
267     }
268 
269     public void deleteT(Token from, Token to)
270     {
271         deleteT(DEFAULT_PROGRAM_NAME, from, to);
272     }
273 
274     public void deleteT(string programName, size_t from, size_t to)
275     {
276         Variant Null;
277         replace(programName, from, to, Null);
278     }
279 
280     public void deleteT(string programName, Token from, Token to)
281     {
282         Variant Null;
283         replace(programName, from, to, Null);
284     }
285 
286     public size_t getLastRewriteTokenIndex()
287     {
288         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
289     }
290 
291     private size_t getLastRewriteTokenIndex(string programName)
292     {
293         if (programName in lastRewriteTokenIndexes) {
294             return lastRewriteTokenIndexes[programName];
295         }
296         else {
297             return -1;
298         }
299     }
300 
301     private void setLastRewriteTokenIndex(string programName, size_t i)
302     {
303         lastRewriteTokenIndexes[programName] =  i;
304     }
305 
306     private RewriteOperation[] getProgram(string name)
307     {
308         if (name in programs) {
309             return programs[name];
310         }
311         else {
312             return initializeProgram(name);
313         }
314     }
315 
316     private RewriteOperation[] initializeProgram(string name)
317     {
318         RewriteOperation[] iso;
319         programs[name] = iso;
320         return iso;
321     }
322 
323     /**
324      * Return the text from the original tokens altered per the
325      *  instructions given to this rewriter.
326      */
327     public Variant getText()
328     {
329         return getText(DEFAULT_PROGRAM_NAME, Interval.of(0, tokens_.size - 1));
330     }
331 
332     /**
333      * Return the text from the original tokens altered per the
334      *  instructions given to this rewriter in programName.
335      */
336     public Variant getText(string programName)
337     {
338         return getText(programName, Interval.of(0, tokens_.size - 1));
339     }
340 
341     /**
342      * Return the text associated with the tokens in the interval from the
343      *  original token stream but with the alterations given to this rewriter.
344      *  The interval refers to the indexes in the original token stream.
345      *  We do not alter the token stream in any way, so the indexes
346      *  and intervals are still consistent. Includes any operations done
347      *  to the first and last token in the interval. So, if you did an
348      *  insertBefore on the first token, you would get that insertion.
349      *  The same is true if you do an insertAfter the stop token.
350      */
351     public Variant getText(Interval interval)
352     {
353         return getText(DEFAULT_PROGRAM_NAME, interval);
354     }
355 
356     public Variant getText(string programName, Interval interval)
357     {
358         RewriteOperation[] rewrites;
359 
360         if (programName in programs)
361             rewrites = programs[programName];
362 
363         int start = interval.a;
364         int stop = interval.b;
365 
366         // ensure start/end are in range
367         if ( stop > tokens_.size-1 )
368             stop = tokens_.size-1;
369         if ( start < 0 )
370             start = 0;
371 
372         if (!rewrites) {
373             return tokens_.getText(interval); // no instructions to execute
374         }
375 
376         Variant buf;
377 
378         // First, optimize instruction stream
379         RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites);
380 
381         // Walk buffer, executing instructions and emitting tokens
382         int i = start;
383 
384         debug(TokenStreamRewriter) {
385                     import std.stdio : stderr, writefln;
386                     writefln("tokens_.size = %s", tokens_.size);
387                 }
388 
389         while (i <= stop && i < tokens_.size) {
390             Token t = tokens_.get(i);
391             debug(TokenStreamRewriter) {
392                     import std.stdio : stderr, writefln;
393                     writefln("i = %s, token = %s", i, t);
394                 }
395             RewriteOperation op;
396             if (i in indexToOp)
397                 op = indexToOp[i];
398 
399             indexToOp.remove(i); // remove so any left have index size-1
400 
401             if (!op) {
402                 // no operation at that index, just dump token
403                 if (t.getType != TokenConstantDefinition.EOF) {
404                     Variant Null;
405                     buf is Null ? buf = t.getText : (buf ~= t.getText);
406                 }
407                 i++; // move to next token
408             }
409             else {
410                 i = to!int(op.execute(buf)); // execute operation and skip
411             }
412         }
413 
414         // include stuff after end if it's last index in buffer
415         // So, if they did an insertAfter(lastValidIndex, "foo"), include
416         // foo if end==lastValidIndex.
417         if (stop == tokens_.size()-1) {
418             // Scan any remaining operations after last token
419             // should be included (they will be inserts).
420             foreach (RewriteOperation op; indexToOp.values()) {
421                 if (op.index >= tokens_.size-1)
422                     buf ~= op.text;
423             }
424         }
425         return buf;
426     }
427 
428     /**
429      * We need to combine operations and report invalid operations (like
430      * overlapping replaces that are not completed nested). Inserts to
431      * same index need to be combined etc.
432      *
433      * Here are the cases:
434      *
435      *  I.i.u I.j.v                             leave alone, nonoverlapping<br>
436      *  I.i.u I.i.v                             combine: Iivu
437      *
438      *  R.i-j.u R.x-y.v | i-j in x-y            delete first R<br>
439      *  R.i-j.u R.i-j.v                         delete first R<br>
440      *  R.i-j.u R.x-y.v | x-y in i-j            ERROR<br>
441      *  R.i-j.u R.x-y.v | boundaries overlap    ERROR
442      *
443      *  Delete special case of replace (text==null):<br>
444      *  D.i-j.u D.x-y.v | boundaries overlap    combine to max(min)..max(right)
445      *
446      *  I.i.u R.x-y.v | i in (x+1)-y            delete I (since insert before<br>
447      *                                          we're not deleting i)<br>
448      *  I.i.u R.x-y.v | i not in (x+1)-y        leave alone, nonoverlapping<br>
449      *  R.x-y.v I.i.u | i in x-y                ERROR<br>
450      *  R.x-y.v I.x.u                           R.x-y.uv (combine, delete I)<br>
451      *  R.x-y.v I.i.u | i not in x-y            leave alone, nonoverlapping
452      *
453      *  I.i.u = insert u before op @ index i<br>
454      *  R.x-y.u = replace x-y indexed tokens with u
455      *
456      *  First we need to examine replaces. For any replace op:
457      *
458      *      1. wipe out any insertions before op within that range.<br>
459      *      2. Drop any replace op before that is contained completely within
460      *   that range.<br>
461      *      3. Throw exception upon boundary overlap with any previous replace.
462      *
463      *  Then we can deal with inserts:
464      *
465      *      1. for any inserts to same index, combine even if not adjacent.<br>
466      *      2. for any prior replace with same left boundary, combine this
467      *   insert with replace and delete this replace.<br>
468      *      3. throw exception if index in same range as previous replace
469      *
470      *  Don't actually delete; make op null in list. Easier to walk list.
471      *  Later we can throw as we add to index &rarr; op map.
472      *
473      *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
474      *  inserted stuff would be before the replace range. But, if you
475      *  add tokens in front of a method body '{' and then delete the method
476      *  body, I think the stuff before the '{' you added should disappear too.
477      *
478      *  Return:
479      *  a map from token index to operation.
480      */
481     protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites)
482     {
483         debug(TokenStreamRewriter) {
484             import std.stdio : writefln;
485             writefln("reduceToSingleOperationPerIndex");
486             foreach (i, rew; rewrites)
487                 writefln("\trewrites[%s] = %s", i, rew);
488         }
489 
490         // WALK REPLACES
491         for (size_t i = 0; i < rewrites.length; i++) {
492             RewriteOperation op = rewrites[i];
493             debug(TokenStreamRewriter) {
494                 import std.stdio : writefln;
495                 writefln("op0 = %s", op);
496             }
497             if (op is null) continue;
498             if (!(cast(ReplaceOp)op)) {
499                 continue;
500             }
501             debug(TokenStreamRewriter) {
502                 import std.stdio : writefln;
503                 writefln("op = %s", op);
504             }
505             ReplaceOp rop = cast(ReplaceOp)rewrites[i];
506             // Wipe prior inserts within range
507             InsertBeforeOp[] inserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
508             foreach (InsertBeforeOp iop; inserts) {
509                 if ( iop.index == rop.index ) {
510                     // E.g., insert before 2, delete 2..2; update replace
511                     // text to include insert before, kill insert
512                     rewrites[iop.instructionIndex] = null;
513                     Variant Null;
514                     rop.text = iop.text ~ (rop.text !is Null?rop.text:Null);
515                 }
516                 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) {
517                     // delete insert as it's a no-op.
518                     rewrites[iop.instructionIndex] =  null;
519                 }
520             }
521             // Drop any prior replaces contained within
522             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
523             foreach (ReplaceOp prevRop; prevReplaces) {
524                 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
525                     // delete replace as it's a no-op.
526                     rewrites[prevRop.instructionIndex] = null;
527                     continue;
528                 }
529                 // throw exception unless disjoint or identical
530                 bool disjoint =
531                     prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
532                 // Delete special case of replace (text==null):
533                 // D.i-j.u D.x-y.v  | boundaries overlap    combine to max(min)..max(right)
534                 if ( prevRop.text==null && rop.text==null && !disjoint ) {
535                     debug(TokenStreamRewriter) {
536                         import std.stdio : writefln;
537                         writefln("overlapping deletes: %s, %s", prevRop, rop);
538                     }
539                     rewrites[prevRop.instructionIndex] = null; // kill first delete
540                     rop.index = min(prevRop.index, rop.index);
541                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex);
542                     debug {
543                         import std.stdio : stderr, writefln;
544                         stderr.writefln("new rop %s", rop);
545                     }
546                 }
547                 else if ( !disjoint ) {
548                     throw
549                         new
550                         IllegalArgumentException(format(
551                                                         "replace op boundaries of %s overlap with previous %s",
552                                                         rop,
553                                                         prevRop));
554                 }
555             }
556         }
557 
558         // WALK INSERTS
559         debug(TokenStreamRewriter) {
560             import std.stdio : stderr, writefln;
561             writefln("WALK INSERTS");
562         }
563         for (int i = 0; i < rewrites.length; i++) {
564             RewriteOperation op = rewrites[i];
565             if (op is null) continue;
566             if (!(cast(InsertBeforeOp)op)) continue;
567             InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i];
568             // combine current insert with prior if any at same index
569             InsertBeforeOp[] prevInserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
570             foreach (InsertBeforeOp prevIop; prevInserts) {
571                 debug(TokenStreamRewriter) {
572                     import std.stdio : writefln;
573                     writefln("prevIop = %s", prevIop);
574                 }
575                 if (prevIop.index == iop.index) {
576                     if (cast(InsertAfterOp)prevIop) {
577                         iop.text = catOpText(prevIop.text, iop.text);
578                         rewrites[prevIop.instructionIndex] = null;
579                     }
580                     else if (cast(InsertBeforeOp)prevIop) { // combine objects
581                         // convert to strings...we're in process of toString'ing
582                         // whole token buffer so no lazy eval issue with any templates
583                         iop.text = catOpText(iop.text, prevIop.text);
584                         // delete redundant prior insert
585                         rewrites[prevIop.instructionIndex] = null;
586                     }
587                 }
588             }
589             // look for replaces where iop.index is in range; error
590             debug(TokenStreamRewriter) {
591                 import std.stdio : stderr, writefln;
592                 writefln("look for replaces where iop.index is in range, i = %s", i);
593             }
594             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
595             debug(TokenStreamRewriter) {
596                 import std.stdio : stderr, writefln;
597                 writefln("prevReplaces = %s", prevReplaces);
598             }
599             foreach (ReplaceOp rop; prevReplaces) {
600                 if ( iop.index == rop.index ) {
601                     rop.text = catOpText(iop.text, rop.text);
602                     rewrites[i] = null; // delete current insert
603                     continue;
604                 }
605                 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
606                     throw
607                         new
608                         IllegalArgumentException(
609                                                  format("insert op %s within boundaries of previous %s",
610                                                         iop, rop));
611                 }
612             }
613         }
614 
615         debug(TokenStreamRewriter) {
616             import std.stdio : stderr, writefln;
617             writefln("rewrites after = %s", rewrites);
618         }
619         RewriteOperation[size_t] m;
620         for (int i = 0; i < rewrites.length; i++) {
621             RewriteOperation op = rewrites[i];
622             if (!op) continue; // ignore deleted ops
623             if (op.index in m) {
624                 throw new Error("should only be one op per index");
625             }
626             m[op.index] = op;
627         }
628         return m;
629     }
630 
631     protected Variant catOpText(Variant a, Variant b)
632     {
633         Variant Null;
634         if (a !is Null && b !is Null)
635             return a ~ b;
636         if (a !is Null)
637             return a;
638         return b;
639     }
640 
641     protected auto getKindOfOps(U)(RewriteOperation[] rewrites, size_t before)
642     {
643         U[] ops;
644         for (int i=0; i<before && i<rewrites.length; i++) {
645             RewriteOperation op = rewrites[i];
646             if (op is null) continue; // ignore deleted
647             if (U.classinfo == op.classinfo) {
648                 ops ~= cast(U)(op);
649             }
650         }
651         return ops;
652     }
653 
654     public static TokenStream tokens()
655     {
656         return tokens_;
657     }
658 
659 }