1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.TokenStreamRewriter;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.InsertAfterOp;
11 import antlr.v4.runtime.InsertBeforeOp;
12 import antlr.v4.runtime.ReplaceOp;
13 import antlr.v4.runtime.RewriteOperation;
14 import antlr.v4.runtime.Token;
15 import antlr.v4.runtime.TokenConstantDefinition;
16 import antlr.v4.runtime.TokenStream;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm.comparison;
19 import std.format;
20 import std.conv;
21 import std.variant;
22 
23 /**
24  * Useful for rewriting out a buffered input token stream after doing some
25  * augmentation or other manipulations on it.
26  *
27  * <p>
28  * You can insert stuff, replace, and delete chunks. Note that the operations
29  * are done lazily--only if you convert the buffer to a {@link String} with
30  * {@link TokenStream#getText()}. This is very efficient because you are not
31  * moving data around all the time. As the buffer of tokens is converted to
32  * strings, the {@link #getText()} method(s) scan the input token stream and
33  * check to see if there is an operation at the current index. If so, the
34  * operation is done and then normal {@link String} rendering continues on the
35  * buffer. This is like having multiple Turing machine instruction streams
36  * (programs) operating on a single input tape. :)</p>
37  *
38  * <p>
39  * This rewriter makes no modifications to the token stream. It does not ask the
40  * stream to fill itself up nor does it advance the input cursor. The token
41  * stream {@link TokenStream#index()} will return the same value before and
42  * after any {@link #getText()} call.</p>
43  *
44  * <p>
45  * The rewriter only works on tokens that you have in the buffer and ignores the
46  * current input cursor. If you are buffering tokens on-demand, calling
47  * {@link #getText()} halfway through the input will only do rewrites for those
48  * tokens in the first half of the file.</p>
49  *
50  * <p>
51  * Since the operations are done lazily at {@link #getText}-time, operations do
52  * not screw up the token index values. That is, an insert operation at token
53  * index {@code i} does not change the index values for tokens
54  * {@code i}+1..n-1.</p>
55  *
56  * <p>
57  * Because operations never actually alter the buffer, you may always get the
58  * original token stream back without undoing anything. Since the instructions
59  * are queued up, you can easily simulate transactions and roll back any changes
60  * if there is an error just by removing instructions. For example,</p>
61  *
62  * <pre>
63  * CharStream input = new ANTLRFileStream("input");
64  * TLexer lex = new TLexer(input);
65  * CommonTokenStream tokens = new CommonTokenStream(lex);
66  * T parser = new T(tokens);
67  * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
68  * parser.startRule();
69  * </pre>
70  *
71  * <p>
72  * Then in the rules, you can execute (assuming rewriter is visible):</p>
73  *
74  * <pre>
75  * Token t,u;
76  * ...
77  * rewriter.insertAfter(t, "text to put after t");}
78  * rewriter.insertAfter(u, "text after u");}
79  * System.out.println(rewriter.getText());
80  * </pre>
81  *
82  * <p>
83  * You can also have multiple "instruction streams" and get multiple rewrites
84  * from a single pass over the input. Just name the instruction streams and use
85  * that name again when printing the buffer. This could be useful for generating
86  * a C file and also its header file--all from the same buffer:</p>
87  *
88  * <pre>
89  * rewriter.insertAfter("pass1", t, "text to put after t");}
90  * rewriter.insertAfter("pass2", u, "text after u");}
91  * System.out.println(rewriter.getText("pass1"));
92  * System.out.println(rewriter.getText("pass2"));
93  * </pre>
94  *
95  * <p>
96  * If you don't use named rewrite streams, a "default" stream is used as the
97  * first example shows.</p>
98  */
99 class TokenStreamRewriter
100 {
101 
102     public static immutable string DEFAULT_PROGRAM_NAME = "default";
103 
104     public static immutable int MIN_TOKEN_INDEX = 0;
105 
106     /**
107      * Our source stream
108      * @uml
109      * @read
110      */
111     private static TokenStream tokens_;
112 
113     protected RewriteOperation[][string] programs;
114 
115     protected size_t[string] lastRewriteTokenIndexes;
116 
117     private RewriteOperation[] rewriteOps;
118 
119     public this()
120     {
121     }
122 
123     public this(TokenStream tokens)
124     {
125         tokens_ = tokens;
126         programs[DEFAULT_PROGRAM_NAME] = rewriteOps;
127     }
128 
129     public void rollback(int instructionIndex)
130     {
131         rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
132     }
133 
134     /**
135      * Rollback the instruction stream for a program so that
136      *  the indicated instruction (via instructionIndex) is no
137      *  longer in the stream. UNTESTED!
138      */
139     public void rollback(string programName, int instructionIndex)
140     {
141         RewriteOperation[] ist = programs[programName];
142         if (programName in programs) {
143             programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex];
144         }
145     }
146 
147     public void deleteProgram()
148     {
149         deleteProgram(DEFAULT_PROGRAM_NAME);
150     }
151 
152     /**
153      * Reset the program so that no instructions exist
154      */
155     public void deleteProgram(string programName)
156     {
157         rollback(programName, MIN_TOKEN_INDEX);
158     }
159 
160     public void insertAfter(Token t, Variant text)
161     {
162         insertAfter(DEFAULT_PROGRAM_NAME, t, text);
163     }
164 
165     public void insertAfter(int index, Variant text)
166     {
167         insertAfter(DEFAULT_PROGRAM_NAME, index, text);
168     }
169 
170     public void insertAfter(string programName, Token t, Variant text)
171     {
172         insertAfter(programName, t.getTokenIndex, text);
173     }
174 
175     public void insertAfter(string programName, int index, Variant text)
176     {
177         // to insert after, just insert before next index (even if past end)
178         RewriteOperation op = new InsertAfterOp(index, text);
179         op.instructionIndex = programs[programName].length;
180         programs[programName] ~= op;
181     }
182 
183     public void insertBefore(Token t, Variant text)
184     {
185         insertBefore(DEFAULT_PROGRAM_NAME, t, text);
186     }
187 
188     public void insertBefore(size_t index, Variant text)
189     {
190         insertBefore(DEFAULT_PROGRAM_NAME, index, text);
191     }
192 
193     public void insertBefore(string programName, Token t, Variant text)
194     {
195         insertBefore(programName, t.getTokenIndex(), text);
196     }
197 
198     public void insertBefore(string programName, size_t index, Variant text)
199     {
200         RewriteOperation op = new InsertBeforeOp(index, text);
201         op.instructionIndex = programs[programName].length;
202         programs[programName] ~= op;
203     }
204 
205     public void replace(size_t index, Variant text)
206     {
207         replace(DEFAULT_PROGRAM_NAME, index, index, text);
208     }
209 
210     public void replace(size_t from, size_t to, Variant text)
211     {
212         replace(DEFAULT_PROGRAM_NAME, from, to, text);
213     }
214 
215     public void replace(Token indexT, Variant text)
216     {
217         replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
218     }
219 
220     public void replace(Token from, Token to, Variant text)
221     {
222         replace(DEFAULT_PROGRAM_NAME, from, to, text);
223     }
224 
225     public void replace(string programName, size_t from, size_t to, Variant text)
226     {
227         if ( from > to || from<0 || to<0 || to >= tokens_.size ) {
228             throw
229                 new
230                 IllegalArgumentException(
231                                          format("replace: range invalid: %s..%s(size=%s)",
232                                                 from, to, tokens_.size));
233         }
234         RewriteOperation op = new ReplaceOp(from, to, text);
235         op.instructionIndex = programs[programName].length;
236         programs[programName] ~= op;
237 
238         debug(TokenStreamRewriter) {
239             import std.stdio : writefln;
240             writefln("replace end: op = %s, programs = %s", op, programs);
241         }
242     }
243 
244     public void replace(string programName, Token from, Token to, Variant text)
245     {
246         debug(TokenStreamRewriter) {
247             import std.stdio : writefln;
248             writefln("replace constructor2: from = %s, to = %s, text = %s", from, to, text);
249         }
250         replace(programName,
251                 from.getTokenIndex,
252                 to.getTokenIndex,
253                 text);
254     }
255 
256     /**
257      * Delete token (can not use delete as identifier)
258      */
259     public void deleteT(size_t index)
260     {
261         deleteT(DEFAULT_PROGRAM_NAME, index, index);
262     }
263 
264     public void deleteT(size_t from, size_t to)
265     {
266         deleteT(DEFAULT_PROGRAM_NAME, from, to);
267     }
268 
269     public void deleteT(Token indexT)
270     {
271         deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT);
272     }
273 
274     public void deleteT(Token from, Token to)
275     {
276         deleteT(DEFAULT_PROGRAM_NAME, from, to);
277     }
278 
279     public void deleteT(string programName, size_t from, size_t to)
280     {
281         Variant Null;
282         replace(programName, from, to, Null);
283     }
284 
285     public void deleteT(string programName, Token from, Token to)
286     {
287         Variant Null;
288         replace(programName, from, to, Null);
289     }
290 
291     public size_t getLastRewriteTokenIndex()
292     {
293         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
294     }
295 
296     private size_t getLastRewriteTokenIndex(string programName)
297     {
298         if (programName in lastRewriteTokenIndexes) {
299             return lastRewriteTokenIndexes[programName];
300         }
301         else {
302             return -1;
303         }
304     }
305 
306     private void setLastRewriteTokenIndex(string programName, size_t i)
307     {
308         lastRewriteTokenIndexes[programName] =  i;
309     }
310 
311     private RewriteOperation[] getProgram(string name)
312     {
313         if (name in programs) {
314             return programs[name];
315         }
316         else {
317             return initializeProgram(name);
318         }
319     }
320 
321     private RewriteOperation[] initializeProgram(string name)
322     {
323         RewriteOperation[] iso;
324         programs[name] = iso;
325         return iso;
326     }
327 
328     /**
329      * Return the text from the original tokens altered per the
330      *  instructions given to this rewriter.
331      */
332     public Variant getText()
333     {
334         return getText(DEFAULT_PROGRAM_NAME, Interval.of(0, tokens_.size - 1));
335     }
336 
337     /**
338      * Return the text from the original tokens altered per the
339      *  instructions given to this rewriter in programName.
340      */
341     public Variant getText(string programName)
342     {
343         return getText(programName, Interval.of(0, tokens_.size - 1));
344     }
345 
346     /**
347      * Return the text associated with the tokens in the interval from the
348      *  original token stream but with the alterations given to this rewriter.
349      *  The interval refers to the indexes in the original token stream.
350      *  We do not alter the token stream in any way, so the indexes
351      *  and intervals are still consistent. Includes any operations done
352      *  to the first and last token in the interval. So, if you did an
353      *  insertBefore on the first token, you would get that insertion.
354      *  The same is true if you do an insertAfter the stop token.
355      */
356     public Variant getText(Interval interval)
357     {
358         return getText(DEFAULT_PROGRAM_NAME, interval);
359     }
360 
361     public Variant getText(string programName, Interval interval)
362     {
363         RewriteOperation[] rewrites;
364 
365         if (programName in programs)
366             rewrites = programs[programName];
367 
368         int start = interval.a;
369         int stop = interval.b;
370 
371         // ensure start/end are in range
372         if ( stop > tokens_.size-1 )
373             stop = tokens_.size-1;
374         if ( start < 0 )
375             start = 0;
376 
377         if (!rewrites) {
378             return tokens_.getText(interval); // no instructions to execute
379         }
380 
381         Variant buf;
382 
383         // First, optimize instruction stream
384         RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites);
385 
386         // Walk buffer, executing instructions and emitting tokens
387         int i = start;
388 
389         debug(TokenStreamRewriter) {
390                     import std.stdio : stderr, writefln;
391                     writefln("tokens_.size = %s", tokens_.size);
392                 }
393 
394         while (i <= stop && i < tokens_.size) {
395             Token t = tokens_.get(i);
396             debug(TokenStreamRewriter) {
397                     import std.stdio : stderr, writefln;
398                     writefln("i = %s, token = %s", i, t);
399                 }
400             RewriteOperation op;
401             if (i in indexToOp)
402                 op = indexToOp[i];
403             debug(TokenStreamRewriter) {
404                 import std.stdio : stderr, writefln;
405                 writefln("indexToOp = %s", indexToOp);
406             }
407             indexToOp.remove(i); // remove so any left have index size-1
408             debug(TokenStreamRewriter) {
409                 import std.stdio : stderr, writefln;
410                 writefln("indexToOp end = %s", indexToOp);
411             }
412             if (!op) {
413                 // no operation at that index, just dump token
414                 if (t.getType != TokenConstantDefinition.EOF) {
415                     Variant Null;
416                     if (buf is Null)
417                         buf = t.getText;
418                     else
419                         buf ~= t.getText;
420                 }
421                 i++; // move to next token
422             }
423             else {
424                 i = to!int(op.execute(buf)); // execute operation and skip
425             }
426         }
427 
428         // include stuff after end if it's last index in buffer
429         // So, if they did an insertAfter(lastValidIndex, "foo"), include
430         // foo if end==lastValidIndex.
431         if (stop == tokens_.size()-1) {
432             // Scan any remaining operations after last token
433             // should be included (they will be inserts).
434             foreach (RewriteOperation op; indexToOp.values()) {
435                 if (op.index >= tokens_.size-1)
436                     buf ~= op.text;
437             }
438         }
439         return buf;
440     }
441 
442     /**
443      *  overlapping replaces that are not completed nested). Inserts to
444      *  same index need to be combined etc...  Here are the cases:
445      *
446      *  I.i.u I.j.v                             leave alone, nonoverlapping
447      *  I.i.u I.i.v                             combine: Iivu
448      *
449      *  R.i-j.u R.x-y.v | i-j in x-y            delete first R
450      *  R.i-j.u R.i-j.v                         delete first R
451      *  R.i-j.u R.x-y.v | x-y in i-j            ERROR
452      *  R.i-j.u R.x-y.v | boundaries overlap    ERROR
453      *
454      *  Delete special case of replace (text==null):
455      *  D.i-j.u D.x-y.v | boundaries overlap    combine to max(min)..max(right)
456      *
457      *  I.i.u R.x-y.v | i in (x+1)-y            delete I (since insert before
458      *                                          we're not deleting i)
459      *  I.i.u R.x-y.v | i not in (x+1)-y        leave alone, nonoverlapping
460      *  R.x-y.v I.i.u | i in x-y                ERROR
461      *  R.x-y.v I.x.u                           R.x-y.uv (combine, delete I)
462      *  R.x-y.v I.i.u | i not in x-y            leave alone, nonoverlapping
463      *
464      *  I.i.u = insert u before op @ index i
465      *  R.x-y.u = replace x-y indexed tokens with u
466      *
467      *  First we need to examine replaces. For any replace op:
468      *
469      *      1. wipe out any insertions before op within that range.
470      *      2. Drop any replace op before that is contained completely within
471      *   that range.
472      *      3. Throw exception upon boundary overlap with any previous replace.
473      *
474      *  Then we can deal with inserts:
475      *
476      *      1. for any inserts to same index, combine even if not adjacent.
477      *      2. for any prior replace with same left boundary, combine this
478      *   insert with replace and delete this replace.
479      *      3. throw exception if index in same range as previous replace
480      *
481      *  Don't actually delete; make op null in list. Easier to walk list.
482      *  Later we can throw as we add to index &rarr; op map.
483      *
484      *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
485      *  inserted stuff would be before the replace range. But, if you
486      *  add tokens in front of a method body '{' and then delete the method
487      *  body, I think the stuff before the '{' you added should disappear too.
488      *
489      *  Return a map from token index to operation.
490      */
491     protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites)
492     {
493         debug(TokenStreamRewriter) {
494             import std.stdio : writefln;
495             writefln("reduceToSingleOperationPerIndex");
496             foreach (i, rew; rewrites)
497                 writefln("\trewrites[%s] = %s", i, rew);
498         }
499 
500         // WALK REPLACES
501         for (size_t i = 0; i < rewrites.length; i++) {
502             RewriteOperation op = rewrites[i];
503             debug(TokenStreamRewriter) {
504                 import std.stdio : writefln;
505                 writefln("op0 = %s", op);
506             }
507             if (op is null) continue;
508             if (!(cast(ReplaceOp)op)) {
509                 continue;
510             }
511             debug(TokenStreamRewriter) {
512                 import std.stdio : writefln;
513                 writefln("op = %s", op);
514             }
515             ReplaceOp rop = cast(ReplaceOp)rewrites[i];
516             // Wipe prior inserts within range
517             InsertBeforeOp[] inserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
518             foreach (InsertBeforeOp iop; inserts) {
519                 if ( iop.index == rop.index ) {
520                     // E.g., insert before 2, delete 2..2; update replace
521                     // text to include insert before, kill insert
522                     rewrites[iop.instructionIndex] = null;
523                     Variant Null;
524                     rop.text = iop.text ~ (rop.text !is Null?rop.text:Null);
525                 }
526                 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) {
527                     // delete insert as it's a no-op.
528                     rewrites[iop.instructionIndex] =  null;
529                 }
530             }
531             // Drop any prior replaces contained within
532             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
533             foreach (ReplaceOp prevRop; prevReplaces) {
534                 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
535                     // delete replace as it's a no-op.
536                     rewrites[prevRop.instructionIndex] = null;
537                     continue;
538                 }
539                 // throw exception unless disjoint or identical
540                 bool disjoint =
541                     prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
542                 // Delete special case of replace (text==null):
543                 // D.i-j.u D.x-y.v  | boundaries overlap    combine to max(min)..max(right)
544                 if ( prevRop.text==null && rop.text==null && !disjoint ) {
545                     //System.out.println("overlapping deletes: "+prevRop+", "+rop);
546                     rewrites[prevRop.instructionIndex] = null; // kill first delete
547                     rop.index = min(prevRop.index, rop.index);
548                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex);
549                     debug {
550                         import std.stdio : stderr, writefln;
551                         stderr.writefln("new rop %s", rop);
552                     }
553                 }
554                 else if ( !disjoint ) {
555                     throw
556                         new
557                         IllegalArgumentException(format(
558                                                         "replace op boundaries of %s overlap with previous %s",
559                                                         rop,
560                                                         prevRop));
561                 }
562             }
563         }
564 
565         // WALK INSERTS
566         debug(TokenStreamRewriter) {
567             import std.stdio : stderr, writefln;
568             writefln("WALK INSERTS");
569         }
570         for (int i = 0; i < rewrites.length; i++) {
571             RewriteOperation op = rewrites[i];
572             if (op is null) continue;
573             if (!(cast(InsertBeforeOp)op)) continue;
574             InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i];
575             // combine current insert with prior if any at same index
576             InsertBeforeOp[] prevInserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
577             foreach (InsertBeforeOp prevIop; prevInserts) {
578                 debug(TokenStreamRewriter) {
579                     import std.stdio : stderr, writefln;
580                     writefln("prevIop = %s", prevIop);
581                 }
582                 if (prevIop.index == iop.index) {
583                     if (cast(InsertAfterOp)prevIop) {
584                         iop.text = catOpText(prevIop.text, iop.text);
585                         rewrites[prevIop.instructionIndex] = null;
586                     }
587                     else if (cast(InsertBeforeOp)prevIop) { // combine objects
588                         // convert to strings...we're in process of toString'ing
589                         // whole token buffer so no lazy eval issue with any templates
590                         iop.text = catOpText(iop.text, prevIop.text);
591                         // delete redundant prior insert
592                         rewrites[prevIop.instructionIndex] = null;
593                     }
594                 }
595             }
596             // look for replaces where iop.index is in range; error
597             debug(TokenStreamRewriter) {
598                 import std.stdio : stderr, writefln;
599                 writefln("look for replaces where iop.index is in range, i = %s", i);
600             }
601             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
602             debug(TokenStreamRewriter) {
603                 import std.stdio : stderr, writefln;
604                 writefln("prevReplaces = %s", prevReplaces);
605             }
606             foreach (ReplaceOp rop; prevReplaces) {
607                 if ( iop.index == rop.index ) {
608                     rop.text = catOpText(iop.text, rop.text);
609                     rewrites[i] = null; // delete current insert
610                     continue;
611                 }
612                 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
613                     throw
614                         new
615                         IllegalArgumentException(
616                                                  format("insert op %s within boundaries of previous %s",
617                                                         iop, rop));
618                 }
619             }
620         }
621 
622         debug(TokenStreamRewriter) {
623             import std.stdio : stderr, writefln;
624             writefln("rewrites after = %s", rewrites);
625         }
626         RewriteOperation[size_t] m;
627         for (int i = 0; i < rewrites.length; i++) {
628             RewriteOperation op = rewrites[i];
629             if (!op) continue; // ignore deleted ops
630             if (op.index in m) {
631                 throw new Error("should only be one op per index");
632             }
633             m[op.index] = op;
634         }
635         return m;
636     }
637 
638     protected Variant catOpText(Variant a, Variant b)
639     {
640         Variant Null;
641         if (a !is Null && b !is Null)
642             return a ~ b;
643         if (a !is Null)
644             return a;
645         return b;
646     }
647 
648     protected auto getKindOfOps(U)(RewriteOperation[] rewrites, size_t before)
649     {
650         U[] ops;
651         for (int i=0; i<before && i<rewrites.length; i++) {
652             RewriteOperation op = rewrites[i];
653             if (op is null) continue; // ignore deleted
654             if (U.classinfo == op.classinfo) {
655                 ops ~= cast(U)(op);
656             }
657         }
658         return ops;
659     }
660 
661     public static TokenStream tokens()
662     {
663         return tokens_;
664     }
665 
666 }