1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.TokenStreamRewriter;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.InsertAfterOp;
11 import antlr.v4.runtime.InsertBeforeOp;
12 import antlr.v4.runtime.ReplaceOp;
13 import antlr.v4.runtime.RewriteOperation;
14 import antlr.v4.runtime.Token;
15 import antlr.v4.runtime.TokenConstantDefinition;
16 import antlr.v4.runtime.TokenStream;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm.comparison;
19 import std.format;
20 import std.conv;
21 import std.variant;
22 
23 /**
24  * Useful for rewriting out a buffered input token stream after doing some
25  * augmentation or other manipulations on it.
26  *
27  * You can insert stuff, replace, and delete chunks. Note that the operations
28  * are done lazily--only if you convert the buffer to a {@link String} with
29  * {@link TokenStream#getText()}. This is very efficient because you are not
30  * moving data around all the time. As the buffer of tokens is converted to
31  * strings, the {@link #getText()} method(s) scan the input token stream and
32  * check to see if there is an operation at the current index. If so, the
33  * operation is done and then normal {@link String} rendering continues on the
34  * buffer. This is like having multiple Turing machine instruction streams
35  * (programs) operating on a single input tape.
36  *
37  * This rewriter makes no modifications to the token stream. It does not ask the
38  * stream to fill itself up nor does it advance the input cursor. The token
39  * stream {@link TokenStream#index()} will return the same value before and
40  * after any {@link #getText()} call.
41  *
42  * The rewriter only works on tokens that you have in the buffer and ignores the
43  * current input cursor. If you are buffering tokens on-demand, calling
44  * {@link #getText()} halfway through the input will only do rewrites for those
45  * tokens in the first half of the file.
46  *
47  * Since the operations are done lazily at {@link #getText}-time, operations do
48  * not screw up the token index values. That is, an insert operation at token
49  * index {@code i} does not change the index values for tokens
50  * {@code i}+1..n-1.
51  *
52  * Because operations never actually alter the buffer, you may always get the
53  * original token stream back without undoing anything. Since the instructions
54  * are queued up, you can easily simulate transactions and roll back any changes
55  * if there is an error just by removing instructions. For example,
56  *
57  * Examples:
58  * ---
59  * CharStream input = new ANTLRFileStream("input");
60  * TLexer lex = new TLexer(input);
61  * CommonTokenStream tokens = new CommonTokenStream(lex);
62  * T parser = new T(tokens);
63  * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
64  * parser.startRule();
65  * ---
66  *
67  * Then in the rules, you can execute (assuming rewriter is visible):
68  *
69  * Examples:
70  * ---
71  * Token t,u;
72  * ...
73  * rewriter.insertAfter(t, "text to put after t");}
74  * rewriter.insertAfter(u, "text after u");}
75  * System.out.println(rewriter.getText());
76  * ---
77  *
78  * You can also have multiple "instruction streams" and get multiple rewrites
79  * from a single pass over the input. Just name the instruction streams and use
80  * that name again when printing the buffer. This could be useful for generating
81  * a C file and also its header file--all from the same buffer:
82  *
83  * Examples:
84  * ---
85  * rewriter.insertAfter("pass1", t, "text to put after t");}
86  * rewriter.insertAfter("pass2", u, "text after u");}
87  * System.out.println(rewriter.getText("pass1"));
88  * System.out.println(rewriter.getText("pass2"));
89  * ---
90  *
91  * If you don't use named rewrite streams, a "default" stream is used as the
92  * first example shows.
93  */
94 class TokenStreamRewriter
95 {
96 
97     public static immutable string DEFAULT_PROGRAM_NAME = "default";
98 
99     public static immutable int MIN_TOKEN_INDEX = 0;
100 
101     /**
102      * Our source stream
103      * @uml
104      * @read
105      */
106     private static TokenStream tokens_;
107 
108     protected RewriteOperation[][string] programs;
109 
110     protected size_t[string] lastRewriteTokenIndexes;
111 
112     private RewriteOperation[] rewriteOps;
113 
114     public this()
115     {
116     }
117 
118     public this(TokenStream tokens)
119     {
120         tokens_ = tokens;
121         programs[DEFAULT_PROGRAM_NAME] = rewriteOps;
122     }
123 
124     public void rollback(int instructionIndex)
125     {
126         rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
127     }
128 
129     /**
130      * Rollback the instruction stream for a program so that
131      *  the indicated instruction (via instructionIndex) is no
132      *  longer in the stream. UNTESTED!
133      */
134     public void rollback(string programName, int instructionIndex)
135     {
136         RewriteOperation[] ist = programs[programName];
137         if (programName in programs) {
138             programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex];
139         }
140     }
141 
142     public void deleteProgram()
143     {
144         deleteProgram(DEFAULT_PROGRAM_NAME);
145     }
146 
147     /**
148      * Reset the program so that no instructions exist
149      */
150     public void deleteProgram(string programName)
151     {
152         rollback(programName, MIN_TOKEN_INDEX);
153     }
154 
155     public void insertAfter(Token t, Variant text)
156     {
157         insertAfter(DEFAULT_PROGRAM_NAME, t, text);
158     }
159 
160     public void insertAfter(int index, Variant text)
161     {
162         insertAfter(DEFAULT_PROGRAM_NAME, index, text);
163     }
164 
165     public void insertAfter(string programName, Token t, Variant text)
166     {
167         insertAfter(programName, t.getTokenIndex, text);
168     }
169 
170     public void insertAfter(string programName, int index, Variant text)
171     {
172         // to insert after, just insert before next index (even if past end)
173         RewriteOperation op = new InsertAfterOp(index, text);
174         op.instructionIndex = programs[programName].length;
175         programs[programName] ~= op;
176     }
177 
178     public void insertBefore(Token t, Variant text)
179     {
180         insertBefore(DEFAULT_PROGRAM_NAME, t, text);
181     }
182 
183     public void insertBefore(size_t index, Variant text)
184     {
185         insertBefore(DEFAULT_PROGRAM_NAME, index, text);
186     }
187 
188     public void insertBefore(string programName, Token t, Variant text)
189     {
190         insertBefore(programName, t.getTokenIndex(), text);
191     }
192 
193     public void insertBefore(string programName, size_t index, Variant text)
194     {
195         RewriteOperation op = new InsertBeforeOp(index, text);
196         op.instructionIndex = programs[programName].length;
197         programs[programName] ~= op;
198     }
199 
200     public void replace(size_t index, Variant text)
201     {
202         replace(DEFAULT_PROGRAM_NAME, index, index, text);
203     }
204 
205     public void replace(size_t from, size_t to, Variant text)
206     {
207         replace(DEFAULT_PROGRAM_NAME, from, to, text);
208     }
209 
210     public void replace(Token indexT, Variant text)
211     {
212         replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
213     }
214 
215     public void replace(Token from, Token to, Variant text)
216     {
217         replace(DEFAULT_PROGRAM_NAME, from, to, text);
218     }
219 
220     public void replace(string programName, size_t from, size_t to, Variant text)
221     {
222         if ( from > to || from<0 || to<0 || to >= tokens_.size ) {
223             throw
224                 new
225                 IllegalArgumentException(
226                                          format("replace: range invalid: %s..%s(size=%s)",
227                                                 from, to, tokens_.size));
228         }
229         RewriteOperation op = new ReplaceOp(from, to, text);
230         op.instructionIndex = programs[programName].length;
231         programs[programName] ~= op;
232 
233         debug(TokenStreamRewriter) {
234             import std.stdio : writefln;
235             writefln("replace end: op = %s, programs = %s", op, programs);
236         }
237     }
238 
239     public void replace(string programName, Token from, Token to, Variant text)
240     {
241         debug(TokenStreamRewriter) {
242             import std.stdio : writefln;
243             writefln("replace constructor2: from = %s, to = %s, text = %s", from, to, text);
244         }
245         replace(programName,
246                 from.getTokenIndex,
247                 to.getTokenIndex,
248                 text);
249     }
250 
251     /**
252      * Delete token (can not use delete as identifier)
253      */
254     public void deleteT(size_t index)
255     {
256         deleteT(DEFAULT_PROGRAM_NAME, index, index);
257     }
258 
259     public void deleteT(size_t from, size_t to)
260     {
261         deleteT(DEFAULT_PROGRAM_NAME, from, to);
262     }
263 
264     public void deleteT(Token indexT)
265     {
266         deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT);
267     }
268 
269     public void deleteT(Token from, Token to)
270     {
271         deleteT(DEFAULT_PROGRAM_NAME, from, to);
272     }
273 
274     public void deleteT(string programName, size_t from, size_t to)
275     {
276         Variant Null;
277         replace(programName, from, to, Null);
278     }
279 
280     public void deleteT(string programName, Token from, Token to)
281     {
282         Variant Null;
283         replace(programName, from, to, Null);
284     }
285 
286     public size_t getLastRewriteTokenIndex()
287     {
288         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
289     }
290 
291     private size_t getLastRewriteTokenIndex(string programName)
292     {
293         if (programName in lastRewriteTokenIndexes) {
294             return lastRewriteTokenIndexes[programName];
295         }
296         else {
297             return -1;
298         }
299     }
300 
301     private void setLastRewriteTokenIndex(string programName, size_t i)
302     {
303         lastRewriteTokenIndexes[programName] =  i;
304     }
305 
306     private RewriteOperation[] getProgram(string name)
307     {
308         if (name in programs) {
309             return programs[name];
310         }
311         else {
312             return initializeProgram(name);
313         }
314     }
315 
316     private RewriteOperation[] initializeProgram(string name)
317     {
318         RewriteOperation[] iso;
319         programs[name] = iso;
320         return iso;
321     }
322 
323     /**
324      * Return the text from the original tokens altered per the
325      *  instructions given to this rewriter.
326      */
327     public Variant getText()
328     {
329         return getText(DEFAULT_PROGRAM_NAME, Interval.of(0, tokens_.size - 1));
330     }
331 
332     /**
333      * Return the text from the original tokens altered per the
334      *  instructions given to this rewriter in programName.
335      */
336     public Variant getText(string programName)
337     {
338         return getText(programName, Interval.of(0, tokens_.size - 1));
339     }
340 
341     /**
342      * Return the text associated with the tokens in the interval from the
343      *  original token stream but with the alterations given to this rewriter.
344      *  The interval refers to the indexes in the original token stream.
345      *  We do not alter the token stream in any way, so the indexes
346      *  and intervals are still consistent. Includes any operations done
347      *  to the first and last token in the interval. So, if you did an
348      *  insertBefore on the first token, you would get that insertion.
349      *  The same is true if you do an insertAfter the stop token.
350      */
351     public Variant getText(Interval interval)
352     {
353         return getText(DEFAULT_PROGRAM_NAME, interval);
354     }
355 
356     public Variant getText(string programName, Interval interval)
357     {
358         RewriteOperation[] rewrites;
359 
360         if (programName in programs)
361             rewrites = programs[programName];
362 
363         int start = interval.a;
364         int stop = interval.b;
365 
366         // ensure start/end are in range
367         if ( stop > tokens_.size-1 )
368             stop = tokens_.size-1;
369         if ( start < 0 )
370             start = 0;
371 
372         if (!rewrites) {
373             return tokens_.getText(interval); // no instructions to execute
374         }
375 
376         Variant buf;
377 
378         // First, optimize instruction stream
379         RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites);
380 
381         // Walk buffer, executing instructions and emitting tokens
382         int i = start;
383 
384         debug(TokenStreamRewriter) {
385                     import std.stdio : stderr, writefln;
386                     writefln("tokens_.size = %s", tokens_.size);
387                 }
388 
389         while (i <= stop && i < tokens_.size) {
390             Token t = tokens_.get(i);
391             debug(TokenStreamRewriter) {
392                     import std.stdio : stderr, writefln;
393                     writefln("i = %s, token = %s", i, t);
394                 }
395             RewriteOperation op;
396             if (i in indexToOp)
397                 op = indexToOp[i];
398             debug(TokenStreamRewriter) {
399                 import std.stdio : stderr, writefln;
400                 writefln("indexToOp = %s", indexToOp);
401             }
402             indexToOp.remove(i); // remove so any left have index size-1
403             debug(TokenStreamRewriter) {
404                 import std.stdio : stderr, writefln;
405                 writefln("indexToOp end = %s", indexToOp);
406             }
407             if (!op) {
408                 // no operation at that index, just dump token
409                 if (t.getType != TokenConstantDefinition.EOF) {
410                     Variant Null;
411                     if (buf is Null)
412                         buf = t.getText;
413                     else
414                         buf ~= t.getText;
415                 }
416                 i++; // move to next token
417             }
418             else {
419                 i = to!int(op.execute(buf)); // execute operation and skip
420             }
421         }
422 
423         // include stuff after end if it's last index in buffer
424         // So, if they did an insertAfter(lastValidIndex, "foo"), include
425         // foo if end==lastValidIndex.
426         if (stop == tokens_.size()-1) {
427             // Scan any remaining operations after last token
428             // should be included (they will be inserts).
429             foreach (RewriteOperation op; indexToOp.values()) {
430                 if (op.index >= tokens_.size-1)
431                     buf ~= op.text;
432             }
433         }
434         return buf;
435     }
436 
437     /**
438      * We need to combine operations and report invalid operations (like
439      * overlapping replaces that are not completed nested). Inserts to
440      * same index need to be combined etc.
441      *
442      * Here are the cases:
443      *
444      *  I.i.u I.j.v                             leave alone, nonoverlapping<br>
445      *  I.i.u I.i.v                             combine: Iivu
446      *
447      *  R.i-j.u R.x-y.v | i-j in x-y            delete first R<br>
448      *  R.i-j.u R.i-j.v                         delete first R<br>
449      *  R.i-j.u R.x-y.v | x-y in i-j            ERROR<br>
450      *  R.i-j.u R.x-y.v | boundaries overlap    ERROR
451      *
452      *  Delete special case of replace (text==null):<br>
453      *  D.i-j.u D.x-y.v | boundaries overlap    combine to max(min)..max(right)
454      *
455      *  I.i.u R.x-y.v | i in (x+1)-y            delete I (since insert before<br>
456      *                                          we're not deleting i)<br>
457      *  I.i.u R.x-y.v | i not in (x+1)-y        leave alone, nonoverlapping<br>
458      *  R.x-y.v I.i.u | i in x-y                ERROR<br>
459      *  R.x-y.v I.x.u                           R.x-y.uv (combine, delete I)<br>
460      *  R.x-y.v I.i.u | i not in x-y            leave alone, nonoverlapping
461      *
462      *  I.i.u = insert u before op @ index i<br>
463      *  R.x-y.u = replace x-y indexed tokens with u
464      *
465      *  First we need to examine replaces. For any replace op:
466      *
467      *      1. wipe out any insertions before op within that range.<br>
468      *      2. Drop any replace op before that is contained completely within
469      *   that range.<br>
470      *      3. Throw exception upon boundary overlap with any previous replace.
471      *
472      *  Then we can deal with inserts:
473      *
474      *      1. for any inserts to same index, combine even if not adjacent.<br>
475      *      2. for any prior replace with same left boundary, combine this
476      *   insert with replace and delete this replace.<br>
477      *      3. throw exception if index in same range as previous replace
478      *
479      *  Don't actually delete; make op null in list. Easier to walk list.
480      *  Later we can throw as we add to index &rarr; op map.
481      *
482      *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
483      *  inserted stuff would be before the replace range. But, if you
484      *  add tokens in front of a method body '{' and then delete the method
485      *  body, I think the stuff before the '{' you added should disappear too.
486      *
487      *  Return:
488      *  a map from token index to operation.
489      */
490     protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites)
491     {
492         debug(TokenStreamRewriter) {
493             import std.stdio : writefln;
494             writefln("reduceToSingleOperationPerIndex");
495             foreach (i, rew; rewrites)
496                 writefln("\trewrites[%s] = %s", i, rew);
497         }
498 
499         // WALK REPLACES
500         for (size_t i = 0; i < rewrites.length; i++) {
501             RewriteOperation op = rewrites[i];
502             debug(TokenStreamRewriter) {
503                 import std.stdio : writefln;
504                 writefln("op0 = %s", op);
505             }
506             if (op is null) continue;
507             if (!(cast(ReplaceOp)op)) {
508                 continue;
509             }
510             debug(TokenStreamRewriter) {
511                 import std.stdio : writefln;
512                 writefln("op = %s", op);
513             }
514             ReplaceOp rop = cast(ReplaceOp)rewrites[i];
515             // Wipe prior inserts within range
516             InsertBeforeOp[] inserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
517             foreach (InsertBeforeOp iop; inserts) {
518                 if ( iop.index == rop.index ) {
519                     // E.g., insert before 2, delete 2..2; update replace
520                     // text to include insert before, kill insert
521                     rewrites[iop.instructionIndex] = null;
522                     Variant Null;
523                     rop.text = iop.text ~ (rop.text !is Null?rop.text:Null);
524                 }
525                 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) {
526                     // delete insert as it's a no-op.
527                     rewrites[iop.instructionIndex] =  null;
528                 }
529             }
530             // Drop any prior replaces contained within
531             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
532             foreach (ReplaceOp prevRop; prevReplaces) {
533                 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
534                     // delete replace as it's a no-op.
535                     rewrites[prevRop.instructionIndex] = null;
536                     continue;
537                 }
538                 // throw exception unless disjoint or identical
539                 bool disjoint =
540                     prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
541                 // Delete special case of replace (text==null):
542                 // D.i-j.u D.x-y.v  | boundaries overlap    combine to max(min)..max(right)
543                 if ( prevRop.text==null && rop.text==null && !disjoint ) {
544                     //System.out.println("overlapping deletes: "+prevRop+", "+rop);
545                     rewrites[prevRop.instructionIndex] = null; // kill first delete
546                     rop.index = min(prevRop.index, rop.index);
547                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex);
548                     debug {
549                         import std.stdio : stderr, writefln;
550                         stderr.writefln("new rop %s", rop);
551                     }
552                 }
553                 else if ( !disjoint ) {
554                     throw
555                         new
556                         IllegalArgumentException(format(
557                                                         "replace op boundaries of %s overlap with previous %s",
558                                                         rop,
559                                                         prevRop));
560                 }
561             }
562         }
563 
564         // WALK INSERTS
565         debug(TokenStreamRewriter) {
566             import std.stdio : stderr, writefln;
567             writefln("WALK INSERTS");
568         }
569         for (int i = 0; i < rewrites.length; i++) {
570             RewriteOperation op = rewrites[i];
571             if (op is null) continue;
572             if (!(cast(InsertBeforeOp)op)) continue;
573             InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i];
574             // combine current insert with prior if any at same index
575             InsertBeforeOp[] prevInserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
576             foreach (InsertBeforeOp prevIop; prevInserts) {
577                 debug(TokenStreamRewriter) {
578                     import std.stdio : stderr, writefln;
579                     writefln("prevIop = %s", prevIop);
580                 }
581                 if (prevIop.index == iop.index) {
582                     if (cast(InsertAfterOp)prevIop) {
583                         iop.text = catOpText(prevIop.text, iop.text);
584                         rewrites[prevIop.instructionIndex] = null;
585                     }
586                     else if (cast(InsertBeforeOp)prevIop) { // combine objects
587                         // convert to strings...we're in process of toString'ing
588                         // whole token buffer so no lazy eval issue with any templates
589                         iop.text = catOpText(iop.text, prevIop.text);
590                         // delete redundant prior insert
591                         rewrites[prevIop.instructionIndex] = null;
592                     }
593                 }
594             }
595             // look for replaces where iop.index is in range; error
596             debug(TokenStreamRewriter) {
597                 import std.stdio : stderr, writefln;
598                 writefln("look for replaces where iop.index is in range, i = %s", i);
599             }
600             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
601             debug(TokenStreamRewriter) {
602                 import std.stdio : stderr, writefln;
603                 writefln("prevReplaces = %s", prevReplaces);
604             }
605             foreach (ReplaceOp rop; prevReplaces) {
606                 if ( iop.index == rop.index ) {
607                     rop.text = catOpText(iop.text, rop.text);
608                     rewrites[i] = null; // delete current insert
609                     continue;
610                 }
611                 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
612                     throw
613                         new
614                         IllegalArgumentException(
615                                                  format("insert op %s within boundaries of previous %s",
616                                                         iop, rop));
617                 }
618             }
619         }
620 
621         debug(TokenStreamRewriter) {
622             import std.stdio : stderr, writefln;
623             writefln("rewrites after = %s", rewrites);
624         }
625         RewriteOperation[size_t] m;
626         for (int i = 0; i < rewrites.length; i++) {
627             RewriteOperation op = rewrites[i];
628             if (!op) continue; // ignore deleted ops
629             if (op.index in m) {
630                 throw new Error("should only be one op per index");
631             }
632             m[op.index] = op;
633         }
634         return m;
635     }
636 
637     protected Variant catOpText(Variant a, Variant b)
638     {
639         Variant Null;
640         if (a !is Null && b !is Null)
641             return a ~ b;
642         if (a !is Null)
643             return a;
644         return b;
645     }
646 
647     protected auto getKindOfOps(U)(RewriteOperation[] rewrites, size_t before)
648     {
649         U[] ops;
650         for (int i=0; i<before && i<rewrites.length; i++) {
651             RewriteOperation op = rewrites[i];
652             if (op is null) continue; // ignore deleted
653             if (U.classinfo == op.classinfo) {
654                 ops ~= cast(U)(op);
655             }
656         }
657         return ops;
658     }
659 
660     public static TokenStream tokens()
661     {
662         return tokens_;
663     }
664 
665 }