1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.TokenStreamRewriter;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.InsertAfterOp;
11 import antlr.v4.runtime.InsertBeforeOp;
12 import antlr.v4.runtime.ReplaceOp;
13 import antlr.v4.runtime.RewriteOperation;
14 import antlr.v4.runtime.Token;
15 import antlr.v4.runtime.TokenConstantDefinition;
16 import antlr.v4.runtime.TokenStream;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm.comparison;
19 import std.conv;
20 import std.format;
21 import std.variant;
22 
23 /**
24  * Useful for rewriting out a buffered input token stream after doing some
25  * augmentation or other manipulations on it.
26  *
27  * You can insert stuff, replace, and delete chunks. Note that the operations
28  * are done lazily--only if you convert the buffer to a {@link String} with
29  * {@link TokenStream#getText()}. This is very efficient because you are not
30  * moving data around all the time. As the buffer of tokens is converted to
31  * strings, the {@link #getText()} method(s) scan the input token stream and
32  * check to see if there is an operation at the current index. If so, the
33  * operation is done and then normal {@link String} rendering continues on the
34  * buffer. This is like having multiple Turing machine instruction streams
35  * (programs) operating on a single input tape.
36  *
37  * This rewriter makes no modifications to the token stream. It does not ask the
38  * stream to fill itself up nor does it advance the input cursor. The token
39  * stream {@link TokenStream#index()} will return the same value before and
40  * after any {@link #getText()} call.
41  *
42  * The rewriter only works on tokens that you have in the buffer and ignores the
43  * current input cursor. If you are buffering tokens on-demand, calling
44  * {@link #getText()} halfway through the input will only do rewrites for those
45  * tokens in the first half of the file.
46  *
47  * Since the operations are done lazily at {@link #getText}-time, operations do
48  * not screw up the token index values. That is, an insert operation at token
49  * index {@code i} does not change the index values for tokens
50  * {@code i}+1..n-1.
51  *
52  * Because operations never actually alter the buffer, you may always get the
53  * original token stream back without undoing anything. Since the instructions
54  * are queued up, you can easily simulate transactions and roll back any changes
55  * if there is an error just by removing instructions. For example,
56  *
57  * Examples:
58  * ---
59  * CharStream input = new ANTLRFileStream("input");
60  * TLexer lex = new TLexer(input);
61  * CommonTokenStream tokens = new CommonTokenStream(lex);
62  * T parser = new T(tokens);
63  * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
64  * parser.startRule();
65  * ---
66  *
67  * Then in the rules, you can execute (assuming rewriter is visible):
68  *
69  * Examples:
70  * ---
71  * Token t,u;
72  * ...
73  * rewriter.insertAfter(t, "text to put after t");}
74  * rewriter.insertAfter(u, "text after u");}
75  * System.out.println(rewriter.getText());
76  * ---
77  *
78  * You can also have multiple "instruction streams" and get multiple rewrites
79  * from a single pass over the input. Just name the instruction streams and use
80  * that name again when printing the buffer. This could be useful for generating
81  * a C file and also its header file--all from the same buffer:
82  *
83  * Examples:
84  * ---
85  * rewriter.insertAfter("pass1", t, "text to put after t");}
86  * rewriter.insertAfter("pass2", u, "text after u");}
87  * System.out.println(rewriter.getText("pass1"));
88  * System.out.println(rewriter.getText("pass2"));
89  * ---
90  *
91  * If you don't use named rewrite streams, a "default" stream is used as the
92  * first example shows.
93  */
94 class TokenStreamRewriter
95 {
96 
97     enum string DEFAULT_PROGRAM_NAME = "default";
98 
99     enum int MIN_TOKEN_INDEX = 0;
100 
101     /**
102      * Our source stream
103      * @uml
104      * @read
105      */
106     private static TokenStream tokens_;
107 
108     protected RewriteOperation[][string] programs;
109 
110     protected size_t[string] lastRewriteTokenIndexes;
111 
112     private RewriteOperation[] rewriteOps;
113 
114     public this()
115     {
116     }
117 
118     public this(TokenStream tokens)
119     {
120         tokens_ = tokens;
121         programs[DEFAULT_PROGRAM_NAME] = rewriteOps;
122     }
123 
124     public void rollback(int instructionIndex)
125     {
126         rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
127     }
128 
129     /**
130      * Rollback the instruction stream for a program so that
131      *  the indicated instruction (via instructionIndex) is no
132      *  longer in the stream. UNTESTED!
133      */
134     public void rollback(string programName, int instructionIndex)
135     {
136         RewriteOperation[] ist = programs[programName];
137         if (programName in programs) {
138             programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex];
139         }
140     }
141 
142     public void deleteProgram()
143     {
144         deleteProgram(DEFAULT_PROGRAM_NAME);
145     }
146 
147     /**
148      * Reset the program so that no instructions exist
149      */
150     public void deleteProgram(string programName)
151     {
152         rollback(programName, MIN_TOKEN_INDEX);
153     }
154 
155     public void insertAfter(Token t, Variant text)
156     {
157         insertAfter(DEFAULT_PROGRAM_NAME, t, text);
158     }
159 
160     public void insertAfter(int index, Variant text)
161     {
162         insertAfter(DEFAULT_PROGRAM_NAME, index, text);
163     }
164 
165     public void insertAfter(string programName, Token t, Variant text)
166     {
167         insertAfter(programName, t.getTokenIndex, text);
168     }
169 
170     public void insertAfter(string programName, size_t index, Variant text)
171     {
172         // to insert after, just insert before next index (even if past end)
173         RewriteOperation op = new InsertAfterOp(index, text);
174         op.instructionIndex = programs[programName].length;
175         programs[programName] ~= op;
176     }
177 
178     public void insertBefore(Token t, Variant text)
179     {
180         insertBefore(DEFAULT_PROGRAM_NAME, t, text);
181     }
182 
183     public void insertBefore(size_t index, Variant text)
184     {
185         insertBefore(DEFAULT_PROGRAM_NAME, index, text);
186     }
187 
188     public void insertBefore(string programName, Token t, Variant text)
189     {
190         insertBefore(programName, t.getTokenIndex(), text);
191     }
192 
193     public void insertBefore(string programName, size_t index, Variant text)
194     {
195         RewriteOperation op = new InsertBeforeOp(index, text);
196         op.instructionIndex = programs[programName].length;
197         programs[programName] ~= op;
198     }
199 
200     public void replace(size_t index, Variant text)
201     {
202         replace(DEFAULT_PROGRAM_NAME, index, index, text);
203     }
204 
205     public void replace(size_t from, size_t to, Variant text)
206     {
207         replace(DEFAULT_PROGRAM_NAME, from, to, text);
208     }
209 
210     public void replace(Token indexT, Variant text)
211     {
212         replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
213     }
214 
215     public void replace(Token from, Token to, Variant text)
216     {
217         replace(DEFAULT_PROGRAM_NAME, from, to, text);
218     }
219 
220     public void replace(string programName, size_t from, size_t to, Variant text)
221     {
222         if ( from > to || from<0 || to<0 || to >= tokens_.size ) {
223             throw
224                 new IllegalArgumentException(
225                     format!"replace: range invalid: %s..%s(size=%s)"
226                            (from, to, tokens_.size));
227         }
228         RewriteOperation op = new ReplaceOp(from, to, text);
229         op.instructionIndex = programs[programName].length;
230         programs[programName] ~= op;
231 
232         debug(TokenStreamRewriter) {
233             import std.stdio : writefln;
234             writefln("replace end: op = %s, programs = %s", op, programs);
235         }
236     }
237 
238     public void replace(string programName, Token from, Token to, Variant text)
239     {
240         debug(TokenStreamRewriter) {
241             import std.stdio : writefln;
242             writefln("replace constructor2: from = %s, to = %s, text = %s", from, to, text);
243         }
244         replace(programName,
245                 from.getTokenIndex,
246                 to.getTokenIndex,
247                 text);
248     }
249 
250     /**
251      * Delete token (can not use delete as identifier)
252      */
253     public void deleteT(size_t index)
254     {
255         deleteT(DEFAULT_PROGRAM_NAME, index, index);
256     }
257 
258     public void deleteT(size_t from, size_t to)
259     {
260         deleteT(DEFAULT_PROGRAM_NAME, from, to);
261     }
262 
263     public void deleteT(Token indexT)
264     {
265         deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT);
266     }
267 
268     public void deleteT(Token from, Token to)
269     {
270         deleteT(DEFAULT_PROGRAM_NAME, from, to);
271     }
272 
273     public void deleteT(string programName, size_t from, size_t to)
274     {
275         Variant Null;
276         replace(programName, from, to, Null);
277     }
278 
279     public void deleteT(string programName, Token from, Token to)
280     {
281         Variant Null;
282         replace(programName, from, to, Null);
283     }
284 
285     public size_t getLastRewriteTokenIndex()
286     {
287         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
288     }
289 
290     private size_t getLastRewriteTokenIndex(string programName)
291     {
292         if (programName in lastRewriteTokenIndexes) {
293             return lastRewriteTokenIndexes[programName];
294         }
295         else {
296             return -1;
297         }
298     }
299 
300     private void setLastRewriteTokenIndex(string programName, size_t i)
301     {
302         lastRewriteTokenIndexes[programName] =  i;
303     }
304 
305     private RewriteOperation[] getProgram(string name)
306     {
307         if (name in programs) {
308             return programs[name];
309         }
310         else {
311             return initializeProgram(name);
312         }
313     }
314 
315     private RewriteOperation[] initializeProgram(string name)
316     {
317         RewriteOperation[] iso;
318         programs[name] = iso;
319         return iso;
320     }
321 
322     /**
323      * Return the text from the original tokens altered per the
324      *  instructions given to this rewriter.
325      */
326     public Variant getText()
327     {
328         return getText(DEFAULT_PROGRAM_NAME, Interval.of(0, to!int(tokens_.size) - 1));
329     }
330 
331     /**
332      * Return the text from the original tokens altered per the
333      *  instructions given to this rewriter in programName.
334      */
335     public Variant getText(string programName)
336     {
337         return getText(programName, Interval.of(0, to!int(tokens_.size) - 1));
338     }
339 
340     /**
341      * Return the text associated with the tokens in the interval from the
342      *  original token stream but with the alterations given to this rewriter.
343      *  The interval refers to the indexes in the original token stream.
344      *  We do not alter the token stream in any way, so the indexes
345      *  and intervals are still consistent. Includes any operations done
346      *  to the first and last token in the interval. So, if you did an
347      *  insertBefore on the first token, you would get that insertion.
348      *  The same is true if you do an insertAfter the stop token.
349      */
350     public Variant getText(Interval interval)
351     {
352         return getText(DEFAULT_PROGRAM_NAME, interval);
353     }
354 
355     public Variant getText(string programName, Interval interval)
356     {
357         RewriteOperation[] rewrites;
358 
359         if (programName in programs)
360             rewrites = programs[programName];
361 
362         int start = interval.a;
363         int stop = interval.b;
364 
365         // ensure start/end are in range
366         if ( stop > to!int(tokens_.size) - 1 )
367             stop = to!int(tokens_.size) - 1;
368         if ( start < 0 )
369             start = 0;
370 
371         if (!rewrites) {
372             return tokens_.getText(interval); // no instructions to execute
373         }
374 
375         Variant buf;
376 
377         // First, optimize instruction stream
378         RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites);
379 
380         // Walk buffer, executing instructions and emitting tokens
381         int i = start;
382 
383         debug(TokenStreamRewriter) {
384                     import std.stdio : stderr, writefln;
385                     writefln("tokens_.size = %s", tokens_.size);
386                 }
387 
388         while (i <= stop && i < tokens_.size) {
389             Token t = tokens_.get(i);
390             debug(TokenStreamRewriter) {
391                     import std.stdio : stderr, writefln;
392                     writefln("i = %s, token = %s", i, t);
393                 }
394             RewriteOperation op;
395             if (i in indexToOp)
396                 op = indexToOp[i];
397 
398             indexToOp.remove(i); // remove so any left have index size-1
399 
400             if (!op) {
401                 // no operation at that index, just dump token
402                 if (t.getType != TokenConstantDefinition.EOF) {
403                     Variant Null;
404                     buf is Null ? buf = t.getText : (buf ~= t.getText);
405                 }
406                 i++; // move to next token
407             }
408             else {
409                 i = to!int(op.execute(buf)); // execute operation and skip
410             }
411         }
412 
413         // include stuff after end if it's last index in buffer
414         // So, if they did an insertAfter(lastValidIndex, "foo"), include
415         // foo if end==lastValidIndex.
416         if (stop == tokens_.size()-1) {
417             // Scan any remaining operations after last token
418             // should be included (they will be inserts).
419             foreach (RewriteOperation op; indexToOp.values()) {
420                 if (op.index >= tokens_.size-1)
421                     buf ~= op.text;
422             }
423         }
424         return buf;
425     }
426 
427     /**
428      * We need to combine operations and report invalid operations (like
429      * overlapping replaces that are not completed nested). Inserts to
430      * same index need to be combined etc.
431      *
432      * Here are the cases:
433      *
434      *  I.i.u I.j.v                             leave alone, nonoverlapping<br>
435      *  I.i.u I.i.v                             combine: Iivu
436      *
437      *  R.i-j.u R.x-y.v | i-j in x-y            delete first R<br>
438      *  R.i-j.u R.i-j.v                         delete first R<br>
439      *  R.i-j.u R.x-y.v | x-y in i-j            ERROR<br>
440      *  R.i-j.u R.x-y.v | boundaries overlap    ERROR
441      *
442      *  Delete special case of replace (text==null):<br>
443      *  D.i-j.u D.x-y.v | boundaries overlap    combine to max(min)..max(right)
444      *
445      *  I.i.u R.x-y.v | i in (x+1)-y            delete I (since insert before<br>
446      *                                          we're not deleting i)<br>
447      *  I.i.u R.x-y.v | i not in (x+1)-y        leave alone, nonoverlapping<br>
448      *  R.x-y.v I.i.u | i in x-y                ERROR<br>
449      *  R.x-y.v I.x.u                           R.x-y.uv (combine, delete I)<br>
450      *  R.x-y.v I.i.u | i not in x-y            leave alone, nonoverlapping
451      *
452      *  I.i.u = insert u before op @ index i<br>
453      *  R.x-y.u = replace x-y indexed tokens with u
454      *
455      *  First we need to examine replaces. For any replace op:
456      *
457      *      1. wipe out any insertions before op within that range.<br>
458      *      2. Drop any replace op before that is contained completely within
459      *   that range.<br>
460      *      3. Throw exception upon boundary overlap with any previous replace.
461      *
462      *  Then we can deal with inserts:
463      *
464      *      1. for any inserts to same index, combine even if not adjacent.<br>
465      *      2. for any prior replace with same left boundary, combine this
466      *   insert with replace and delete this replace.<br>
467      *      3. throw exception if index in same range as previous replace
468      *
469      *  Don't actually delete; make op null in list. Easier to walk list.
470      *  Later we can throw as we add to index &rarr; op map.
471      *
472      *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
473      *  inserted stuff would be before the replace range. But, if you
474      *  add tokens in front of a method body '{' and then delete the method
475      *  body, I think the stuff before the '{' you added should disappear too.
476      *
477      *  Return:
478      *  a map from token index to operation.
479      */
480     protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites)
481     {
482         debug(TokenStreamRewriter) {
483             import std.stdio : writefln;
484             writefln("reduceToSingleOperationPerIndex");
485             foreach (i, rew; rewrites)
486                 writefln("\trewrites[%s] = %s", i, rew);
487         }
488 
489         // WALK REPLACES
490         for (size_t i = 0; i < rewrites.length; i++) {
491             RewriteOperation op = rewrites[i];
492             debug(TokenStreamRewriter) {
493                 import std.stdio : writefln;
494                 writefln("op0 = %s", op);
495             }
496             if (op is null) continue;
497             if (!(cast(ReplaceOp)op)) {
498                 continue;
499             }
500             debug(TokenStreamRewriter) {
501                 import std.stdio : writefln;
502                 writefln("op = %s", op);
503             }
504             ReplaceOp rop = cast(ReplaceOp)rewrites[i];
505             // Wipe prior inserts within range
506             InsertBeforeOp[] inserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
507             foreach (InsertBeforeOp iop; inserts) {
508                 if ( iop.index == rop.index ) {
509                     // E.g., insert before 2, delete 2..2; update replace
510                     // text to include insert before, kill insert
511                     rewrites[iop.instructionIndex] = null;
512                     Variant Null;
513                     rop.text = iop.text ~ (rop.text !is Null?rop.text:Null);
514                 }
515                 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) {
516                     // delete insert as it's a no-op.
517                     rewrites[iop.instructionIndex] =  null;
518                 }
519             }
520             // Drop any prior replaces contained within
521             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
522             foreach (ReplaceOp prevRop; prevReplaces) {
523                 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
524                     // delete replace as it's a no-op.
525                     rewrites[prevRop.instructionIndex] = null;
526                     continue;
527                 }
528                 // throw exception unless disjoint or identical
529                 bool disjoint =
530                     prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
531                 // Delete special case of replace (text==null):
532                 // D.i-j.u D.x-y.v  | boundaries overlap    combine to max(min)..max(right)
533                 if ( prevRop.text==null && rop.text==null && !disjoint ) {
534                     debug(TokenStreamRewriter) {
535                         import std.stdio : writefln;
536                         writefln("overlapping deletes: %s, %s", prevRop, rop);
537                     }
538                     rewrites[prevRop.instructionIndex] = null; // kill first delete
539                     rop.index = min(prevRop.index, rop.index);
540                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex);
541                     debug {
542                         import std.stdio : stderr, writefln;
543                         stderr.writefln("new rop %s", rop);
544                     }
545                 }
546                 else if ( !disjoint ) {
547                     throw
548                         new
549                         IllegalArgumentException(format(
550                                                         "replace op boundaries of %s overlap with previous %s",
551                                                         rop,
552                                                         prevRop));
553                 }
554             }
555         }
556 
557         // WALK INSERTS
558         debug(TokenStreamRewriter) {
559             import std.stdio : stderr, writefln;
560             writefln("WALK INSERTS");
561         }
562         for (int i = 0; i < rewrites.length; i++) {
563             RewriteOperation op = rewrites[i];
564             if (op is null) continue;
565             if (!(cast(InsertBeforeOp)op)) continue;
566             InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i];
567             // combine current insert with prior if any at same index
568             InsertBeforeOp[] prevInserts = getKindOfOps!(InsertBeforeOp)(rewrites, i);
569             foreach (InsertBeforeOp prevIop; prevInserts) {
570                 debug(TokenStreamRewriter) {
571                     import std.stdio : writefln;
572                     writefln("prevIop = %s", prevIop);
573                 }
574                 if (prevIop.index == iop.index) {
575                     if (cast(InsertAfterOp)prevIop) {
576                         iop.text = catOpText(prevIop.text, iop.text);
577                         rewrites[prevIop.instructionIndex] = null;
578                     }
579                     else if (cast(InsertBeforeOp)prevIop) { // combine objects
580                         // convert to strings...we're in process of toString'ing
581                         // whole token buffer so no lazy eval issue with any templates
582                         iop.text = catOpText(iop.text, prevIop.text);
583                         // delete redundant prior insert
584                         rewrites[prevIop.instructionIndex] = null;
585                     }
586                 }
587             }
588             // look for replaces where iop.index is in range; error
589             debug(TokenStreamRewriter) {
590                 import std.stdio : stderr, writefln;
591                 writefln("look for replaces where iop.index is in range, i = %s", i);
592             }
593             ReplaceOp[] prevReplaces = getKindOfOps!(ReplaceOp)(rewrites, i);
594             debug(TokenStreamRewriter) {
595                 import std.stdio : stderr, writefln;
596                 writefln("prevReplaces = %s", prevReplaces);
597             }
598             foreach (ReplaceOp rop; prevReplaces) {
599                 if ( iop.index == rop.index ) {
600                     rop.text = catOpText(iop.text, rop.text);
601                     rewrites[i] = null; // delete current insert
602                     continue;
603                 }
604                 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
605                     throw
606                         new
607                         IllegalArgumentException(
608                                                  format("insert op %s within boundaries of previous %s",
609                                                         iop, rop));
610                 }
611             }
612         }
613 
614         debug(TokenStreamRewriter) {
615             import std.stdio : stderr, writefln;
616             writefln("rewrites after = %s", rewrites);
617         }
618         RewriteOperation[size_t] m;
619         for (int i = 0; i < rewrites.length; i++) {
620             RewriteOperation op = rewrites[i];
621             if (!op) continue; // ignore deleted ops
622             if (op.index in m) {
623                 throw new Error("should only be one op per index");
624             }
625             m[op.index] = op;
626         }
627         return m;
628     }
629 
630     protected Variant catOpText(Variant a, Variant b)
631     {
632         Variant Null;
633         if (a !is Null && b !is Null)
634             return a ~ b;
635         if (a !is Null)
636             return a;
637         return b;
638     }
639 
640     protected auto getKindOfOps(U)(RewriteOperation[] rewrites, size_t before)
641     {
642         U[] ops;
643         for (int i=0; i<before && i<rewrites.length; i++) {
644             RewriteOperation op = rewrites[i];
645             if (op is null) continue; // ignore deleted
646             if (U.classinfo == op.classinfo) {
647                 ops ~= cast(U)(op);
648             }
649         }
650         return ops;
651     }
652 
653     public static TokenStream tokens()
654     {
655         return tokens_;
656     }
657 
658 }