1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.TokenStreamRewriter;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.InsertAfterOp;
11 import antlr.v4.runtime.InsertBeforeOp;
12 import antlr.v4.runtime.ReplaceOp;
13 import antlr.v4.runtime.RewriteOperation;
14 import antlr.v4.runtime.Token;
15 import antlr.v4.runtime.TokenConstantDefinition;
16 import antlr.v4.runtime.TokenStream;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm.comparison;
19 import std.format;
20 import std.conv;
21 
22 /**
23  * Useful for rewriting out a buffered input token stream after doing some
24  * augmentation or other manipulations on it.
25  *
26  * <p>
27  * You can insert stuff, replace, and delete chunks. Note that the operations
28  * are done lazily--only if you convert the buffer to a {@link String} with
29  * {@link TokenStream#getText()}. This is very efficient because you are not
30  * moving data around all the time. As the buffer of tokens is converted to
31  * strings, the {@link #getText()} method(s) scan the input token stream and
32  * check to see if there is an operation at the current index. If so, the
33  * operation is done and then normal {@link String} rendering continues on the
34  * buffer. This is like having multiple Turing machine instruction streams
35  * (programs) operating on a single input tape. :)</p>
36  *
37  * <p>
38  * This rewriter makes no modifications to the token stream. It does not ask the
39  * stream to fill itself up nor does it advance the input cursor. The token
40  * stream {@link TokenStream#index()} will return the same value before and
41  * after any {@link #getText()} call.</p>
42  *
43  * <p>
44  * The rewriter only works on tokens that you have in the buffer and ignores the
45  * current input cursor. If you are buffering tokens on-demand, calling
46  * {@link #getText()} halfway through the input will only do rewrites for those
47  * tokens in the first half of the file.</p>
48  *
49  * <p>
50  * Since the operations are done lazily at {@link #getText}-time, operations do
51  * not screw up the token index values. That is, an insert operation at token
52  * index {@code i} does not change the index values for tokens
53  * {@code i}+1..n-1.</p>
54  *
55  * <p>
56  * Because operations never actually alter the buffer, you may always get the
57  * original token stream back without undoing anything. Since the instructions
58  * are queued up, you can easily simulate transactions and roll back any changes
59  * if there is an error just by removing instructions. For example,</p>
60  *
61  * <pre>
62  * CharStream input = new ANTLRFileStream("input");
63  * TLexer lex = new TLexer(input);
64  * CommonTokenStream tokens = new CommonTokenStream(lex);
65  * T parser = new T(tokens);
66  * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
67  * parser.startRule();
68  * </pre>
69  *
70  * <p>
71  * Then in the rules, you can execute (assuming rewriter is visible):</p>
72  *
73  * <pre>
74  * Token t,u;
75  * ...
76  * rewriter.insertAfter(t, "text to put after t");}
77  * rewriter.insertAfter(u, "text after u");}
78  * System.out.println(rewriter.getText());
79  * </pre>
80  *
81  * <p>
82  * You can also have multiple "instruction streams" and get multiple rewrites
83  * from a single pass over the input. Just name the instruction streams and use
84  * that name again when printing the buffer. This could be useful for generating
85  * a C file and also its header file--all from the same buffer:</p>
86  *
87  * <pre>
88  * rewriter.insertAfter("pass1", t, "text to put after t");}
89  * rewriter.insertAfter("pass2", u, "text after u");}
90  * System.out.println(rewriter.getText("pass1"));
91  * System.out.println(rewriter.getText("pass2"));
92  * </pre>
93  *
94  * <p>
95  * If you don't use named rewrite streams, a "default" stream is used as the
96  * first example shows.</p>
97  */
98 class TokenStreamRewriter(T)
99 {
100 
101     public static immutable string DEFAULT_PROGRAM_NAME = "default";
102 
103     public static immutable int MIN_TOKEN_INDEX = 0;
104 
105     /**
106      * Our source stream
107      * @uml
108      * @read
109      */
110     private static TokenStream tokens_;
111 
112     protected RewriteOperation!T[][string] programs;
113 
114     protected size_t[string] lastRewriteTokenIndexes;
115 
116     private RewriteOperation!T[] rewriteOps;
117 
118     public this()
119     {
120     }
121 
122     public this(TokenStream tokens)
123     {
124         tokens_ = tokens;
125         programs[DEFAULT_PROGRAM_NAME] = rewriteOps;
126     }
127 
128     public void rollback(int instructionIndex)
129     {
130         rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
131     }
132 
133     /**
134      * Rollback the instruction stream for a program so that
135      *  the indicated instruction (via instructionIndex) is no
136      *  longer in the stream. UNTESTED!
137      */
138     public void rollback(string programName, int instructionIndex)
139     {
140         RewriteOperation!T[] ist = programs[programName];
141         if (programName in programs) {
142             programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex];
143         }
144     }
145 
146     public void deleteProgram()
147     {
148         deleteProgram(DEFAULT_PROGRAM_NAME);
149     }
150 
151     /**
152      * Reset the program so that no instructions exist
153      */
154     public void deleteProgram(string programName)
155     {
156         rollback(programName, MIN_TOKEN_INDEX);
157     }
158 
159     public void insertAfter(Token t, T text)
160     {
161         insertAfter(DEFAULT_PROGRAM_NAME, t, text);
162     }
163 
164     public void insertAfter(int index, T text)
165     {
166         insertAfter(DEFAULT_PROGRAM_NAME, index, text);
167     }
168 
169     public void insertAfter(string programName, Token t, T text)
170     {
171         insertAfter(programName, t.getTokenIndex, text);
172     }
173 
174     public void insertAfter(string programName, int index, T text)
175     {
176         // to insert after, just insert before next index (even if past end)
177         RewriteOperation!T op = new InsertAfterOp!T(index, text);
178         op.instructionIndex = programs[programName].length;
179         programs[programName] ~= op;
180     }
181 
182     public void insertBefore(Token t, T text)
183     {
184         insertBefore(DEFAULT_PROGRAM_NAME, t, text);
185     }
186 
187     public void insertBefore(size_t index, T text)
188     {
189         insertBefore(DEFAULT_PROGRAM_NAME, index, text);
190     }
191 
192     public void insertBefore(string programName, Token t, T text)
193     {
194         insertBefore(programName, t.getTokenIndex(), text);
195     }
196 
197     public void insertBefore(string programName, size_t index, T text)
198     {
199         RewriteOperation!T op = new InsertBeforeOp!T(index, text);
200         op.instructionIndex = programs[programName].length;
201         programs[programName] ~= op;
202     }
203 
204     public void replace(size_t index, T text)
205     {
206         replace(DEFAULT_PROGRAM_NAME, index, index, text);
207     }
208 
209     public void replace(size_t from, size_t to, T text)
210     {
211         replace(DEFAULT_PROGRAM_NAME, from, to, text);
212     }
213 
214     public void replace(Token indexT, T text)
215     {
216         replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
217     }
218 
219     public void replace(Token from, Token to, T text)
220     {
221         replace(DEFAULT_PROGRAM_NAME, from, to, text);
222     }
223 
224     public void replace(string programName, size_t from, size_t to, T text)
225     {
226         debug(TokenStreamRewriter) {
227             import std.stdio : writefln;
228             writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text);
229         }
230         if ( from > to || from<0 || to<0 || to >= tokens_.size ) {
231             throw
232                 new
233                 IllegalArgumentException(
234                                          format("replace: range invalid: %s..%s(size=%s)",
235                                                 from, to, tokens_.size));
236         }
237         RewriteOperation!T op = new ReplaceOp!T(from, to, text);
238         op.instructionIndex = programs[programName].length;
239         programs[programName] ~= op;
240     }
241 
242     public void replace(string programName, Token from, Token to, T text)
243     {
244         debug(TokenStreamRewriter) {
245             import std.stdio : writefln;
246             writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text);
247         }
248         replace(programName,
249                 from.getTokenIndex,
250                 to.getTokenIndex,
251                 text);
252     }
253 
254     /**
255      * Delete token (can not use delete as identifier)
256      */
257     public void deleteT(size_t index)
258     {
259         deleteT(DEFAULT_PROGRAM_NAME, index, index);
260     }
261 
262     public void deleteT(size_t from, size_t to)
263     {
264         deleteT(DEFAULT_PROGRAM_NAME, from, to);
265     }
266 
267     public void deleteT(Token indexT)
268     {
269         deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT);
270     }
271 
272     public void deleteT(Token from, Token to)
273     {
274         deleteT(DEFAULT_PROGRAM_NAME, from, to);
275     }
276 
277     public void deleteT(string programName, size_t from, size_t to)
278     {
279         replace(programName,from,to,null);
280     }
281 
282     public void deleteT(string programName, Token from, Token to)
283     {
284         replace(programName,from,to,null);
285     }
286 
287     public size_t getLastRewriteTokenIndex()
288     {
289         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
290     }
291 
292     private size_t getLastRewriteTokenIndex(string programName)
293     {
294         if (programName in lastRewriteTokenIndexes) {
295             return lastRewriteTokenIndexes[programName];
296         }
297         else {
298             return -1;
299         }
300     }
301 
302     private void setLastRewriteTokenIndex(string programName, size_t i)
303     {
304         lastRewriteTokenIndexes[programName] =  i;
305     }
306 
307     private RewriteOperation!T[] getProgram(string name)
308     {
309         if (name in programs) {
310             return programs[name];
311         }
312         else {
313             return initializeProgram(name);
314         }
315     }
316 
317     private RewriteOperation!T[] initializeProgram(string name)
318     {
319         RewriteOperation!T[] iso;
320         programs[name] = iso;
321         return iso;
322     }
323 
324     /**
325      * Return the text from the original tokens altered per the
326      *  instructions given to this rewriter.
327      */
328     public T getText()
329     {
330         return getText(DEFAULT_PROGRAM_NAME, Interval.of(0,tokens_.size()-1));
331     }
332 
333     /**
334      * Return the text from the original tokens altered per the
335      *  instructions given to this rewriter in programName.
336      */
337     public T getText(string programName)
338     {
339         return getText(programName, Interval.of(0,tokens_.size-1));
340     }
341 
342     /**
343      * Return the text associated with the tokens in the interval from the
344      *  original token stream but with the alterations given to this rewriter.
345      *  The interval refers to the indexes in the original token stream.
346      *  We do not alter the token stream in any way, so the indexes
347      *  and intervals are still consistent. Includes any operations done
348      *  to the first and last token in the interval. So, if you did an
349      *  insertBefore on the first token, you would get that insertion.
350      *  The same is true if you do an insertAfter the stop token.
351      */
352     public T getText(Interval interval)
353     {
354         return getText(DEFAULT_PROGRAM_NAME, interval);
355     }
356 
357     public T getText(string programName, Interval interval)
358     {
359         RewriteOperation!T[] rewrites;
360 
361         if (programName in programs)
362             rewrites = programs[programName];
363 
364         int start = interval.a;
365         int stop = interval.b;
366 
367         // ensure start/end are in range
368         if ( stop > tokens_.size-1 )
369             stop = tokens_.size-1;
370         if ( start < 0 )
371             start = 0;
372 
373         if (rewrites.length == 0) {
374             static if (is(T == string)) {
375                 return tokens_.getText(interval); // no instructions to execute
376             }
377             else {
378                 return getPositionText(tokens_, interval);
379             }
380         }
381 
382         T buf;
383 
384         // First, optimize instruction stream
385         RewriteOperation!T[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites);
386 
387         // Walk buffer, executing instructions and emitting tokens
388         int i = start;
389 
390         while (i <= stop && i < tokens_.size) {
391             Token t = tokens_.get(i);
392             RewriteOperation!T op;
393             if (i in indexToOp)
394                 op = indexToOp[i];
395             indexToOp.remove(i); // remove so any left have index size-1
396             if (!op) {
397                 // no operation at that index, just dump token
398                 if (t.getType != TokenConstantDefinition.EOF) {
399                     static if (is(T == string)) {
400                         buf ~= t.getText;
401                     }
402                     else {
403                         buf ~= getPositionText(t);
404                     }
405                 }
406                 i++; // move to next token
407             }
408             else {
409                 i = to!int(op.execute(buf)); // execute operation and skip
410             }
411         }
412 
413         // include stuff after end if it's last index in buffer
414         // So, if they did an insertAfter(lastValidIndex, "foo"), include
415         // foo if end==lastValidIndex.
416         if (stop == tokens_.size()-1) {
417             // Scan any remaining operations after last token
418             // should be included (they will be inserts).
419             foreach (RewriteOperation!T op; indexToOp.values()) {
420                 if (op.index >= tokens_.size-1)
421                     buf ~= to!T(op.text);
422             }
423         }
424         return buf;
425     }
426 
427     /**
428      *  overlapping replaces that are not completed nested). Inserts to
429      *  same index need to be combined etc...  Here are the cases:
430      *
431      *  I.i.u I.j.v								leave alone, nonoverlapping
432      *  I.i.u I.i.v								combine: Iivu
433      *
434      *  R.i-j.u R.x-y.v	| i-j in x-y			delete first R
435      *  R.i-j.u R.i-j.v							delete first R
436      *  R.i-j.u R.x-y.v	| x-y in i-j			ERROR
437      *  R.i-j.u R.x-y.v	| boundaries overlap	ERROR
438      *
439      *  Delete special case of replace (text==null):
440      *  D.i-j.u D.x-y.v	| boundaries overlap	combine to max(min)..max(right)
441      *
442      *  I.i.u R.x-y.v | i in (x+1)-y			delete I (since insert before
443      *											we're not deleting i)
444      *  I.i.u R.x-y.v | i not in (x+1)-y		leave alone, nonoverlapping
445      *  R.x-y.v I.i.u | i in x-y				ERROR
446      *  R.x-y.v I.x.u 							R.x-y.uv (combine, delete I)
447      *  R.x-y.v I.i.u | i not in x-y			leave alone, nonoverlapping
448      *
449      *  I.i.u = insert u before op @ index i
450      *  R.x-y.u = replace x-y indexed tokens with u
451      *
452      *  First we need to examine replaces. For any replace op:
453      *
454      * 		1. wipe out any insertions before op within that range.
455      *		2. Drop any replace op before that is contained completely within
456      *	 that range.
457      *		3. Throw exception upon boundary overlap with any previous replace.
458      *
459      *  Then we can deal with inserts:
460      *
461      * 		1. for any inserts to same index, combine even if not adjacent.
462      * 		2. for any prior replace with same left boundary, combine this
463      *	 insert with replace and delete this replace.
464      * 		3. throw exception if index in same range as previous replace
465      *
466      *  Don't actually delete; make op null in list. Easier to walk list.
467      *  Later we can throw as we add to index &rarr; op map.
468      *
469      *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
470      *  inserted stuff would be before the replace range. But, if you
471      *  add tokens in front of a method body '{' and then delete the method
472      *  body, I think the stuff before the '{' you added should disappear too.
473      *
474      *  Return a map from token index to operation.
475      */
476     protected RewriteOperation!T[size_t] reduceToSingleOperationPerIndex(RewriteOperation!T[] rewrites)
477     {
478         debug(TokenStreamRewriter) {
479             import std.stdio : writefln;
480             writefln("reduceToSingleOperationPerIndex");
481             foreach (i, rew; rewrites)
482                 writefln("\trewrites[%s] = %s", i, rew);
483         }
484 
485         // WALK REPLACES
486         for (size_t i = 0; i < rewrites.length; i++) {
487             RewriteOperation!T op = rewrites[i];
488             debug(TokenStreamRewriter) {
489                 import std.stdio : writefln;
490                 writefln("op0 = %s", op);
491             }
492             if (op is null) continue;
493             if (!(cast(ReplaceOp!T)op)) {
494                 continue;
495             }
496             debug(TokenStreamRewriter) {
497                 import std.stdio : writefln;
498                 writefln("op = %s", op);
499             }
500             ReplaceOp!T rop = cast(ReplaceOp!T)rewrites[i];
501             // Wipe prior inserts within range
502             InsertBeforeOp!T[] inserts = getKindOfOps!(InsertBeforeOp!T)(rewrites, i);
503             foreach (InsertBeforeOp!T iop; inserts) {
504                 if ( iop.index == rop.index ) {
505                     // E.g., insert before 2, delete 2..2; update replace
506                     // text to include insert before, kill insert
507                     rewrites[iop.instructionIndex] = null;
508                     rop.text = iop.text ~ (rop.text !is null?rop.text:T.init);
509                 }
510                 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) {
511                     // delete insert as it's a no-op.
512                     rewrites[iop.instructionIndex] =  null;
513                 }
514             }
515             // Drop any prior replaces contained within
516             ReplaceOp!T[] prevReplaces = getKindOfOps!(ReplaceOp!T)(rewrites, i);
517             foreach (ReplaceOp!T prevRop; prevReplaces) {
518                 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
519                     // delete replace as it's a no-op.
520                     rewrites[prevRop.instructionIndex] = null;
521                     continue;
522                 }
523                 // throw exception unless disjoint or identical
524                 bool disjoint =
525                     prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
526                 // Delete special case of replace (text==null):
527                 // D.i-j.u D.x-y.v	| boundaries overlap	combine to max(min)..max(right)
528                 if ( prevRop.text==null && rop.text==null && !disjoint ) {
529                     //System.out.println("overlapping deletes: "+prevRop+", "+rop);
530                     rewrites[prevRop.instructionIndex] = null; // kill first delete
531                     rop.index = min(prevRop.index, rop.index);
532                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex);
533                     debug {
534                         import std.stdio : stderr, writefln;
535                         stderr.writefln("new rop %s", rop);
536                     }
537                 }
538                 else if ( !disjoint ) {
539                     throw
540                         new
541                         IllegalArgumentException(format(
542                                                         "replace op boundaries of %s overlap with previous %s",
543                                                         rop,
544                                                         prevRop));
545                 }
546             }
547         }
548 
549         // WALK INSERTS
550         debug(TokenStreamRewriter) {
551             import std.stdio : stderr, writefln;
552             writefln("WALK INSERTS");
553         }
554         for (int i = 0; i < rewrites.length; i++) {
555             RewriteOperation!T op = rewrites[i];
556             if (op is null) continue;
557             if (!(cast(InsertBeforeOp!T)op)) continue;
558             InsertBeforeOp!T iop = cast(InsertBeforeOp!T)rewrites[i];
559             // combine current insert with prior if any at same index
560             InsertBeforeOp!T[] prevInserts = getKindOfOps!(InsertBeforeOp!T)(rewrites, i);
561             foreach (InsertBeforeOp!T prevIop; prevInserts) {
562                 debug(TokenStreamRewriter) {
563                     import std.stdio : stderr, writefln;
564                     writefln("prevIop = %s", prevIop);
565                 }
566                 if (prevIop.index == iop.index) {
567                     if (cast(InsertAfterOp!T)prevIop) {
568                         iop.text = catOpText(prevIop.text, iop.text);
569                         rewrites[prevIop.instructionIndex] = null;
570                     }
571                     else if (cast(InsertBeforeOp!T)prevIop) { // combine objects
572                         // convert to strings...we're in process of toString'ing
573                         // whole token buffer so no lazy eval issue with any templates
574                         iop.text = catOpText(iop.text, prevIop.text);
575                         // delete redundant prior insert
576                         rewrites[prevIop.instructionIndex] = null;
577                     }
578                 }
579             }
580             // look for replaces where iop.index is in range; error
581             debug(TokenStreamRewriter) {
582                 import std.stdio : stderr, writefln;
583                 writefln("look for replaces where iop.index is in range, i = %s", i);
584             }
585             ReplaceOp!T[] prevReplaces = getKindOfOps!(ReplaceOp!T)(rewrites, i);
586             debug(TokenStreamRewriter) {
587                 import std.stdio : stderr, writefln;
588                 writefln("prevReplaces = %s", prevReplaces);
589             }
590             foreach (ReplaceOp!T rop; prevReplaces) {
591                 if ( iop.index == rop.index ) {
592                     rop.text = catOpText(iop.text, rop.text);
593                     rewrites[i] = null;	// delete current insert
594                     continue;
595                 }
596                 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
597                     throw
598                         new
599                         IllegalArgumentException(
600                                                  format("insert op %s within boundaries of previous %s",
601                                                         iop, rop));
602                 }
603             }
604         }
605 
606         debug(TokenStreamRewriter) {
607             import std.stdio : stderr, writefln;
608             writefln("rewrites after = %s", rewrites);
609         }
610         RewriteOperation!T[size_t] m;
611         for (int i = 0; i < rewrites.length; i++) {
612             RewriteOperation!T op = rewrites[i];
613             if (op is null) continue; // ignore deleted ops
614             if (op.index in m) {
615                 throw new Error("should only be one op per index");
616             }
617             m[op.index] = op;
618         }
619         return m;
620     }
621 
622     protected T catOpText(T a, T b)
623     {
624         if (a && b)
625             return a ~ b;
626         if (a)
627             return a;
628         return b;
629     }
630 
631     protected auto getKindOfOps(U)(RewriteOperation!T[] rewrites, size_t before)
632     {
633         U[] ops;
634         for (int i=0; i<before && i<rewrites.length; i++) {
635             RewriteOperation!T op = rewrites[i];
636             if (op is null) continue; // ignore deleted
637             if (U.classinfo == op.classinfo) {
638                 ops ~= cast(U)(op);
639             }
640         }
641         return ops;
642     }
643 
644     protected T getPositionText(Token token)
645     {
646         T buf;
647         return buf;
648     }
649 
650     protected T getPositionText(TokenStream tokens, Interval interval)
651     {
652         T buf;
653         return buf;
654     }
655 
656     public static TokenStream tokens()
657     {
658         return tokens_;
659     }
660 
661 }