1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.TokenStreamRewriter;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.InsertAfterOp;
11 import antlr.v4.runtime.InsertBeforeOp;
12 import antlr.v4.runtime.ReplaceOp;
13 import antlr.v4.runtime.RewriteOperation;
14 import antlr.v4.runtime.Token;
15 import antlr.v4.runtime.TokenConstantDefinition;
16 import antlr.v4.runtime.TokenStream;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm.comparison;
19 import std.format;
20 import std.conv;
21 
22 /**
23  * Useful for rewriting out a buffered input token stream after doing some
24  * augmentation or other manipulations on it.
25  *
26  * <p>
27  * You can insert stuff, replace, and delete chunks. Note that the operations
28  * are done lazily--only if you convert the buffer to a {@link String} with
29  * {@link TokenStream#getText()}. This is very efficient because you are not
30  * moving data around all the time. As the buffer of tokens is converted to
31  * strings, the {@link #getText()} method(s) scan the input token stream and
32  * check to see if there is an operation at the current index. If so, the
33  * operation is done and then normal {@link String} rendering continues on the
34  * buffer. This is like having multiple Turing machine instruction streams
35  * (programs) operating on a single input tape. :)</p>
36  *
37  * <p>
38  * This rewriter makes no modifications to the token stream. It does not ask the
39  * stream to fill itself up nor does it advance the input cursor. The token
40  * stream {@link TokenStream#index()} will return the same value before and
41  * after any {@link #getText()} call.</p>
42  *
43  * <p>
44  * The rewriter only works on tokens that you have in the buffer and ignores the
45  * current input cursor. If you are buffering tokens on-demand, calling
46  * {@link #getText()} halfway through the input will only do rewrites for those
47  * tokens in the first half of the file.</p>
48  *
49  * <p>
50  * Since the operations are done lazily at {@link #getText}-time, operations do
51  * not screw up the token index values. That is, an insert operation at token
52  * index {@code i} does not change the index values for tokens
53  * {@code i}+1..n-1.</p>
54  *
55  * <p>
56  * Because operations never actually alter the buffer, you may always get the
57  * original token stream back without undoing anything. Since the instructions
58  * are queued up, you can easily simulate transactions and roll back any changes
59  * if there is an error just by removing instructions. For example,</p>
60  *
61  * <pre>
62  * CharStream input = new ANTLRFileStream("input");
63  * TLexer lex = new TLexer(input);
64  * CommonTokenStream tokens = new CommonTokenStream(lex);
65  * T parser = new T(tokens);
66  * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
67  * parser.startRule();
68  * </pre>
69  *
70  * <p>
71  * Then in the rules, you can execute (assuming rewriter is visible):</p>
72  *
73  * <pre>
74  * Token t,u;
75  * ...
76  * rewriter.insertAfter(t, "text to put after t");}
77  * rewriter.insertAfter(u, "text after u");}
78  * System.out.println(rewriter.getText());
79  * </pre>
80  *
81  * <p>
82  * You can also have multiple "instruction streams" and get multiple rewrites
83  * from a single pass over the input. Just name the instruction streams and use
84  * that name again when printing the buffer. This could be useful for generating
85  * a C file and also its header file--all from the same buffer:</p>
86  *
87  * <pre>
88  * rewriter.insertAfter("pass1", t, "text to put after t");}
89  * rewriter.insertAfter("pass2", u, "text after u");}
90  * System.out.println(rewriter.getText("pass1"));
91  * System.out.println(rewriter.getText("pass2"));
92  * </pre>
93  *
94  * <p>
95  * If you don't use named rewrite streams, a "default" stream is used as the
96  * first example shows.</p>
97  */
98 class TokenStreamRewriter
99 {
100 
101     public static immutable string DEFAULT_PROGRAM_NAME = "default";
102 
103     public static immutable int MIN_TOKEN_INDEX = 0;
104 
105     /**
106      * Our source stream
107      * @uml
108      * @read
109      */
110     private static TokenStream tokens_;
111 
112     protected RewriteOperation[][string] programs;
113 
114     protected size_t[string] lastRewriteTokenIndexes;
115 
116     private RewriteOperation[] rewriteOps;
117 
118     public this()
119     {
120     }
121 
122     public this(TokenStream tokens)
123     {
124         tokens_ = tokens;
125         programs[DEFAULT_PROGRAM_NAME] = rewriteOps;
126     }
127 
128     public void rollback(int instructionIndex)
129     {
130         rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
131     }
132 
133     /**
134      * Rollback the instruction stream for a program so that
135      *  the indicated instruction (via instructionIndex) is no
136      *  longer in the stream. UNTESTED!
137      */
138     public void rollback(string programName, int instructionIndex)
139     {
140         RewriteOperation[] ist = programs[programName];
141         if (programName in programs) {
142             programs[programName] = programs[programName][MIN_TOKEN_INDEX .. instructionIndex];
143         }
144     }
145 
146     public void deleteProgram()
147     {
148         deleteProgram(DEFAULT_PROGRAM_NAME);
149     }
150 
151     /**
152      * Reset the program so that no instructions exist
153      */
154     public void deleteProgram(string programName)
155     {
156         rollback(programName, MIN_TOKEN_INDEX);
157     }
158 
159     public void insertAfter(Token t, string text)
160     {
161         insertAfter(DEFAULT_PROGRAM_NAME, t, text);
162     }
163 
164     public void insertAfter(int index, string text)
165     {
166         insertAfter(DEFAULT_PROGRAM_NAME, index, text);
167     }
168 
169     public void insertAfter(string programName, Token t, string text)
170     {
171         insertAfter(programName, t.getTokenIndex, text);
172     }
173 
174     public void insertAfter(string programName, int index, string text)
175     {
176         // to insert after, just insert before next index (even if past end)
177         RewriteOperation op = new InsertAfterOp(index, text);
178         op.instructionIndex = programs[programName].length;
179         programs[programName] ~= op;
180     }
181 
182     public void insertBefore(Token t, string text)
183     {
184         insertBefore(DEFAULT_PROGRAM_NAME, t, text);
185     }
186 
187     public void insertBefore(size_t index, string text)
188     {
189         insertBefore(DEFAULT_PROGRAM_NAME, index, text);
190     }
191 
192     public void insertBefore(string programName, Token t, string text)
193     {
194         insertBefore(programName, t.getTokenIndex(), text);
195     }
196 
197     public void insertBefore(string programName, size_t index, string text)
198     {
199         RewriteOperation op = new InsertBeforeOp(index, text);
200         op.instructionIndex = programs[programName].length;
201         programs[programName] ~= op;
202     }
203 
204     public void replace(size_t index, string text)
205     {
206         replace(DEFAULT_PROGRAM_NAME, index, index, text);
207     }
208 
209     public void replace(size_t from, size_t to, string text)
210     {
211         replace(DEFAULT_PROGRAM_NAME, from, to, text);
212     }
213 
214     public void replace(Token indexT, string text)
215     {
216         replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
217     }
218 
219     public void replace(Token from, Token to, string text)
220     {
221         replace(DEFAULT_PROGRAM_NAME, from, to, text);
222     }
223 
224     public void replace(string programName, size_t from, size_t to, string text)
225     {
226         debug(TokenStreamRewriter) {
227             import std.stdio : writefln;
228             writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text);
229         }
230         if ( from > to || from<0 || to<0 || to >= tokens_.size ) {
231             throw
232                 new
233                 IllegalArgumentException(
234                                          format("replace: range invalid: %s..%s(size=%s)",
235                                                 from, to, tokens_.size));
236         }
237         RewriteOperation op = new ReplaceOp(from, to, text);
238         op.instructionIndex = programs[programName].length;
239         programs[programName] ~= op;
240     }
241 
242     public void replace(string programName, Token from, Token to, string text)
243     {		
244         debug(TokenStreamRewriter) {
245             import std.stdio : writefln;
246             writefln("replace constructor: from = %s, to = %s, text = %s", from, to, text);
247         }
248         replace(programName,
249                 from.getTokenIndex,
250                 to.getTokenIndex,
251                 text);
252     }
253 
254     /**
255      * Delete token (can not use delete as identifier)
256      */
257     public void deleteT(size_t index)
258     {
259         deleteT(DEFAULT_PROGRAM_NAME, index, index);
260     }
261 
262     public void deleteT(size_t from, size_t to)
263     {
264         deleteT(DEFAULT_PROGRAM_NAME, from, to);
265     }
266 
267     public void deleteT(Token indexT)
268     {
269         deleteT(DEFAULT_PROGRAM_NAME, indexT, indexT);
270     }
271 
272     public void deleteT(Token from, Token to)
273     {
274         deleteT(DEFAULT_PROGRAM_NAME, from, to);
275     }
276 
277     public void deleteT(string programName, size_t from, size_t to)
278     {
279         replace(programName,from,to,null);
280     }
281 
282     public void deleteT(string programName, Token from, Token to)
283     {
284         replace(programName,from,to,null);
285     }
286 
287     public size_t getLastRewriteTokenIndex()
288     {
289         return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
290     }
291 
292     private size_t getLastRewriteTokenIndex(string programName)
293     {
294         if (programName in lastRewriteTokenIndexes) {
295             return lastRewriteTokenIndexes[programName];
296         }
297         else {
298             return -1;
299         }
300     }
301 
302     private void setLastRewriteTokenIndex(string programName, size_t i)
303     {
304         lastRewriteTokenIndexes[programName] =  i;
305     }
306 
307     private RewriteOperation[] getProgram(string name)
308     {
309         if (name in programs) {
310             return programs[name];
311         }
312         else {
313             return initializeProgram(name);
314         }
315     }
316 
317     private RewriteOperation[] initializeProgram(string name)
318     {
319         RewriteOperation[] iso;
320         programs[name] = iso;
321         return iso;
322     }
323 
324     /**
325      * Return the text from the original tokens altered per the
326      *  instructions given to this rewriter.
327      */
328     public string getText()
329     {
330         return getText(DEFAULT_PROGRAM_NAME, Interval.of(0,tokens_.size()-1));
331     }
332 
333     /**
334      * Return the text from the original tokens altered per the
335      *  instructions given to this rewriter in programName.
336      */
337     public string getText(string programName)
338     {
339         return getText(programName, Interval.of(0,tokens_.size-1));
340     }
341 
342     /**
343      * Return the text associated with the tokens in the interval from the
344      *  original token stream but with the alterations given to this rewriter.
345      *  The interval refers to the indexes in the original token stream.
346      *  We do not alter the token stream in any way, so the indexes
347      *  and intervals are still consistent. Includes any operations done
348      *  to the first and last token in the interval. So, if you did an
349      *  insertBefore on the first token, you would get that insertion.
350      *  The same is true if you do an insertAfter the stop token.
351      */
352     public string getText(Interval interval)
353     {
354         return getText(DEFAULT_PROGRAM_NAME, interval);
355     }
356 
357     public string getText(string programName, Interval interval)
358     {
359         RewriteOperation[] rewrites;
360 
361         if (programName in programs)
362             rewrites = programs[programName];
363 
364         int start = interval.a;
365         int stop = interval.b;
366 
367         // ensure start/end are in range
368         if ( stop > tokens_.size-1 )
369             stop = tokens_.size-1;
370         if ( start < 0 )
371             start = 0;
372         if (rewrites.length == 0) {
373             return tokens_.getText(interval); // no instructions to execute
374         }
375 
376         string buf;
377 
378         // First, optimize instruction stream
379         RewriteOperation[size_t] indexToOp = reduceToSingleOperationPerIndex(rewrites);
380 
381         // Walk buffer, executing instructions and emitting tokens
382         int i = start;
383 
384         while (i <= stop && i < tokens_.size) {
385             Token t = tokens_.get(i);
386             RewriteOperation op;
387             if (i in indexToOp)
388                 op = indexToOp[i];
389             indexToOp.remove(i); // remove so any left have index size-1
390             if (!op) {
391                 // no operation at that index, just dump token
392                 if (t.getType != TokenConstantDefinition.EOF)
393                     buf ~= t.getText;
394                 i++; // move to next token
395             }
396             else {
397                 i = to!int(op.execute(buf)); // execute operation and skip
398             }
399         }
400 
401         // include stuff after end if it's last index in buffer
402         // So, if they did an insertAfter(lastValidIndex, "foo"), include
403         // foo if end==lastValidIndex.
404         if (stop == tokens_.size()-1) {
405             // Scan any remaining operations after last token
406             // should be included (they will be inserts).
407             foreach (RewriteOperation op; indexToOp.values()) {
408                 if (op.index >= tokens_.size-1)
409                     buf ~= to!string(op.text);
410             }
411         }
412         return buf;
413     }
414 
415     /**
416      *  overlapping replaces that are not completed nested). Inserts to
417      *  same index need to be combined etc...  Here are the cases:
418      *
419      *  I.i.u I.j.v								leave alone, nonoverlapping
420      *  I.i.u I.i.v								combine: Iivu
421      *
422      *  R.i-j.u R.x-y.v	| i-j in x-y			delete first R
423      *  R.i-j.u R.i-j.v							delete first R
424      *  R.i-j.u R.x-y.v	| x-y in i-j			ERROR
425      *  R.i-j.u R.x-y.v	| boundaries overlap	ERROR
426      *
427      *  Delete special case of replace (text==null):
428      *  D.i-j.u D.x-y.v	| boundaries overlap	combine to max(min)..max(right)
429      *
430      *  I.i.u R.x-y.v | i in (x+1)-y			delete I (since insert before
431      *											we're not deleting i)
432      *  I.i.u R.x-y.v | i not in (x+1)-y		leave alone, nonoverlapping
433      *  R.x-y.v I.i.u | i in x-y				ERROR
434      *  R.x-y.v I.x.u 							R.x-y.uv (combine, delete I)
435      *  R.x-y.v I.i.u | i not in x-y			leave alone, nonoverlapping
436      *
437      *  I.i.u = insert u before op @ index i
438      *  R.x-y.u = replace x-y indexed tokens with u
439      *
440      *  First we need to examine replaces. For any replace op:
441      *
442      * 		1. wipe out any insertions before op within that range.
443      *		2. Drop any replace op before that is contained completely within
444      *	 that range.
445      *		3. Throw exception upon boundary overlap with any previous replace.
446      *
447      *  Then we can deal with inserts:
448      *
449      * 		1. for any inserts to same index, combine even if not adjacent.
450      * 		2. for any prior replace with same left boundary, combine this
451      *	 insert with replace and delete this replace.
452      * 		3. throw exception if index in same range as previous replace
453      *
454      *  Don't actually delete; make op null in list. Easier to walk list.
455      *  Later we can throw as we add to index &rarr; op map.
456      *
457      *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
458      *  inserted stuff would be before the replace range. But, if you
459      *  add tokens in front of a method body '{' and then delete the method
460      *  body, I think the stuff before the '{' you added should disappear too.
461      *
462      *  Return a map from token index to operation.
463      */
464     protected RewriteOperation[size_t] reduceToSingleOperationPerIndex(RewriteOperation[] rewrites)
465     {
466         debug(TokenStreamRewriter) {
467             import std.stdio : writefln;
468             writefln("reduceToSingleOperationPerIndex");
469             foreach (i, rew; rewrites)
470                 writefln("\trewrites[%s] = %s", i, rew);
471         }
472 
473         // WALK REPLACES
474         for (size_t i = 0; i < rewrites.length; i++) {
475             RewriteOperation op = rewrites[i];
476             debug(TokenStreamRewriter) {
477                 import std.stdio : writefln;
478                 writefln("op0 = %s", op);
479             }
480             if (op is null) continue;
481             if (!(cast(ReplaceOp)op)) {
482                 continue;
483             }
484             debug(TokenStreamRewriter) {
485                 import std.stdio : writefln;
486                 writefln("op = %s", op);
487             }
488             ReplaceOp rop = cast(ReplaceOp)rewrites[i];
489             // Wipe prior inserts within range
490             InsertBeforeOp[] inserts = getKindOfOps!InsertBeforeOp(rewrites, i);
491             foreach (InsertBeforeOp iop; inserts) {
492                 if ( iop.index == rop.index ) {
493                     // E.g., insert before 2, delete 2..2; update replace
494                     // text to include insert before, kill insert
495                     rewrites[iop.instructionIndex] = null;
496                     rop.text = iop.text ~ (rop.text !is null?rop.text:"");
497                 }
498                 else if (iop.index > rop.index && iop.index <= rop.lastIndex ) {
499                     // delete insert as it's a no-op.
500                     rewrites[iop.instructionIndex] =  null;
501                 }
502             }
503             // Drop any prior replaces contained within
504             ReplaceOp[] prevReplaces = getKindOfOps!ReplaceOp(rewrites, i);
505             foreach (ReplaceOp prevRop; prevReplaces) {
506                 if (prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
507                     // delete replace as it's a no-op.
508                     rewrites[prevRop.instructionIndex] = null;
509                     continue;
510                 }
511                 // throw exception unless disjoint or identical
512                 bool disjoint =
513                     prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
514                 // Delete special case of replace (text==null):
515                 // D.i-j.u D.x-y.v	| boundaries overlap	combine to max(min)..max(right)
516                 if ( prevRop.text==null && rop.text==null && !disjoint ) {
517                     //System.out.println("overlapping deletes: "+prevRop+", "+rop);
518                     rewrites[prevRop.instructionIndex] = null; // kill first delete
519                     rop.index = min(prevRop.index, rop.index);
520                     rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex);
521                     debug {
522                         import std.stdio : stderr, writefln;
523                         stderr.writefln("new rop %s", rop);
524                     }
525                 }
526                 else if ( !disjoint ) {
527                     throw
528                         new
529                         IllegalArgumentException(format(
530                                                         "replace op boundaries of %s overlap with previous %s",
531                                                         rop,
532                                                         prevRop));
533                 }
534             }
535         }
536 
537         // WALK INSERTS
538         debug(TokenStreamRewriter) {
539             import std.stdio : stderr, writefln;
540             writefln("WALK INSERTS");
541         }
542         for (int i = 0; i < rewrites.length; i++) {
543             RewriteOperation op = rewrites[i];
544             if (op is null) continue;
545             if (!(cast(InsertBeforeOp)op)) continue;
546             InsertBeforeOp iop = cast(InsertBeforeOp)rewrites[i];
547             // combine current insert with prior if any at same index
548             InsertBeforeOp[] prevInserts = getKindOfOps!InsertBeforeOp(rewrites, i);
549             foreach (InsertBeforeOp prevIop; prevInserts) {
550                 debug(TokenStreamRewriter) {
551                     import std.stdio : stderr, writefln;
552                     writefln("prevIop = %s", prevIop);
553                 }
554                 if (prevIop.index == iop.index) {
555                     if (cast(InsertAfterOp)prevIop) {
556                         iop.text = catOpText(prevIop.text, iop.text);
557                         rewrites[prevIop.instructionIndex] = null;
558                     }
559                     else if (cast(InsertBeforeOp)prevIop) { // combine objects
560                         // convert to strings...we're in process of toString'ing
561                         // whole token buffer so no lazy eval issue with any templates
562                         iop.text = catOpText(iop.text, prevIop.text);
563                         // delete redundant prior insert
564                         rewrites[prevIop.instructionIndex] = null;
565                     }
566                 }
567             }
568             // look for replaces where iop.index is in range; error
569             debug(TokenStreamRewriter) {
570                 import std.stdio : stderr, writefln;
571                 writefln("look for replaces where iop.index is in range, i = %s", i);
572             }
573             ReplaceOp[] prevReplaces = getKindOfOps!ReplaceOp(rewrites, i);
574             debug(TokenStreamRewriter) {
575                 import std.stdio : stderr, writefln;
576                 writefln("prevReplaces = %s", prevReplaces);
577             }
578             foreach (ReplaceOp rop; prevReplaces) {
579                 if ( iop.index == rop.index ) {
580                     rop.text = catOpText(iop.text, rop.text);
581                     rewrites[i] = null;	// delete current insert
582                     continue;
583                 }
584                 if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
585                     throw
586                         new
587                         IllegalArgumentException(
588                                                  format("insert op %s within boundaries of previous %s",
589                                                         iop, rop));
590                 }
591             }
592         }
593 
594         debug(TokenStreamRewriter) {
595             import std.stdio : stderr, writefln;
596             writefln("rewrites after = %s", rewrites);
597         }
598         RewriteOperation[size_t] m;
599         for (int i = 0; i < rewrites.length; i++) {
600             RewriteOperation op = rewrites[i];
601             if (op is null) continue; // ignore deleted ops
602             if (op.index in m) {
603                 throw new Error("should only be one op per index");
604             }
605             m[op.index] = op;
606         }
607         return m;
608     }
609 
610     protected string catOpText(string a, string b)
611     {
612         string x;
613         string y;
614         if (a !is null) x = a;
615         if (b !is null) y = b;
616         return x~y;
617     }
618 
619     protected auto getKindOfOps(T)(RewriteOperation[] rewrites, size_t before)
620     {
621         T[] ops;
622         for (int i=0; i<before && i<rewrites.length; i++) {
623             RewriteOperation op = rewrites[i];
624             if (op is null) continue; // ignore deleted
625             if (T.classinfo == op.classinfo) {
626                 ops ~= cast(T)(op);
627             }
628         }
629         return ops;
630     }
631 
632     public static TokenStream tokens()
633     {
634         return tokens_;
635     }
636 
637 }