antlr.v4.runtime.BufferedTokenStream source code

1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.BufferedTokenStream;
8 
9 import antlr.v4.runtime.IllegalStateException;
10 import antlr.v4.runtime.Lexer;
11 import antlr.v4.runtime.RuleContext;
12 import antlr.v4.runtime.Token;
13 import antlr.v4.runtime.TokenConstantDefinition;
14 import antlr.v4.runtime.TokenSource;
15 import antlr.v4.runtime.TokenStream;
16 import antlr.v4.runtime.WritableToken;
17 import antlr.v4.runtime.misc.Interval;
18 import std.algorithm: canFind;
19 import std.array;
20 import std.conv;
21 import std.format;
22 import std.variant;
23 
24 /**
25  * This implementation of {@link TokenStream} loads tokens from a
26  * {@link TokenSource} on-demand, and places the tokens in a buffer to provide
27  * access to any previous token by index.
28  *
29  * <p>
30  * This token stream ignores the value of {@link Token#getChannel}. If your
31  * parser requires the token stream filter tokens to only those on a particular
32  * channel, such as {@link Token#DEFAULT_CHANNEL} or
33  * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
34  * {@link CommonTokenStream}.</p>
35  */
36 class BufferedTokenStream : TokenStream
37 {
38 
39     /**
40      * The {@link TokenSource} from which tokens for this stream are fetched.
41      */
42     protected TokenSource tokenSource;
43 
44     /**
45      * A collection of all tokens fetched from the token source. The list is
46      * considered a complete view of the input once {@link #fetchedEOF} is set
47      * to {@code true}.
48      */
49     protected Token[] tokens;
50 
51     /**
52      * The index into {@link #tokens} of the current token (next token to
53      * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
54      * {@link #LT LT(1)}.
55      *
56      * <p>This field is set to -1 when the stream is first constructed or when
57      * {@link #setTokenSource} is called, indicating that the first token has
58      * not yet been fetched from the token source. For additional information,
59      * see the documentation of {@link IntStream} for a description of
60      * Initializing Methods.</p>
61      * @uml
62      * @read
63      */
64     private int index_ = -1;
65 
66     /**
67      * Indicates whether the {@link Token#EOF} token has been fetched from
68      * {@link #tokenSource} and added to {@link #tokens}. This field improves
69      * performance for the following cases:
70      *
71      * <ul>
72      * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
73      * consuming the EOF symbol is optimized by checking the values of
74      * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
75      * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
76      * {@link #tokens} is trivial with this field.</li>
77      * <ul>
78      */
79     protected bool fetchedEOF;
80 
81     public this(TokenSource tokenSource)
82     in
83     {
84         assert (tokenSource !is null, "tokenSource cannot be null");
85     }
86     do
87     {
88             this.tokenSource = tokenSource;
89     }
90 
91     /**
92      * @uml
93      * @override
94      */
95     public override TokenSource getTokenSource()
96     {
97         return tokenSource;
98     }
99 
100     public int mark()
101     {
102         return 0;
103     }
104 
105     public void release(int marker)
106     {
107         // no resources to release
108     }
109 
110     public void reset()
111     {
112         seek(0);
113     }
114 
115     public void seek(int index)
116     {
117         lazyInit;
118         index_ = adjustSeekIndex(index);
119     }
120 
121     public int size()
122     {
123         return to!int(tokens.length);
124     }
125 
126     public void consume()
127     {
128         bool skipEofCheck;
129         if (index_ >= 0) {
130             if (fetchedEOF) {
131                 // the last token in tokens is EOF. skip check if p indexes any
132                 // fetched token except the last.
133                 skipEofCheck = index_ < tokens.length - 1;
134             }
135             else {
136                 // no EOF token in tokens. skip check if p indexes a fetched token.
137                 skipEofCheck = index_ < tokens.length;
138             }
139         }
140         else {
141             // not yet initialized
142             skipEofCheck = false;
143         }
144 
145         if (!skipEofCheck && LA(1) == TokenConstantDefinition.EOF) {
146             throw new IllegalStateException("cannot consume EOF");
147         }
148 
149         if (sync(index_ + 1)) {
150             index_ = adjustSeekIndex(index_ + 1);
151         }
152     }
153 
154     /**
155      * Make sure index {@code i} in tokens has a token.
156      *
157      * @return {@code true} if a token is located at index {@code i}, otherwise
158      *    {@code false}.
159      * @see #get(int i)
160      */
161     protected bool sync(int i)
162     in
163     {
164         assert (i >= 0);
165     }
166     do
167     {
168             int n = i - to!int(tokens.length) + 1; // how many more elements we need?
169             if ( n > 0 ) {
170                 int fetched = fetch(n);
171                 return fetched >= n;
172             }
173             return true;
174     }
175 
176     /**
177      * Add {@code n} elements to buffer.
178      *
179      *  @return The actual number of elements added to the buffer.
180      */
181     protected int fetch(int n)
182     {
183         if (fetchedEOF) {
184             return 0;
185         }
186         for (int i = 0; i < n; i++) {
187             Token t = tokenSource.nextToken();
188             if (cast(WritableToken)t) {
189                 (cast(WritableToken)t).setTokenIndex(to!int(tokens.length));
190             }
191             tokens ~= t;
192             if (t.getType == TokenConstantDefinition.EOF) {
193                 fetchedEOF = true;
194                 return i + 1;
195             }
196         }
197         return n;
198     }
199 
200     /**
201      * @uml
202      * @override
203      */
204     public override Token get(int i)
205     in
206     {
207         assert( i >= 0 && i < tokens.length, format("token index %1$s out of range 0..%2$s", i, tokens.length-1));
208     }
209     do
210     {
211         return tokens[i];
212     }
213 
214     /**
215      * Get all tokens from start..stop inclusively
216      */
217     public Token[] get(int start, int stop)
218     {
219 	if (start < 0 || stop < 0 ) return null;
220         lazyInit;
221         Token[] subset;
222         if (stop >= tokens.length) stop = to!int(tokens.length) - 1;
223         for (int i = start; i <= stop; i++) {
224             Token t = tokens[i];
225             if (t.getType == TokenConstantDefinition.EOF)
226                 break;
227             subset ~= t;
228         }
229         return subset;
230     }
231 
232     public int LA(int i)
233     {
234         return LT(i).getType();
235     }
236 
237     public Token LB(int k)
238     {
239         if ((index_ - k) < 0)
240             return null;
241         return tokens[index_ - k];
242     }
243 
244     /**
245      * @uml
246      * @override
247      */
248     public override Token LT(int k)
249     {
250         lazyInit();
251         if (k == 0)
252             return null;
253         if (k < 0)
254             return LB(-k);
255         int i = index_ + k - 1;
256         sync(i);
257         if ( i >= tokens.length ) { // return EOF token
258             // EOF must be last token
259             return tokens[$-1];
260         }
261         return tokens[i];
262     }
263 
264     /**
265      * Allowed derived classes to modify the behavior of operations which change
266      * the current stream position by adjusting the target token index of a seek
267      * operation. The default implementation simply returns {@code i}. If an
268      * exception is thrown in this method, the current stream index should not be
269      * changed.
270      *
271      * <p>For example, {@link CommonTokenStream} overrides this method to ensure that
272      * the seek target is always an on-channel token.</p>
273      *
274      *  @param i The target token index.
275      *  @return The adjusted target token index.
276      */
277     protected int adjustSeekIndex(int i)
278     {
279         return i;
280     }
281 
282     protected void lazyInit()
283     {
284         if (index_ == -1) {
285             setup;
286         }
287     }
288 
289     protected void setup()
290     {
291         sync(0);
292         index_ = adjustSeekIndex(0);
293     }
294 
295     /**
296      * Reset this token stream by setting its token source.
297      */
298     public void setTokenSource(TokenSource tokenSource)
299     {
300         this.tokenSource = tokenSource;
301         tokens.length = 0;
302         index_ = -1;
303     }
304 
305     public Token[] getTokens()
306     {
307         return tokens;
308     }
309 
310     public Token[] getTokens(int start, int stop)
311     {
312         return getTokens(start, stop, null);
313     }
314 
315     /**
316      * Given a start and stop index, return a List of all tokens in
317      * the token type BitSet.  Return null if no tokens were found.  This
318      * method looks at both on and off channel tokens.
319      */
320     public Token[] getTokens(int start, int stop, int[] types)
321     in
322     {
323         lazyInit();
324         assert(start >= 0 && stop < tokens.length &&
325                stop > 0  && start < tokens.length,
326                format("start %1$s or stop %2$s not in 0..%3$s", start, stop, tokens.length - 1));
327     }
328     do
329     {
330             if (start > stop)
331                 return null;
332 
333             // list = tokens[start:stop]:{T t, t.getType() in types}
334             Token[] filteredTokens;
335             for (int i=start; i<=stop; i++) {
336                 Token t = tokens[i];
337                 if (types is null || types.canFind(t.getType()) ) {
338                     filteredTokens ~= t;
339                 }
340             }
341             if (filteredTokens.length == 0) {
342                 filteredTokens = null;
343             }
344             return filteredTokens;
345     }
346 
347     public Token[] getTokens(int start, int stop, int ttype)
348     {
349         int[] s;
350         s ~= ttype;
351         return getTokens(start,stop, s);
352     }
353 
354     /**
355      * Given a starting index, return the index of the next token on channel.
356      * Return {@code i} if {@code tokens[i]} is on channel. Return the index of
357      * the EOF token if there are no tokens on channel between {@code i} and
358      * EOF.
359      */
360     protected int nextTokenOnChannel(int i, int channel)
361     {
362 	sync(i);
363         if (i >= size) {
364             return size - 1;
365         }
366 
367         Token token = tokens[i];
368         while (token.getChannel != channel) {
369             if (token.getType == TokenConstantDefinition.EOF) {
370                 return i;
371             }
372 
373             i++;
374             sync(i);
375             token = tokens[i];
376         }
377 
378         return i;
379     }
380 
381     /**
382      * Given a starting index, return the index of the previous token on
383      * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1
384      * if there are no tokens on channel between {@code i} and 0.
385      *
386      * <p>
387      * If {@code i} specifies an index at or after the EOF token, the EOF token
388      * index is returned. This is due to the fact that the EOF token is treated
389      * as though it were on every channel.</p>
390      */
391     protected int previousTokenOnChannel(int i, int channel)
392     {
393 	sync(i);
394         if (i >= size) {
395             // the EOF token is on every channel
396             return size() - 1;
397         }
398         while (i >= 0) {
399             Token token = tokens[i];
400             if (token.getType() == TokenConstantDefinition.EOF || token.getChannel == channel) {
401                 return i;
402             }
403             i--;
404         }
405         return i;
406     }
407 
408     /**
409      * Collect all tokens on specified channel to the right of
410      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
411      * EOF. If channel is -1, find any non default channel token.
412      */
413     public Token[] getHiddenTokensToRight(int tokenIndex, int channel)
414     in
415     {
416         lazyInit();
417         assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1));
418     }
419     do
420     {
421             int nextOnChannel =
422                 nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL);
423             int to;
424             int from = tokenIndex+1;
425             // if none onchannel to right, nextOnChannel=-1 so set to = last token
426             if ( nextOnChannel == -1 ) to = size()-1;
427             else to = nextOnChannel;
428             return filterForChannel(from, to, channel);
429     }
430 
431     /**
432      * Collect all hidden tokens (any off-default channel) to the right of
433      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL
434      * of EOF.
435      */
436     public Token[] getHiddenTokensToRight(int tokenIndex)
437     {
438         return getHiddenTokensToRight(tokenIndex, -1);
439     }
440 
441     /**
442      * Collect all tokens on specified channel to the left of
443      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
444      * If channel is -1, find any non default channel token.
445      */
446     public Token[] getHiddenTokensToLeft(int tokenIndex, int channel)
447     in
448     {
449         lazyInit();
450         assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1));
451     }
452     do
453     {
454 
455             if (tokenIndex == 0) {
456                 // obviously no tokens can appear before the first token
457                 return null;
458             }
459             int prevOnChannel =
460                 previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL);
461             if ( prevOnChannel == tokenIndex - 1 ) return null;
462             // if none onchannel to left, prevOnChannel=-1 then from=0
463             int from = prevOnChannel+1;
464             int to = tokenIndex-1;
465             return filterForChannel(from, to, channel);
466     }
467 
468     /**
469      * Collect all hidden tokens (any off-default channel) to the left of
470      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
471      */
472     public Token[] getHiddenTokensToLeft(int tokenIndex)
473     {
474         return getHiddenTokensToLeft(tokenIndex, -1);
475     }
476 
477     /**
478      * Collect all hidden tokens (any off-default channel) to the left of
479      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
480      */
481     public Token[] filterForChannel(int from, int to, int channel)
482     {
483         Token[] hidden;
484         for (int i=from; i<=to; i++) {
485             Token t = tokens[i];
486             if (channel == -1) {
487                 if (t.getChannel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden ~= t;
488             }
489             else {
490                 if (t.getChannel == channel)
491                     hidden ~= t;
492             }
493         }
494         if (hidden.length == 0) return null;
495         return hidden;
496     }
497 
498     public string getSourceName()
499     {
500         return tokenSource.getSourceName();
501     }
502 
503     /**
504      * @uml
505      * @override
506      */
507     public override Variant getText()
508     {
509         lazyInit();
510         fill();
511         return getText(Interval.of(0,size()-1));
512     }
513 
514     /**
515      * @uml
516      * @override
517      */
518     public override Variant getText(Interval interval)
519     {
520         import std.array : join;
521       	int start = interval.a;
522         int stop = interval.b;
523         if (start < 0 || stop < 0) {
524             Variant v;
525             return v;
526         }
527         fill();
528         if (stop >= tokens.length)
529             stop = to!int(tokens.length) - 1;
530 
531         string[] buf;
532         foreach (t; tokens[start..stop+1]) {
533             if (t.getType == TokenConstantDefinition.EOF)
534                 break;
535             buf ~= t.getText.get!string;
536         }
537         return Variant(buf.join(" "));
538     }
539 
540     /**
541      * @uml
542      * @override
543      */
544     public override Variant getText(RuleContext ctx)
545     {
546         return getText(ctx.getSourceInterval());
547     }
548 
549     /**
550      * @uml
551      * @override
552      */
553     public override Variant getText(Token start, Token stop)
554     {
555         if (start !is null && stop !is null) {
556             return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex()));
557         }
558         Variant v = "";
559        	return v;
560     }
561 
562     /**
563      * Get all tokens from lexer until EOF
564      */
565     public void fill()
566     {
567         lazyInit();
568         const int blockSize = 1000;
569         while (true) {
570             int fetched = fetch(blockSize);
571             if (fetched < blockSize) {
572                 return;
573             }
574         }
575     }
576 
577     public final int index()
578     {
579         return this.index_;
580     }
581 
582 }