antlr.v4.runtime.BufferedTokenStream source code

1 /*
2  * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.BufferedTokenStream;
8 
9 import std.array;
10 import std.format;
11 import std.conv;
12 import std.algorithm: canFind;
13 import antlr.v4.runtime.IllegalStateException;
14 import antlr.v4.runtime.Lexer;
15 import antlr.v4.runtime.RuleContext;
16 import antlr.v4.runtime.Token;
17 import antlr.v4.runtime.TokenConstantDefinition;
18 import antlr.v4.runtime.TokenSource;
19 import antlr.v4.runtime.TokenStream;
20 import antlr.v4.runtime.WritableToken;
21 import antlr.v4.runtime.misc.Interval;
22 
23 /**
24  * This implementation of {@link TokenStream} loads tokens from a
25  * {@link TokenSource} on-demand, and places the tokens in a buffer to provide
26  * access to any previous token by index.
27  *
28  * <p>
29  * This token stream ignores the value of {@link Token#getChannel}. If your
30  * parser requires the token stream filter tokens to only those on a particular
31  * channel, such as {@link Token#DEFAULT_CHANNEL} or
32  * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
33  * {@link CommonTokenStream}.</p>
34  */
35 class BufferedTokenStream : TokenStream
36 {
37 
38     /**
39      * The {@link TokenSource} from which tokens for this stream are fetched.
40      */
41     protected TokenSource tokenSource;
42 
43     /**
44      * A collection of all tokens fetched from the token source. The list is
45      * considered a complete view of the input once {@link #fetchedEOF} is set
46      * to {@code true}.
47      */
48     protected Token[] tokens;
49 
50     /**
51      * The index into {@link #tokens} of the current token (next token to
52      * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
53      * {@link #LT LT(1)}.
54      *
55      * <p>This field is set to -1 when the stream is first constructed or when
56      * {@link #setTokenSource} is called, indicating that the first token has
57      * not yet been fetched from the token source. For additional information,
58      * see the documentation of {@link IntStream} for a description of
59      * Initializing Methods.</p>
60      * @uml
61      * @read
62      */
63     private int index_ = -1;
64 
65     /**
66      * Indicates whether the {@link Token#EOF} token has been fetched from
67      * {@link #tokenSource} and added to {@link #tokens}. This field improves
68      * performance for the following cases:
69      *
70      * <ul>
71      * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
72      * consuming the EOF symbol is optimized by checking the values of
73      * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
74      * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
75      * {@link #tokens} is trivial with this field.</li>
76      * <ul>
77      */
78     protected bool fetchedEOF;
79 
80     public this(TokenSource tokenSource)
81     in
82     {
83         assert (tokenSource !is null, "tokenSource cannot be null");
84     }
85     do
86     {
87             this.tokenSource = tokenSource;
88     }
89 
90     /**
91      * @uml
92      * @override
93      */
94     public override TokenSource getTokenSource()
95     {
96         return tokenSource;
97     }
98 
99     public int mark()
100     {
101         return 0;
102     }
103 
104     public void release(int marker)
105     {
106         // no resources to release
107     }
108 
109     public void reset()
110     {
111         seek(0);
112     }
113 
114     public void seek(int index)
115     {
116         lazyInit;
117         index_ = adjustSeekIndex(index);
118     }
119 
120     public int size()
121     {
122         return to!int(tokens.length);
123     }
124 
125     public void consume()
126     {
127         bool skipEofCheck;
128         if (index_ >= 0) {
129             if (fetchedEOF) {
130                 // the last token in tokens is EOF. skip check if p indexes any
131                 // fetched token except the last.
132                 skipEofCheck = index_ < tokens.length - 1;
133             }
134             else {
135                 // no EOF token in tokens. skip check if p indexes a fetched token.
136                 skipEofCheck = index_ < tokens.length;
137             }
138         }
139         else {
140             // not yet initialized
141             skipEofCheck = false;
142         }
143 
144         if (!skipEofCheck && LA(1) == TokenConstantDefinition.EOF) {
145             throw new IllegalStateException("cannot consume EOF");
146         }
147 
148         if (sync(index_ + 1)) {
149             index_ = adjustSeekIndex(index_ + 1);
150         }
151     }
152 
153     /**
154      * Make sure index {@code i} in tokens has a token.
155      *
156      * @return {@code true} if a token is located at index {@code i}, otherwise
157      *    {@code false}.
158      * @see #get(int i)
159      */
160     protected bool sync(int i)
161     in
162     {
163         assert (i >= 0);
164     }
165     do
166     {
167             int n = i - to!int(tokens.length) + 1; // how many more elements we need?
168             if ( n > 0 ) {
169                 int fetched = fetch(n);
170                 return fetched >= n;
171             }
172             return true;
173     }
174 
175     /**
176      * Add {@code n} elements to buffer.
177      *
178      *  @return The actual number of elements added to the buffer.
179      */
180     protected int fetch(int n)
181     {
182         if (fetchedEOF) {
183             return 0;
184         }
185         for (int i = 0; i < n; i++) {
186             Token t = tokenSource.nextToken();
187             if (cast(WritableToken)t) {
188                 (cast(WritableToken)t).setTokenIndex(to!int(tokens.length));
189             }
190             tokens ~= t;
191             if (t.getType == TokenConstantDefinition.EOF) {
192                 fetchedEOF = true;
193                 return i + 1;
194             }
195         }
196         return n;
197     }
198 
199     /**
200      * @uml
201      * @override
202      */
203     public override Token get(int i)
204     in
205     {
206         assert( i >= 0 && i < tokens.length, format("token index %1$s out of range 0..%2$s", i, tokens.length-1));
207     }
208     do
209     {
210         return tokens[i];
211     }
212 
213     /**
214      * Get all tokens from start..stop inclusively
215      */
216     public Token[] get(int start, int stop)
217     {
218 	if (start < 0 || stop < 0 ) return null;
219         lazyInit;
220         Token[] subset;
221         if (stop >= tokens.length) stop = to!int(tokens.length) - 1;
222         for (int i = start; i <= stop; i++) {
223             Token t = tokens[i];
224             if (t.getType == TokenConstantDefinition.EOF)
225                 break;
226             subset ~= t;
227         }
228         return subset;
229     }
230 
231     public int LA(int i)
232     {
233         return LT(i).getType();
234     }
235 
236     public Token LB(int k)
237     {
238         if ((index_ - k) < 0)
239             return null;
240         return tokens[index_ - k];
241     }
242 
243     /**
244      * @uml
245      * @override
246      */
247     public override Token LT(int k)
248     {
249         lazyInit();
250         if (k == 0)
251             return null;
252         if (k < 0)
253             return LB(-k);
254         int i = index_ + k - 1;
255         sync(i);
256         if ( i >= tokens.length ) { // return EOF token
257             // EOF must be last token
258             return tokens[$-1];
259         }
260         return tokens[i];
261     }
262 
263     /**
264      * Allowed derived classes to modify the behavior of operations which change
265      * the current stream position by adjusting the target token index of a seek
266      * operation. The default implementation simply returns {@code i}. If an
267      * exception is thrown in this method, the current stream index should not be
268      * changed.
269      *
270      * <p>For example, {@link CommonTokenStream} overrides this method to ensure that
271      * the seek target is always an on-channel token.</p>
272      *
273      *  @param i The target token index.
274      *  @return The adjusted target token index.
275      */
276     protected int adjustSeekIndex(int i)
277     {
278         return i;
279     }
280 
281     protected void lazyInit()
282     {
283         if (index_ == -1) {
284             setup;
285         }
286     }
287 
288     protected void setup()
289     {
290         sync(0);
291         index_ = adjustSeekIndex(0);
292     }
293 
294     /**
295      * Reset this token stream by setting its token source.
296      */
297     public void setTokenSource(TokenSource tokenSource)
298     {
299         this.tokenSource = tokenSource;
300         tokens.length = 0;
301         index_ = -1;
302     }
303 
304     public Token[] getTokens()
305     {
306         return tokens;
307     }
308 
309     public Token[] getTokens(int start, int stop)
310     {
311         return getTokens(start, stop, null);
312     }
313 
314     /**
315      * Given a start and stop index, return a List of all tokens in
316      * the token type BitSet.  Return null if no tokens were found.  This
317      * method looks at both on and off channel tokens.
318      */
319     public Token[] getTokens(int start, int stop, int[] types)
320     in
321     {
322         lazyInit();
323         assert(start >= 0 && stop < tokens.length &&
324                stop > 0  && start < tokens.length,
325                format("start %1$s or stop %2$s not in 0..%3$s", start, stop, tokens.length - 1));
326     }
327     do
328     {
329             if (start > stop)
330                 return null;
331 
332             // list = tokens[start:stop]:{T t, t.getType() in types}
333             Token[] filteredTokens;
334             for (int i=start; i<=stop; i++) {
335                 Token t = tokens[i];
336                 if (types is null || types.canFind(t.getType()) ) {
337                     filteredTokens ~= t;
338                 }
339             }
340             if (filteredTokens.length == 0) {
341                 filteredTokens = null;
342             }
343             return filteredTokens;
344     }
345 
346     public Token[] getTokens(int start, int stop, int ttype)
347     {
348         int[] s;
349         s ~= ttype;
350         return getTokens(start,stop, s);
351     }
352 
353     /**
354      * Given a starting index, return the index of the next token on channel.
355      * Return {@code i} if {@code tokens[i]} is on channel. Return the index of
356      * the EOF token if there are no tokens on channel between {@code i} and
357      * EOF.
358      */
359     protected int nextTokenOnChannel(int i, int channel)
360     {
361 	sync(i);
362         if (i >= size) {
363             return size - 1;
364         }
365 
366         Token token = tokens[i];
367         while (token.getChannel != channel) {
368             if (token.getType == TokenConstantDefinition.EOF) {
369                 return i;
370             }
371 
372             i++;
373             sync(i);
374             token = tokens[i];
375         }
376 
377         return i;
378     }
379 
380     /**
381      * Given a starting index, return the index of the previous token on
382      * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1
383      * if there are no tokens on channel between {@code i} and 0.
384      *
385      * <p>
386      * If {@code i} specifies an index at or after the EOF token, the EOF token
387      * index is returned. This is due to the fact that the EOF token is treated
388      * as though it were on every channel.</p>
389      */
390     protected int previousTokenOnChannel(int i, int channel)
391     {
392 	sync(i);
393         if (i >= size) {
394             // the EOF token is on every channel
395             return size() - 1;
396         }
397         while (i >= 0) {
398             Token token = tokens[i];
399             if (token.getType() == TokenConstantDefinition.EOF || token.getChannel == channel) {
400                 return i;
401             }
402             i--;
403         }
404         return i;
405     }
406 
407     /**
408      * Collect all tokens on specified channel to the right of
409      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
410      * EOF. If channel is -1, find any non default channel token.
411      */
412     public Token[] getHiddenTokensToRight(int tokenIndex, int channel)
413     in
414     {
415         lazyInit();
416         assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1));
417     }
418     do
419     {
420             int nextOnChannel =
421                 nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL);
422             int to;
423             int from = tokenIndex+1;
424             // if none onchannel to right, nextOnChannel=-1 so set to = last token
425             if ( nextOnChannel == -1 ) to = size()-1;
426             else to = nextOnChannel;
427             return filterForChannel(from, to, channel);
428     }
429 
430     /**
431      * Collect all hidden tokens (any off-default channel) to the right of
432      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL
433      * of EOF.
434      */
435     public Token[] getHiddenTokensToRight(int tokenIndex)
436     {
437         return getHiddenTokensToRight(tokenIndex, -1);
438     }
439 
440     /**
441      * Collect all tokens on specified channel to the left of
442      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
443      * If channel is -1, find any non default channel token.
444      */
445     public Token[] getHiddenTokensToLeft(int tokenIndex, int channel)
446     in
447     {
448         lazyInit();
449         assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1));
450     }
451     do
452     {
453 
454             if (tokenIndex == 0) {
455                 // obviously no tokens can appear before the first token
456                 return null;
457             }
458             int prevOnChannel =
459                 previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL);
460             if ( prevOnChannel == tokenIndex - 1 ) return null;
461             // if none onchannel to left, prevOnChannel=-1 then from=0
462             int from = prevOnChannel+1;
463             int to = tokenIndex-1;
464             return filterForChannel(from, to, channel);
465     }
466 
467     /**
468      * Collect all hidden tokens (any off-default channel) to the left of
469      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
470      */
471     public Token[] getHiddenTokensToLeft(int tokenIndex)
472     {
473         return getHiddenTokensToLeft(tokenIndex, -1);
474     }
475 
476     /**
477      * Collect all hidden tokens (any off-default channel) to the left of
478      * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
479      */
480     public Token[] filterForChannel(int from, int to, int channel)
481     {
482         Token[] hidden;
483         for (int i=from; i<=to; i++) {
484             Token t = tokens[i];
485             if (channel == -1) {
486                 if (t.getChannel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden ~= t;
487             }
488             else {
489                 if (t.getChannel == channel)
490                     hidden ~= t;
491             }
492         }
493         if (hidden.length == 0) return null;
494         return hidden;
495     }
496 
497     public string getSourceName()
498     {
499         return tokenSource.getSourceName();
500     }
501 
502     /**
503      * @uml
504      * @override
505      */
506     public override string getText()
507     {
508         lazyInit();
509         fill();
510         return getText(Interval.of(0,size()-1));
511     }
512 
513     /**
514      * @uml
515      * @override
516      */
517     public override string getText(Interval interval)
518     {
519       	int start = interval.a;
520         int stop = interval.b;
521         if (start < 0 || stop < 0) return "";
522         lazyInit();
523         if (stop >= tokens.length) stop = to!int(tokens.length) - 1;
524 
525         auto buf = appender!(string);
526         for (int i = start; i <= stop; i++) {
527             Token t = tokens[i];
528             if (t.getType == TokenConstantDefinition.EOF) break;
529             buf.put(t.getText());
530         }
531         return buf.data;
532     }
533 
534     /**
535      * @uml
536      * @override
537      */
538     public override string getText(RuleContext ctx)
539     {
540         return getText(ctx.getSourceInterval());
541     }
542 
543     /**
544      * @uml
545      * @override
546      */
547     public override string getText(Token start, Token stop)
548     {
549         if (start !is null && stop !is null) {
550             return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex()));
551         }
552        	return "";
553     }
554 
555     /**
556      * Get all tokens from lexer until EOF
557      */
558     public void fill()
559     {
560         lazyInit();
561         const int blockSize = 1000;
562         while (true) {
563             int fetched = fetch(blockSize);
564             if (fetched < blockSize) {
565                 return;
566             }
567         }
568     }
569 
570     public final int index()
571     {
572         return this.index_;
573     }
574 
575 }