1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.UnbufferedTokenStream;
8 
9 import antlr.v4.runtime.IllegalArgumentException;
10 import antlr.v4.runtime.IllegalStateException;
11 import antlr.v4.runtime.RuleContext;
12 import antlr.v4.runtime.Token;
13 import antlr.v4.runtime.TokenConstantDefinition;
14 import antlr.v4.runtime.TokenSource;
15 import antlr.v4.runtime.TokenStream;
16 import antlr.v4.runtime.UnsupportedOperationException;
17 import antlr.v4.runtime.WritableToken;
18 import antlr.v4.runtime.misc.Interval;
19 import std.algorithm;
20 import std.array;
21 import std.conv;
22 import std.format;
23 import std.variant;
24 
25 /**
26  * TODO add class description
27  */
28 class UnbufferedTokenStream : TokenStream
29 {
30 
31     protected TokenSource tokenSource;
32 
33     /**
34      * @uml
35      * A moving window buffer of the data being scanned. While there's a marker,
36      * we keep adding to buffer. Otherwise, {@link #consume consume()} resets so
37      * we start filling at index 0 again.
38      */
39     protected Token[] tokens;
40 
41     /**
42      * @uml
43      * The number of tokens currently in {@link #tokens tokens}.
44      *
45      *  <p>This is not the buffer capacity, that's {@code tokens.length}.</p>
46      */
47     protected size_t n;
48 
49     /**
50      * @uml
51      * 0..n-1 index into {@link #tokens tokens} of next token.
52      *
53      * <p>The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are
54      * out of buffered tokens.</p>
55      */
56     protected size_t p = 0;
57 
58     /**
59      * @uml
60      * Count up with {@link #mark mark()} and down with
61      * {@link #release release()}. When we {@code release()} the last mark,
62      * {@code numMarkers} reaches 0 and we reset the buffer. Copy
63      * {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}.
64      */
65     protected int numMarkers = 0;
66 
67     protected Token lastToken;
68 
69     protected Token lastTokenBufferStart;
70 
71     protected size_t currentTokenIndex = 0;
72 
73     public this(TokenSource tokenSource)
74     {
75         this(tokenSource, 256);
76     }
77 
78     public this(TokenSource tokenSource, int bufferSize)
79     {
80         this.tokenSource = tokenSource;
81         tokens = new Token[bufferSize];
82         n = 0;
83         fill(1); // prime the pump
84     }
85 
86     public Token get(size_t i)
87     {
88         size_t bufferStartIndex = getBufferStartIndex();
89         assert(i < bufferStartIndex || i >= bufferStartIndex + n,
90                format("get(%1$s) outside buffer: %2$s..%3$s", i, bufferStartIndex,
91                       bufferStartIndex+n));
92         return tokens[i - bufferStartIndex];
93     }
94 
95     public Token LT(int i)
96     {
97         if ( i==-1 ) {
98             return lastToken;
99         }
100 
101         sync(i);
102         auto index = p + i - 1;
103         assert(index >= 0,  format("LT(%s) gives negative index",i));
104         if ( index >= n ) {
105             assert (n > 0 && tokens[n-1].getType() == TokenConstantDefinition.EOF);
106             return tokens[n-1];
107         }
108         return tokens[to!size_t(index)];
109     }
110 
111     public dchar LA(int i)
112     {
113         return LT(i).getType();
114     }
115 
116     public TokenSource getTokenSource()
117     {
118         return tokenSource;
119     }
120 
121     public Variant getText()
122     {
123         Variant v = "";
124         return v;
125     }
126 
127     public Variant getText(RuleContext ctx)
128     {
129         return getText(ctx.getSourceInterval());
130     }
131 
132     public Variant getText(Token start, Token stop)
133     {
134         return getText(Interval.of(to!int(start.getTokenIndex), to!int(stop.getTokenIndex)));
135     }
136 
137     public void consume()
138     {
139         if (LA(1) == TokenConstantDefinition.EOF) {
140             throw new IllegalStateException("cannot consume EOF");
141         }
142         // buf always has at least tokens[p==0] in this method due to ctor
143         lastToken = tokens[p];   // track last token for LT(-1)
144         // if we're at last token and no markers, opportunity to flush buffer
145         if ( p == n-1 && numMarkers==0 ) {
146             n = 0;
147             p = -1; // p++ will leave this at 0
148             lastTokenBufferStart = lastToken;
149         }
150         p++;
151         currentTokenIndex++;
152         sync(1);
153     }
154 
155     /**
156      * Make sure we have 'need' elements from current position {@link #p p}. Last valid
157      *  {@code p} index is {@code tokens.length-1}.  {@code p+need-1} is the tokens index 'need' elements
158      *  ahead.  If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}.
159      */
160     protected void sync(int want)
161     {
162         auto need = (p+want-1) - n + 1; // how many more elements we need?
163         if ( need > 0 ) {
164             fill(to!int(need));
165         }
166     }
167 
168     /**
169      * Add {@code n} elements to the buffer. Returns the number of tokens
170      * actually added to the buffer. If the return value is less than {@code n},
171      * then EOF was reached before {@code n} tokens could be added.
172      */
173     protected int fill(int n)
174     {
175         for (int i=0; i<n; i++) {
176             if (this.n > 0 && tokens[this.n-1].getType() == TokenConstantDefinition.EOF) {
177                 return i;
178             }
179             Token t = tokenSource.nextToken();
180             add(t);
181         }
182         return n;
183     }
184 
185     protected void add(Token t)
186     {
187         if (n >= tokens.length) {
188             tokens.length = tokens.length * 2;
189         }
190         if (cast(WritableToken)t) {
191             (cast(WritableToken)t).setTokenIndex(getBufferStartIndex() + n);
192         }
193         tokens[n++] = t;
194     }
195 
196     /**
197      * Return a marker that we can release later.
198      *
199      * <p>The specific marker value used for this class allows for some level of
200      * protection against misuse where {@code seek()} is called on a mark or
201      * {@code release()} is called in the wrong order.</p>
202      */
203     public int mark()
204     {
205         if (numMarkers == 0) {
206             lastTokenBufferStart = lastToken;
207         }
208         int mark = -numMarkers - 1;
209         numMarkers++;
210         return mark;
211     }
212 
213     public void release(int marker)
214     {
215         int expectedMark = -numMarkers;
216         if ( marker!=expectedMark ) {
217             throw new IllegalStateException("release() called with an invalid marker.");
218         }
219 
220         numMarkers--;
221         if (numMarkers == 0) { // can we release buffer?
222             if (p > 0) {
223                 // Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs
224                 // p is last valid token; move nothing if p==n as we have no valid char
225                 // System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p tokens from p to 0
226                 tokens = tokens[p..(n-p)];
227                 n = n - p;
228                 p = 0;
229             }
230             lastTokenBufferStart = lastToken;
231         }
232     }
233 
234     /**
235      * @uml
236      * @override
237      */
238     public override size_t index()
239     {
240         return currentTokenIndex;
241     }
242 
243     public void seek(size_t index)
244     {
245         if (index == currentTokenIndex) {
246             return;
247         }
248 
249         if (index > currentTokenIndex) {
250             sync(to!int(index - currentTokenIndex));
251             index = min(index, getBufferStartIndex() + n - 1);
252         }
253 
254         auto bufferStartIndex = getBufferStartIndex();
255         auto i = index - bufferStartIndex;
256         if ( i < 0 ) {
257             throw new IllegalArgumentException("cannot seek to negative index " ~
258                                                to!string(index));
259         }
260         else if (i >= n) {
261             auto msg = format("seek to index outside buffer: %1$s not in %2$s..%3$s",
262                               index, bufferStartIndex, bufferStartIndex + n);
263             throw new UnsupportedOperationException(msg);
264         }
265 
266         p = i;
267         currentTokenIndex = index;
268         if (p == 0) {
269             lastToken = lastTokenBufferStart;
270         }
271         else {
272             lastToken = tokens[p-1];
273         }
274 
275     }
276 
277     public size_t size()
278     {
279         throw new UnsupportedOperationException("Unbuffered stream cannot know its size");
280     }
281 
282     public string getSourceName()
283     {
284         return tokenSource.getSourceName();
285     }
286 
287     public Variant getText(Interval interval)
288     {
289         auto bufferStartIndex = getBufferStartIndex();
290         auto bufferStopIndex = bufferStartIndex + to!int(tokens.length) - 1;
291 
292         int start = interval.a;
293         int stop = interval.b;
294         if (start < bufferStartIndex || stop > bufferStopIndex) {
295             auto msg = format("interval %1$s not in token buffer window: %2$s..%3$s",
296                               interval, bufferStartIndex, bufferStopIndex);
297             throw new UnsupportedOperationException(msg);
298         }
299 
300         auto a = start - bufferStartIndex;
301         auto b = stop - bufferStartIndex;
302 
303         auto buf = appender!string;
304         for (auto i = a; i <= b; i++) {
305             Token t = tokens[i];
306             buf.put(to!string(t.getText));
307         }
308         Variant v = buf.data;
309         return v;
310     }
311 
312     protected size_t getBufferStartIndex()
313     {
314         return currentTokenIndex - p;
315     }
316 
317 }