1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.UnbufferedTokenStream; 8 9 import antlr.v4.runtime.IllegalArgumentException; 10 import antlr.v4.runtime.IllegalStateException; 11 import antlr.v4.runtime.RuleContext; 12 import antlr.v4.runtime.Token; 13 import antlr.v4.runtime.TokenConstantDefinition; 14 import antlr.v4.runtime.TokenSource; 15 import antlr.v4.runtime.TokenStream; 16 import antlr.v4.runtime.UnsupportedOperationException; 17 import antlr.v4.runtime.WritableToken; 18 import antlr.v4.runtime.misc.Interval; 19 import std.algorithm; 20 import std.array; 21 import std.conv; 22 import std.format; 23 import std.variant; 24 25 /** 26 * TODO add class description 27 */ 28 class UnbufferedTokenStream : TokenStream 29 { 30 31 protected TokenSource tokenSource; 32 33 /** 34 * @uml 35 * A moving window buffer of the data being scanned. While there's a marker, 36 * we keep adding to buffer. Otherwise, {@link #consume consume()} resets so 37 * we start filling at index 0 again. 38 */ 39 protected Token[] tokens; 40 41 /** 42 * @uml 43 * The number of tokens currently in {@link #tokens tokens}. 44 * 45 * <p>This is not the buffer capacity, that's {@code tokens.length}.</p> 46 */ 47 protected size_t n; 48 49 /** 50 * @uml 51 * 0..n-1 index into {@link #tokens tokens} of next token. 52 * 53 * <p>The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are 54 * out of buffered tokens.</p> 55 */ 56 protected size_t p = 0; 57 58 /** 59 * @uml 60 * Count up with {@link #mark mark()} and down with 61 * {@link #release release()}. When we {@code release()} the last mark, 62 * {@code numMarkers} reaches 0 and we reset the buffer. Copy 63 * {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}. 64 */ 65 protected int numMarkers = 0; 66 67 protected Token lastToken; 68 69 protected Token lastTokenBufferStart; 70 71 protected size_t currentTokenIndex = 0; 72 73 public this(TokenSource tokenSource) 74 { 75 this(tokenSource, 256); 76 } 77 78 public this(TokenSource tokenSource, int bufferSize) 79 { 80 this.tokenSource = tokenSource; 81 tokens = new Token[bufferSize]; 82 n = 0; 83 fill(1); // prime the pump 84 } 85 86 public Token get(size_t i) 87 { 88 size_t bufferStartIndex = getBufferStartIndex(); 89 assert(i < bufferStartIndex || i >= bufferStartIndex + n, 90 format("get(%1$s) outside buffer: %2$s..%3$s", i, bufferStartIndex, 91 bufferStartIndex+n)); 92 return tokens[i - bufferStartIndex]; 93 } 94 95 public Token LT(int i) 96 { 97 if ( i==-1 ) { 98 return lastToken; 99 } 100 101 sync(i); 102 auto index = p + i - 1; 103 assert(index >= 0, format("LT(%s) gives negative index",i)); 104 if ( index >= n ) { 105 assert (n > 0 && tokens[n-1].getType() == TokenConstantDefinition.EOF); 106 return tokens[n-1]; 107 } 108 return tokens[to!size_t(index)]; 109 } 110 111 public dchar LA(int i) 112 { 113 return LT(i).getType(); 114 } 115 116 public TokenSource getTokenSource() 117 { 118 return tokenSource; 119 } 120 121 public Variant getText() 122 { 123 Variant v = ""; 124 return v; 125 } 126 127 public Variant getText(RuleContext ctx) 128 { 129 return getText(ctx.getSourceInterval()); 130 } 131 132 public Variant getText(Token start, Token stop) 133 { 134 return getText(Interval.of(to!int(start.getTokenIndex), to!int(stop.getTokenIndex))); 135 } 136 137 public void consume() 138 { 139 if (LA(1) == TokenConstantDefinition.EOF) { 140 throw new IllegalStateException("cannot consume EOF"); 141 } 142 // buf always has at least tokens[p==0] in this method due to ctor 143 lastToken = tokens[p]; // track last token for LT(-1) 144 // if we're at last token and no markers, opportunity to flush buffer 145 if ( p == n-1 && numMarkers==0 ) { 146 n = 0; 147 p = -1; // p++ will leave this at 0 148 lastTokenBufferStart = lastToken; 149 } 150 p++; 151 currentTokenIndex++; 152 sync(1); 153 } 154 155 /** 156 * Make sure we have 'need' elements from current position {@link #p p}. Last valid 157 * {@code p} index is {@code tokens.length-1}. {@code p+need-1} is the tokens index 'need' elements 158 * ahead. If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}. 159 */ 160 protected void sync(int want) 161 { 162 auto need = (p+want-1) - n + 1; // how many more elements we need? 163 if ( need > 0 ) { 164 fill(to!int(need)); 165 } 166 } 167 168 /** 169 * Add {@code n} elements to the buffer. Returns the number of tokens 170 * actually added to the buffer. If the return value is less than {@code n}, 171 * then EOF was reached before {@code n} tokens could be added. 172 */ 173 protected int fill(int n) 174 { 175 for (int i=0; i<n; i++) { 176 if (this.n > 0 && tokens[this.n-1].getType() == TokenConstantDefinition.EOF) { 177 return i; 178 } 179 Token t = tokenSource.nextToken(); 180 add(t); 181 } 182 return n; 183 } 184 185 protected void add(Token t) 186 { 187 if (n >= tokens.length) { 188 tokens.length = tokens.length * 2; 189 } 190 if (cast(WritableToken)t) { 191 (cast(WritableToken)t).setTokenIndex(getBufferStartIndex() + n); 192 } 193 tokens[n++] = t; 194 } 195 196 /** 197 * Return a marker that we can release later. 198 * 199 * <p>The specific marker value used for this class allows for some level of 200 * protection against misuse where {@code seek()} is called on a mark or 201 * {@code release()} is called in the wrong order.</p> 202 */ 203 public int mark() 204 { 205 if (numMarkers == 0) { 206 lastTokenBufferStart = lastToken; 207 } 208 int mark = -numMarkers - 1; 209 numMarkers++; 210 return mark; 211 } 212 213 public void release(int marker) 214 { 215 int expectedMark = -numMarkers; 216 if ( marker!=expectedMark ) { 217 throw new IllegalStateException("release() called with an invalid marker."); 218 } 219 220 numMarkers--; 221 if (numMarkers == 0) { // can we release buffer? 222 if (p > 0) { 223 // Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs 224 // p is last valid token; move nothing if p==n as we have no valid char 225 // System.arraycopy(tokens, p, tokens, 0, n - p); // shift n-p tokens from p to 0 226 tokens = tokens[p..(n-p)]; 227 n = n - p; 228 p = 0; 229 } 230 lastTokenBufferStart = lastToken; 231 } 232 } 233 234 /** 235 * @uml 236 * @override 237 */ 238 public override size_t index() 239 { 240 return currentTokenIndex; 241 } 242 243 public void seek(size_t index) 244 { 245 if (index == currentTokenIndex) { 246 return; 247 } 248 249 if (index > currentTokenIndex) { 250 sync(to!int(index - currentTokenIndex)); 251 index = min(index, getBufferStartIndex() + n - 1); 252 } 253 254 auto bufferStartIndex = getBufferStartIndex(); 255 auto i = index - bufferStartIndex; 256 if ( i < 0 ) { 257 throw new IllegalArgumentException("cannot seek to negative index " ~ 258 to!string(index)); 259 } 260 else if (i >= n) { 261 auto msg = format("seek to index outside buffer: %1$s not in %2$s..%3$s", 262 index, bufferStartIndex, bufferStartIndex + n); 263 throw new UnsupportedOperationException(msg); 264 } 265 266 p = i; 267 currentTokenIndex = index; 268 if (p == 0) { 269 lastToken = lastTokenBufferStart; 270 } 271 else { 272 lastToken = tokens[p-1]; 273 } 274 275 } 276 277 public size_t size() 278 { 279 throw new UnsupportedOperationException("Unbuffered stream cannot know its size"); 280 } 281 282 public string getSourceName() 283 { 284 return tokenSource.getSourceName(); 285 } 286 287 public Variant getText(Interval interval) 288 { 289 auto bufferStartIndex = getBufferStartIndex(); 290 auto bufferStopIndex = bufferStartIndex + to!int(tokens.length) - 1; 291 292 int start = interval.a; 293 int stop = interval.b; 294 if (start < bufferStartIndex || stop > bufferStopIndex) { 295 auto msg = format("interval %1$s not in token buffer window: %2$s..%3$s", 296 interval, bufferStartIndex, bufferStopIndex); 297 throw new UnsupportedOperationException(msg); 298 } 299 300 auto a = start - bufferStartIndex; 301 auto b = stop - bufferStartIndex; 302 303 auto buf = appender!string; 304 for (auto i = a; i <= b; i++) { 305 Token t = tokens[i]; 306 buf.put(to!string(t.getText)); 307 } 308 Variant v = buf.data; 309 return v; 310 } 311 312 protected size_t getBufferStartIndex() 313 { 314 return currentTokenIndex - p; 315 } 316 317 }