1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.BufferedTokenStream; 8 9 import antlr.v4.runtime.IllegalStateException; 10 import antlr.v4.runtime.Lexer; 11 import antlr.v4.runtime.RuleContext; 12 import antlr.v4.runtime.Token; 13 import antlr.v4.runtime.TokenConstantDefinition; 14 import antlr.v4.runtime.TokenSource; 15 import antlr.v4.runtime.TokenStream; 16 import antlr.v4.runtime.WritableToken; 17 import antlr.v4.runtime.misc.Interval; 18 import std.algorithm: canFind; 19 import std.array; 20 import std.conv; 21 import std.format; 22 import std.variant; 23 import std.algorithm; 24 25 /** 26 * This implementation of {@link TokenStream} loads tokens from a 27 * {@link TokenSource} on-demand, and places the tokens in a buffer to provide 28 * access to any previous token by index. 29 * 30 * <p> 31 * This token stream ignores the value of {@link Token#getChannel}. If your 32 * parser requires the token stream filter tokens to only those on a particular 33 * channel, such as {@link Token#DEFAULT_CHANNEL} or 34 * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a 35 * {@link CommonTokenStream}.</p> 36 */ 37 class BufferedTokenStream : TokenStream 38 { 39 40 /** 41 * The {@link TokenSource} from which tokens for this stream are fetched. 42 */ 43 protected TokenSource tokenSource; 44 45 /** 46 * A collection of all tokens fetched from the token source. The list is 47 * considered a complete view of the input once {@link #fetchedEOF} is set 48 * to {@code true}. 49 */ 50 protected Token[] tokens; 51 52 /** 53 * The index into {@link #tokens} of the current token (next token to 54 * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be 55 * {@link #LT LT(1)}. 56 * 57 * <p>This field is set to -1 when the stream is first constructed or when 58 * {@link #setTokenSource} is called, indicating that the first token has 59 * not yet been fetched from the token source. For additional information, 60 * see the documentation of {@link IntStream} for a description of 61 * Initializing Methods.</p> 62 * @uml 63 * @read 64 */ 65 private size_t index_ = size_t.max; 66 67 /** 68 * Indicates whether the {@link Token#EOF} token has been fetched from 69 * {@link #tokenSource} and added to {@link #tokens}. This field improves 70 * performance for the following cases: 71 * 72 * <ul> 73 * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent 74 * consuming the EOF symbol is optimized by checking the values of 75 * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li> 76 * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into 77 * {@link #tokens} is trivial with this field.</li> 78 * <ul> 79 */ 80 protected bool fetchedEOF; 81 82 public this(TokenSource tokenSource) 83 in 84 { 85 assert (tokenSource !is null, "tokenSource cannot be null"); 86 } 87 do 88 { 89 this.tokenSource = tokenSource; 90 } 91 92 /** 93 * @uml 94 * @override 95 */ 96 public override TokenSource getTokenSource() 97 { 98 return tokenSource; 99 } 100 101 public int mark() 102 { 103 return 0; 104 } 105 106 public void release(int marker) 107 { 108 // no resources to release 109 } 110 111 public void reset() 112 { 113 seek(0); 114 } 115 116 public void seek(size_t index) 117 { 118 lazyInit; 119 index_ = adjustSeekIndex(index); 120 } 121 122 public size_t size() 123 { 124 return tokens.length; 125 } 126 127 public void consume() 128 { 129 bool skipEofCheck; 130 if (cast(int)index_ >= 0) { 131 if (fetchedEOF) { 132 // the last token in tokens is EOF. skip check if p indexes any 133 // fetched token except the last. 134 skipEofCheck = cast(int)index_ < cast(int)tokens.length - 1; 135 } 136 else { 137 // no EOF token in tokens. skip check if p indexes a fetched token. 138 skipEofCheck = index_ < tokens.length; 139 } 140 } 141 else { 142 // not yet initialized 143 skipEofCheck = false; 144 } 145 146 if (!skipEofCheck && LA(1) == TokenConstantDefinition.EOF) { 147 throw new IllegalStateException("cannot consume EOF"); 148 } 149 150 if (sync(index_ + 1)) { 151 index_ = adjustSeekIndex(index_ + 1); 152 } 153 } 154 155 /** 156 * Make sure index {@code i} in tokens has a token. 157 * 158 * @return {@code true} if a token is located at index {@code i}, otherwise 159 * {@code false}. 160 * @see #get(int i) 161 */ 162 protected bool sync(size_t i) 163 in 164 { 165 assert (i >= 0); 166 } 167 do 168 { 169 auto n = cast(int)i - cast(int)tokens.length + 1; // how many more elements we need? 170 if ( n > 0 ) { 171 auto fetched = fetch(n); 172 return fetched >= n; 173 } 174 return true; 175 } 176 177 /** 178 * Add {@code n} elements to buffer. 179 * 180 * @return The actual number of elements added to the buffer. 181 */ 182 protected size_t fetch(size_t n) 183 { 184 if (fetchedEOF) { 185 return 0; 186 } 187 for (int i = 0; i < n; i++) { 188 Token t = tokenSource.nextToken(); 189 if (cast(WritableToken)t) { 190 (cast(WritableToken)t).setTokenIndex(to!int(tokens.length)); 191 } 192 tokens ~= t; 193 if (t.getType == TokenConstantDefinition.EOF) { 194 fetchedEOF = true; 195 return i + 1; 196 } 197 } 198 return n; 199 } 200 201 /** 202 * @uml 203 * @override 204 */ 205 public override Token get(size_t i) 206 in 207 { 208 assert( i >= 0 && i < tokens.length, format("token index %1$s out of range 0..%2$s", i, tokens.length-1)); 209 } 210 do 211 { 212 return tokens[i]; 213 } 214 215 /** 216 * Get all tokens from start..stop inclusively 217 */ 218 public Token[] get(size_t start, size_t stop) 219 { 220 if (start < 0 || stop < 0 ) return null; 221 lazyInit; 222 Token[] subset; 223 if (stop >= tokens.length) stop = to!int(tokens.length) - 1; 224 for (auto i = start; i <= stop; i++) { 225 Token t = tokens[i]; 226 if (t.getType == TokenConstantDefinition.EOF) 227 break; 228 subset ~= t; 229 } 230 return subset; 231 } 232 233 public dchar LA(int i) 234 { 235 return LT(i).getType; 236 } 237 238 public Token LB(int k) 239 { 240 if ((cast(int)index_ - k) < 0) 241 return null; 242 return tokens[index_ - k]; 243 } 244 245 /** 246 * @uml 247 * @override 248 */ 249 public override Token LT(int k) 250 { 251 lazyInit(); 252 if (k == 0) 253 return null; 254 if (k < 0) 255 return LB(-k); 256 auto i = cast(int)index_ + k - 1; 257 sync(i); 258 if ( i >= tokens.length ) { // return EOF token 259 // EOF must be last token 260 return tokens[$-1]; 261 } 262 return tokens[i]; 263 } 264 265 /** 266 * Allowed derived classes to modify the behavior of operations which change 267 * the current stream position by adjusting the target token index of a seek 268 * operation. The default implementation simply returns {@code i}. If an 269 * exception is thrown in this method, the current stream index should not be 270 * changed. 271 * 272 * <p>For example, {@link CommonTokenStream} overrides this method to ensure that 273 * the seek target is always an on-channel token.</p> 274 * 275 * @param i The target token index. 276 * @return The adjusted target token index. 277 */ 278 protected size_t adjustSeekIndex(size_t i) 279 { 280 return i; 281 } 282 283 protected void lazyInit() 284 { 285 if (index_ == size_t.max) { 286 setup; 287 } 288 } 289 290 protected void setup() 291 { 292 sync(0); 293 index_ = adjustSeekIndex(0); 294 } 295 296 /** 297 * Reset this token stream by setting its token source. 298 */ 299 public void setTokenSource(TokenSource tokenSource) 300 { 301 this.tokenSource = tokenSource; 302 tokens.length = 0; 303 index_ = size_t.max; 304 fetchedEOF = false; 305 } 306 307 public Token[] getTokens() 308 { 309 return tokens; 310 } 311 312 public Token[] getTokens(size_t start, size_t stop) 313 { 314 return getTokens(start, stop, null); 315 } 316 317 /** 318 * Given a start and stop index, return a List of all tokens in 319 * the token type BitSet. Return null if no tokens were found. This 320 * method looks at both on and off channel tokens. 321 */ 322 public Token[] getTokens(size_t start, size_t stop, int[] types) 323 in 324 { 325 lazyInit(); 326 assert(start >= 0 && stop < tokens.length && 327 stop > 0 && start < tokens.length, 328 format("start %1$s or stop %2$s not in 0..%3$s", start, stop, tokens.length - 1)); 329 } 330 do 331 { 332 if (start > stop) 333 return null; 334 335 // list = tokens[start:stop]:{T t, t.getType() in types} 336 Token[] filteredTokens; 337 for (auto i = start; i<=stop; i++) { 338 Token t = tokens[i]; 339 if (types is null || types.canFind(t.getType()) ) { 340 filteredTokens ~= t; 341 } 342 } 343 if (filteredTokens.length == 0) { 344 filteredTokens = null; 345 } 346 return filteredTokens; 347 } 348 349 public Token[] getTokens(size_t start, size_t stop, int ttype) 350 { 351 int[] s; 352 s ~= ttype; 353 return getTokens(start, stop, s); 354 } 355 356 /** 357 * Given a starting index, return the index of the next token on channel. 358 * Return {@code i} if {@code tokens[i]} is on channel. Return the index of 359 * the EOF token if there are no tokens on channel between {@code i} and 360 * EOF. 361 */ 362 protected size_t nextTokenOnChannel(size_t i, int channel) 363 { 364 sync(i); 365 if (i >= size) { 366 return size - 1; 367 } 368 369 Token token = tokens[i]; 370 while (token.getChannel != channel) { 371 if (token.getType == TokenConstantDefinition.EOF) { 372 return i; 373 } 374 375 i++; 376 sync(i); 377 token = tokens[i]; 378 } 379 380 return i; 381 } 382 383 /** 384 * Given a starting index, return the index of the previous token on 385 * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 386 * if there are no tokens on channel between {@code i} and 0. 387 * 388 * <p> 389 * If {@code i} specifies an index at or after the EOF token, the EOF token 390 * index is returned. This is due to the fact that the EOF token is treated 391 * as though it were on every channel.</p> 392 */ 393 protected size_t previousTokenOnChannel(size_t i, int channel) 394 { 395 sync(i); 396 if (i >= size) { 397 // the EOF token is on every channel 398 return size() - 1; 399 } 400 while (i >= 0) { 401 Token token = tokens[i]; 402 if (token.getType() == TokenConstantDefinition.EOF || token.getChannel == channel) { 403 return i; 404 } 405 i--; 406 } 407 return i; 408 } 409 410 /** 411 * Collect all tokens on specified channel to the right of 412 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or 413 * EOF. If channel is -1, find any non default channel token. 414 */ 415 public Token[] getHiddenTokensToRight(size_t tokenIndex, int channel) 416 in 417 { 418 lazyInit(); 419 assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1)); 420 } 421 do 422 { 423 auto nextOnChannel = 424 nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL); 425 size_t to; 426 auto from = tokenIndex + 1; 427 // if none onchannel to right, nextOnChannel=-1 so set to = last token 428 if ( nextOnChannel == -1 ) to = size()-1; 429 else to = nextOnChannel; 430 return filterForChannel(from, to, channel); 431 } 432 433 /** 434 * Collect all hidden tokens (any off-default channel) to the right of 435 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL 436 * of EOF. 437 */ 438 public Token[] getHiddenTokensToRight(size_t tokenIndex) 439 { 440 return getHiddenTokensToRight(tokenIndex, -1); 441 } 442 443 /** 444 * Collect all tokens on specified channel to the left of 445 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 446 * If channel is -1, find any non default channel token. 447 */ 448 public Token[] getHiddenTokensToLeft(size_t tokenIndex, int channel) 449 in 450 { 451 lazyInit(); 452 assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1)); 453 } 454 do 455 { 456 if (tokenIndex == 0) { 457 // obviously no tokens can appear before the first token 458 return null; 459 } 460 auto prevOnChannel = 461 previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL); 462 if ( prevOnChannel == tokenIndex - 1 ) return null; 463 // if none onchannel to left, prevOnChannel=-1 then from=0 464 auto from = prevOnChannel+1; 465 auto to = tokenIndex-1; 466 return filterForChannel(from, to, channel); 467 } 468 469 /** 470 * Collect all hidden tokens (any off-default channel) to the left of 471 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 472 */ 473 public Token[] getHiddenTokensToLeft(size_t tokenIndex) 474 { 475 return getHiddenTokensToLeft(tokenIndex, -1); 476 } 477 478 /** 479 * Collect all hidden tokens (any off-default channel) to the left of 480 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 481 */ 482 public Token[] filterForChannel(size_t from, size_t to, int channel) 483 { 484 Token[] hidden; 485 for (auto i=from; i<=to; i++) { 486 Token t = tokens[i]; 487 if (channel == -1) { 488 if (t.getChannel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden ~= t; 489 } 490 else { 491 if (t.getChannel == channel) 492 hidden ~= t; 493 } 494 } 495 if (hidden.length == 0) return null; 496 return hidden; 497 } 498 499 public string getSourceName() 500 { 501 return tokenSource.getSourceName(); 502 } 503 504 /** 505 * @uml 506 * @override 507 */ 508 public override Variant getText() 509 { 510 lazyInit; 511 fill; 512 return getText(Interval.of(0, to!int(size) - 1)); 513 } 514 515 /** 516 * @uml 517 * @override 518 */ 519 public override Variant getText(Interval interval) 520 { 521 import std.array : join; 522 int start = interval.a; 523 int stop = interval.b; 524 if (start < 0 || stop < 0) { 525 Variant v; 526 return v; 527 } 528 fill; 529 if (stop >= tokens.length) 530 stop = to!int(tokens.length) - 1; 531 532 string buf; 533 foreach (t; tokens[start..stop+1]) { 534 if (t.getType == TokenConstantDefinition.EOF) 535 break; 536 buf ~= t.getText.get!string; 537 } 538 return Variant(buf); 539 } 540 541 /** 542 * @uml 543 * @override 544 */ 545 public override Variant getText(RuleContext ctx) 546 { 547 return getText(ctx.getSourceInterval()); 548 } 549 550 /** 551 * @uml 552 * @override 553 */ 554 public override Variant getText(Token start, Token stop) 555 { 556 if (start !is null && stop !is null) { 557 return getText(Interval.of(to!int(start.getTokenIndex()), to!int(stop.getTokenIndex()))); 558 } 559 Variant v = ""; 560 return v; 561 } 562 563 /** 564 * Get all tokens from lexer until EOF 565 */ 566 public void fill() 567 { 568 lazyInit; 569 const int blockSize = 1000; 570 while (true) { 571 auto fetched = fetch(blockSize); 572 if (fetched < blockSize) { 573 return; 574 } 575 } 576 } 577 578 public final size_t index() 579 { 580 return this.index_; 581 } 582 583 }