1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.BufferedTokenStream; 8 9 import antlr.v4.runtime.IllegalStateException; 10 import antlr.v4.runtime.Lexer; 11 import antlr.v4.runtime.RuleContext; 12 import antlr.v4.runtime.Token; 13 import antlr.v4.runtime.TokenConstantDefinition; 14 import antlr.v4.runtime.TokenSource; 15 import antlr.v4.runtime.TokenStream; 16 import antlr.v4.runtime.WritableToken; 17 import antlr.v4.runtime.misc.Interval; 18 import std.algorithm: canFind; 19 import std.array; 20 import std.conv; 21 import std.format; 22 import std.variant; 23 24 /** 25 * This implementation of {@link TokenStream} loads tokens from a 26 * {@link TokenSource} on-demand, and places the tokens in a buffer to provide 27 * access to any previous token by index. 28 * 29 * <p> 30 * This token stream ignores the value of {@link Token#getChannel}. If your 31 * parser requires the token stream filter tokens to only those on a particular 32 * channel, such as {@link Token#DEFAULT_CHANNEL} or 33 * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a 34 * {@link CommonTokenStream}.</p> 35 */ 36 class BufferedTokenStream : TokenStream 37 { 38 39 /** 40 * The {@link TokenSource} from which tokens for this stream are fetched. 41 */ 42 protected TokenSource tokenSource; 43 44 /** 45 * A collection of all tokens fetched from the token source. The list is 46 * considered a complete view of the input once {@link #fetchedEOF} is set 47 * to {@code true}. 48 */ 49 protected Token[] tokens; 50 51 /** 52 * The index into {@link #tokens} of the current token (next token to 53 * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be 54 * {@link #LT LT(1)}. 55 * 56 * <p>This field is set to -1 when the stream is first constructed or when 57 * {@link #setTokenSource} is called, indicating that the first token has 58 * not yet been fetched from the token source. For additional information, 59 * see the documentation of {@link IntStream} for a description of 60 * Initializing Methods.</p> 61 * @uml 62 * @read 63 */ 64 private int index_ = -1; 65 66 /** 67 * Indicates whether the {@link Token#EOF} token has been fetched from 68 * {@link #tokenSource} and added to {@link #tokens}. This field improves 69 * performance for the following cases: 70 * 71 * <ul> 72 * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent 73 * consuming the EOF symbol is optimized by checking the values of 74 * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li> 75 * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into 76 * {@link #tokens} is trivial with this field.</li> 77 * <ul> 78 */ 79 protected bool fetchedEOF; 80 81 public this(TokenSource tokenSource) 82 in 83 { 84 assert (tokenSource !is null, "tokenSource cannot be null"); 85 } 86 do 87 { 88 this.tokenSource = tokenSource; 89 } 90 91 /** 92 * @uml 93 * @override 94 */ 95 public override TokenSource getTokenSource() 96 { 97 return tokenSource; 98 } 99 100 public int mark() 101 { 102 return 0; 103 } 104 105 public void release(int marker) 106 { 107 // no resources to release 108 } 109 110 public void reset() 111 { 112 seek(0); 113 } 114 115 public void seek(int index) 116 { 117 lazyInit; 118 index_ = adjustSeekIndex(index); 119 } 120 121 public int size() 122 { 123 return to!int(tokens.length); 124 } 125 126 public void consume() 127 { 128 bool skipEofCheck; 129 if (index_ >= 0) { 130 if (fetchedEOF) { 131 // the last token in tokens is EOF. skip check if p indexes any 132 // fetched token except the last. 133 skipEofCheck = index_ < tokens.length - 1; 134 } 135 else { 136 // no EOF token in tokens. skip check if p indexes a fetched token. 137 skipEofCheck = index_ < tokens.length; 138 } 139 } 140 else { 141 // not yet initialized 142 skipEofCheck = false; 143 } 144 145 if (!skipEofCheck && LA(1) == TokenConstantDefinition.EOF) { 146 throw new IllegalStateException("cannot consume EOF"); 147 } 148 149 if (sync(index_ + 1)) { 150 index_ = adjustSeekIndex(index_ + 1); 151 } 152 } 153 154 /** 155 * Make sure index {@code i} in tokens has a token. 156 * 157 * @return {@code true} if a token is located at index {@code i}, otherwise 158 * {@code false}. 159 * @see #get(int i) 160 */ 161 protected bool sync(int i) 162 in 163 { 164 assert (i >= 0); 165 } 166 do 167 { 168 int n = i - to!int(tokens.length) + 1; // how many more elements we need? 169 if ( n > 0 ) { 170 int fetched = fetch(n); 171 return fetched >= n; 172 } 173 return true; 174 } 175 176 /** 177 * Add {@code n} elements to buffer. 178 * 179 * @return The actual number of elements added to the buffer. 180 */ 181 protected int fetch(int n) 182 { 183 if (fetchedEOF) { 184 return 0; 185 } 186 for (int i = 0; i < n; i++) { 187 Token t = tokenSource.nextToken(); 188 if (cast(WritableToken)t) { 189 (cast(WritableToken)t).setTokenIndex(to!int(tokens.length)); 190 } 191 tokens ~= t; 192 if (t.getType == TokenConstantDefinition.EOF) { 193 fetchedEOF = true; 194 return i + 1; 195 } 196 } 197 return n; 198 } 199 200 /** 201 * @uml 202 * @override 203 */ 204 public override Token get(int i) 205 in 206 { 207 assert( i >= 0 && i < tokens.length, format("token index %1$s out of range 0..%2$s", i, tokens.length-1)); 208 } 209 do 210 { 211 return tokens[i]; 212 } 213 214 /** 215 * Get all tokens from start..stop inclusively 216 */ 217 public Token[] get(int start, int stop) 218 { 219 if (start < 0 || stop < 0 ) return null; 220 lazyInit; 221 Token[] subset; 222 if (stop >= tokens.length) stop = to!int(tokens.length) - 1; 223 for (int i = start; i <= stop; i++) { 224 Token t = tokens[i]; 225 if (t.getType == TokenConstantDefinition.EOF) 226 break; 227 subset ~= t; 228 } 229 return subset; 230 } 231 232 public int LA(int i) 233 { 234 return LT(i).getType(); 235 } 236 237 public Token LB(int k) 238 { 239 if ((index_ - k) < 0) 240 return null; 241 return tokens[index_ - k]; 242 } 243 244 /** 245 * @uml 246 * @override 247 */ 248 public override Token LT(int k) 249 { 250 lazyInit(); 251 if (k == 0) 252 return null; 253 if (k < 0) 254 return LB(-k); 255 int i = index_ + k - 1; 256 sync(i); 257 if ( i >= tokens.length ) { // return EOF token 258 // EOF must be last token 259 return tokens[$-1]; 260 } 261 return tokens[i]; 262 } 263 264 /** 265 * Allowed derived classes to modify the behavior of operations which change 266 * the current stream position by adjusting the target token index of a seek 267 * operation. The default implementation simply returns {@code i}. If an 268 * exception is thrown in this method, the current stream index should not be 269 * changed. 270 * 271 * <p>For example, {@link CommonTokenStream} overrides this method to ensure that 272 * the seek target is always an on-channel token.</p> 273 * 274 * @param i The target token index. 275 * @return The adjusted target token index. 276 */ 277 protected int adjustSeekIndex(int i) 278 { 279 return i; 280 } 281 282 protected void lazyInit() 283 { 284 if (index_ == -1) { 285 setup; 286 } 287 } 288 289 protected void setup() 290 { 291 sync(0); 292 index_ = adjustSeekIndex(0); 293 } 294 295 /** 296 * Reset this token stream by setting its token source. 297 */ 298 public void setTokenSource(TokenSource tokenSource) 299 { 300 this.tokenSource = tokenSource; 301 tokens.length = 0; 302 index_ = -1; 303 } 304 305 public Token[] getTokens() 306 { 307 return tokens; 308 } 309 310 public Token[] getTokens(int start, int stop) 311 { 312 return getTokens(start, stop, null); 313 } 314 315 /** 316 * Given a start and stop index, return a List of all tokens in 317 * the token type BitSet. Return null if no tokens were found. This 318 * method looks at both on and off channel tokens. 319 */ 320 public Token[] getTokens(int start, int stop, int[] types) 321 in 322 { 323 lazyInit(); 324 assert(start >= 0 && stop < tokens.length && 325 stop > 0 && start < tokens.length, 326 format("start %1$s or stop %2$s not in 0..%3$s", start, stop, tokens.length - 1)); 327 } 328 do 329 { 330 if (start > stop) 331 return null; 332 333 // list = tokens[start:stop]:{T t, t.getType() in types} 334 Token[] filteredTokens; 335 for (int i=start; i<=stop; i++) { 336 Token t = tokens[i]; 337 if (types is null || types.canFind(t.getType()) ) { 338 filteredTokens ~= t; 339 } 340 } 341 if (filteredTokens.length == 0) { 342 filteredTokens = null; 343 } 344 return filteredTokens; 345 } 346 347 public Token[] getTokens(int start, int stop, int ttype) 348 { 349 int[] s; 350 s ~= ttype; 351 return getTokens(start,stop, s); 352 } 353 354 /** 355 * Given a starting index, return the index of the next token on channel. 356 * Return {@code i} if {@code tokens[i]} is on channel. Return the index of 357 * the EOF token if there are no tokens on channel between {@code i} and 358 * EOF. 359 */ 360 protected int nextTokenOnChannel(int i, int channel) 361 { 362 sync(i); 363 if (i >= size) { 364 return size - 1; 365 } 366 367 Token token = tokens[i]; 368 while (token.getChannel != channel) { 369 if (token.getType == TokenConstantDefinition.EOF) { 370 return i; 371 } 372 373 i++; 374 sync(i); 375 token = tokens[i]; 376 } 377 378 return i; 379 } 380 381 /** 382 * Given a starting index, return the index of the previous token on 383 * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 384 * if there are no tokens on channel between {@code i} and 0. 385 * 386 * <p> 387 * If {@code i} specifies an index at or after the EOF token, the EOF token 388 * index is returned. This is due to the fact that the EOF token is treated 389 * as though it were on every channel.</p> 390 */ 391 protected int previousTokenOnChannel(int i, int channel) 392 { 393 sync(i); 394 if (i >= size) { 395 // the EOF token is on every channel 396 return size() - 1; 397 } 398 while (i >= 0) { 399 Token token = tokens[i]; 400 if (token.getType() == TokenConstantDefinition.EOF || token.getChannel == channel) { 401 return i; 402 } 403 i--; 404 } 405 return i; 406 } 407 408 /** 409 * Collect all tokens on specified channel to the right of 410 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or 411 * EOF. If channel is -1, find any non default channel token. 412 */ 413 public Token[] getHiddenTokensToRight(int tokenIndex, int channel) 414 in 415 { 416 lazyInit(); 417 assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1)); 418 } 419 do 420 { 421 int nextOnChannel = 422 nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL); 423 int to; 424 int from = tokenIndex+1; 425 // if none onchannel to right, nextOnChannel=-1 so set to = last token 426 if ( nextOnChannel == -1 ) to = size()-1; 427 else to = nextOnChannel; 428 return filterForChannel(from, to, channel); 429 } 430 431 /** 432 * Collect all hidden tokens (any off-default channel) to the right of 433 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL 434 * of EOF. 435 */ 436 public Token[] getHiddenTokensToRight(int tokenIndex) 437 { 438 return getHiddenTokensToRight(tokenIndex, -1); 439 } 440 441 /** 442 * Collect all tokens on specified channel to the left of 443 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 444 * If channel is -1, find any non default channel token. 445 */ 446 public Token[] getHiddenTokensToLeft(int tokenIndex, int channel) 447 in 448 { 449 lazyInit(); 450 assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1)); 451 } 452 do 453 { 454 455 if (tokenIndex == 0) { 456 // obviously no tokens can appear before the first token 457 return null; 458 } 459 int prevOnChannel = 460 previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL); 461 if ( prevOnChannel == tokenIndex - 1 ) return null; 462 // if none onchannel to left, prevOnChannel=-1 then from=0 463 int from = prevOnChannel+1; 464 int to = tokenIndex-1; 465 return filterForChannel(from, to, channel); 466 } 467 468 /** 469 * Collect all hidden tokens (any off-default channel) to the left of 470 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 471 */ 472 public Token[] getHiddenTokensToLeft(int tokenIndex) 473 { 474 return getHiddenTokensToLeft(tokenIndex, -1); 475 } 476 477 /** 478 * Collect all hidden tokens (any off-default channel) to the left of 479 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 480 */ 481 public Token[] filterForChannel(int from, int to, int channel) 482 { 483 Token[] hidden; 484 for (int i=from; i<=to; i++) { 485 Token t = tokens[i]; 486 if (channel == -1) { 487 if (t.getChannel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden ~= t; 488 } 489 else { 490 if (t.getChannel == channel) 491 hidden ~= t; 492 } 493 } 494 if (hidden.length == 0) return null; 495 return hidden; 496 } 497 498 public string getSourceName() 499 { 500 return tokenSource.getSourceName(); 501 } 502 503 /** 504 * @uml 505 * @override 506 */ 507 public override Variant getText() 508 { 509 lazyInit(); 510 fill(); 511 return getText(Interval.of(0,size()-1)); 512 } 513 514 /** 515 * @uml 516 * @override 517 */ 518 public override Variant getText(Interval interval) 519 { 520 import std.array : join; 521 int start = interval.a; 522 int stop = interval.b; 523 if (start < 0 || stop < 0) { 524 Variant v; 525 return v; 526 } 527 fill(); 528 if (stop >= tokens.length) 529 stop = to!int(tokens.length) - 1; 530 531 string[] buf; 532 foreach (t; tokens[start..stop+1]) { 533 if (t.getType == TokenConstantDefinition.EOF) 534 break; 535 buf ~= t.getText.get!string; 536 } 537 return Variant(buf.join(" ")); 538 } 539 540 /** 541 * @uml 542 * @override 543 */ 544 public override Variant getText(RuleContext ctx) 545 { 546 return getText(ctx.getSourceInterval()); 547 } 548 549 /** 550 * @uml 551 * @override 552 */ 553 public override Variant getText(Token start, Token stop) 554 { 555 if (start !is null && stop !is null) { 556 return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex())); 557 } 558 Variant v = ""; 559 return v; 560 } 561 562 /** 563 * Get all tokens from lexer until EOF 564 */ 565 public void fill() 566 { 567 lazyInit(); 568 const int blockSize = 1000; 569 while (true) { 570 int fetched = fetch(blockSize); 571 if (fetched < blockSize) { 572 return; 573 } 574 } 575 } 576 577 public final int index() 578 { 579 return this.index_; 580 } 581 582 }