1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.BufferedTokenStream; 8 9 import std.array; 10 import std.format; 11 import std.conv; 12 import std.algorithm: canFind; 13 import antlr.v4.runtime.IllegalStateException; 14 import antlr.v4.runtime.Lexer; 15 import antlr.v4.runtime.RuleContext; 16 import antlr.v4.runtime.Token; 17 import antlr.v4.runtime.TokenConstantDefinition; 18 import antlr.v4.runtime.TokenSource; 19 import antlr.v4.runtime.TokenStream; 20 import antlr.v4.runtime.WritableToken; 21 import antlr.v4.runtime.misc.Interval; 22 23 /** 24 * This implementation of {@link TokenStream} loads tokens from a 25 * {@link TokenSource} on-demand, and places the tokens in a buffer to provide 26 * access to any previous token by index. 27 * 28 * <p> 29 * This token stream ignores the value of {@link Token#getChannel}. If your 30 * parser requires the token stream filter tokens to only those on a particular 31 * channel, such as {@link Token#DEFAULT_CHANNEL} or 32 * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a 33 * {@link CommonTokenStream}.</p> 34 */ 35 class BufferedTokenStream : TokenStream 36 { 37 38 /** 39 * The {@link TokenSource} from which tokens for this stream are fetched. 40 */ 41 protected TokenSource tokenSource; 42 43 /** 44 * A collection of all tokens fetched from the token source. The list is 45 * considered a complete view of the input once {@link #fetchedEOF} is set 46 * to {@code true}. 47 */ 48 protected Token[] tokens; 49 50 /** 51 * The index into {@link #tokens} of the current token (next token to 52 * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be 53 * {@link #LT LT(1)}. 54 * 55 * <p>This field is set to -1 when the stream is first constructed or when 56 * {@link #setTokenSource} is called, indicating that the first token has 57 * not yet been fetched from the token source. For additional information, 58 * see the documentation of {@link IntStream} for a description of 59 * Initializing Methods.</p> 60 * @uml 61 * @read 62 */ 63 private int index_ = -1; 64 65 /** 66 * Indicates whether the {@link Token#EOF} token has been fetched from 67 * {@link #tokenSource} and added to {@link #tokens}. This field improves 68 * performance for the following cases: 69 * 70 * <ul> 71 * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent 72 * consuming the EOF symbol is optimized by checking the values of 73 * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li> 74 * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into 75 * {@link #tokens} is trivial with this field.</li> 76 * <ul> 77 */ 78 protected bool fetchedEOF; 79 80 public this(TokenSource tokenSource) 81 in 82 { 83 assert (tokenSource !is null, "tokenSource cannot be null"); 84 } 85 do 86 { 87 this.tokenSource = tokenSource; 88 } 89 90 /** 91 * @uml 92 * @override 93 */ 94 public override TokenSource getTokenSource() 95 { 96 return tokenSource; 97 } 98 99 public int mark() 100 { 101 return 0; 102 } 103 104 public void release(int marker) 105 { 106 // no resources to release 107 } 108 109 public void reset() 110 { 111 seek(0); 112 } 113 114 public void seek(int index) 115 { 116 lazyInit; 117 index_ = adjustSeekIndex(index); 118 } 119 120 public int size() 121 { 122 return to!int(tokens.length); 123 } 124 125 public void consume() 126 { 127 bool skipEofCheck; 128 if (index_ >= 0) { 129 if (fetchedEOF) { 130 // the last token in tokens is EOF. skip check if p indexes any 131 // fetched token except the last. 132 skipEofCheck = index_ < tokens.length - 1; 133 } 134 else { 135 // no EOF token in tokens. skip check if p indexes a fetched token. 136 skipEofCheck = index_ < tokens.length; 137 } 138 } 139 else { 140 // not yet initialized 141 skipEofCheck = false; 142 } 143 144 if (!skipEofCheck && LA(1) == TokenConstantDefinition.EOF) { 145 throw new IllegalStateException("cannot consume EOF"); 146 } 147 148 if (sync(index_ + 1)) { 149 index_ = adjustSeekIndex(index_ + 1); 150 } 151 } 152 153 /** 154 * Make sure index {@code i} in tokens has a token. 155 * 156 * @return {@code true} if a token is located at index {@code i}, otherwise 157 * {@code false}. 158 * @see #get(int i) 159 */ 160 protected bool sync(int i) 161 in 162 { 163 assert (i >= 0); 164 } 165 do 166 { 167 int n = i - to!int(tokens.length) + 1; // how many more elements we need? 168 if ( n > 0 ) { 169 int fetched = fetch(n); 170 return fetched >= n; 171 } 172 return true; 173 } 174 175 /** 176 * Add {@code n} elements to buffer. 177 * 178 * @return The actual number of elements added to the buffer. 179 */ 180 protected int fetch(int n) 181 { 182 if (fetchedEOF) { 183 return 0; 184 } 185 for (int i = 0; i < n; i++) { 186 Token t = tokenSource.nextToken(); 187 if (cast(WritableToken)t) { 188 (cast(WritableToken)t).setTokenIndex(to!int(tokens.length)); 189 } 190 tokens ~= t; 191 if (t.getType == TokenConstantDefinition.EOF) { 192 fetchedEOF = true; 193 return i + 1; 194 } 195 } 196 return n; 197 } 198 199 /** 200 * @uml 201 * @override 202 */ 203 public override Token get(int i) 204 in 205 { 206 assert( i >= 0 && i < tokens.length, format("token index %1$s out of range 0..%2$s", i, tokens.length-1)); 207 } 208 do 209 { 210 return tokens[i]; 211 } 212 213 /** 214 * Get all tokens from start..stop inclusively 215 */ 216 public Token[] get(int start, int stop) 217 { 218 if (start < 0 || stop < 0 ) return null; 219 lazyInit; 220 Token[] subset; 221 if (stop >= tokens.length) stop = to!int(tokens.length) - 1; 222 for (int i = start; i <= stop; i++) { 223 Token t = tokens[i]; 224 if (t.getType == TokenConstantDefinition.EOF) 225 break; 226 subset ~= t; 227 } 228 return subset; 229 } 230 231 public int LA(int i) 232 { 233 return LT(i).getType(); 234 } 235 236 public Token LB(int k) 237 { 238 if ((index_ - k) < 0) 239 return null; 240 return tokens[index_ - k]; 241 } 242 243 /** 244 * @uml 245 * @override 246 */ 247 public override Token LT(int k) 248 { 249 lazyInit(); 250 if (k == 0) 251 return null; 252 if (k < 0) 253 return LB(-k); 254 int i = index_ + k - 1; 255 sync(i); 256 if ( i >= tokens.length ) { // return EOF token 257 // EOF must be last token 258 return tokens[$-1]; 259 } 260 return tokens[i]; 261 } 262 263 /** 264 * Allowed derived classes to modify the behavior of operations which change 265 * the current stream position by adjusting the target token index of a seek 266 * operation. The default implementation simply returns {@code i}. If an 267 * exception is thrown in this method, the current stream index should not be 268 * changed. 269 * 270 * <p>For example, {@link CommonTokenStream} overrides this method to ensure that 271 * the seek target is always an on-channel token.</p> 272 * 273 * @param i The target token index. 274 * @return The adjusted target token index. 275 */ 276 protected int adjustSeekIndex(int i) 277 { 278 return i; 279 } 280 281 protected void lazyInit() 282 { 283 if (index_ == -1) { 284 setup; 285 } 286 } 287 288 protected void setup() 289 { 290 sync(0); 291 index_ = adjustSeekIndex(0); 292 } 293 294 /** 295 * Reset this token stream by setting its token source. 296 */ 297 public void setTokenSource(TokenSource tokenSource) 298 { 299 this.tokenSource = tokenSource; 300 tokens.length = 0; 301 index_ = -1; 302 } 303 304 public Token[] getTokens() 305 { 306 return tokens; 307 } 308 309 public Token[] getTokens(int start, int stop) 310 { 311 return getTokens(start, stop, null); 312 } 313 314 /** 315 * Given a start and stop index, return a List of all tokens in 316 * the token type BitSet. Return null if no tokens were found. This 317 * method looks at both on and off channel tokens. 318 */ 319 public Token[] getTokens(int start, int stop, int[] types) 320 in 321 { 322 lazyInit(); 323 assert(start >= 0 && stop < tokens.length && 324 stop > 0 && start < tokens.length, 325 format("start %1$s or stop %2$s not in 0..%3$s", start, stop, tokens.length - 1)); 326 } 327 do 328 { 329 if (start > stop) 330 return null; 331 332 // list = tokens[start:stop]:{T t, t.getType() in types} 333 Token[] filteredTokens; 334 for (int i=start; i<=stop; i++) { 335 Token t = tokens[i]; 336 if (types is null || types.canFind(t.getType()) ) { 337 filteredTokens ~= t; 338 } 339 } 340 if (filteredTokens.length == 0) { 341 filteredTokens = null; 342 } 343 return filteredTokens; 344 } 345 346 public Token[] getTokens(int start, int stop, int ttype) 347 { 348 int[] s; 349 s ~= ttype; 350 return getTokens(start,stop, s); 351 } 352 353 /** 354 * Given a starting index, return the index of the next token on channel. 355 * Return {@code i} if {@code tokens[i]} is on channel. Return the index of 356 * the EOF token if there are no tokens on channel between {@code i} and 357 * EOF. 358 */ 359 protected int nextTokenOnChannel(int i, int channel) 360 { 361 sync(i); 362 if (i >= size) { 363 return size - 1; 364 } 365 366 Token token = tokens[i]; 367 while (token.getChannel != channel) { 368 if (token.getType == TokenConstantDefinition.EOF) { 369 return i; 370 } 371 372 i++; 373 sync(i); 374 token = tokens[i]; 375 } 376 377 return i; 378 } 379 380 /** 381 * Given a starting index, return the index of the previous token on 382 * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 383 * if there are no tokens on channel between {@code i} and 0. 384 * 385 * <p> 386 * If {@code i} specifies an index at or after the EOF token, the EOF token 387 * index is returned. This is due to the fact that the EOF token is treated 388 * as though it were on every channel.</p> 389 */ 390 protected int previousTokenOnChannel(int i, int channel) 391 { 392 sync(i); 393 if (i >= size) { 394 // the EOF token is on every channel 395 return size() - 1; 396 } 397 while (i >= 0) { 398 Token token = tokens[i]; 399 if (token.getType() == TokenConstantDefinition.EOF || token.getChannel == channel) { 400 return i; 401 } 402 i--; 403 } 404 return i; 405 } 406 407 /** 408 * Collect all tokens on specified channel to the right of 409 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or 410 * EOF. If channel is -1, find any non default channel token. 411 */ 412 public Token[] getHiddenTokensToRight(int tokenIndex, int channel) 413 in 414 { 415 lazyInit(); 416 assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1)); 417 } 418 do 419 { 420 int nextOnChannel = 421 nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL); 422 int to; 423 int from = tokenIndex+1; 424 // if none onchannel to right, nextOnChannel=-1 so set to = last token 425 if ( nextOnChannel == -1 ) to = size()-1; 426 else to = nextOnChannel; 427 return filterForChannel(from, to, channel); 428 } 429 430 /** 431 * Collect all hidden tokens (any off-default channel) to the right of 432 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL 433 * of EOF. 434 */ 435 public Token[] getHiddenTokensToRight(int tokenIndex) 436 { 437 return getHiddenTokensToRight(tokenIndex, -1); 438 } 439 440 /** 441 * Collect all tokens on specified channel to the left of 442 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 443 * If channel is -1, find any non default channel token. 444 */ 445 public Token[] getHiddenTokensToLeft(int tokenIndex, int channel) 446 in 447 { 448 lazyInit(); 449 assert(tokenIndex >= 0 && tokenIndex < tokens.length, format("%1$s not in 0..%2$s", tokenIndex, tokens.length-1)); 450 } 451 do 452 { 453 454 if (tokenIndex == 0) { 455 // obviously no tokens can appear before the first token 456 return null; 457 } 458 int prevOnChannel = 459 previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL); 460 if ( prevOnChannel == tokenIndex - 1 ) return null; 461 // if none onchannel to left, prevOnChannel=-1 then from=0 462 int from = prevOnChannel+1; 463 int to = tokenIndex-1; 464 return filterForChannel(from, to, channel); 465 } 466 467 /** 468 * Collect all hidden tokens (any off-default channel) to the left of 469 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 470 */ 471 public Token[] getHiddenTokensToLeft(int tokenIndex) 472 { 473 return getHiddenTokensToLeft(tokenIndex, -1); 474 } 475 476 /** 477 * Collect all hidden tokens (any off-default channel) to the left of 478 * the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. 479 */ 480 public Token[] filterForChannel(int from, int to, int channel) 481 { 482 Token[] hidden; 483 for (int i=from; i<=to; i++) { 484 Token t = tokens[i]; 485 if (channel == -1) { 486 if (t.getChannel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden ~= t; 487 } 488 else { 489 if (t.getChannel == channel) 490 hidden ~= t; 491 } 492 } 493 if (hidden.length == 0) return null; 494 return hidden; 495 } 496 497 public string getSourceName() 498 { 499 return tokenSource.getSourceName(); 500 } 501 502 /** 503 * @uml 504 * @override 505 */ 506 public override string getText() 507 { 508 lazyInit(); 509 fill(); 510 return getText(Interval.of(0,size()-1)); 511 } 512 513 /** 514 * @uml 515 * @override 516 */ 517 public override string getText(Interval interval) 518 { 519 int start = interval.a; 520 int stop = interval.b; 521 if (start < 0 || stop < 0) return ""; 522 lazyInit(); 523 if (stop >= tokens.length) stop = to!int(tokens.length) - 1; 524 525 auto buf = appender!(string); 526 for (int i = start; i <= stop; i++) { 527 Token t = tokens[i]; 528 if (t.getType == TokenConstantDefinition.EOF) break; 529 buf.put(t.getText()); 530 } 531 return buf.data; 532 } 533 534 /** 535 * @uml 536 * @override 537 */ 538 public override string getText(RuleContext ctx) 539 { 540 return getText(ctx.getSourceInterval()); 541 } 542 543 /** 544 * @uml 545 * @override 546 */ 547 public override string getText(Token start, Token stop) 548 { 549 if (start !is null && stop !is null) { 550 return getText(Interval.of(start.getTokenIndex(), stop.getTokenIndex())); 551 } 552 return ""; 553 } 554 555 /** 556 * Get all tokens from lexer until EOF 557 */ 558 public void fill() 559 { 560 lazyInit(); 561 const int blockSize = 1000; 562 while (true) { 563 int fetched = fetch(blockSize); 564 if (fetched < blockSize) { 565 return; 566 } 567 } 568 } 569 570 public final int index() 571 { 572 return this.index_; 573 } 574 575 }