1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.ANTLRInputStream; 8 9 import antlr.v4.runtime.CharStream; 10 import antlr.v4.runtime.IntStream; 11 import antlr.v4.runtime.IntStreamConstant; 12 import antlr.v4.runtime.misc.Interval; 13 import std.algorithm; 14 import std.conv : to; 15 import std.file; 16 import std.format; 17 import std.range; 18 import std.stdio; 19 import std.utf; 20 21 /** 22 * Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it 23 * like a {@code char[]} buffer. Can also pass in a {@link String} or 24 * {@code char[]} to use. 25 * 26 * <p>If you need encoding, pass in stream/reader with correct encoding.</p> 27 */ 28 class ANTLRInputStream : CharStream 29 { 30 31 /** 32 * The UTF-8 data being scanned 33 */ 34 protected char[] data; 35 36 /** 37 * How many UCS code_points are actually in the buffer 38 */ 39 protected size_t cp_in_buffer; 40 41 /** 42 * index of next UTF-8 character 43 */ 44 protected size_t index_of_next_char = 0; 45 46 /** 47 * What is name or source of this char stream? 48 */ 49 public string name; 50 51 public this() 52 { 53 } 54 55 /** 56 * Copy data in string to a local char array 57 */ 58 public this(string input) 59 { 60 data = input.to!(char []); 61 cp_in_buffer = data.toUCSindex(data.length); 62 } 63 64 /** 65 * This is the preferred constructor for strings as no data is copied 66 */ 67 public this(char[] data, size_t numberOfActualCharsInArray) 68 { 69 this.data = data.to!(char []); 70 cp_in_buffer = data.toUCSindex(data.length); 71 } 72 73 public this(File r) 74 { 75 load(r); 76 } 77 78 public void load(File r) 79 { 80 import std.array : array; 81 82 name = r.name; 83 data = cast(char[]) r.byChunk(4096).joiner.array; 84 // set the actual size of the data available; 85 cp_in_buffer = data.toUCSindex(data.length); 86 debug (ANTLRInputStreamStream) 87 writefln!"name = %s; cp_in_buffer = $s"(name, cp_in_buffer); 88 } 89 90 /** 91 * Reset the stream so that it's in the same state it was 92 * when the object was created *except* the data array is not 93 * touched. 94 */ 95 public void reset() 96 { 97 index_of_next_char = 0; 98 } 99 100 /** 101 * @uml 102 * @override 103 */ 104 public override void consume() 105 { 106 if (index_of_next_char >= cp_in_buffer) { 107 assert (LA(1) == IntStreamConstant.EOF, "cannot consume EOF"); 108 } 109 110 debug (ANTLRInputStream) 111 { 112 import std.stdio; 113 writefln!"consume; prev index_of_next_char= %s, data[index_of_next_character] = %s"( 114 index_of_next_char, 115 front(data[data.toUTFindex(index_of_next_char) .. $])); 116 } 117 118 if (index_of_next_char < cp_in_buffer) 119 { 120 index_of_next_char++; 121 122 debug (ANTLRInputStream) 123 { 124 import std.stdio; 125 writefln!"p moves to %s (c='%s')"( 126 index_of_next_char, 127 cast(char)data[index_of_next_char]); 128 } 129 130 } 131 } 132 133 /** 134 * UTF-8 coded character mapped to UTF-32 135 * @uml 136 * @override 137 */ 138 public override dchar LA(int i) 139 { 140 if (i == 0) 141 { 142 return to!dchar(0); // undefined 143 } 144 if (i < 0) 145 { 146 i++; // e.g., translate LA(-1) to use offset i=0; then data[index_of_next_character+0-1] 147 if ((index_of_next_char + i - 1) < 0) 148 { 149 return to!dchar(IntStreamConstant.EOF); // invalid; no char before first char 150 } 151 } 152 if (( index_of_next_char + i - 1) >= cp_in_buffer) 153 { 154 return to!dchar(IntStreamConstant.EOF); 155 } 156 return front(data[data.toUTFindex(index_of_next_char + i - 1) .. $]); 157 } 158 159 public dchar LT(int i) 160 { 161 return LA(i); 162 } 163 164 /** 165 * @uml 166 * @override 167 */ 168 public override size_t index() 169 { 170 return index_of_next_char; 171 } 172 173 /** 174 * @uml 175 * @override 176 */ 177 public override size_t size() 178 { 179 return cp_in_buffer; 180 } 181 182 /** 183 * mark/release do nothing; we have entire buffer 184 * @uml 185 * @override 186 */ 187 public override int mark() 188 { 189 return -1; 190 } 191 192 /** 193 * @uml 194 * @override 195 */ 196 public override void release(int marker) 197 { 198 } 199 200 /** 201 * consume() ahead until index_of_next_character==index; 202 * can't just set index_of_next_character=index as we must 203 * update line and charPositionInLine. If we seek backwards, 204 * just set index_of_next_character 205 * @uml 206 * @override 207 */ 208 public override void seek(size_t index) 209 { 210 if (index <= index_of_next_char) 211 { 212 index_of_next_char= index; // just jump; don't update stream state (line, ...) 213 return; 214 } 215 // seek forward, consume until next code point hits index or cp_in_buffer 216 // (whichever comes first) 217 index = min(index, cp_in_buffer); 218 while (index_of_next_char < index) 219 { 220 consume(); 221 } 222 } 223 224 /** 225 * @uml 226 * @override 227 */ 228 public override string getText(Interval interval) 229 { 230 int start = interval.a; 231 int stop = interval.b; 232 if (stop >= to!int(cp_in_buffer)) 233 stop = to!int(cp_in_buffer)-1; 234 if (start >= to!int(cp_in_buffer)) return ""; 235 236 debug (ANTLRInputStream) 237 { 238 writefln!"data: start=%s, stop=%s, string = %s"( 239 start, stop, 240 data[data.toUTFindex(start)..data.toUTFindex(stop+1)]); 241 } 242 243 return to!string(data[data.toUTFindex(start)..data.toUTFindex(stop+1)]); 244 } 245 246 /** 247 * @uml 248 * @override 249 */ 250 public override string getSourceName() 251 { 252 if (!name) 253 { 254 return IntStreamConstant.UNKNOWN_SOURCE_NAME; 255 } 256 return name; 257 } 258 259 /** 260 * @uml 261 * @override 262 */ 263 public override string toString() 264 { 265 return to!string(data); 266 } 267 268 }