1 /* 2 * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.ANTLRInputStream; 8 9 import antlr.v4.runtime.CharStream; 10 import antlr.v4.runtime.IntStream; 11 import antlr.v4.runtime.IntStreamConstant; 12 import antlr.v4.runtime.misc.Interval; 13 import std.algorithm; 14 import std.conv : to; 15 import std.file; 16 import std.format; 17 import std.range; 18 import std.stdio; 19 import std.utf; 20 21 /** 22 * Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it 23 * like a {@code char[]} buffer. Can also pass in a {@link String} or 24 * {@code char[]} to use. 25 * 26 * <p>If you need encoding, pass in stream/reader with correct encoding.</p> 27 */ 28 class ANTLRInputStream : CharStream 29 { 30 31 /** 32 * The UTF-8 data being scanned 33 */ 34 protected char[] data; 35 36 /** 37 * How many UCS code_points are actually in the buffer 38 */ 39 protected size_t cp_in_buffer; 40 41 /** 42 * index of next UTF-8 character 43 */ 44 protected size_t index_of_next_char = 0; 45 46 /** 47 * What is name or source of this char stream? 48 */ 49 public string name; 50 51 public this() 52 { 53 } 54 55 /** 56 * Copy data in string to a local char array 57 */ 58 public this(string input) 59 { 60 data = input.to!(char []); 61 cp_in_buffer = data.toUCSindex(data.length); 62 } 63 64 /** 65 * This is the preferred constructor for strings as no data is copied 66 */ 67 public this(char[] data, size_t numberOfActualCharsInArray) 68 { 69 this.data = data.to!(char []); 70 cp_in_buffer = data.toUCSindex(data.length); 71 } 72 73 public this(File r) 74 { 75 load(r); 76 } 77 78 public void load(File r) 79 { 80 name = r.name; 81 data = to!(char[])(name.readText); 82 // set the actual size of the data available; 83 cp_in_buffer = data.toUCSindex(data.length); 84 debug (ANTLRInputStreamStream) 85 writefln!"name = %s; cp_in_buffer = $s"( 86 name, cp_in_buffer); 87 } 88 89 /** 90 * Reset the stream so that it's in the same state it was 91 * when the object was created *except* the data array is not 92 * touched. 93 */ 94 public void reset() 95 { 96 index_of_next_char = 0; 97 } 98 99 /** 100 * @uml 101 * @override 102 */ 103 public override void consume() 104 { 105 if (index_of_next_char >= cp_in_buffer) { 106 assert (LA(1) == IntStreamConstant.EOF, "cannot consume EOF"); 107 } 108 109 debug (ANTLRInputStream) 110 { 111 import std.stdio; 112 writefln!"consume; prev index_of_next_char= %s, data[index_of_next_character] = %s"( 113 index_of_next_char, 114 front(data[data.toUTFindex(index_of_next_char) .. $])); 115 } 116 117 if (index_of_next_char < cp_in_buffer) 118 { 119 index_of_next_char++; 120 121 debug (ANTLRInputStream) 122 { 123 import std.stdio; 124 writefln!"p moves to %s (c='%s')"( 125 index_of_next_char, 126 cast(char)data[index_of_next_char]); 127 } 128 129 } 130 } 131 132 /** 133 * UTF-8 coded character mapped to UTF-32 134 * @uml 135 * @override 136 */ 137 public override dchar LA(int i) 138 { 139 if (i == 0) 140 { 141 return to!dchar(0); // undefined 142 } 143 if (i < 0) 144 { 145 i++; // e.g., translate LA(-1) to use offset i=0; then data[index_of_next_character+0-1] 146 if ((index_of_next_char + i - 1) < 0) 147 { 148 return to!dchar(IntStreamConstant.EOF); // invalid; no char before first char 149 } 150 } 151 if (( index_of_next_char + i - 1) >= cp_in_buffer) 152 { 153 return to!dchar(IntStreamConstant.EOF); 154 } 155 return front(data[data.toUTFindex(index_of_next_char + i - 1) .. $]); 156 } 157 158 public dchar LT(int i) 159 { 160 return LA(i); 161 } 162 163 /** 164 * @uml 165 * @override 166 */ 167 public override size_t index() 168 { 169 return index_of_next_char; 170 } 171 172 /** 173 * @uml 174 * @override 175 */ 176 public override size_t size() 177 { 178 return cp_in_buffer; 179 } 180 181 /** 182 * mark/release do nothing; we have entire buffer 183 * @uml 184 * @override 185 */ 186 public override int mark() 187 { 188 return -1; 189 } 190 191 /** 192 * @uml 193 * @override 194 */ 195 public override void release(int marker) 196 { 197 } 198 199 /** 200 * consume() ahead until index_of_next_character==index; 201 * can't just set index_of_next_character=index as we must 202 * update line and charPositionInLine. If we seek backwards, 203 * just set index_of_next_character 204 * @uml 205 * @override 206 */ 207 public override void seek(size_t index) 208 { 209 if (index <= index_of_next_char) 210 { 211 index_of_next_char= index; // just jump; don't update stream state (line, ...) 212 return; 213 } 214 // seek forward, consume until next code point hits index or cp_in_buffer 215 // (whichever comes first) 216 index = min(index, cp_in_buffer); 217 while (index_of_next_char < index) 218 { 219 consume(); 220 } 221 } 222 223 /** 224 * @uml 225 * @override 226 */ 227 public override string getText(Interval interval) 228 { 229 int start = interval.a; 230 int stop = interval.b; 231 if (stop >= to!int(cp_in_buffer)) 232 stop = to!int(cp_in_buffer)-1; 233 if (start >= to!int(cp_in_buffer)) return ""; 234 235 debug (ANTLRInputStream) 236 { 237 writefln!"data: start=%s, stop=%s, string = %s"( 238 start, stop, 239 data[data.toUTFindex(start)..data.toUTFindex(stop+1)]); 240 } 241 242 return to!string(data[data.toUTFindex(start)..data.toUTFindex(stop+1)]); 243 } 244 245 /** 246 * @uml 247 * @override 248 */ 249 public override string getSourceName() 250 { 251 if (!name) 252 { 253 return IntStreamConstant.UNKNOWN_SOURCE_NAME; 254 } 255 return name; 256 } 257 258 /** 259 * @uml 260 * @override 261 */ 262 public override string toString() 263 { 264 return to!string(data); 265 } 266 267 }