1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.ANTLRInputStream;
8 
9 import antlr.v4.runtime.CharStream;
10 import antlr.v4.runtime.IntStream;
11 import antlr.v4.runtime.IntStreamConstant;
12 import antlr.v4.runtime.misc.Interval;
13 import std.algorithm;
14 import std.conv : to;
15 import std.file;
16 import std.format;
17 import std.range;
18 import std.stdio;
19 import std.utf;
20 
21 /**
22  * Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it
23  * like a {@code char[]} buffer. Can also pass in a {@link String} or
24  * {@code char[]} to use.
25  *
26  * <p>If you need encoding, pass in stream/reader with correct encoding.</p>
27  */
28 class ANTLRInputStream : CharStream
29 {
30 
31     /**
32      * The UTF-8 data being scanned
33      */
34     protected char[] data;
35 
36     /**
37      * How many UCS code_points are actually in the buffer
38      */
39     protected size_t cp_in_buffer;
40 
41     /**
42      * index of next UTF-8 character
43      */
44     protected size_t index_of_next_char = 0;
45 
46     /**
47      * What is name or source of this char stream?
48      */
49     public string name;
50 
51     public this()
52     {
53     }
54 
55     /**
56      * Copy data in string to a local char array
57      */
58     public this(string input)
59     {
60         data = input.to!(char []);
61         cp_in_buffer = data.toUCSindex(data.length);
62     }
63 
64     /**
65      * This is the preferred constructor for strings as no data is copied
66      */
67     public this(char[] data, size_t numberOfActualCharsInArray)
68     {
69         this.data = data.to!(char []);
70         cp_in_buffer = data.toUCSindex(data.length);
71     }
72 
73     public this(File r)
74     {
75         load(r);
76     }
77 
78     public void load(File r)
79     {
80         import std.array : array;
81 
82         name = r.name;
83         data = cast(char[]) r.byChunk(4096).joiner.array;
84         // set the actual size of the data available;
85         cp_in_buffer = data.toUCSindex(data.length);
86         debug (ANTLRInputStreamStream)
87             writefln!"name = %s; cp_in_buffer = $s"(name, cp_in_buffer);
88     }
89 
90     /**
91      * Reset the stream so that it's in the same state it was
92      * when the object was created *except* the data array is not
93      * touched.
94      */
95     public void reset()
96     {
97         index_of_next_char = 0;
98     }
99 
100     /**
101      * @uml
102      * @override
103      */
104     public override void consume()
105     {
106         if (index_of_next_char >= cp_in_buffer) {
107             assert (LA(1) == IntStreamConstant.EOF, "cannot consume EOF");
108         }
109 
110         debug (ANTLRInputStream)
111         {
112             import std.stdio;
113             writefln!"consume; prev index_of_next_char= %s, data[index_of_next_character] = %s"(
114                      index_of_next_char,
115                      front(data[data.toUTFindex(index_of_next_char) .. $]));
116         }
117 
118         if (index_of_next_char < cp_in_buffer)
119         {
120             index_of_next_char++;
121 
122             debug (ANTLRInputStream)
123             {
124                 import std.stdio;
125                 writefln!"p moves to %s (c='%s')"(
126                          index_of_next_char,
127                          cast(char)data[index_of_next_char]);
128             }
129 
130         }
131     }
132 
133     /**
134      * UTF-8 coded character mapped to UTF-32
135      * @uml
136      * @override
137      */
138     public override dchar LA(int i)
139     {
140         if (i == 0)
141         {
142             return to!dchar(0); // undefined
143         }
144         if (i < 0)
145         {
146             i++; // e.g., translate LA(-1) to use offset i=0; then data[index_of_next_character+0-1]
147             if ((index_of_next_char + i - 1) < 0)
148             {
149                 return to!dchar(IntStreamConstant.EOF); // invalid; no char before first char
150             }
151         }
152         if (( index_of_next_char + i - 1) >= cp_in_buffer)
153         {
154             return to!dchar(IntStreamConstant.EOF);
155         }
156         return front(data[data.toUTFindex(index_of_next_char + i - 1) .. $]);
157     }
158 
159     public dchar LT(int i)
160     {
161         return LA(i);
162     }
163 
164     /**
165      * @uml
166      * @override
167      */
168     public override size_t index()
169     {
170         return index_of_next_char;
171     }
172 
173     /**
174      * @uml
175      * @override
176      */
177     public override size_t size()
178     {
179         return cp_in_buffer;
180     }
181 
182     /**
183      * mark/release do nothing; we have entire buffer
184      * @uml
185      * @override
186      */
187     public override int mark()
188     {
189         return -1;
190     }
191 
192     /**
193      * @uml
194      * @override
195      */
196     public override void release(int marker)
197     {
198     }
199 
200     /**
201      * consume() ahead until index_of_next_character==index;
202      * can't just set index_of_next_character=index as we must
203      * update line and charPositionInLine. If we seek backwards,
204      * just set index_of_next_character
205      * @uml
206      * @override
207      */
208     public override void seek(size_t index)
209     {
210         if (index <= index_of_next_char)
211         {
212             index_of_next_char= index; // just jump; don't update stream state (line, ...)
213             return;
214         }
215         // seek forward, consume until next code point hits index or cp_in_buffer
216         // (whichever comes first)
217         index = min(index, cp_in_buffer);
218         while (index_of_next_char < index)
219         {
220             consume();
221         }
222     }
223 
224     /**
225      * @uml
226      * @override
227      */
228     public override string getText(Interval interval)
229     {
230         int start = interval.a;
231         int stop = interval.b;
232         if (stop >= to!int(cp_in_buffer))
233             stop = to!int(cp_in_buffer)-1;
234         if (start >= to!int(cp_in_buffer)) return "";
235 
236         debug (ANTLRInputStream)
237         {
238             writefln!"data: start=%s, stop=%s, string = %s"(
239                      start, stop,
240                      data[data.toUTFindex(start)..data.toUTFindex(stop+1)]);
241         }
242 
243         return to!string(data[data.toUTFindex(start)..data.toUTFindex(stop+1)]);
244     }
245 
246     /**
247      * @uml
248      * @override
249      */
250     public override string getSourceName()
251     {
252         if (!name)
253         {
254             return IntStreamConstant.UNKNOWN_SOURCE_NAME;
255         }
256         return name;
257     }
258 
259     /**
260      * @uml
261      * @override
262      */
263     public override string toString()
264     {
265         return to!string(data);
266     }
267 
268 }