1 /*
2  * Copyright (c) 2012-2020 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.ANTLRInputStream;
8 
9 import antlr.v4.runtime.CharStream;
10 import antlr.v4.runtime.IntStream;
11 import antlr.v4.runtime.IntStreamConstant;
12 import antlr.v4.runtime.misc.Interval;
13 import std.algorithm;
14 import std.conv : to;
15 import std.file;
16 import std.format;
17 import std.range;
18 import std.stdio;
19 import std.utf;
20 
21 /**
22  * Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it
23  * like a {@code char[]} buffer. Can also pass in a {@link String} or
24  * {@code char[]} to use.
25  *
26  * <p>If you need encoding, pass in stream/reader with correct encoding.</p>
27  */
28 class ANTLRInputStream : CharStream
29 {
30 
31     /**
32      * The UTF-8 data being scanned
33      */
34     protected char[] data;
35 
36     /**
37      * How many UCS code_points are actually in the buffer
38      */
39     protected size_t cp_in_buffer;
40 
41     /**
42      * index of next UTF-8 character
43      */
44     protected size_t index_of_next_char = 0;
45 
46     /**
47      * What is name or source of this char stream?
48      */
49     public string name;
50 
51     public this()
52     {
53     }
54 
55     /**
56      * Copy data in string to a local char array
57      */
58     public this(string input)
59     {
60         data = input.to!(char []);
61         cp_in_buffer = data.toUCSindex(data.length);
62     }
63 
64     /**
65      * This is the preferred constructor for strings as no data is copied
66      */
67     public this(char[] data, size_t numberOfActualCharsInArray)
68     {
69         this.data = data.to!(char []);
70         cp_in_buffer = data.toUCSindex(data.length);
71     }
72 
73     public this(File r)
74     {
75         load(r);
76     }
77 
78     public void load(File r)
79     {
80         name = r.name;
81         data = to!(char[])(name.readText);
82         // set the actual size of the data available;
83         cp_in_buffer = data.toUCSindex(data.length);
84         debug (ANTLRInputStreamStream)
85             writefln!"name = %s; cp_in_buffer = $s"(
86                      name, cp_in_buffer);
87     }
88 
89     /**
90      * Reset the stream so that it's in the same state it was
91      * when the object was created *except* the data array is not
92      * touched.
93      */
94     public void reset()
95     {
96         index_of_next_char = 0;
97     }
98 
99     /**
100      * @uml
101      * @override
102      */
103     public override void consume()
104     {
105         if (index_of_next_char >= cp_in_buffer) {
106             assert (LA(1) == IntStreamConstant.EOF, "cannot consume EOF");
107         }
108 
109         debug (ANTLRInputStream)
110         {
111             import std.stdio;
112             writefln!"consume; prev index_of_next_char= %s, data[index_of_next_character] = %s"(
113                      index_of_next_char,
114                      front(data[data.toUTFindex(index_of_next_char) .. $]));
115         }
116 
117         if (index_of_next_char < cp_in_buffer)
118         {
119             index_of_next_char++;
120 
121             debug (ANTLRInputStream)
122             {
123                 import std.stdio;
124                 writefln!"p moves to %s (c='%s')"(
125                          index_of_next_char,
126                          cast(char)data[index_of_next_char]);
127             }
128 
129         }
130     }
131 
132     /**
133      * UTF-8 coded character mapped to UTF-32
134      * @uml
135      * @override
136      */
137     public override dchar LA(int i)
138     {
139         if (i == 0)
140         {
141             return to!dchar(0); // undefined
142         }
143         if (i < 0)
144         {
145             i++; // e.g., translate LA(-1) to use offset i=0; then data[index_of_next_character+0-1]
146             if ((index_of_next_char + i - 1) < 0)
147             {
148                 return to!dchar(IntStreamConstant.EOF); // invalid; no char before first char
149             }
150         }
151         if (( index_of_next_char + i - 1) >= cp_in_buffer)
152         {
153             return to!dchar(IntStreamConstant.EOF);
154         }
155         return front(data[data.toUTFindex(index_of_next_char + i - 1) .. $]);
156     }
157 
158     public dchar LT(int i)
159     {
160         return LA(i);
161     }
162 
163     /**
164      * @uml
165      * @override
166      */
167     public override size_t index()
168     {
169         return index_of_next_char;
170     }
171 
172     /**
173      * @uml
174      * @override
175      */
176     public override size_t size()
177     {
178         return cp_in_buffer;
179     }
180 
181     /**
182      * mark/release do nothing; we have entire buffer
183      * @uml
184      * @override
185      */
186     public override int mark()
187     {
188         return -1;
189     }
190 
191     /**
192      * @uml
193      * @override
194      */
195     public override void release(int marker)
196     {
197     }
198 
199     /**
200      * consume() ahead until index_of_next_character==index;
201      * can't just set index_of_next_character=index as we must
202      * update line and charPositionInLine. If we seek backwards,
203      * just set index_of_next_character
204      * @uml
205      * @override
206      */
207     public override void seek(size_t index)
208     {
209         if (index <= index_of_next_char)
210         {
211             index_of_next_char= index; // just jump; don't update stream state (line, ...)
212             return;
213         }
214         // seek forward, consume until next code point hits index or cp_in_buffer
215         // (whichever comes first)
216         index = min(index, cp_in_buffer);
217         while (index_of_next_char < index)
218         {
219             consume();
220         }
221     }
222 
223     /**
224      * @uml
225      * @override
226      */
227     public override string getText(Interval interval)
228     {
229         int start = interval.a;
230         int stop = interval.b;
231         if (stop >= to!int(cp_in_buffer))
232             stop = to!int(cp_in_buffer)-1;
233         if (start >= to!int(cp_in_buffer)) return "";
234 
235         debug (ANTLRInputStream)
236         {
237             writefln!"data: start=%s, stop=%s, string = %s"(
238                      start, stop,
239                      data[data.toUTFindex(start)..data.toUTFindex(stop+1)]);
240         }
241 
242         return to!string(data[data.toUTFindex(start)..data.toUTFindex(stop+1)]);
243     }
244 
245     /**
246      * @uml
247      * @override
248      */
249     public override string getSourceName()
250     {
251         if (!name)
252         {
253             return IntStreamConstant.UNKNOWN_SOURCE_NAME;
254         }
255         return name;
256     }
257 
258     /**
259      * @uml
260      * @override
261      */
262     public override string toString()
263     {
264         return to!string(data);
265     }
266 
267 }