antlr.v4.runtime.Recognizer source code

1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2012 Terence Parr
4  *  Copyright (c) 2012 Sam Harwell
5  *  Copyright (c) 2017 Egbert Voigt
6  *  All rights reserved.
7  *
8  *  Redistribution and use in source and binary forms, with or without
9  *  modification, are permitted provided that the following conditions
10  *  are met:
11  *
12  *  1. Redistributions of source code must retain the above copyright
13  *     notice, this list of conditions and the following disclaimer.
14  *  2. Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  *  3. The name of the author may not be used to endorse or promote products
18  *     derived from this software without specific prior written permission.
19  *
20  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 module antlr.v4.runtime.Recognizer;
33 
34 import std.stdio;
35 import std.algorithm;
36 import std.array;
37 import std.conv;
38 import antlr.v4.runtime.ANTLRErrorListener;
39 import antlr.v4.runtime.ConsoleErrorListener;
40 import antlr.v4.runtime.InterfaceRuleContext;
41 import antlr.v4.runtime.Token;
42 import antlr.v4.runtime.TokenConstantDefinition;
43 import antlr.v4.runtime.IntStream;
44 import antlr.v4.runtime.InterfaceRecognizer;
45 import antlr.v4.runtime.UnsupportedOperationException;
46 import antlr.v4.runtime.RecognitionException;
47 import antlr.v4.runtime.TokenFactory;
48 import antlr.v4.runtime.CommonToken;
49 import antlr.v4.runtime.ProxyErrorListener;
50 import antlr.v4.runtime.Vocabulary;
51 import antlr.v4.runtime.VocabularyImpl;
52 import antlr.v4.runtime.atn.ATN;
53 import antlr.v4.runtime.atn.ParseInfo;
54 
55 // Class Template Recognizer
56 /**
57  * TODO add class description
58  */
59 abstract class Recognizer(U, V) : InterfaceRecognizer
60 {
61 
62     public static immutable int EOF = -1;
63 
64     public int[string][Vocabulary] tokenTypeMapCache;
65 
66     public int[string][string] ruleIndexMapCache;
67 
68     public ANTLRErrorListener!(U,V)[] _listeners;
69 
70     protected V _interp;
71 
72     private int _stateNumber = -1;
73 
74     public this()
75     {
76         _listeners = [ConsoleErrorListener!(U,V).instance];
77     }
78 
79     /**
80      * @uml
81      * Used to print out token names like ID during debugging and
82      * error reporting.  The generated parsers implement a method
83      * that overrides this to point to their String[] tokenNames.
84      *
85      *  @deprecated Use {@link #getVocabulary()} instead.
86      */
87     abstract public string[] getTokenNames();
88 
89     abstract public string[] getRuleNames();
90 
91     /**
92      * @uml
93      * Get the vocabulary used by the recognizer.
94      *
95      *  @return A {@link Vocabulary} instance providing information about the
96      *  vocabulary used by the grammar.
97      */
98     public Vocabulary getVocabulary()
99     {
100         return VocabularyImpl.fromTokenNames(getTokenNames());
101     }
102 
103     /**
104      * @uml
105      * Get a map from token names to token types.
106      * <p>Used for XPath and tree pattern compilation.</p>
107      */
108     public int[string] getTokenTypeMap()
109     {
110         Vocabulary vocabulary = getVocabulary();
111         int[string] result = tokenTypeMapCache[vocabulary];
112         if (result is null) {
113             int[string] result1;
114             result = result1;
115             for (int i = 0; i < getATN.maxTokenType; i++) {
116                 string literalName = vocabulary.getLiteralName(i);
117                 if (literalName !is null) {
118                     result[literalName] = i;
119                 }
120 
121                 string symbolicName = vocabulary.getSymbolicName(i);
122                 if (symbolicName != null) {
123                     result[symbolicName] = i;
124                 }
125             }
126 
127             result["EOF"] = TokenConstantDefinition.EOF;
128             result.rehash; // for faster lookups
129             tokenTypeMapCache[vocabulary] = result;
130         }
131         return result;
132     }
133 
134     /**
135      * @uml
136      * Get a map from rule names to rule indexes.
137      *
138      * <p>Used for XPath and tree pattern compilation.
139      */
140     public int[string] getRuleIndexMap()
141     {
142         string[] ruleNames = getRuleNames();
143         if (!ruleNames) {
144             throw new UnsupportedOperationException("The current recognizer does not provide a list of rule names.");
145         }
146         int[string] result;
147         if (to!string(joiner(ruleNames, ",")) in ruleIndexMapCache) {
148             result = ruleIndexMapCache[to!string(joiner(ruleNames, ","))];
149         }
150         else {
151             foreach (int i, rn; ruleNames) {
152                 result[rn] = i;
153             }
154             ruleIndexMapCache[to!string(joiner(ruleNames, ","))] = result;
155         }
156         return result;
157     }
158 
159     public int getTokenType(string tokenName)
160     {
161         int ttype = getTokenTypeMap()[tokenName];
162         if (ttype) return ttype;
163         return TokenConstantDefinition.INVALID_TYPE;
164     }
165 
166     /**
167      * @uml
168      * If this recognizer was generated, it will have a serialized ATN
169      * representation of the grammar.
170      *
171      * <p>For interpreters, we don't know their serialized ATN despite having
172      * created the interpreter from it.</p>
173      */
174     public wstring getSerializedATN()
175     {
176         throw new UnsupportedOperationException("there is no serialized ATN");
177     }
178 
179     /**
180      * @uml
181      * For debugging and other purposes, might want the grammar name.
182      * Have ANTLR generate an implementation for this method.
183      */
184     abstract public string getGrammarFileName();
185 
186     /**
187      * @uml
188      * Get the {@link ATN} used by the recognizer for prediction.
189      *
190      *  @return The {@link ATN} used by the recognizer for prediction.
191      */
192     abstract public ATN getATN();
193 
194     /**
195      * @uml
196      * Get the ATN interpreter used by the recognizer for prediction.
197      *
198      *  @return The ATN interpreter used by the recognizer for prediction.
199      */
200     public V getInterpreter()
201     {
202         return _interp;
203     }
204 
205     /**
206      * @uml
207      * If profiling during the parse/lex, this will return DecisionInfo records
208      * for each decision in recognizer in a ParseInfo object.
209      */
210     public ParseInfo getParseInfo()
211     {
212         return null;
213     }
214 
215     public void setInterpreter(V interpreter)
216     {
217         _interp = interpreter;
218     }
219 
220     /**
221      * @uml
222      * What is the error header, normally line/character position information?
223      */
224     public string getErrorHeader(RecognitionException e)
225     {
226         int line = e.getOffendingToken().getLine();
227         int charPositionInLine = e.getOffendingToken().getCharPositionInLine();
228         return "line " ~ to!string(line) ~ ":" ~ to!string(charPositionInLine);
229     }
230 
231     /**
232      * @uml
233      * How should a token be displayed in an error message? The default
234      * is to display just the text, but during development you might
235      * want to have a lot of information spit out.  Override in that case
236      * to use t.toString() (which, for CommonToken, dumps everything about
237      * the token). This is better than forcing you to override a method in
238      * your token objects because you don't have to go modify your lexer
239      * so that it creates a new Java type.
240      *
241      *  @deprecated This method is not called by the ANTLR 4 Runtime. Specific
242      * implementations of {@link ANTLRErrorStrategy} may provide a similar
243      * feature when necessary. For example, see
244      * {@link DefaultErrorStrategy#getTokenErrorDisplay}.
245      */
246     public string getTokenErrorDisplay(Token t)
247     {
248 	if (t is null) return "<no token>";
249         string s = t.getText();
250         if (s is null) {
251             if (t.getType() == TokenConstantDefinition.EOF) {
252                 s = "<EOF>";
253             }
254             else {
255                 s = "<" ~ to!string(t.getType) ~ ">";
256             }
257         }
258         s = s.replace("\n","\\n");
259         s = s.replace("\r","\\r");
260         s = s.replace("\t","\\t");
261         return "'" ~ s ~ "'";
262     }
263 
264     public void addErrorListener(ANTLRErrorListener!(U, V) listener)
265     {
266 	if (listener is null) {
267             assert(0, "listener cannot be null.");
268         }
269 
270        	_listeners ~= listener;
271     }
272 
273     public void removeErrorListener(ANTLRErrorListener!(U, V) listener)
274     {
275         foreach (int elementRemoveIndex, el; _listeners) {
276             if (listener is el) {
277                 _listeners.remove(elementRemoveIndex);
278                 break;
279             }
280         }
281     }
282 
283     public void removeErrorListeners()
284     {
285         _listeners.length = 0;
286     }
287 
288     public ANTLRErrorListener!(U,V)[] getErrorListeners()
289     {
290         return _listeners;
291     }
292 
293     public ANTLRErrorListener!(U, V) getErrorListenerDispatch()
294     {
295         return new ProxyErrorListener!(U, V)(getErrorListeners());
296     }
297 
298     /**
299      * @uml
300      * subclass needs to override these if there are sempreds or actions
301      * that the ATN interp needs to execute
302      */
303     public bool sempred(InterfaceRuleContext localctx, int ruleIndex, int actionIndex)
304     {
305         return true;
306     }
307 
308     public bool precpred(InterfaceRuleContext localctx, int precedence)
309     {
310         return true;
311     }
312 
313     public void action(InterfaceRuleContext localctx, int ruleIndex, int actionIndex)
314     {
315     }
316 
317     /**
318      * @uml
319      * @final
320      */
321     public final int getState()
322     {
323         return _stateNumber;
324     }
325 
326     /**
327      * @uml
328      * @final
329      * Indicate that the recognizer has changed internal state that is
330      * consistent with the ATN state passed in.  This way we always know
331      * where we are in the ATN as the parser goes along. The rule
332      * context objects form a stack that lets us see the stack of
333      * invoking rules. Combine this and we have complete ATN
334      * configuration information.
335      */
336     public final void setState(int atnState)
337     {
338         //writeln("setState "+atnState);
339         _stateNumber = atnState;
340         // if ( traceATNStates ) _ctx.trace(atnState);
341     }
342 
343     abstract public IntStream getInputStream();
344 
345     abstract public void setInputStream(IntStream input);
346 
347     abstract public TokenFactory!CommonToken tokenFactory();
348 
349     abstract public void tokenFactory(TokenFactory!CommonToken input);
350 
351 }