antlr.v4.runtime.Recognizer source code

1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.Recognizer;
8 
9 import std.stdio;
10 import std.algorithm;
11 import std.array;
12 import std.conv;
13 import antlr.v4.runtime.ANTLRErrorListener;
14 import antlr.v4.runtime.ConsoleErrorListener;
15 import antlr.v4.runtime.InterfaceRuleContext;
16 import antlr.v4.runtime.Token;
17 import antlr.v4.runtime.TokenConstantDefinition;
18 import antlr.v4.runtime.IntStream;
19 import antlr.v4.runtime.InterfaceRecognizer;
20 import antlr.v4.runtime.UnsupportedOperationException;
21 import antlr.v4.runtime.RecognitionException;
22 import antlr.v4.runtime.TokenFactory;
23 import antlr.v4.runtime.CommonToken;
24 import antlr.v4.runtime.ProxyErrorListener;
25 import antlr.v4.runtime.Vocabulary;
26 import antlr.v4.runtime.VocabularyImpl;
27 import antlr.v4.runtime.atn.ATN;
28 import antlr.v4.runtime.atn.ParseInfo;
29 
30 /**
31  * Base for Lexer and Parser
32  */
33 abstract class Recognizer(U, V) : InterfaceRecognizer
34 {
35 
36     enum int EOF = -1;
37 
38     public int[string][Vocabulary] tokenTypeMapCache;
39 
40     public int[string][string] ruleIndexMapCache;
41 
42     public ANTLRErrorListener[] _listeners;
43 
44     protected V _interp;
45 
46     private int _stateNumber = -1;
47 
48     public this()
49     {
50         _listeners = [ConsoleErrorListener.instance];
51     }
52 
53     /**
54      * Used to print out token names like ID during debugging and
55      * error reporting.  The generated parsers implement a method
56      * that overrides this to point to their String[] tokenNames.
57      *
58      *  @deprecated Use {@link #getVocabulary()} instead.
59      */
60     abstract public string[] getTokenNames();
61 
62     abstract public string[] getRuleNames();
63 
64     /**
65      * Get the vocabulary used by the recognizer.
66      *
67      *  @return A {@link Vocabulary} instance providing information about the
68      *  vocabulary used by the grammar.
69      */
70     public Vocabulary getVocabulary()
71     {
72         return VocabularyImpl.fromTokenNames(getTokenNames());
73     }
74 
75     /**
76      * Get a map from token names to token types.
77      * <p>Used for XPath and tree pattern compilation.</p>
78      */
79     public int[string] getTokenTypeMap()
80     {
81         Vocabulary vocabulary = getVocabulary;
82         int[string] result = tokenTypeMapCache[vocabulary];
83         if (result is null) {
84             int[string] result_mapping;
85             result = result_mapping;
86             for (int i = 0; i < getATN.maxTokenType; i++) {
87                 string literalName = vocabulary.getLiteralName(i);
88                 if (literalName !is null) {
89                     result[literalName] = i;
90                 }
91 
92                 string symbolicName = vocabulary.getSymbolicName(i);
93                 if (symbolicName != null) {
94                     result[symbolicName] = i;
95                 }
96             }
97 
98             result["EOF"] = TokenConstantDefinition.EOF;
99             result.rehash; // for faster lookups
100             tokenTypeMapCache[vocabulary] = result;
101         }
102         return result;
103     }
104 
105     /**
106      * Get a map from rule names to rule indexes.
107      *
108      * <p>Used for XPath and tree pattern compilation.
109      */
110     public int[string] getRuleIndexMap()
111     {
112         string[] ruleNames = getRuleNames();
113         if (!ruleNames) {
114             throw new UnsupportedOperationException("The current recognizer does not provide a list of rule names.");
115         }
116         int[string] result;
117         if (to!string(joiner(ruleNames, ",")) in ruleIndexMapCache) {
118             result = ruleIndexMapCache[to!string(joiner(ruleNames, ","))];
119         }
120         else {
121             foreach (i, rn; ruleNames) {
122                 result[rn] = to!int(i);
123             }
124             ruleIndexMapCache[to!string(joiner(ruleNames, ","))] = result;
125         }
126         return result;
127     }
128 
129     public int getTokenType(string tokenName)
130     {
131         int ttype = getTokenTypeMap()[tokenName];
132         if (ttype) return ttype;
133         return TokenConstantDefinition.INVALID_TYPE;
134     }
135 
136     /**
137      * If this recognizer was generated, it will have a serialized ATN
138      * representation of the grammar.
139      *
140      * <p>For interpreters, we don't know their serialized ATN despite having
141      * created the interpreter from it.</p>
142      */
143     public wstring getSerializedATN()
144     {
145         throw new UnsupportedOperationException("there is no serialized ATN");
146     }
147 
148     /**
149      * For debugging and other purposes, might want the grammar name.
150      * Have ANTLR generate an implementation for this method.
151      */
152     abstract public string getGrammarFileName();
153 
154     /**
155      * Get the {@link ATN} used by the recognizer for prediction.
156      *
157      *  @return The {@link ATN} used by the recognizer for prediction.
158      */
159     abstract public ATN getATN();
160 
161     /**
162      * Get the ATN interpreter used by the recognizer for prediction.
163      *
164      *  @return The ATN interpreter used by the recognizer for prediction.
165      */
166     public V getInterpreter()
167     {
168         return _interp;
169     }
170 
171     /**
172      * If profiling during the parse/lex, this will return DecisionInfo records
173      * for each decision in recognizer in a ParseInfo object.
174      */
175     public ParseInfo getParseInfo()
176     {
177         return null;
178     }
179 
180     public void setInterpreter(V interpreter)
181     {
182         _interp = interpreter;
183     }
184 
185     /**
186      * What is the error header, normally line/character position information?
187      */
188     public string getErrorHeader(RecognitionException e)
189     {
190         int line = e.getOffendingToken().getLine();
191         int charPositionInLine = e.getOffendingToken().getCharPositionInLine();
192         return "line " ~ to!string(line) ~ ":" ~ to!string(charPositionInLine);
193     }
194 
195     /**
196      * How should a token be displayed in an error message? The default
197      * is to display just the text, but during development you might
198      * want to have a lot of information spit out.  Override in that case
199      * to use t.toString() (which, for CommonToken, dumps everything about
200      * the token). This is better than forcing you to override a method in
201      * your token objects because you don't have to go modify your lexer
202      * so that it creates a new Java type.
203      *
204      *  @deprecated This method is not called by the ANTLR 4 Runtime. Specific
205      * implementations of {@link ANTLRErrorStrategy} may provide a similar
206      * feature when necessary. For example, see
207      * {@link DefaultErrorStrategy#getTokenErrorDisplay}.
208      */
209     public string getTokenErrorDisplay(Token t)
210     {
211 	if (t is null) return "<no token>";
212         string s = to!string(t.getText);
213         if (s is null) {
214             if (t.getType() == TokenConstantDefinition.EOF) {
215                 s = "<EOF>";
216             }
217             else {
218                 s = "<" ~ to!string(t.getType) ~ ">";
219             }
220         }
221         s = s.replace("\n","\\n");
222         s = s.replace("\r","\\r");
223         s = s.replace("\t","\\t");
224         return "'" ~ s ~ "'";
225     }
226 
227     public void addErrorListener(ANTLRErrorListener listener)
228     {
229 	if (listener is null) {
230             assert(0, "listener cannot be null.");
231         }
232 
233        	_listeners ~= listener;
234     }
235 
236     public void removeErrorListener(ANTLRErrorListener listener)
237     {
238         foreach (elementRemoveIndex, el; _listeners) {
239             if (listener is el) {
240                 _listeners.remove(to!int(elementRemoveIndex));
241                 break;
242             }
243         }
244     }
245 
246     public void removeErrorListeners()
247     {
248         _listeners.length = 0;
249     }
250 
251     public ANTLRErrorListener[] getErrorListeners()
252     {
253         return _listeners;
254     }
255 
256     public ANTLRErrorListener getErrorListenerDispatch()
257     {
258         return new ProxyErrorListener(getErrorListeners());
259     }
260 
261     /**
262      * subclass needs to override these if there are sempreds or actions
263      * that the ATN interp needs to execute
264      */
265     public bool sempred(InterfaceRuleContext localctx, int ruleIndex, int actionIndex)
266     {
267         return true;
268     }
269 
270     public bool precpred(InterfaceRuleContext localctx, int precedence)
271     {
272         return true;
273     }
274 
275     public void action(InterfaceRuleContext localctx, int ruleIndex, int actionIndex)
276     {
277     }
278 
279     /**
280      * @uml
281      * @final
282      */
283     public final int getState()
284     {
285         return _stateNumber;
286     }
287 
288     /**
289      * Indicate that the recognizer has changed internal state that is
290      * consistent with the ATN state passed in.  This way we always know
291      * where we are in the ATN as the parser goes along. The rule
292      * context objects form a stack that lets us see the stack of
293      * invoking rules. Combine this and we have complete ATN
294      * configuration information.
295      * @uml
296      * @final
297      */
298     public final void setState(int atnState)
299     {
300         _stateNumber = atnState;
301     }
302 
303     abstract public IntStream getInputStream();
304 
305     abstract public void setInputStream(IntStream input);
306 
307     abstract public TokenFactory!CommonToken tokenFactory();
308 
309     abstract public void tokenFactory(TokenFactory!CommonToken input);
310 
311 }