1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.Recognizer; 8 9 import std.stdio; 10 import std.algorithm; 11 import std.array; 12 import std.conv; 13 import antlr.v4.runtime.ANTLRErrorListener; 14 import antlr.v4.runtime.ConsoleErrorListener; 15 import antlr.v4.runtime.InterfaceRuleContext; 16 import antlr.v4.runtime.Token; 17 import antlr.v4.runtime.TokenConstantDefinition; 18 import antlr.v4.runtime.IntStream; 19 import antlr.v4.runtime.InterfaceRecognizer; 20 import antlr.v4.runtime.UnsupportedOperationException; 21 import antlr.v4.runtime.RecognitionException; 22 import antlr.v4.runtime.TokenFactory; 23 import antlr.v4.runtime.CommonToken; 24 import antlr.v4.runtime.ProxyErrorListener; 25 import antlr.v4.runtime.Vocabulary; 26 import antlr.v4.runtime.VocabularyImpl; 27 import antlr.v4.runtime.atn.ATN; 28 import antlr.v4.runtime.atn.ParseInfo; 29 30 /** 31 * Base for Lexer and Parser 32 */ 33 abstract class Recognizer(U, V) : InterfaceRecognizer 34 { 35 36 enum int EOF = -1; 37 38 public int[string][Vocabulary] tokenTypeMapCache; 39 40 public int[string][string] ruleIndexMapCache; 41 42 public ANTLRErrorListener[] _listeners; 43 44 protected V _interp; 45 46 private int _stateNumber = -1; 47 48 public this() 49 { 50 _listeners = [ConsoleErrorListener.instance]; 51 } 52 53 /** 54 * Used to print out token names like ID during debugging and 55 * error reporting. The generated parsers implement a method 56 * that overrides this to point to their String[] tokenNames. 57 * 58 * @deprecated Use {@link #getVocabulary()} instead. 59 */ 60 abstract public string[] getTokenNames(); 61 62 abstract public string[] getRuleNames(); 63 64 /** 65 * Get the vocabulary used by the recognizer. 66 * 67 * @return A {@link Vocabulary} instance providing information about the 68 * vocabulary used by the grammar. 69 */ 70 public Vocabulary getVocabulary() 71 { 72 return VocabularyImpl.fromTokenNames(getTokenNames()); 73 } 74 75 /** 76 * Get a map from token names to token types. 77 * <p>Used for XPath and tree pattern compilation.</p> 78 */ 79 public int[string] getTokenTypeMap() 80 { 81 Vocabulary vocabulary = getVocabulary; 82 int[string] result = tokenTypeMapCache[vocabulary]; 83 if (result is null) { 84 int[string] result_mapping; 85 result = result_mapping; 86 for (int i = 0; i < getATN.maxTokenType; i++) { 87 string literalName = vocabulary.getLiteralName(i); 88 if (literalName !is null) { 89 result[literalName] = i; 90 } 91 92 string symbolicName = vocabulary.getSymbolicName(i); 93 if (symbolicName != null) { 94 result[symbolicName] = i; 95 } 96 } 97 98 result["EOF"] = TokenConstantDefinition.EOF; 99 result.rehash; // for faster lookups 100 tokenTypeMapCache[vocabulary] = result; 101 } 102 return result; 103 } 104 105 /** 106 * Get a map from rule names to rule indexes. 107 * 108 * <p>Used for XPath and tree pattern compilation. 109 */ 110 public int[string] getRuleIndexMap() 111 { 112 string[] ruleNames = getRuleNames(); 113 if (!ruleNames) { 114 throw new UnsupportedOperationException("The current recognizer does not provide a list of rule names."); 115 } 116 int[string] result; 117 if (to!string(joiner(ruleNames, ",")) in ruleIndexMapCache) { 118 result = ruleIndexMapCache[to!string(joiner(ruleNames, ","))]; 119 } 120 else { 121 foreach (i, rn; ruleNames) { 122 result[rn] = to!int(i); 123 } 124 ruleIndexMapCache[to!string(joiner(ruleNames, ","))] = result; 125 } 126 return result; 127 } 128 129 public int getTokenType(string tokenName) 130 { 131 int ttype = getTokenTypeMap()[tokenName]; 132 if (ttype) return ttype; 133 return TokenConstantDefinition.INVALID_TYPE; 134 } 135 136 /** 137 * If this recognizer was generated, it will have a serialized ATN 138 * representation of the grammar. 139 * 140 * <p>For interpreters, we don't know their serialized ATN despite having 141 * created the interpreter from it.</p> 142 */ 143 public wstring getSerializedATN() 144 { 145 throw new UnsupportedOperationException("there is no serialized ATN"); 146 } 147 148 /** 149 * For debugging and other purposes, might want the grammar name. 150 * Have ANTLR generate an implementation for this method. 151 */ 152 abstract public string getGrammarFileName(); 153 154 /** 155 * Get the {@link ATN} used by the recognizer for prediction. 156 * 157 * @return The {@link ATN} used by the recognizer for prediction. 158 */ 159 abstract public ATN getATN(); 160 161 /** 162 * Get the ATN interpreter used by the recognizer for prediction. 163 * 164 * @return The ATN interpreter used by the recognizer for prediction. 165 */ 166 public V getInterpreter() 167 { 168 return _interp; 169 } 170 171 /** 172 * If profiling during the parse/lex, this will return DecisionInfo records 173 * for each decision in recognizer in a ParseInfo object. 174 */ 175 public ParseInfo getParseInfo() 176 { 177 return null; 178 } 179 180 public void setInterpreter(V interpreter) 181 { 182 _interp = interpreter; 183 } 184 185 /** 186 * What is the error header, normally line/character position information? 187 */ 188 public string getErrorHeader(RecognitionException e) 189 { 190 int line = e.getOffendingToken().getLine(); 191 int charPositionInLine = e.getOffendingToken().getCharPositionInLine(); 192 return "line " ~ to!string(line) ~ ":" ~ to!string(charPositionInLine); 193 } 194 195 /** 196 * How should a token be displayed in an error message? The default 197 * is to display just the text, but during development you might 198 * want to have a lot of information spit out. Override in that case 199 * to use t.toString() (which, for CommonToken, dumps everything about 200 * the token). This is better than forcing you to override a method in 201 * your token objects because you don't have to go modify your lexer 202 * so that it creates a new Java type. 203 * 204 * @deprecated This method is not called by the ANTLR 4 Runtime. Specific 205 * implementations of {@link ANTLRErrorStrategy} may provide a similar 206 * feature when necessary. For example, see 207 * {@link DefaultErrorStrategy#getTokenErrorDisplay}. 208 */ 209 public string getTokenErrorDisplay(Token t) 210 { 211 if (t is null) return "<no token>"; 212 string s = to!string(t.getText); 213 if (s is null) { 214 if (t.getType() == TokenConstantDefinition.EOF) { 215 s = "<EOF>"; 216 } 217 else { 218 s = "<" ~ to!string(t.getType) ~ ">"; 219 } 220 } 221 s = s.replace("\n","\\n"); 222 s = s.replace("\r","\\r"); 223 s = s.replace("\t","\\t"); 224 return "'" ~ s ~ "'"; 225 } 226 227 public void addErrorListener(ANTLRErrorListener listener) 228 { 229 if (listener is null) { 230 assert(0, "listener cannot be null."); 231 } 232 233 _listeners ~= listener; 234 } 235 236 public void removeErrorListener(ANTLRErrorListener listener) 237 { 238 foreach (elementRemoveIndex, el; _listeners) { 239 if (listener is el) { 240 _listeners.remove(to!int(elementRemoveIndex)); 241 break; 242 } 243 } 244 } 245 246 public void removeErrorListeners() 247 { 248 _listeners.length = 0; 249 } 250 251 public ANTLRErrorListener[] getErrorListeners() 252 { 253 return _listeners; 254 } 255 256 public ANTLRErrorListener getErrorListenerDispatch() 257 { 258 return new ProxyErrorListener(getErrorListeners()); 259 } 260 261 /** 262 * subclass needs to override these if there are sempreds or actions 263 * that the ATN interp needs to execute 264 */ 265 public bool sempred(InterfaceRuleContext localctx, int ruleIndex, int actionIndex) 266 { 267 return true; 268 } 269 270 public bool precpred(InterfaceRuleContext localctx, int precedence) 271 { 272 return true; 273 } 274 275 public void action(InterfaceRuleContext localctx, int ruleIndex, int actionIndex) 276 { 277 } 278 279 /** 280 * @uml 281 * @final 282 */ 283 public final int getState() 284 { 285 return _stateNumber; 286 } 287 288 /** 289 * Indicate that the recognizer has changed internal state that is 290 * consistent with the ATN state passed in. This way we always know 291 * where we are in the ATN as the parser goes along. The rule 292 * context objects form a stack that lets us see the stack of 293 * invoking rules. Combine this and we have complete ATN 294 * configuration information. 295 * @uml 296 * @final 297 */ 298 public final void setState(int atnState) 299 { 300 _stateNumber = atnState; 301 } 302 303 abstract public IntStream getInputStream(); 304 305 abstract public void setInputStream(IntStream input); 306 307 abstract public TokenFactory!CommonToken tokenFactory(); 308 309 abstract public void tokenFactory(TokenFactory!CommonToken input); 310 311 }