1 /* 2 * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.LexerActionExecutor; 8 9 import std.conv; 10 import antlr.v4.runtime.InterfaceLexer; 11 import antlr.v4.runtime.CharStream; 12 import antlr.v4.runtime.atn.LexerAction; 13 import antlr.v4.runtime.atn.LexerIndexedCustomAction; 14 import antlr.v4.runtime.misc.MurmurHash; 15 16 // Class LexerActionExecutor 17 /** 18 * @uml 19 * Represents an executor for a sequence of lexer actions which traversed during 20 * the matching operation of a lexer rule (token). 21 * 22 * <p>The executor tracks position information for position-dependent lexer actions 23 * efficiently, ensuring that actions appearing only at the end of the rule do 24 * not cause bloating of the {@link DFA} created for the lexer.</p> 25 * 26 * @author Sam Harwell 27 * @since 4.2 28 */ 29 class LexerActionExecutor 30 { 31 32 private LexerAction[] lexerActions; 33 34 /** 35 * @uml 36 * Caches the result of {@link #hashCode} since the hash code is an element 37 * of the performance-critical {@link LexerATNConfig#hashCode} operation. 38 */ 39 private size_t hashCode_; 40 41 /** 42 * @uml 43 * Constructs an executor for a sequence of {@link LexerAction} actions. 44 * @param lexerActions The lexer actions to execute. 45 */ 46 public this(LexerAction[] lexerActions) 47 { 48 this.lexerActions = lexerActions; 49 50 size_t hash = MurmurHash.initialize(); 51 foreach (LexerAction lexerAction; lexerActions) { 52 hash = MurmurHash.update(hash, lexerAction); 53 } 54 this.hashCode_ = MurmurHash.finish(hash, lexerActions.length); 55 } 56 57 /** 58 * @uml 59 * Creates a {@link LexerActionExecutor} which executes the actions forthe input {@code lexerActionExecutor} followed by a specified 60 * {@code lexerAction}. 61 * 62 * @param lexerActionExecutor The executor for actions already traversed by 63 * the lexer while matching a token within a particular {@link LexerATNConfig}. If this is {@code null}, the method behaves as 64 * though it were an empty executor. 65 * @param lexerAction The lexer action to execute after the actions 66 * specified in {@code lexerActionExecutor}. 67 * 68 * @return A {@link LexerActionExecutor} for executing the combine actions 69 * of {@code lexerActionExecutor} and {@code lexerAction}. 70 */ 71 public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) 72 { 73 if (lexerActionExecutor is null) { 74 LexerAction[] a = [lexerAction]; 75 return new LexerActionExecutor(a); 76 } 77 78 LexerAction[] lexerActions = lexerActionExecutor.lexerActions; 79 lexerActions ~= lexerAction; 80 return new LexerActionExecutor(lexerActions); 81 } 82 83 /** 84 * @uml 85 * Creates a {@link LexerActionExecutor} which encodes the current offset 86 * for position-dependent lexer actions. 87 * 88 * <p>Normally, when the executor encounters lexer actions where 89 * {@link LexerAction#isPositionDependent} returns {@code true}, it calls 90 * {@link IntStream#seek} on the input {@link CharStream} to set the input 91 * position to the <em>end</em> of the current token. This behavior provides 92 * for efficient DFA representation of lexer actions which appear at the end 93 * of a lexer rule, even when the lexer rule matches a variable number of 94 * characters.</p> 95 * 96 * <p>Prior to traversing a match transition in the ATN, the current offset 97 * from the token start index is assigned to all position-dependent lexer 98 * actions which have not already been assigned a fixed offset. By storing 99 * the offsets relative to the token start index, the DFA representation of 100 * lexer actions which appear in the middle of tokens remains efficient due 101 * to sharing among tokens of the same length, regardless of their absolute 102 * position in the input stream.</p> 103 * 104 * <p>If the current executor already has offsets assigned to all 105 * position-dependent lexer actions, the method returns {@code this}.</p> 106 * 107 * @param offset The current offset to assign to all position-dependent 108 * lexer actions which do not already have offsets assigned. 109 * 110 * @return A {@link LexerActionExecutor} which stores input stream offsets 111 * for all position-dependent lexer actions. 112 */ 113 public LexerActionExecutor fixOffsetBeforeMatch(int offset) 114 { 115 LexerAction[] updatedLexerActions = null; 116 for (int i = 0; i < lexerActions.length; i++) { 117 if (lexerActions[i].isPositionDependent() && !(lexerActions[i].classinfo == LexerIndexedCustomAction.classinfo)) { 118 if (updatedLexerActions is null) { 119 updatedLexerActions = lexerActions.dup(); 120 } 121 122 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]); 123 } 124 } 125 126 if (updatedLexerActions is null) { 127 return this; 128 } 129 130 return new LexerActionExecutor(updatedLexerActions); 131 } 132 133 public LexerAction[] getLexerActions() 134 { 135 return lexerActions; 136 } 137 138 /** 139 * Execute the actions encapsulated by this executor within the context of a 140 * particular {@link Lexer}. 141 * 142 * <p>This method calls {@link IntStream#seek} to set the position of the 143 * {@code input} {@link CharStream} prior to calling 144 * {@link LexerAction#execute} on a position-dependent action. Before the 145 * method returns, the input position will be restored to the same position 146 * it was in when the method was invoked.</p> 147 * 148 * @param lexer The lexer instance. 149 * @param input The input stream which is the source for the current token. 150 * When this method is called, the current {@link IntStream#index} for 151 * {@code input} should be the start of the following token, i.e. 1 152 * character past the end of the current token. 153 * @param startIndex The token start index. This value may be passed to 154 * {@link IntStream#seek} to set the {@code input} position to the beginning 155 * of the token. 156 */ 157 public void execute(InterfaceLexer lexer, CharStream input, int startIndex) 158 { 159 bool requiresSeek = false; 160 int stopIndex = input.index; 161 try { 162 foreach (LexerAction lexerAction; lexerActions) { 163 if (cast(LexerIndexedCustomAction)lexerAction) { 164 int offset = (cast(LexerIndexedCustomAction)lexerAction).getOffset; 165 input.seek(startIndex + offset); 166 lexerAction = (cast(LexerIndexedCustomAction)lexerAction).getAction; 167 requiresSeek = (startIndex + offset) != stopIndex; 168 } 169 else if (lexerAction.isPositionDependent) { 170 input.seek(stopIndex); 171 requiresSeek = false; 172 } 173 174 lexerAction.execute(lexer); 175 } 176 } 177 finally { 178 if (requiresSeek) { 179 input.seek(stopIndex); 180 } 181 } 182 183 } 184 185 /** 186 * @uml 187 * @safe 188 * @nothrow 189 * @override 190 */ 191 public override size_t toHash() @safe nothrow 192 { 193 return this.hashCode_; 194 } 195 196 /** 197 * @uml 198 * @override 199 */ 200 public override bool opEquals(Object obj) 201 { 202 if (obj is this) { 203 return true; 204 } 205 else if (obj.classinfo != LexerActionExecutor.classinfo) { 206 return false; 207 } 208 LexerActionExecutor other = cast(LexerActionExecutor)obj; 209 foreach (int i, lexerA; lexerActions) 210 if (lexerA != other.lexerActions[i]) 211 return false; 212 return this.hashCode_ == other.toHash; 213 } 214 215 }