1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.LexerActionExecutor; 8 9 import std.conv; 10 import antlr.v4.runtime.InterfaceLexer; 11 import antlr.v4.runtime.CharStream; 12 import antlr.v4.runtime.atn.LexerAction; 13 import antlr.v4.runtime.atn.LexerIndexedCustomAction; 14 import antlr.v4.runtime.misc.MurmurHash; 15 16 /** 17 * @uml 18 * Represents an executor for a sequence of lexer actions which traversed during 19 * the matching operation of a lexer rule (token). 20 * 21 * <p>The executor tracks position information for position-dependent lexer actions 22 * efficiently, ensuring that actions appearing only at the end of the rule do 23 * not cause bloating of the {@link DFA} created for the lexer.</p> 24 * 25 * @author Sam Harwell 26 * @since 4.2 27 */ 28 class LexerActionExecutor 29 { 30 31 private LexerAction[] lexerActions; 32 33 /** 34 * @uml 35 * Caches the result of {@link #hashCode} since the hash code is an element 36 * of the performance-critical {@link LexerATNConfig#hashCode} operation. 37 */ 38 private size_t hashCode_; 39 40 /** 41 * @uml 42 * Constructs an executor for a sequence of {@link LexerAction} actions. 43 * @param lexerActions The lexer actions to execute. 44 */ 45 public this(LexerAction[] lexerActions) 46 { 47 this.lexerActions = lexerActions; 48 49 size_t hash = MurmurHash.initialize(); 50 foreach (LexerAction lexerAction; lexerActions) { 51 hash = MurmurHash.update(hash, lexerAction); 52 } 53 this.hashCode_ = MurmurHash.finish(hash, lexerActions.length); 54 } 55 56 /** 57 * @uml 58 * Creates a {@link LexerActionExecutor} which executes the actions forthe input {@code lexerActionExecutor} followed by a specified 59 * {@code lexerAction}. 60 * 61 * @param lexerActionExecutor The executor for actions already traversed by 62 * the lexer while matching a token within a particular {@link LexerATNConfig}. If this is {@code null}, the method behaves as 63 * though it were an empty executor. 64 * @param lexerAction The lexer action to execute after the actions 65 * specified in {@code lexerActionExecutor}. 66 * 67 * @return A {@link LexerActionExecutor} for executing the combine actions 68 * of {@code lexerActionExecutor} and {@code lexerAction}. 69 */ 70 public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) 71 { 72 if (lexerActionExecutor is null) { 73 LexerAction[] a = [lexerAction]; 74 return new LexerActionExecutor(a); 75 } 76 77 LexerAction[] lexerActions = lexerActionExecutor.lexerActions; 78 lexerActions ~= lexerAction; 79 return new LexerActionExecutor(lexerActions); 80 } 81 82 /** 83 * @uml 84 * Creates a {@link LexerActionExecutor} which encodes the current offset 85 * for position-dependent lexer actions. 86 * 87 * <p>Normally, when the executor encounters lexer actions where 88 * {@link LexerAction#isPositionDependent} returns {@code true}, it calls 89 * {@link IntStream#seek} on the input {@link CharStream} to set the input 90 * position to the <em>end</em> of the current token. This behavior provides 91 * for efficient DFA representation of lexer actions which appear at the end 92 * of a lexer rule, even when the lexer rule matches a variable number of 93 * characters.</p> 94 * 95 * <p>Prior to traversing a match transition in the ATN, the current offset 96 * from the token start index is assigned to all position-dependent lexer 97 * actions which have not already been assigned a fixed offset. By storing 98 * the offsets relative to the token start index, the DFA representation of 99 * lexer actions which appear in the middle of tokens remains efficient due 100 * to sharing among tokens of the same length, regardless of their absolute 101 * position in the input stream.</p> 102 * 103 * <p>If the current executor already has offsets assigned to all 104 * position-dependent lexer actions, the method returns {@code this}.</p> 105 * 106 * @param offset The current offset to assign to all position-dependent 107 * lexer actions which do not already have offsets assigned. 108 * 109 * @return A {@link LexerActionExecutor} which stores input stream offsets 110 * for all position-dependent lexer actions. 111 */ 112 public LexerActionExecutor fixOffsetBeforeMatch(int offset) 113 { 114 LexerAction[] updatedLexerActions = null; 115 for (int i = 0; i < lexerActions.length; i++) { 116 if (lexerActions[i].isPositionDependent() && !(lexerActions[i].classinfo == LexerIndexedCustomAction.classinfo)) { 117 if (updatedLexerActions is null) { 118 updatedLexerActions = lexerActions.dup(); 119 } 120 121 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]); 122 } 123 } 124 125 if (updatedLexerActions is null) { 126 return this; 127 } 128 129 return new LexerActionExecutor(updatedLexerActions); 130 } 131 132 public LexerAction[] getLexerActions() 133 { 134 return lexerActions; 135 } 136 137 /** 138 * Execute the actions encapsulated by this executor within the context of a 139 * particular {@link Lexer}. 140 * 141 * <p>This method calls {@link IntStream#seek} to set the position of the 142 * {@code input} {@link CharStream} prior to calling 143 * {@link LexerAction#execute} on a position-dependent action. Before the 144 * method returns, the input position will be restored to the same position 145 * it was in when the method was invoked.</p> 146 * 147 * @param lexer The lexer instance. 148 * @param input The input stream which is the source for the current token. 149 * When this method is called, the current {@link IntStream#index} for 150 * {@code input} should be the start of the following token, i.e. 1 151 * character past the end of the current token. 152 * @param startIndex The token start index. This value may be passed to 153 * {@link IntStream#seek} to set the {@code input} position to the beginning 154 * of the token. 155 */ 156 public void execute(InterfaceLexer lexer, CharStream input, int startIndex) 157 { 158 bool requiresSeek = false; 159 int stopIndex = input.index; 160 try { 161 foreach (LexerAction lexerAction; lexerActions) { 162 if (cast(LexerIndexedCustomAction)lexerAction) { 163 int offset = (cast(LexerIndexedCustomAction)lexerAction).getOffset; 164 input.seek(startIndex + offset); 165 lexerAction = (cast(LexerIndexedCustomAction)lexerAction).getAction; 166 requiresSeek = (startIndex + offset) != stopIndex; 167 } 168 else if (lexerAction.isPositionDependent) { 169 input.seek(stopIndex); 170 requiresSeek = false; 171 } 172 173 lexerAction.execute(lexer); 174 } 175 } 176 finally { 177 if (requiresSeek) { 178 input.seek(stopIndex); 179 } 180 } 181 182 } 183 184 /** 185 * @uml 186 * @safe 187 * @nothrow 188 * @override 189 */ 190 public override size_t toHash() @safe nothrow 191 { 192 return this.hashCode_; 193 } 194 195 /** 196 * @uml 197 * @override 198 */ 199 public override bool opEquals(Object obj) 200 { 201 if (obj is this) { 202 return true; 203 } 204 else if (obj.classinfo != LexerActionExecutor.classinfo) { 205 return false; 206 } 207 LexerActionExecutor other = cast(LexerActionExecutor)obj; 208 foreach (i, lexerA; lexerActions) 209 if (lexerA != other.lexerActions[i]) 210 return false; 211 return this.hashCode_ == other.toHash; 212 } 213 214 }