1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.atn.LexerActionExecutor; 8 9 import std.conv; 10 import antlr.v4.runtime.InterfaceLexer; 11 import antlr.v4.runtime.CharStream; 12 import antlr.v4.runtime.atn.LexerAction; 13 import antlr.v4.runtime.atn.LexerIndexedCustomAction; 14 import antlr.v4.runtime.misc.MurmurHash; 15 16 /** 17 * @uml 18 * Represents an executor for a sequence of lexer actions which traversed during 19 * the matching operation of a lexer rule (token). 20 * 21 * <p>The executor tracks position information for position-dependent lexer actions 22 * efficiently, ensuring that actions appearing only at the end of the rule do 23 * not cause bloating of the {@link DFA} created for the lexer.</p> 24 * 25 * @author Sam Harwell 26 * @since 4.2 27 */ 28 class LexerActionExecutor 29 { 30 31 private LexerAction[] lexerActions; 32 33 /** 34 * @uml 35 * Caches the result of {@link #hashCode} since the hash code is an element 36 * of the performance-critical {@link LexerATNConfig#hashCode} operation. 37 */ 38 private size_t hashCode_; 39 40 /** 41 * @uml 42 * Constructs an executor for a sequence of {@link LexerAction} actions. 43 * @param lexerActions The lexer actions to execute. 44 */ 45 public this(LexerAction[] lexerActions) 46 { 47 this.lexerActions = lexerActions; 48 49 size_t hash = MurmurHash.initialize(); 50 foreach (LexerAction lexerAction; lexerActions) { 51 hash = MurmurHash.update(hash, lexerAction); 52 } 53 this.hashCode_ = MurmurHash.finish(hash, lexerActions.length); 54 } 55 56 /** 57 * @uml 58 * Creates a {@link LexerActionExecutor} which executes the actions forthe input {@code lexerActionExecutor} followed by a specified 59 * {@code lexerAction}. 60 * 61 * @param lexerActionExecutor The executor for actions already traversed by 62 * the lexer while matching a token within a particular {@link LexerATNConfig}. If this is {@code null}, the method behaves as 63 * though it were an empty executor. 64 * @param lexerAction The lexer action to execute after the actions 65 * specified in {@code lexerActionExecutor}. 66 * 67 * @return A {@link LexerActionExecutor} for executing the combine actions 68 * of {@code lexerActionExecutor} and {@code lexerAction}. 69 */ 70 public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) 71 { 72 if (lexerActionExecutor is null) { 73 LexerAction[] a = [lexerAction]; 74 return new LexerActionExecutor(a); 75 } 76 77 LexerAction[] lexerActions = lexerActionExecutor.lexerActions; 78 lexerActions ~= lexerAction; 79 return new LexerActionExecutor(lexerActions); 80 } 81 82 /** 83 * @uml 84 * Creates a {@link LexerActionExecutor} which encodes the current offset 85 * for position-dependent lexer actions. 86 * 87 * <p>Normally, when the executor encounters lexer actions where 88 * {@link LexerAction#isPositionDependent} returns {@code true}, it calls 89 * {@link IntStream#seek} on the input {@link CharStream} to set the input 90 * position to the <em>end</em> of the current token. This behavior provides 91 * for efficient DFA representation of lexer actions which appear at the end 92 * of a lexer rule, even when the lexer rule matches a variable number of 93 * characters.</p> 94 * 95 * <p>Prior to traversing a match transition in the ATN, the current offset 96 * from the token start index is assigned to all position-dependent lexer 97 * actions which have not already been assigned a fixed offset. By storing 98 * the offsets relative to the token start index, the DFA representation of 99 * lexer actions which appear in the middle of tokens remains efficient due 100 * to sharing among tokens of the same length, regardless of their absolute 101 * position in the input stream.</p> 102 * 103 * <p>If the current executor already has offsets assigned to all 104 * position-dependent lexer actions, the method returns {@code this}.</p> 105 * 106 * @param offset The current offset to assign to all position-dependent 107 * lexer actions which do not already have offsets assigned. 108 * 109 * @return A {@link LexerActionExecutor} which stores input stream offsets 110 * for all position-dependent lexer actions. 111 */ 112 public LexerActionExecutor fixOffsetBeforeMatch(size_t offset) 113 { 114 LexerAction[] updatedLexerActions; 115 116 for (size_t i = 0; i < lexerActions.length; i++) 117 { 118 if (lexerActions[i].isPositionDependent && !(cast(LexerIndexedCustomAction)lexerActions[i])) { 119 if (updatedLexerActions is null) { 120 updatedLexerActions = lexerActions.dup(); 121 } 122 123 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]); 124 } 125 } 126 127 if (!updatedLexerActions) { 128 return this; 129 } 130 131 return new LexerActionExecutor(updatedLexerActions); 132 } 133 134 public LexerAction[] getLexerActions() 135 { 136 return lexerActions; 137 } 138 139 /** 140 * Execute the actions encapsulated by this executor within the context of a 141 * particular {@link Lexer}. 142 * 143 * <p>This method calls {@link IntStream#seek} to set the position of the 144 * {@code input} {@link CharStream} prior to calling 145 * {@link LexerAction#execute} on a position-dependent action. Before the 146 * method returns, the input position will be restored to the same position 147 * it was in when the method was invoked.</p> 148 * 149 * @param lexer The lexer instance. 150 * @param input The input stream which is the source for the current token. 151 * When this method is called, the current {@link IntStream#index} for 152 * {@code input} should be the start of the following token, i.e. 1 153 * character past the end of the current token. 154 * @param startIndex The token start index. This value may be passed to 155 * {@link IntStream#seek} to set the {@code input} position to the beginning 156 * of the token. 157 */ 158 public void execute(InterfaceLexer lexer, CharStream input, size_t startIndex) 159 { 160 bool requiresSeek = false; 161 auto stopIndex = input.index; 162 try { 163 foreach (LexerAction lexerAction; lexerActions) { 164 if (cast(LexerIndexedCustomAction)lexerAction) { 165 auto offset = (cast(LexerIndexedCustomAction)lexerAction).getOffset; 166 input.seek(startIndex + offset); 167 lexerAction = (cast(LexerIndexedCustomAction)lexerAction).getAction; 168 requiresSeek = (startIndex + offset) != stopIndex; 169 } 170 else if (lexerAction.isPositionDependent) { 171 input.seek(stopIndex); 172 requiresSeek = false; 173 } 174 175 lexerAction.execute(lexer); 176 } 177 } 178 finally { 179 if (requiresSeek) { 180 input.seek(stopIndex); 181 } 182 } 183 184 } 185 186 /** 187 * @uml 188 * @safe 189 * @nothrow 190 * @override 191 */ 192 public override size_t toHash() @safe nothrow 193 { 194 return this.hashCode_; 195 } 196 197 /** 198 * @uml 199 * @override 200 */ 201 public override bool opEquals(Object obj) 202 { 203 if (obj is this) { 204 return true; 205 } 206 else if (obj.classinfo != LexerActionExecutor.classinfo) { 207 return false; 208 } 209 LexerActionExecutor other = cast(LexerActionExecutor)obj; 210 foreach (i, lexerA; lexerActions) 211 if (lexerA != other.lexerActions[i]) 212 return false; 213 return this.hashCode_ == other.toHash; 214 } 215 216 }