antlr.v4.runtime.atn.LexerActionExecutor source code

1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.atn.LexerActionExecutor;
8 
9 import std.conv;
10 import antlr.v4.runtime.InterfaceLexer;
11 import antlr.v4.runtime.CharStream;
12 import antlr.v4.runtime.atn.LexerAction;
13 import antlr.v4.runtime.atn.LexerIndexedCustomAction;
14 import antlr.v4.runtime.misc.MurmurHash;
15 
16 /**
17  * @uml
18  * Represents an executor for a sequence of lexer actions which traversed during
19  * the matching operation of a lexer rule (token).
20  *
21  * <p>The executor tracks position information for position-dependent lexer actions
22  * efficiently, ensuring that actions appearing only at the end of the rule do
23  * not cause bloating of the {@link DFA} created for the lexer.</p>
24  *
25  *  @author Sam Harwell
26  *  @since 4.2
27  */
28 class LexerActionExecutor
29 {
30 
31     private LexerAction[] lexerActions;
32 
33     /**
34      * @uml
35      * Caches the result of {@link #hashCode} since the hash code is an element
36      * of the performance-critical {@link LexerATNConfig#hashCode} operation.
37      */
38     private size_t hashCode_;
39 
40     /**
41      * @uml
42      * Constructs an executor for a sequence of {@link LexerAction} actions.
43      *  @param lexerActions The lexer actions to execute.
44      */
45     public this(LexerAction[] lexerActions)
46     {
47         this.lexerActions = lexerActions;
48 
49         size_t hash = MurmurHash.initialize();
50         foreach (LexerAction lexerAction; lexerActions) {
51             hash = MurmurHash.update(hash, lexerAction);
52         }
53         this.hashCode_ = MurmurHash.finish(hash, lexerActions.length);
54     }
55 
56     /**
57      * @uml
58      * Creates a {@link LexerActionExecutor} which executes the actions forthe input {@code lexerActionExecutor} followed by a specified
59      *  {@code lexerAction}.
60      *
61      *  @param lexerActionExecutor The executor for actions already traversed by
62      *  the lexer while matching a token within a particular {@link LexerATNConfig}. If this is {@code null}, the method behaves as
63      * though it were an empty executor.
64      *  @param lexerAction The lexer action to execute after the actions
65      *  specified in {@code lexerActionExecutor}.
66      *
67      *  @return A {@link LexerActionExecutor} for executing the combine actions
68      * of {@code lexerActionExecutor} and {@code lexerAction}.
69      */
70     public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction)
71     {
72         if (lexerActionExecutor is null) {
73             LexerAction[] a = [lexerAction];
74             return new LexerActionExecutor(a);
75         }
76 
77         LexerAction[] lexerActions = lexerActionExecutor.lexerActions;
78         lexerActions ~= lexerAction;
79         return new LexerActionExecutor(lexerActions);
80     }
81 
82     /**
83      * @uml
84      * Creates a {@link LexerActionExecutor} which encodes the current offset
85      * for position-dependent lexer actions.
86      *
87      * <p>Normally, when the executor encounters lexer actions where
88      * {@link LexerAction#isPositionDependent} returns {@code true}, it calls
89      * {@link IntStream#seek} on the input {@link CharStream} to set the input
90      * position to the <em>end</em> of the current token. This behavior provides
91      * for efficient DFA representation of lexer actions which appear at the end
92      * of a lexer rule, even when the lexer rule matches a variable number of
93      * characters.</p>
94      *
95      * <p>Prior to traversing a match transition in the ATN, the current offset
96      * from the token start index is assigned to all position-dependent lexer
97      * actions which have not already been assigned a fixed offset. By storing
98      * the offsets relative to the token start index, the DFA representation of
99      * lexer actions which appear in the middle of tokens remains efficient due
100      * to sharing among tokens of the same length, regardless of their absolute
101      * position in the input stream.</p>
102      *
103      * <p>If the current executor already has offsets assigned to all
104      * position-dependent lexer actions, the method returns {@code this}.</p>
105      *
106      *  @param offset The current offset to assign to all position-dependent
107      * lexer actions which do not already have offsets assigned.
108      *
109      *  @return A {@link LexerActionExecutor} which stores input stream offsets
110      * for all position-dependent lexer actions.
111      */
112     public LexerActionExecutor fixOffsetBeforeMatch(int offset)
113     {
114 	LexerAction[] updatedLexerActions = null;
115         for (int i = 0; i < lexerActions.length; i++) {
116             if (lexerActions[i].isPositionDependent() && !(lexerActions[i].classinfo == LexerIndexedCustomAction.classinfo)) {
117                 if (updatedLexerActions is null) {
118                     updatedLexerActions = lexerActions.dup();
119                 }
120 
121                 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]);
122             }
123         }
124 
125         if (updatedLexerActions is null) {
126             return this;
127         }
128 
129         return new LexerActionExecutor(updatedLexerActions);
130     }
131 
132     public LexerAction[] getLexerActions()
133     {
134         return lexerActions;
135     }
136 
137     /**
138      * Execute the actions encapsulated by this executor within the context of a
139      * particular {@link Lexer}.
140      *
141      * <p>This method calls {@link IntStream#seek} to set the position of the
142      * {@code input} {@link CharStream} prior to calling
143      * {@link LexerAction#execute} on a position-dependent action. Before the
144      * method returns, the input position will be restored to the same position
145      * it was in when the method was invoked.</p>
146      *
147      * @param lexer The lexer instance.
148      * @param input The input stream which is the source for the current token.
149      * When this method is called, the current {@link IntStream#index} for
150      * {@code input} should be the start of the following token, i.e. 1
151      * character past the end of the current token.
152      * @param startIndex The token start index. This value may be passed to
153      * {@link IntStream#seek} to set the {@code input} position to the beginning
154      * of the token.
155      */
156     public void execute(InterfaceLexer lexer, CharStream input, int startIndex)
157     {
158         bool requiresSeek = false;
159         int stopIndex = input.index;
160         try {
161             foreach (LexerAction lexerAction; lexerActions) {
162                 if (cast(LexerIndexedCustomAction)lexerAction) {
163                     int offset = (cast(LexerIndexedCustomAction)lexerAction).getOffset;
164                     input.seek(startIndex + offset);
165                     lexerAction = (cast(LexerIndexedCustomAction)lexerAction).getAction;
166                     requiresSeek = (startIndex + offset) != stopIndex;
167                 }
168                 else if (lexerAction.isPositionDependent) {
169                     input.seek(stopIndex);
170                     requiresSeek = false;
171                 }
172 
173                 lexerAction.execute(lexer);
174             }
175         }
176         finally {
177             if (requiresSeek) {
178                 input.seek(stopIndex);
179             }
180         }
181 
182     }
183 
184     /**
185      * @uml
186      * @safe
187      * @nothrow
188      * @override
189      */
190     public override size_t toHash() @safe nothrow
191     {
192 	return this.hashCode_;
193     }
194 
195     /**
196      * @uml
197      * @override
198      */
199     public override bool opEquals(Object obj)
200     {
201         if (obj is this) {
202             return true;
203         }
204         else if (obj.classinfo != LexerActionExecutor.classinfo) {
205             return false;
206         }
207         LexerActionExecutor other = cast(LexerActionExecutor)obj;
208         foreach (i, lexerA; lexerActions)
209             if (lexerA != other.lexerActions[i])
210                 return false;
211         return this.hashCode_ == other.toHash;
212     }
213 
214 }