antlr.v4.runtime.atn.LexerActionExecutor source code

1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.atn.LexerActionExecutor;
8 
9 import std.conv;
10 import antlr.v4.runtime.InterfaceLexer;
11 import antlr.v4.runtime.CharStream;
12 import antlr.v4.runtime.atn.LexerAction;
13 import antlr.v4.runtime.atn.LexerIndexedCustomAction;
14 import antlr.v4.runtime.misc.MurmurHash;
15 
16 /**
17  * @uml
18  * Represents an executor for a sequence of lexer actions which traversed during
19  * the matching operation of a lexer rule (token).
20  *
21  * <p>The executor tracks position information for position-dependent lexer actions
22  * efficiently, ensuring that actions appearing only at the end of the rule do
23  * not cause bloating of the {@link DFA} created for the lexer.</p>
24  *
25  *  @author Sam Harwell
26  *  @since 4.2
27  */
28 class LexerActionExecutor
29 {
30 
31     private LexerAction[] lexerActions;
32 
33     /**
34      * @uml
35      * Caches the result of {@link #hashCode} since the hash code is an element
36      * of the performance-critical {@link LexerATNConfig#hashCode} operation.
37      */
38     private size_t hashCode_;
39 
40     /**
41      * @uml
42      * Constructs an executor for a sequence of {@link LexerAction} actions.
43      *  @param lexerActions The lexer actions to execute.
44      */
45     public this(LexerAction[] lexerActions)
46     {
47         this.lexerActions = lexerActions;
48 
49         size_t hash = MurmurHash.initialize();
50         foreach (LexerAction lexerAction; lexerActions) {
51             hash = MurmurHash.update(hash, lexerAction);
52         }
53         this.hashCode_ = MurmurHash.finish(hash, lexerActions.length);
54     }
55 
56     /**
57      * @uml
58      * Creates a {@link LexerActionExecutor} which executes the actions forthe input {@code lexerActionExecutor} followed by a specified
59      *  {@code lexerAction}.
60      *
61      *  @param lexerActionExecutor The executor for actions already traversed by
62      *  the lexer while matching a token within a particular {@link LexerATNConfig}. If this is {@code null}, the method behaves as
63      * though it were an empty executor.
64      *  @param lexerAction The lexer action to execute after the actions
65      *  specified in {@code lexerActionExecutor}.
66      *
67      *  @return A {@link LexerActionExecutor} for executing the combine actions
68      * of {@code lexerActionExecutor} and {@code lexerAction}.
69      */
70     public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction)
71     {
72         if (lexerActionExecutor is null) {
73             LexerAction[] a = [lexerAction];
74             return new LexerActionExecutor(a);
75         }
76 
77         LexerAction[] lexerActions = lexerActionExecutor.lexerActions;
78         lexerActions ~= lexerAction;
79         return new LexerActionExecutor(lexerActions);
80     }
81 
82     /**
83      * @uml
84      * Creates a {@link LexerActionExecutor} which encodes the current offset
85      * for position-dependent lexer actions.
86      *
87      * <p>Normally, when the executor encounters lexer actions where
88      * {@link LexerAction#isPositionDependent} returns {@code true}, it calls
89      * {@link IntStream#seek} on the input {@link CharStream} to set the input
90      * position to the <em>end</em> of the current token. This behavior provides
91      * for efficient DFA representation of lexer actions which appear at the end
92      * of a lexer rule, even when the lexer rule matches a variable number of
93      * characters.</p>
94      *
95      * <p>Prior to traversing a match transition in the ATN, the current offset
96      * from the token start index is assigned to all position-dependent lexer
97      * actions which have not already been assigned a fixed offset. By storing
98      * the offsets relative to the token start index, the DFA representation of
99      * lexer actions which appear in the middle of tokens remains efficient due
100      * to sharing among tokens of the same length, regardless of their absolute
101      * position in the input stream.</p>
102      *
103      * <p>If the current executor already has offsets assigned to all
104      * position-dependent lexer actions, the method returns {@code this}.</p>
105      *
106      *  @param offset The current offset to assign to all position-dependent
107      * lexer actions which do not already have offsets assigned.
108      *
109      *  @return A {@link LexerActionExecutor} which stores input stream offsets
110      * for all position-dependent lexer actions.
111      */
112     public LexerActionExecutor fixOffsetBeforeMatch(size_t offset)
113     {
114         LexerAction[] updatedLexerActions;
115 
116         for (size_t i = 0; i < lexerActions.length; i++)
117         {
118             if (lexerActions[i].isPositionDependent && !(cast(LexerIndexedCustomAction)lexerActions[i])) {
119                 if (updatedLexerActions is null) {
120                     updatedLexerActions = lexerActions.dup();
121                 }
122 
123                 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]);
124             }
125         }
126 
127         if (!updatedLexerActions) {
128             return this;
129         }
130 
131         return new LexerActionExecutor(updatedLexerActions);
132     }
133 
134     public LexerAction[] getLexerActions()
135     {
136         return lexerActions;
137     }
138 
139     /**
140      * Execute the actions encapsulated by this executor within the context of a
141      * particular {@link Lexer}.
142      *
143      * <p>This method calls {@link IntStream#seek} to set the position of the
144      * {@code input} {@link CharStream} prior to calling
145      * {@link LexerAction#execute} on a position-dependent action. Before the
146      * method returns, the input position will be restored to the same position
147      * it was in when the method was invoked.</p>
148      *
149      * @param lexer The lexer instance.
150      * @param input The input stream which is the source for the current token.
151      * When this method is called, the current {@link IntStream#index} for
152      * {@code input} should be the start of the following token, i.e. 1
153      * character past the end of the current token.
154      * @param startIndex The token start index. This value may be passed to
155      * {@link IntStream#seek} to set the {@code input} position to the beginning
156      * of the token.
157      */
158     public void execute(InterfaceLexer lexer, CharStream input, size_t startIndex)
159     {
160         bool requiresSeek = false;
161         auto stopIndex = input.index;
162         try {
163             foreach (LexerAction lexerAction; lexerActions) {
164                 if (cast(LexerIndexedCustomAction)lexerAction) {
165                     auto offset = (cast(LexerIndexedCustomAction)lexerAction).getOffset;
166                     input.seek(startIndex + offset);
167                     lexerAction = (cast(LexerIndexedCustomAction)lexerAction).getAction;
168                     requiresSeek = (startIndex + offset) != stopIndex;
169                 }
170                 else if (lexerAction.isPositionDependent) {
171                     input.seek(stopIndex);
172                     requiresSeek = false;
173                 }
174 
175                 lexerAction.execute(lexer);
176             }
177         }
178         finally {
179             if (requiresSeek) {
180                 input.seek(stopIndex);
181             }
182         }
183 
184     }
185 
186     /**
187      * @uml
188      * @safe
189      * @nothrow
190      * @override
191      */
192     public override size_t toHash() @safe nothrow
193     {
194     return this.hashCode_;
195     }
196 
197     /**
198      * @uml
199      * @override
200      */
201     public override bool opEquals(Object obj)
202     {
203         if (obj is this) {
204             return true;
205         }
206         else if (obj.classinfo != LexerActionExecutor.classinfo) {
207             return false;
208         }
209         LexerActionExecutor other = cast(LexerActionExecutor)obj;
210         foreach (i, lexerA; lexerActions)
211             if (lexerA != other.lexerActions[i])
212                 return false;
213         return this.hashCode_ == other.toHash;
214     }
215 
216 }