antlr.v4.runtime.atn.LexerActionExecutor source code

1 /*
2  * Copyright (c) 2012-2018 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.atn.LexerActionExecutor;
8 
9 import std.conv;
10 import antlr.v4.runtime.InterfaceLexer;
11 import antlr.v4.runtime.CharStream;
12 import antlr.v4.runtime.atn.LexerAction;
13 import antlr.v4.runtime.atn.LexerIndexedCustomAction;
14 import antlr.v4.runtime.misc.MurmurHash;
15 
16 // Class LexerActionExecutor
17 /**
18  * @uml
19  * Represents an executor for a sequence of lexer actions which traversed during
20  * the matching operation of a lexer rule (token).
21  *
22  * <p>The executor tracks position information for position-dependent lexer actions
23  * efficiently, ensuring that actions appearing only at the end of the rule do
24  * not cause bloating of the {@link DFA} created for the lexer.</p>
25  *
26  *  @author Sam Harwell
27  *  @since 4.2
28  */
29 class LexerActionExecutor
30 {
31 
32     private LexerAction[] lexerActions;
33 
34     /**
35      * @uml
36      * Caches the result of {@link #hashCode} since the hash code is an element
37      * of the performance-critical {@link LexerATNConfig#hashCode} operation.
38      */
39     private size_t hashCode_;
40 
41     /**
42      * @uml
43      * Constructs an executor for a sequence of {@link LexerAction} actions.
44      *  @param lexerActions The lexer actions to execute.
45      */
46     public this(LexerAction[] lexerActions)
47     {
48         this.lexerActions = lexerActions;
49 
50         size_t hash = MurmurHash.initialize();
51         foreach (LexerAction lexerAction; lexerActions) {
52             hash = MurmurHash.update(hash, lexerAction);
53         }
54         this.hashCode_ = MurmurHash.finish(hash, lexerActions.length);
55     }
56 
57     /**
58      * @uml
59      * Creates a {@link LexerActionExecutor} which executes the actions forthe input {@code lexerActionExecutor} followed by a specified
60      *  {@code lexerAction}.
61      *
62      *  @param lexerActionExecutor The executor for actions already traversed by
63      *  the lexer while matching a token within a particular {@link LexerATNConfig}. If this is {@code null}, the method behaves as
64      * though it were an empty executor.
65      *  @param lexerAction The lexer action to execute after the actions
66      *  specified in {@code lexerActionExecutor}.
67      *
68      *  @return A {@link LexerActionExecutor} for executing the combine actions
69      * of {@code lexerActionExecutor} and {@code lexerAction}.
70      */
71     public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction)
72     {
73         if (lexerActionExecutor is null) {
74             LexerAction[] a = [lexerAction];
75             return new LexerActionExecutor(a);
76         }
77 
78         LexerAction[] lexerActions = lexerActionExecutor.lexerActions;
79         lexerActions ~= lexerAction;
80         return new LexerActionExecutor(lexerActions);
81     }
82 
83     /**
84      * @uml
85      * Creates a {@link LexerActionExecutor} which encodes the current offset
86      * for position-dependent lexer actions.
87      *
88      * <p>Normally, when the executor encounters lexer actions where
89      * {@link LexerAction#isPositionDependent} returns {@code true}, it calls
90      * {@link IntStream#seek} on the input {@link CharStream} to set the input
91      * position to the <em>end</em> of the current token. This behavior provides
92      * for efficient DFA representation of lexer actions which appear at the end
93      * of a lexer rule, even when the lexer rule matches a variable number of
94      * characters.</p>
95      *
96      * <p>Prior to traversing a match transition in the ATN, the current offset
97      * from the token start index is assigned to all position-dependent lexer
98      * actions which have not already been assigned a fixed offset. By storing
99      * the offsets relative to the token start index, the DFA representation of
100      * lexer actions which appear in the middle of tokens remains efficient due
101      * to sharing among tokens of the same length, regardless of their absolute
102      * position in the input stream.</p>
103      *
104      * <p>If the current executor already has offsets assigned to all
105      * position-dependent lexer actions, the method returns {@code this}.</p>
106      *
107      *  @param offset The current offset to assign to all position-dependent
108      * lexer actions which do not already have offsets assigned.
109      *
110      *  @return A {@link LexerActionExecutor} which stores input stream offsets
111      * for all position-dependent lexer actions.
112      */
113     public LexerActionExecutor fixOffsetBeforeMatch(int offset)
114     {
115 	LexerAction[] updatedLexerActions = null;
116         for (int i = 0; i < lexerActions.length; i++) {
117             if (lexerActions[i].isPositionDependent() && !(lexerActions[i].classinfo == LexerIndexedCustomAction.classinfo)) {
118                 if (updatedLexerActions is null) {
119                     updatedLexerActions = lexerActions.dup();
120                 }
121 
122                 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]);
123             }
124         }
125 
126         if (updatedLexerActions is null) {
127             return this;
128         }
129 
130         return new LexerActionExecutor(updatedLexerActions);
131     }
132 
133     public LexerAction[] getLexerActions()
134     {
135         return lexerActions;
136     }
137 
138     /**
139      * Execute the actions encapsulated by this executor within the context of a
140      * particular {@link Lexer}.
141      *
142      * <p>This method calls {@link IntStream#seek} to set the position of the
143      * {@code input} {@link CharStream} prior to calling
144      * {@link LexerAction#execute} on a position-dependent action. Before the
145      * method returns, the input position will be restored to the same position
146      * it was in when the method was invoked.</p>
147      *
148      * @param lexer The lexer instance.
149      * @param input The input stream which is the source for the current token.
150      * When this method is called, the current {@link IntStream#index} for
151      * {@code input} should be the start of the following token, i.e. 1
152      * character past the end of the current token.
153      * @param startIndex The token start index. This value may be passed to
154      * {@link IntStream#seek} to set the {@code input} position to the beginning
155      * of the token.
156      */
157     public void execute(InterfaceLexer lexer, CharStream input, int startIndex)
158     {
159         bool requiresSeek = false;
160         int stopIndex = input.index;
161         try {
162             foreach (LexerAction lexerAction; lexerActions) {
163                 if (cast(LexerIndexedCustomAction)lexerAction) {
164                     int offset = (cast(LexerIndexedCustomAction)lexerAction).getOffset;
165                     input.seek(startIndex + offset);
166                     lexerAction = (cast(LexerIndexedCustomAction)lexerAction).getAction;
167                     requiresSeek = (startIndex + offset) != stopIndex;
168                 }
169                 else if (lexerAction.isPositionDependent) {
170                     input.seek(stopIndex);
171                     requiresSeek = false;
172                 }
173 
174                 lexerAction.execute(lexer);
175             }
176         }
177         finally {
178             if (requiresSeek) {
179                 input.seek(stopIndex);
180             }
181         }
182 
183     }
184 
185     /**
186      * @uml
187      * @safe
188      * @nothrow
189      * @override
190      */
191     public override size_t toHash() @safe nothrow
192     {
193 	return this.hashCode_;
194     }
195 
196     /**
197      * @uml
198      * @override
199      */
200     public override bool opEquals(Object obj)
201     {
202         if (obj is this) {
203             return true;
204         }
205         else if (obj.classinfo != LexerActionExecutor.classinfo) {
206             return false;
207         }
208         LexerActionExecutor other = cast(LexerActionExecutor)obj;
209         foreach (int i, lexerA; lexerActions)
210             if (lexerA != other.lexerActions[i])
211                 return false;
212         return this.hashCode_ == other.toHash;
213     }
214 
215 }