antlr.v4.runtime.tree.xpath.XPath source code

1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2012 Terence Parr
4  *  Copyright (c) 2012 Sam Harwell
5  *  Copyright (c) 2017 Egbert Voigt
6  *  All rights reserved.
7  *
8  *  Redistribution and use in source and binary forms, with or without
9  *  modification, are permitted provided that the following conditions
10  *  are met:
11  *
12  *  1. Redistributions of source code must retain the above copyright
13  *     notice, this list of conditions and the following disclaimer.
14  *  2. Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  *  3. The name of the author may not be used to endorse or promote products
18  *     derived from this software without specific prior written permission.
19  *
20  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 module antlr.v4.runtime.tree.xpath.XPath;
33 
34 import std.array;
35 import std.stdio;
36 import std.file;
37 import std.format;
38 import std.conv;
39 import std.container : DList;
40 import antlr.v4.runtime.Parser;
41 import antlr.v4.runtime.Token;
42 import antlr.v4.runtime.TokenConstantDefinition;
43 import antlr.v4.runtime.CommonTokenStream;
44 import antlr.v4.runtime.ANTLRInputStream;
45 import antlr.v4.runtime.IllegalArgumentException;
46 import antlr.v4.runtime.LexerNoViableAltException;
47 import antlr.v4.runtime.ParserRuleContext;
48 import antlr.v4.runtime.tree.ParseTree;
49 import antlr.v4.runtime.tree.xpath.XPathElement;
50 import antlr.v4.runtime.tree.xpath.XPathLexerErrorListener;
51 import antlr.v4.runtime.tree.xpath.XPathLexer;
52 import antlr.v4.runtime.tree.xpath.XPathWildcardAnywhereElement;
53 import antlr.v4.runtime.tree.xpath.XPathWildcardElement;
54 import antlr.v4.runtime.tree.xpath.XPathRuleElement;
55 import antlr.v4.runtime.tree.xpath.XPathRuleAnywhereElement;
56 import antlr.v4.runtime.tree.xpath.XPathTokenElement;
57 import antlr.v4.runtime.tree.xpath.XPathTokenAnywhereElement;
58 
59 // Class XPath
60 /**
61  * @uml
62  * Represent a subset of XPath XML path syntax for use in identifying nodes in
63  * parse trees.
64  *
65  * <p>
66  * Split path into words and separators {@code /} and {@code //} via ANTLR
67  * itself then walk path elements from left to right. At each separator-word
68  * pair, find set of nodes. Next stage uses those as work list.</p>
69  *
70  * <p>
71  * The basic interface is
72  * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
73  * But that is just shorthand for:</p>
74  *
75  * <pre>
76  * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
77  * return p.{@link #evaluate evaluate}(tree);
78  * </pre>
79  *
80  * <p>
81  * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
82  * allows operators:</p>
83  *
84  * <dl>
85  * <dt>/</dt> <dd>root</dd>
86  * <dt>//</dt> <dd>anywhere</dd>
87  * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
88  * operator</dd>
89  * </dl>
90  *
91  * <p>
92  * and path elements:</p>
93  *
94  * <dl>
95  * <dt>ID</dt> <dd>token name</dd>
96  * <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
97  * <dt>expr</dt> <dd>rule name</dd>
98  * <dt>*</dt> <dd>wildcard matching any node</dd>
99  * </dl>
100  *
101  * <p>
102  * Whitespace is not allowed.</p>
103  */
104 class XPath
105 {
106 
107     /**
108      * @uml
109      * word not operator/separator
110      */
111     public static immutable string WILDCARD = "*";
112 
113     /**
114      * @uml
115      * word for invert operator
116      */
117     public static immutable string NOT = "!";
118 
119     protected string path;
120 
121     protected XPathElement[] elements;
122 
123     protected Parser parser;
124 
125     public this(Parser parser, string path)
126     {
127         this.parser = parser;
128         this.path = path;
129         elements = split(path);
130         debug writefln("%s", elements);
131     }
132 
133     /**
134      * @uml
135      * TODO: check for invalid token/rule names, bad syntax
136      */
137     public XPathElement[] split(string path)
138     {
139         ANTLRInputStream ins;
140         try {
141             ins = new ANTLRInputStream(readText(path));
142         }
143         catch (Exception ioe) {
144             throw new IllegalArgumentException("Could not read path: " ~ path, ioe);
145         }
146         XPathLexer lexer = new XPathLexer(ins);
147         lexer.removeErrorListeners();
148         lexer.addErrorListener(new XPathLexerErrorListener());
149         CommonTokenStream tokenStream = new CommonTokenStream(lexer);
150         try {
151             tokenStream.fill();
152         }
153         catch (LexerNoViableAltException e) {
154             int pos = lexer.getCharPositionInLine();
155             string msg = format("Invalid tokens or characters at index %1$s in path '%2$s'", pos, path);
156             throw new IllegalArgumentException(msg);
157         }
158 
159         Token[] tokens = tokenStream.getTokens();
160         //		System.out.println("path="+path+"=>"+tokens);
161         XPathElement[] elements;
162         int n = to!int(tokens.length);
163         int i=0;
164     loop:
165         while ( i<n ) {
166             Token el = tokens[i];
167             Token next = null;
168             switch ( el.getType() ) {
169             case XPathLexer.ROOT :
170             case XPathLexer.ANYWHERE :
171                 bool anywhere = el.getType() == XPathLexer.ANYWHERE;
172                 i++;
173                 next = tokens[i];
174                 bool invert = next.getType() == XPathLexer.BANG;
175                 if ( invert ) {
176                     i++;
177                     next = tokens[i];
178                 }
179                 XPathElement pathElement = getXPathElement(next, anywhere);
180                 pathElement.invert = invert;
181                 elements ~= pathElement;
182                 i++;
183                 break;
184 
185             case XPathLexer.TOKEN_REF :
186             case XPathLexer.RULE_REF :
187             case XPathLexer.WILDCARD :
188                 elements ~= getXPathElement(el, false);
189                 i++;
190                 break;
191 
192             case TokenConstantDefinition.EOF :
193                 break loop;
194 
195             default :
196                 throw new IllegalArgumentException("Unknowth path element " ~ to!string(el));
197             }
198         }
199         XPathElement[] nullArray;
200         return elements = nullArray;
201     }
202 
203     /**
204      * @uml
205      * Convert word like {@code *} or {@code ID} or {@code expr} to a path
206      * element. {@code anywhere} is {@code true} if {@code //} precedes the
207      * word.
208      */
209     public XPathElement getXPathElement(Token wordToken, bool anywhere)
210     {
211 	if (wordToken.getType() == TokenConstantDefinition.EOF) {
212             throw new IllegalArgumentException("Missing path element at end of path");
213         }
214         string word = wordToken.getText();
215         int ttype = parser.getTokenType(word);
216         int ruleIndex = parser.getRuleIndex(word);
217         switch (wordToken.getType()) {
218         case XPathLexer.WILDCARD :
219             return anywhere ?
220                 new XPathWildcardAnywhereElement() :
221             new XPathWildcardElement();
222         case XPathLexer.TOKEN_REF :
223         case XPathLexer.STRING :
224             if (ttype == TokenConstantDefinition.INVALID_TYPE ) {
225                 throw new IllegalArgumentException(word~
226                                                    " at index "~
227                                                    to!string(wordToken.getStartIndex) ~
228                                                    " isn't a valid token name");
229             }
230             return anywhere ?
231                 new XPathTokenAnywhereElement(word, ttype) :
232                 new XPathTokenElement(word, ttype);
233         default :
234             if ( ruleIndex==-1 ) {
235                 throw new IllegalArgumentException(word ~
236                                                    " at index "~
237                                                    to!string(wordToken.getStartIndex) ~
238                                                    " isn't a valid rule name");
239             }
240             return anywhere ?
241                 new XPathRuleAnywhereElement(word, ruleIndex) :
242                 new XPathRuleElement(word, ruleIndex);
243         }
244     }
245 
246     public static ParseTree[] findAll(ParseTree tree, string xpath, Parser parser)
247     {
248         XPath p = new XPath(parser, xpath);
249         return p.evaluate(tree);
250     }
251 
252     /**
253      * @uml
254      * Return a list of all nodes starting at {@code t} as root that satisfy the
255      * path. The root {@code /} is relative to the node passed to
256      * {@link #evaluate}.
257      */
258     public ParseTree[] evaluate(ParseTree t)
259     {
260 	ParserRuleContext dummyRoot = new ParserRuleContext();
261         ParseTree[1] pt;
262         pt[0] = t;
263         dummyRoot.children = pt; // don't set t's parent.
264 
265         ParseTree[] work = [dummyRoot];
266 
267         int i = 0;
268         while (i < elements.length) {
269             DList!ParseTree next;
270             foreach (ParseTree node; work) {
271                 if (node.getChildCount() > 0) {
272                     // only try to match next element if it has children
273                     // e.g., //func/*/stat might have a token node for which
274                     // we can't go looking for stat nodes.
275                     ParseTree[] matching = elements[i].evaluate(node);
276                     next.insert(matching);
277                 }
278             }
279             i++;
280             work ~= array(next[]);
281         }
282         return work;
283     }
284 
285 }