antlr.v4.runtime.tree.xpath.XPath source code

1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2012 Terence Parr
4  *  Copyright (c) 2012 Sam Harwell
5  *  Copyright (c) 2017 Egbert Voigt
6  *  All rights reserved.
7  *
8  *  Redistribution and use in source and binary forms, with or without
9  *  modification, are permitted provided that the following conditions
10  *  are met:
11  *
12  *  1. Redistributions of source code must retain the above copyright
13  *     notice, this list of conditions and the following disclaimer.
14  *  2. Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  *  3. The name of the author may not be used to endorse or promote products
18  *     derived from this software without specific prior written permission.
19  *
20  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 module antlr.v4.runtime.tree.xpath.XPath;
33 
34 import std.array;
35 import std.stdio;
36 import std.file;
37 import std.format;
38 import std.conv;
39 import std.container : DList;
40 import antlr.v4.runtime.Parser;
41 import antlr.v4.runtime.Token;
42 import antlr.v4.runtime.TokenConstantDefinition;
43 import antlr.v4.runtime.CommonTokenStream;
44 import antlr.v4.runtime.ANTLRInputStream;
45 import antlr.v4.runtime.IllegalArgumentException;
46 import antlr.v4.runtime.LexerNoViableAltException;
47 import antlr.v4.runtime.ParserRuleContext;
48 import antlr.v4.runtime.tree.ParseTree;
49 import antlr.v4.runtime.tree.xpath.XPathElement;
50 import antlr.v4.runtime.tree.xpath.XPathLexerErrorListener;
51 import antlr.v4.runtime.tree.xpath.XPathLexer;
52 import antlr.v4.runtime.tree.xpath.XPathWildcardAnywhereElement;
53 import antlr.v4.runtime.tree.xpath.XPathWildcardElement;
54 import antlr.v4.runtime.tree.xpath.XPathRuleElement;
55 import antlr.v4.runtime.tree.xpath.XPathRuleAnywhereElement;
56 import antlr.v4.runtime.tree.xpath.XPathTokenElement;
57 import antlr.v4.runtime.tree.xpath.XPathTokenAnywhereElement;
58 
59 /**
60  * @uml
61  * Represent a subset of XPath XML path syntax for use in identifying nodes in
62  * parse trees.
63  *
64  * <p>
65  * Split path into words and separators {@code /} and {@code //} via ANTLR
66  * itself then walk path elements from left to right. At each separator-word
67  * pair, find set of nodes. Next stage uses those as work list.</p>
68  *
69  * <p>
70  * The basic interface is
71  * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
72  * But that is just shorthand for:</p>
73  *
74  * <pre>
75  * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
76  * return p.{@link #evaluate evaluate}(tree);
77  * </pre>
78  *
79  * <p>
80  * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
81  * allows operators:</p>
82  *
83  * <dl>
84  * <dt>/</dt> <dd>root</dd>
85  * <dt>//</dt> <dd>anywhere</dd>
86  * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
87  * operator</dd>
88  * </dl>
89  *
90  * <p>
91  * and path elements:</p>
92  *
93  * <dl>
94  * <dt>ID</dt> <dd>token name</dd>
95  * <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
96  * <dt>expr</dt> <dd>rule name</dd>
97  * <dt>*</dt> <dd>wildcard matching any node</dd>
98  * </dl>
99  *
100  * <p>
101  * Whitespace is not allowed.</p>
102  */
103 class XPath
104 {
105 
106     /**
107      * @uml
108      * word not operator/separator
109      */
110     public static immutable string WILDCARD = "*";
111 
112     /**
113      * @uml
114      * word for invert operator
115      */
116     public static immutable string NOT = "!";
117 
118     protected string path;
119 
120     protected XPathElement[] elements;
121 
122     protected Parser parser;
123 
124     public this(Parser parser, string path)
125     {
126         this.parser = parser;
127         this.path = path;
128         elements = split(path);
129         debug writefln("%s", elements);
130     }
131 
132     /**
133      * @uml
134      * TODO: check for invalid token/rule names, bad syntax
135      */
136     public XPathElement[] split(string path)
137     {
138         ANTLRInputStream ins;
139         try {
140             ins = new ANTLRInputStream(readText(path));
141         }
142         catch (Exception ioe) {
143             throw new IllegalArgumentException("Could not read path: " ~ path, ioe);
144         }
145         XPathLexer lexer = new XPathLexer(ins);
146         lexer.removeErrorListeners();
147         lexer.addErrorListener(new XPathLexerErrorListener());
148         CommonTokenStream tokenStream = new CommonTokenStream(lexer);
149         try {
150             tokenStream.fill();
151         }
152         catch (LexerNoViableAltException e) {
153             int pos = lexer.getCharPositionInLine();
154             string msg = format("Invalid tokens or characters at index %1$s in path '%2$s'", pos, path);
155             throw new IllegalArgumentException(msg);
156         }
157 
158         Token[] tokens = tokenStream.getTokens();
159         //		System.out.println("path="+path+"=>"+tokens);
160         XPathElement[] elements;
161         int n = to!int(tokens.length);
162         int i=0;
163     loop:
164         while ( i<n ) {
165             Token el = tokens[i];
166             Token next = null;
167             switch ( el.getType() ) {
168             case XPathLexer.ROOT :
169             case XPathLexer.ANYWHERE :
170                 bool anywhere = el.getType() == XPathLexer.ANYWHERE;
171                 i++;
172                 next = tokens[i];
173                 bool invert = next.getType() == XPathLexer.BANG;
174                 if ( invert ) {
175                     i++;
176                     next = tokens[i];
177                 }
178                 XPathElement pathElement = getXPathElement(next, anywhere);
179                 pathElement.invert = invert;
180                 elements ~= pathElement;
181                 i++;
182                 break;
183 
184             case XPathLexer.TOKEN_REF :
185             case XPathLexer.RULE_REF :
186             case XPathLexer.WILDCARD :
187                 elements ~= getXPathElement(el, false);
188                 i++;
189                 break;
190 
191             case TokenConstantDefinition.EOF :
192                 break loop;
193 
194             default :
195                 throw new IllegalArgumentException("Unknowth path element " ~ to!string(el));
196             }
197         }
198         XPathElement[] nullArray;
199         return elements = nullArray;
200     }
201 
202     /**
203      * @uml
204      * Convert word like {@code *} or {@code ID} or {@code expr} to a path
205      * element. {@code anywhere} is {@code true} if {@code //} precedes the
206      * word.
207      */
208     public XPathElement getXPathElement(Token wordToken, bool anywhere)
209     {
210 	if (wordToken.getType() == TokenConstantDefinition.EOF) {
211             throw new IllegalArgumentException("Missing path element at end of path");
212         }
213         string word = wordToken.getText();
214         int ttype = parser.getTokenType(word);
215         int ruleIndex = parser.getRuleIndex(word);
216         switch (wordToken.getType()) {
217         case XPathLexer.WILDCARD :
218             return anywhere ?
219                 new XPathWildcardAnywhereElement() :
220             new XPathWildcardElement();
221         case XPathLexer.TOKEN_REF :
222         case XPathLexer.STRING :
223             if (ttype == TokenConstantDefinition.INVALID_TYPE ) {
224                 throw new IllegalArgumentException(word~
225                                                    " at index "~
226                                                    to!string(wordToken.getStartIndex) ~
227                                                    " isn't a valid token name");
228             }
229             return anywhere ?
230                 new XPathTokenAnywhereElement(word, ttype) :
231                 new XPathTokenElement(word, ttype);
232         default :
233             if ( ruleIndex==-1 ) {
234                 throw new IllegalArgumentException(word ~
235                                                    " at index "~
236                                                    to!string(wordToken.getStartIndex) ~
237                                                    " isn't a valid rule name");
238             }
239             return anywhere ?
240                 new XPathRuleAnywhereElement(word, ruleIndex) :
241                 new XPathRuleElement(word, ruleIndex);
242         }
243     }
244 
245     public static ParseTree[] findAll(ParseTree tree, string xpath, Parser parser)
246     {
247         XPath p = new XPath(parser, xpath);
248         return p.evaluate(tree);
249     }
250 
251     /**
252      * @uml
253      * Return a list of all nodes starting at {@code t} as root that satisfy the
254      * path. The root {@code /} is relative to the node passed to
255      * {@link #evaluate}.
256      */
257     public ParseTree[] evaluate(ParseTree t)
258     {
259 	ParserRuleContext dummyRoot = new ParserRuleContext();
260         ParseTree[1] pt;
261         pt[0] = t;
262         dummyRoot.children = pt; // don't set t's parent.
263 
264         ParseTree[] work = [dummyRoot];
265 
266         int i = 0;
267         while (i < elements.length) {
268             DList!ParseTree next;
269             foreach (ParseTree node; work) {
270                 if (node.getChildCount() > 0) {
271                     // only try to match next element if it has children
272                     // e.g., //func/*/stat might have a token node for which
273                     // we can't go looking for stat nodes.
274                     ParseTree[] matching = elements[i].evaluate(node);
275                     next.insert(matching);
276                 }
277             }
278             i++;
279             work ~= array(next[]);
280         }
281         return work;
282     }
283 
284 }