antlr.v4.runtime.tree.xpath.XPath source code

1 /*
2  * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 module antlr.v4.runtime.tree.xpath.XPath;
8 
9 import std.array;
10 import std.stdio;
11 import std.file;
12 import std.format;
13 import std.conv;
14 import std.container : DList;
15 import antlr.v4.runtime.Parser;
16 import antlr.v4.runtime.Token;
17 import antlr.v4.runtime.TokenConstantDefinition;
18 import antlr.v4.runtime.CommonTokenStream;
19 import antlr.v4.runtime.ANTLRInputStream;
20 import antlr.v4.runtime.IllegalArgumentException;
21 import antlr.v4.runtime.LexerNoViableAltException;
22 import antlr.v4.runtime.ParserRuleContext;
23 import antlr.v4.runtime.tree.ParseTree;
24 import antlr.v4.runtime.tree.xpath.XPathElement;
25 import antlr.v4.runtime.tree.xpath.XPathLexerErrorListener;
26 import antlr.v4.runtime.tree.xpath.XPathLexer;
27 import antlr.v4.runtime.tree.xpath.XPathWildcardAnywhereElement;
28 import antlr.v4.runtime.tree.xpath.XPathWildcardElement;
29 import antlr.v4.runtime.tree.xpath.XPathRuleElement;
30 import antlr.v4.runtime.tree.xpath.XPathRuleAnywhereElement;
31 import antlr.v4.runtime.tree.xpath.XPathTokenElement;
32 import antlr.v4.runtime.tree.xpath.XPathTokenAnywhereElement;
33 
34 /**
35  * @uml
36  * Represent a subset of XPath XML path syntax for use in identifying nodes in
37  * parse trees.
38  *
39  * <p>
40  * Split path into words and separators {@code /} and {@code //} via ANTLR
41  * itself then walk path elements from left to right. At each separator-word
42  * pair, find set of nodes. Next stage uses those as work list.</p>
43  *
44  * <p>
45  * The basic interface is
46  * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
47  * But that is just shorthand for:</p>
48  *
49  * <pre>
50  * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
51  * return p.{@link #evaluate evaluate}(tree);
52  * </pre>
53  *
54  * <p>
55  * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
56  * allows operators:</p>
57  *
58  * <dl>
59  * <dt>/</dt> <dd>root</dd>
60  * <dt>//</dt> <dd>anywhere</dd>
61  * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
62  * operator</dd>
63  * </dl>
64  *
65  * <p>
66  * and path elements:</p>
67  *
68  * <dl>
69  * <dt>ID</dt> <dd>token name</dd>
70  * <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
71  * <dt>expr</dt> <dd>rule name</dd>
72  * <dt>*</dt> <dd>wildcard matching any node</dd>
73  * </dl>
74  *
75  * <p>
76  * Whitespace is not allowed.</p>
77  */
78 class XPath
79 {
80 
81     /**
82      * @uml
83      * word not operator/separator
84      */
85     public static immutable string WILDCARD = "*";
86 
87     /**
88      * @uml
89      * word for invert operator
90      */
91     public static immutable string NOT = "!";
92 
93     protected string path;
94 
95     protected XPathElement[] elements;
96 
97     protected Parser parser;
98 
99     public this(Parser parser, string path)
100     {
101         this.parser = parser;
102         this.path = path;
103         elements = split(path);
104         debug writefln("%s", elements);
105     }
106 
107     /**
108      * @uml
109      * TODO: check for invalid token/rule names, bad syntax
110      */
111     public XPathElement[] split(string path)
112     {
113         ANTLRInputStream ins;
114         try {
115             ins = new ANTLRInputStream(readText(path));
116         }
117         catch (Exception ioe) {
118             throw new IllegalArgumentException("Could not read path: " ~ path, ioe);
119         }
120         XPathLexer lexer = new XPathLexer(ins);
121         lexer.removeErrorListeners();
122         lexer.addErrorListener(new XPathLexerErrorListener());
123         CommonTokenStream tokenStream = new CommonTokenStream(lexer);
124         try {
125             tokenStream.fill();
126         }
127         catch (LexerNoViableAltException e) {
128             int pos = lexer.getCharPositionInLine();
129             string msg = format("Invalid tokens or characters at index %1$s in path '%2$s'", pos, path);
130             throw new IllegalArgumentException(msg);
131         }
132 
133         Token[] tokens = tokenStream.getTokens();
134         //		System.out.println("path="+path+"=>"+tokens);
135         XPathElement[] elements;
136         int n = to!int(tokens.length);
137         int i=0;
138     loop:
139         while ( i<n ) {
140             Token el = tokens[i];
141             Token next = null;
142             switch ( el.getType() ) {
143             case XPathLexer.ROOT :
144             case XPathLexer.ANYWHERE :
145                 bool anywhere = el.getType() == XPathLexer.ANYWHERE;
146                 i++;
147                 next = tokens[i];
148                 bool invert = next.getType() == XPathLexer.BANG;
149                 if ( invert ) {
150                     i++;
151                     next = tokens[i];
152                 }
153                 XPathElement pathElement = getXPathElement(next, anywhere);
154                 pathElement.invert = invert;
155                 elements ~= pathElement;
156                 i++;
157                 break;
158 
159             case XPathLexer.TOKEN_REF :
160             case XPathLexer.RULE_REF :
161             case XPathLexer.WILDCARD :
162                 elements ~= getXPathElement(el, false);
163                 i++;
164                 break;
165 
166             case TokenConstantDefinition.EOF :
167                 break loop;
168 
169             default :
170                 throw new IllegalArgumentException("Unknowth path element " ~ to!string(el));
171             }
172         }
173         XPathElement[] nullArray;
174         return elements = nullArray;
175     }
176 
177     /**
178      * @uml
179      * Convert word like {@code *} or {@code ID} or {@code expr} to a path
180      * element. {@code anywhere} is {@code true} if {@code //} precedes the
181      * word.
182      */
183     public XPathElement getXPathElement(Token wordToken, bool anywhere)
184     {
185 	if (wordToken.getType() == TokenConstantDefinition.EOF) {
186             throw new IllegalArgumentException("Missing path element at end of path");
187         }
188         string word = to!string(wordToken.getText);
189         int ttype = parser.getTokenType(word);
190         int ruleIndex = parser.getRuleIndex(word);
191         switch (wordToken.getType()) {
192         case XPathLexer.WILDCARD :
193             return anywhere ?
194                 new XPathWildcardAnywhereElement() :
195             new XPathWildcardElement();
196         case XPathLexer.TOKEN_REF :
197         case XPathLexer.STRING :
198             if (ttype == TokenConstantDefinition.INVALID_TYPE ) {
199                 throw new IllegalArgumentException(word~
200                                                    " at index "~
201                                                    to!string(wordToken.startIndex) ~
202                                                    " isn't a valid token name");
203             }
204             return anywhere ?
205                 new XPathTokenAnywhereElement(word, ttype) :
206                 new XPathTokenElement(word, ttype);
207         default :
208             if ( ruleIndex==-1 ) {
209                 throw new IllegalArgumentException(word ~
210                                                    " at index "~
211                                                    to!string(wordToken.startIndex) ~
212                                                    " isn't a valid rule name");
213             }
214             return anywhere ?
215                 new XPathRuleAnywhereElement(word, ruleIndex) :
216                 new XPathRuleElement(word, ruleIndex);
217         }
218     }
219 
220     public static ParseTree[] findAll(ParseTree tree, string xpath, Parser parser)
221     {
222         XPath p = new XPath(parser, xpath);
223         return p.evaluate(tree);
224     }
225 
226     /**
227      * @uml
228      * Return a list of all nodes starting at {@code t} as root that satisfy the
229      * path. The root {@code /} is relative to the node passed to
230      * {@link #evaluate}.
231      */
232     public ParseTree[] evaluate(ParseTree t)
233     {
234 	ParserRuleContext dummyRoot = new ParserRuleContext();
235         ParseTree[1] pt;
236         pt[0] = t;
237         dummyRoot.children = pt; // don't set t's parent.
238 
239         ParseTree[] work = [dummyRoot];
240 
241         int i = 0;
242         while (i < elements.length) {
243             DList!ParseTree next;
244             foreach (ParseTree node; work) {
245                 if (node.getChildCount() > 0) {
246                     // only try to match next element if it has children
247                     // e.g., //func/*/stat might have a token node for which
248                     // we can't go looking for stat nodes.
249                     ParseTree[] matching = elements[i].evaluate(node);
250                     next.insert(matching);
251                 }
252             }
253             i++;
254             work ~= array(next[]);
255         }
256         return work;
257     }
258 
259 }