1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2012 Terence Parr 4 * Copyright (c) 2012 Sam Harwell 5 * Copyright (c) 2017 Egbert Voigt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 module antlr.v4.runtime.tree.xpath.XPath; 33 34 import std.array; 35 import std.stdio; 36 import std.file; 37 import std.format; 38 import std.conv; 39 import std.container : DList; 40 import antlr.v4.runtime.Parser; 41 import antlr.v4.runtime.Token; 42 import antlr.v4.runtime.TokenConstantDefinition; 43 import antlr.v4.runtime.CommonTokenStream; 44 import antlr.v4.runtime.ANTLRInputStream; 45 import antlr.v4.runtime.IllegalArgumentException; 46 import antlr.v4.runtime.LexerNoViableAltException; 47 import antlr.v4.runtime.ParserRuleContext; 48 import antlr.v4.runtime.tree.ParseTree; 49 import antlr.v4.runtime.tree.xpath.XPathElement; 50 import antlr.v4.runtime.tree.xpath.XPathLexerErrorListener; 51 import antlr.v4.runtime.tree.xpath.XPathLexer; 52 import antlr.v4.runtime.tree.xpath.XPathWildcardAnywhereElement; 53 import antlr.v4.runtime.tree.xpath.XPathWildcardElement; 54 import antlr.v4.runtime.tree.xpath.XPathRuleElement; 55 import antlr.v4.runtime.tree.xpath.XPathRuleAnywhereElement; 56 import antlr.v4.runtime.tree.xpath.XPathTokenElement; 57 import antlr.v4.runtime.tree.xpath.XPathTokenAnywhereElement; 58 59 /** 60 * @uml 61 * Represent a subset of XPath XML path syntax for use in identifying nodes in 62 * parse trees. 63 * 64 * <p> 65 * Split path into words and separators {@code /} and {@code //} via ANTLR 66 * itself then walk path elements from left to right. At each separator-word 67 * pair, find set of nodes. Next stage uses those as work list.</p> 68 * 69 * <p> 70 * The basic interface is 71 * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}. 72 * But that is just shorthand for:</p> 73 * 74 * <pre> 75 * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString); 76 * return p.{@link #evaluate evaluate}(tree); 77 * </pre> 78 * 79 * <p> 80 * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this 81 * allows operators:</p> 82 * 83 * <dl> 84 * <dt>/</dt> <dd>root</dd> 85 * <dt>//</dt> <dd>anywhere</dd> 86 * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere 87 * operator</dd> 88 * </dl> 89 * 90 * <p> 91 * and path elements:</p> 92 * 93 * <dl> 94 * <dt>ID</dt> <dd>token name</dd> 95 * <dt>'string'</dt> <dd>any string literal token from the grammar</dd> 96 * <dt>expr</dt> <dd>rule name</dd> 97 * <dt>*</dt> <dd>wildcard matching any node</dd> 98 * </dl> 99 * 100 * <p> 101 * Whitespace is not allowed.</p> 102 */ 103 class XPath 104 { 105 106 /** 107 * @uml 108 * word not operator/separator 109 */ 110 public static immutable string WILDCARD = "*"; 111 112 /** 113 * @uml 114 * word for invert operator 115 */ 116 public static immutable string NOT = "!"; 117 118 protected string path; 119 120 protected XPathElement[] elements; 121 122 protected Parser parser; 123 124 public this(Parser parser, string path) 125 { 126 this.parser = parser; 127 this.path = path; 128 elements = split(path); 129 debug writefln("%s", elements); 130 } 131 132 /** 133 * @uml 134 * TODO: check for invalid token/rule names, bad syntax 135 */ 136 public XPathElement[] split(string path) 137 { 138 ANTLRInputStream ins; 139 try { 140 ins = new ANTLRInputStream(readText(path)); 141 } 142 catch (Exception ioe) { 143 throw new IllegalArgumentException("Could not read path: " ~ path, ioe); 144 } 145 XPathLexer lexer = new XPathLexer(ins); 146 lexer.removeErrorListeners(); 147 lexer.addErrorListener(new XPathLexerErrorListener()); 148 CommonTokenStream tokenStream = new CommonTokenStream(lexer); 149 try { 150 tokenStream.fill(); 151 } 152 catch (LexerNoViableAltException e) { 153 int pos = lexer.getCharPositionInLine(); 154 string msg = format("Invalid tokens or characters at index %1$s in path '%2$s'", pos, path); 155 throw new IllegalArgumentException(msg); 156 } 157 158 Token[] tokens = tokenStream.getTokens(); 159 // System.out.println("path="+path+"=>"+tokens); 160 XPathElement[] elements; 161 int n = to!int(tokens.length); 162 int i=0; 163 loop: 164 while ( i<n ) { 165 Token el = tokens[i]; 166 Token next = null; 167 switch ( el.getType() ) { 168 case XPathLexer.ROOT : 169 case XPathLexer.ANYWHERE : 170 bool anywhere = el.getType() == XPathLexer.ANYWHERE; 171 i++; 172 next = tokens[i]; 173 bool invert = next.getType() == XPathLexer.BANG; 174 if ( invert ) { 175 i++; 176 next = tokens[i]; 177 } 178 XPathElement pathElement = getXPathElement(next, anywhere); 179 pathElement.invert = invert; 180 elements ~= pathElement; 181 i++; 182 break; 183 184 case XPathLexer.TOKEN_REF : 185 case XPathLexer.RULE_REF : 186 case XPathLexer.WILDCARD : 187 elements ~= getXPathElement(el, false); 188 i++; 189 break; 190 191 case TokenConstantDefinition.EOF : 192 break loop; 193 194 default : 195 throw new IllegalArgumentException("Unknowth path element " ~ to!string(el)); 196 } 197 } 198 XPathElement[] nullArray; 199 return elements = nullArray; 200 } 201 202 /** 203 * @uml 204 * Convert word like {@code *} or {@code ID} or {@code expr} to a path 205 * element. {@code anywhere} is {@code true} if {@code //} precedes the 206 * word. 207 */ 208 public XPathElement getXPathElement(Token wordToken, bool anywhere) 209 { 210 if (wordToken.getType() == TokenConstantDefinition.EOF) { 211 throw new IllegalArgumentException("Missing path element at end of path"); 212 } 213 string word = wordToken.getText(); 214 int ttype = parser.getTokenType(word); 215 int ruleIndex = parser.getRuleIndex(word); 216 switch (wordToken.getType()) { 217 case XPathLexer.WILDCARD : 218 return anywhere ? 219 new XPathWildcardAnywhereElement() : 220 new XPathWildcardElement(); 221 case XPathLexer.TOKEN_REF : 222 case XPathLexer.STRING : 223 if (ttype == TokenConstantDefinition.INVALID_TYPE ) { 224 throw new IllegalArgumentException(word~ 225 " at index "~ 226 to!string(wordToken.getStartIndex) ~ 227 " isn't a valid token name"); 228 } 229 return anywhere ? 230 new XPathTokenAnywhereElement(word, ttype) : 231 new XPathTokenElement(word, ttype); 232 default : 233 if ( ruleIndex==-1 ) { 234 throw new IllegalArgumentException(word ~ 235 " at index "~ 236 to!string(wordToken.getStartIndex) ~ 237 " isn't a valid rule name"); 238 } 239 return anywhere ? 240 new XPathRuleAnywhereElement(word, ruleIndex) : 241 new XPathRuleElement(word, ruleIndex); 242 } 243 } 244 245 public static ParseTree[] findAll(ParseTree tree, string xpath, Parser parser) 246 { 247 XPath p = new XPath(parser, xpath); 248 return p.evaluate(tree); 249 } 250 251 /** 252 * @uml 253 * Return a list of all nodes starting at {@code t} as root that satisfy the 254 * path. The root {@code /} is relative to the node passed to 255 * {@link #evaluate}. 256 */ 257 public ParseTree[] evaluate(ParseTree t) 258 { 259 ParserRuleContext dummyRoot = new ParserRuleContext(); 260 ParseTree[1] pt; 261 pt[0] = t; 262 dummyRoot.children = pt; // don't set t's parent. 263 264 ParseTree[] work = [dummyRoot]; 265 266 int i = 0; 267 while (i < elements.length) { 268 DList!ParseTree next; 269 foreach (ParseTree node; work) { 270 if (node.getChildCount() > 0) { 271 // only try to match next element if it has children 272 // e.g., //func/*/stat might have a token node for which 273 // we can't go looking for stat nodes. 274 ParseTree[] matching = elements[i].evaluate(node); 275 next.insert(matching); 276 } 277 } 278 i++; 279 work ~= array(next[]); 280 } 281 return work; 282 } 283 284 }