1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2012 Terence Parr 4 * Copyright (c) 2012 Sam Harwell 5 * Copyright (c) 2017 Egbert Voigt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 module antlr.v4.runtime.tree.xpath.XPath; 33 34 import std.array; 35 import std.stdio; 36 import std.file; 37 import std.format; 38 import std.conv; 39 import std.container : DList; 40 import antlr.v4.runtime.Parser; 41 import antlr.v4.runtime.Token; 42 import antlr.v4.runtime.TokenConstantDefinition; 43 import antlr.v4.runtime.CommonTokenStream; 44 import antlr.v4.runtime.ANTLRInputStream; 45 import antlr.v4.runtime.IllegalArgumentException; 46 import antlr.v4.runtime.LexerNoViableAltException; 47 import antlr.v4.runtime.ParserRuleContext; 48 import antlr.v4.runtime.tree.ParseTree; 49 import antlr.v4.runtime.tree.xpath.XPathElement; 50 import antlr.v4.runtime.tree.xpath.XPathLexerErrorListener; 51 import antlr.v4.runtime.tree.xpath.XPathLexer; 52 import antlr.v4.runtime.tree.xpath.XPathWildcardAnywhereElement; 53 import antlr.v4.runtime.tree.xpath.XPathWildcardElement; 54 import antlr.v4.runtime.tree.xpath.XPathRuleElement; 55 import antlr.v4.runtime.tree.xpath.XPathRuleAnywhereElement; 56 import antlr.v4.runtime.tree.xpath.XPathTokenElement; 57 import antlr.v4.runtime.tree.xpath.XPathTokenAnywhereElement; 58 59 // Class XPath 60 /** 61 * @uml 62 * Represent a subset of XPath XML path syntax for use in identifying nodes in 63 * parse trees. 64 * 65 * <p> 66 * Split path into words and separators {@code /} and {@code //} via ANTLR 67 * itself then walk path elements from left to right. At each separator-word 68 * pair, find set of nodes. Next stage uses those as work list.</p> 69 * 70 * <p> 71 * The basic interface is 72 * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}. 73 * But that is just shorthand for:</p> 74 * 75 * <pre> 76 * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString); 77 * return p.{@link #evaluate evaluate}(tree); 78 * </pre> 79 * 80 * <p> 81 * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this 82 * allows operators:</p> 83 * 84 * <dl> 85 * <dt>/</dt> <dd>root</dd> 86 * <dt>//</dt> <dd>anywhere</dd> 87 * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere 88 * operator</dd> 89 * </dl> 90 * 91 * <p> 92 * and path elements:</p> 93 * 94 * <dl> 95 * <dt>ID</dt> <dd>token name</dd> 96 * <dt>'string'</dt> <dd>any string literal token from the grammar</dd> 97 * <dt>expr</dt> <dd>rule name</dd> 98 * <dt>*</dt> <dd>wildcard matching any node</dd> 99 * </dl> 100 * 101 * <p> 102 * Whitespace is not allowed.</p> 103 */ 104 class XPath 105 { 106 107 /** 108 * @uml 109 * word not operator/separator 110 */ 111 public static immutable string WILDCARD = "*"; 112 113 /** 114 * @uml 115 * word for invert operator 116 */ 117 public static immutable string NOT = "!"; 118 119 protected string path; 120 121 protected XPathElement[] elements; 122 123 protected Parser parser; 124 125 public this(Parser parser, string path) 126 { 127 this.parser = parser; 128 this.path = path; 129 elements = split(path); 130 debug writefln("%s", elements); 131 } 132 133 /** 134 * @uml 135 * TODO: check for invalid token/rule names, bad syntax 136 */ 137 public XPathElement[] split(string path) 138 { 139 ANTLRInputStream ins; 140 try { 141 ins = new ANTLRInputStream(readText(path)); 142 } 143 catch (Exception ioe) { 144 throw new IllegalArgumentException("Could not read path: " ~ path, ioe); 145 } 146 XPathLexer lexer = new XPathLexer(ins); 147 lexer.removeErrorListeners(); 148 lexer.addErrorListener(new XPathLexerErrorListener()); 149 CommonTokenStream tokenStream = new CommonTokenStream(lexer); 150 try { 151 tokenStream.fill(); 152 } 153 catch (LexerNoViableAltException e) { 154 int pos = lexer.getCharPositionInLine(); 155 string msg = format("Invalid tokens or characters at index %1$s in path '%2$s'", pos, path); 156 throw new IllegalArgumentException(msg); 157 } 158 159 Token[] tokens = tokenStream.getTokens(); 160 // System.out.println("path="+path+"=>"+tokens); 161 XPathElement[] elements; 162 int n = to!int(tokens.length); 163 int i=0; 164 loop: 165 while ( i<n ) { 166 Token el = tokens[i]; 167 Token next = null; 168 switch ( el.getType() ) { 169 case XPathLexer.ROOT : 170 case XPathLexer.ANYWHERE : 171 bool anywhere = el.getType() == XPathLexer.ANYWHERE; 172 i++; 173 next = tokens[i]; 174 bool invert = next.getType() == XPathLexer.BANG; 175 if ( invert ) { 176 i++; 177 next = tokens[i]; 178 } 179 XPathElement pathElement = getXPathElement(next, anywhere); 180 pathElement.invert = invert; 181 elements ~= pathElement; 182 i++; 183 break; 184 185 case XPathLexer.TOKEN_REF : 186 case XPathLexer.RULE_REF : 187 case XPathLexer.WILDCARD : 188 elements ~= getXPathElement(el, false); 189 i++; 190 break; 191 192 case TokenConstantDefinition.EOF : 193 break loop; 194 195 default : 196 throw new IllegalArgumentException("Unknowth path element " ~ to!string(el)); 197 } 198 } 199 XPathElement[] nullArray; 200 return elements = nullArray; 201 } 202 203 /** 204 * @uml 205 * Convert word like {@code *} or {@code ID} or {@code expr} to a path 206 * element. {@code anywhere} is {@code true} if {@code //} precedes the 207 * word. 208 */ 209 public XPathElement getXPathElement(Token wordToken, bool anywhere) 210 { 211 if (wordToken.getType() == TokenConstantDefinition.EOF) { 212 throw new IllegalArgumentException("Missing path element at end of path"); 213 } 214 string word = wordToken.getText(); 215 int ttype = parser.getTokenType(word); 216 int ruleIndex = parser.getRuleIndex(word); 217 switch (wordToken.getType()) { 218 case XPathLexer.WILDCARD : 219 return anywhere ? 220 new XPathWildcardAnywhereElement() : 221 new XPathWildcardElement(); 222 case XPathLexer.TOKEN_REF : 223 case XPathLexer.STRING : 224 if (ttype == TokenConstantDefinition.INVALID_TYPE ) { 225 throw new IllegalArgumentException(word~ 226 " at index "~ 227 to!string(wordToken.getStartIndex) ~ 228 " isn't a valid token name"); 229 } 230 return anywhere ? 231 new XPathTokenAnywhereElement(word, ttype) : 232 new XPathTokenElement(word, ttype); 233 default : 234 if ( ruleIndex==-1 ) { 235 throw new IllegalArgumentException(word ~ 236 " at index "~ 237 to!string(wordToken.getStartIndex) ~ 238 " isn't a valid rule name"); 239 } 240 return anywhere ? 241 new XPathRuleAnywhereElement(word, ruleIndex) : 242 new XPathRuleElement(word, ruleIndex); 243 } 244 } 245 246 public static ParseTree[] findAll(ParseTree tree, string xpath, Parser parser) 247 { 248 XPath p = new XPath(parser, xpath); 249 return p.evaluate(tree); 250 } 251 252 /** 253 * @uml 254 * Return a list of all nodes starting at {@code t} as root that satisfy the 255 * path. The root {@code /} is relative to the node passed to 256 * {@link #evaluate}. 257 */ 258 public ParseTree[] evaluate(ParseTree t) 259 { 260 ParserRuleContext dummyRoot = new ParserRuleContext(); 261 ParseTree[1] pt; 262 pt[0] = t; 263 dummyRoot.children = pt; // don't set t's parent. 264 265 ParseTree[] work = [dummyRoot]; 266 267 int i = 0; 268 while (i < elements.length) { 269 DList!ParseTree next; 270 foreach (ParseTree node; work) { 271 if (node.getChildCount() > 0) { 272 // only try to match next element if it has children 273 // e.g., //func/*/stat might have a token node for which 274 // we can't go looking for stat nodes. 275 ParseTree[] matching = elements[i].evaluate(node); 276 next.insert(matching); 277 } 278 } 279 i++; 280 work ~= array(next[]); 281 } 282 return work; 283 } 284 285 }