1 /* 2 * Copyright (c) 2012-2019 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 module antlr.v4.runtime.tree.xpath.XPath; 8 9 import std.array; 10 import std.stdio; 11 import std.file; 12 import std.format; 13 import std.conv; 14 import std.container : DList; 15 import antlr.v4.runtime.Parser; 16 import antlr.v4.runtime.Token; 17 import antlr.v4.runtime.TokenConstantDefinition; 18 import antlr.v4.runtime.CommonTokenStream; 19 import antlr.v4.runtime.ANTLRInputStream; 20 import antlr.v4.runtime.IllegalArgumentException; 21 import antlr.v4.runtime.LexerNoViableAltException; 22 import antlr.v4.runtime.ParserRuleContext; 23 import antlr.v4.runtime.tree.ParseTree; 24 import antlr.v4.runtime.tree.xpath.XPathElement; 25 import antlr.v4.runtime.tree.xpath.XPathLexerErrorListener; 26 import antlr.v4.runtime.tree.xpath.XPathLexer; 27 import antlr.v4.runtime.tree.xpath.XPathWildcardAnywhereElement; 28 import antlr.v4.runtime.tree.xpath.XPathWildcardElement; 29 import antlr.v4.runtime.tree.xpath.XPathRuleElement; 30 import antlr.v4.runtime.tree.xpath.XPathRuleAnywhereElement; 31 import antlr.v4.runtime.tree.xpath.XPathTokenElement; 32 import antlr.v4.runtime.tree.xpath.XPathTokenAnywhereElement; 33 34 /** 35 * @uml 36 * Represent a subset of XPath XML path syntax for use in identifying nodes in 37 * parse trees. 38 * 39 * <p> 40 * Split path into words and separators {@code /} and {@code //} via ANTLR 41 * itself then walk path elements from left to right. At each separator-word 42 * pair, find set of nodes. Next stage uses those as work list.</p> 43 * 44 * <p> 45 * The basic interface is 46 * {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}. 47 * But that is just shorthand for:</p> 48 * 49 * <pre> 50 * {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString); 51 * return p.{@link #evaluate evaluate}(tree); 52 * </pre> 53 * 54 * <p> 55 * See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this 56 * allows operators:</p> 57 * 58 * <dl> 59 * <dt>/</dt> <dd>root</dd> 60 * <dt>//</dt> <dd>anywhere</dd> 61 * <dt>!</dt> <dd>invert; this must appear directly after root or anywhere 62 * operator</dd> 63 * </dl> 64 * 65 * <p> 66 * and path elements:</p> 67 * 68 * <dl> 69 * <dt>ID</dt> <dd>token name</dd> 70 * <dt>'string'</dt> <dd>any string literal token from the grammar</dd> 71 * <dt>expr</dt> <dd>rule name</dd> 72 * <dt>*</dt> <dd>wildcard matching any node</dd> 73 * </dl> 74 * 75 * <p> 76 * Whitespace is not allowed.</p> 77 */ 78 class XPath 79 { 80 81 /** 82 * @uml 83 * word not operator/separator 84 */ 85 public static immutable string WILDCARD = "*"; 86 87 /** 88 * @uml 89 * word for invert operator 90 */ 91 public static immutable string NOT = "!"; 92 93 protected string path; 94 95 protected XPathElement[] elements; 96 97 protected Parser parser; 98 99 public this(Parser parser, string path) 100 { 101 this.parser = parser; 102 this.path = path; 103 elements = split(path); 104 debug writefln("%s", elements); 105 } 106 107 /** 108 * @uml 109 * TODO: check for invalid token/rule names, bad syntax 110 */ 111 public XPathElement[] split(string path) 112 { 113 ANTLRInputStream ins; 114 try { 115 ins = new ANTLRInputStream(readText(path)); 116 } 117 catch (Exception ioe) { 118 throw new IllegalArgumentException("Could not read path: " ~ path, ioe); 119 } 120 XPathLexer lexer = new XPathLexer(ins); 121 lexer.removeErrorListeners(); 122 lexer.addErrorListener(new XPathLexerErrorListener()); 123 CommonTokenStream tokenStream = new CommonTokenStream(lexer); 124 try { 125 tokenStream.fill(); 126 } 127 catch (LexerNoViableAltException e) { 128 int pos = lexer.getCharPositionInLine(); 129 string msg = format("Invalid tokens or characters at index %1$s in path '%2$s'", pos, path); 130 throw new IllegalArgumentException(msg); 131 } 132 133 Token[] tokens = tokenStream.getTokens(); 134 // System.out.println("path="+path+"=>"+tokens); 135 XPathElement[] elements; 136 int n = to!int(tokens.length); 137 int i=0; 138 loop: 139 while ( i<n ) { 140 Token el = tokens[i]; 141 Token next = null; 142 switch ( el.getType() ) { 143 case XPathLexer.ROOT : 144 case XPathLexer.ANYWHERE : 145 bool anywhere = el.getType() == XPathLexer.ANYWHERE; 146 i++; 147 next = tokens[i]; 148 bool invert = next.getType() == XPathLexer.BANG; 149 if ( invert ) { 150 i++; 151 next = tokens[i]; 152 } 153 XPathElement pathElement = getXPathElement(next, anywhere); 154 pathElement.invert = invert; 155 elements ~= pathElement; 156 i++; 157 break; 158 159 case XPathLexer.TOKEN_REF : 160 case XPathLexer.RULE_REF : 161 case XPathLexer.WILDCARD : 162 elements ~= getXPathElement(el, false); 163 i++; 164 break; 165 166 case TokenConstantDefinition.EOF : 167 break loop; 168 169 default : 170 throw new IllegalArgumentException("Unknowth path element " ~ to!string(el)); 171 } 172 } 173 XPathElement[] nullArray; 174 return elements = nullArray; 175 } 176 177 /** 178 * @uml 179 * Convert word like {@code *} or {@code ID} or {@code expr} to a path 180 * element. {@code anywhere} is {@code true} if {@code //} precedes the 181 * word. 182 */ 183 public XPathElement getXPathElement(Token wordToken, bool anywhere) 184 { 185 if (wordToken.getType() == TokenConstantDefinition.EOF) { 186 throw new IllegalArgumentException("Missing path element at end of path"); 187 } 188 string word = to!string(wordToken.getText); 189 int ttype = parser.getTokenType(word); 190 int ruleIndex = parser.getRuleIndex(word); 191 switch (wordToken.getType()) { 192 case XPathLexer.WILDCARD : 193 return anywhere ? 194 new XPathWildcardAnywhereElement() : 195 new XPathWildcardElement(); 196 case XPathLexer.TOKEN_REF : 197 case XPathLexer.STRING : 198 if (ttype == TokenConstantDefinition.INVALID_TYPE ) { 199 throw new IllegalArgumentException(word~ 200 " at index "~ 201 to!string(wordToken.startIndex) ~ 202 " isn't a valid token name"); 203 } 204 return anywhere ? 205 new XPathTokenAnywhereElement(word, ttype) : 206 new XPathTokenElement(word, ttype); 207 default : 208 if ( ruleIndex==-1 ) { 209 throw new IllegalArgumentException(word ~ 210 " at index "~ 211 to!string(wordToken.startIndex) ~ 212 " isn't a valid rule name"); 213 } 214 return anywhere ? 215 new XPathRuleAnywhereElement(word, ruleIndex) : 216 new XPathRuleElement(word, ruleIndex); 217 } 218 } 219 220 public static ParseTree[] findAll(ParseTree tree, string xpath, Parser parser) 221 { 222 XPath p = new XPath(parser, xpath); 223 return p.evaluate(tree); 224 } 225 226 /** 227 * @uml 228 * Return a list of all nodes starting at {@code t} as root that satisfy the 229 * path. The root {@code /} is relative to the node passed to 230 * {@link #evaluate}. 231 */ 232 public ParseTree[] evaluate(ParseTree t) 233 { 234 ParserRuleContext dummyRoot = new ParserRuleContext(); 235 ParseTree[1] pt; 236 pt[0] = t; 237 dummyRoot.children = pt; // don't set t's parent. 238 239 ParseTree[] work = [dummyRoot]; 240 241 int i = 0; 242 while (i < elements.length) { 243 DList!ParseTree next; 244 foreach (ParseTree node; work) { 245 if (node.getChildCount() > 0) { 246 // only try to match next element if it has children 247 // e.g., //func/*/stat might have a token node for which 248 // we can't go looking for stat nodes. 249 ParseTree[] matching = elements[i].evaluate(node); 250 next.insert(matching); 251 } 252 } 253 i++; 254 work ~= array(next[]); 255 } 256 return work; 257 } 258 259 }