From b4d3f93e8fa8be468212bcc5dcc7f51963a96f75 Mon Sep 17 00:00:00 2001 From: Evie Lau <689163+evie-lau@users.noreply.github.com> Date: Mon, 8 Jul 2024 07:31:24 -0500 Subject: [PATCH] XPath condition enhancements - multiple conditions and and/or operators (#4305) * Support chained conditions * Add support for and/or XPath operators * Autoformat * Simplify pattern for matching XPath conditions * Update javadoc, add more complex condition tests * Enable the namespace match functions test separately --------- Co-authored-by: Tim te Beek --- .../org/openrewrite/xml/XPathMatcher.java | 107 ++++++++++++------ .../org/openrewrite/xml/XPathMatcherTest.java | 101 +++++++++++++++-- 2 files changed, 162 insertions(+), 46 deletions(-) diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java b/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java index bae47c1b672..74905d9281e 100644 --- a/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java +++ b/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java @@ -29,6 +29,7 @@ /** * Supports a limited set of XPath expressions, specifically those documented on this page. + * Additionally, supports `local-name()` and `namespace-uri()` conditions, `and`/`or` operators, and chained conditions. *

* Used for checking whether a visitor's cursor meets a certain XPath expression. *

@@ -37,9 +38,11 @@ */ public class XPathMatcher { - private static final Pattern XPATH_ELEMENT_SPLITTER = Pattern.compile("((?<=/)(?=/)|[^/\\[]|\\[[^]]*\\])+"); + private static final Pattern XPATH_ELEMENT_SPLITTER = Pattern.compile("((?<=/)(?=/)|[^/\\[]|\\[[^]]*])+"); // Regular expression to support conditional tags like `plugin[artifactId='maven-compiler-plugin']` or foo[@bar='baz'] - private static final Pattern PATTERN = Pattern.compile("(@)?([-:\\w]+|\\*)\\[((local-name|namespace-uri)\\(\\)|(@)?([-\\w]+|\\*))='(.*)']"); + private static final Pattern ELEMENT_WITH_CONDITION_PATTERN = Pattern.compile("(@)?([-:\\w]+|\\*)(\\[.+])"); + private static final Pattern CONDITION_PATTERN = Pattern.compile("(\\[.*?])+?"); + private static final Pattern CONDITION_CONJUNCTION_PATTERN = Pattern.compile("(((local-name|namespace-uri)\\(\\)|(@)?([-\\w:]+|\\*))='(.*?)'(\\h?(or|and)\\h?)?)+?"); private final String expression; private final boolean startsWithSlash; @@ -120,8 +123,8 @@ public boolean matches(Cursor cursor) { boolean matchedCondition = false; Matcher matcher; - if (tagForCondition != null && partWithCondition.endsWith("]") && (matcher = PATTERN.matcher( - partWithCondition)).matches()) { + if (tagForCondition != null && partWithCondition.endsWith("]") + && (matcher = ELEMENT_WITH_CONDITION_PATTERN.matcher(partWithCondition)).matches()) { String optionalPartName = matchesElementWithConditionFunction(matcher, tagForCondition, cursor); if (optionalPartName == null) { return false; @@ -147,16 +150,16 @@ public boolean matches(Cursor cursor) { continue; } - boolean conditionNotFulfilled = - tagForCondition == null || (!part.equals(partName) && !tagForCondition.getName() - .equals(partName)); + boolean conditionNotFulfilled = tagForCondition == null + || (!part.equals(partName) && !tagForCondition.getName().equals(partName)); int idx = part.indexOf("["); if (idx > 0) { part = part.substring(0, idx); } - if (path.size() < i + 1 || ( - !(path.get(pathIndex).getName().equals(part)) && !"*".equals(part)) || conditionIsBefore && conditionNotFulfilled) { + if (path.size() < i + 1 + || (!(path.get(pathIndex).getName().equals(part)) && !"*".equals(part)) + || conditionIsBefore && conditionNotFulfilled) { return false; } } @@ -203,7 +206,7 @@ public boolean matches(Cursor cursor) { boolean matchedCondition = false; Matcher matcher; - if (tag != null && part.endsWith("]") && (matcher = PATTERN.matcher(part)).matches()) { + if (tag != null && part.endsWith("]") && (matcher = ELEMENT_WITH_CONDITION_PATTERN.matcher(part)).matches()) { String optionalPartName = matchesElementWithConditionFunction(matcher, tag, cursor); if (optionalPartName == null) { return false; @@ -236,40 +239,74 @@ public boolean matches(Cursor cursor) { private String matchesElementWithConditionFunction(Matcher matcher, Xml.Tag tag, Cursor cursor) { boolean isAttributeElement = matcher.group(1) != null; String element = matcher.group(2); - boolean isAttributeCondition = matcher.group(5) != null; // either group4 != null, or group 2 startsWith @ - String selector = isAttributeCondition ? matcher.group(6) : matcher.group(3); - boolean isFunctionCondition = selector.endsWith("()"); - String value = matcher.group(7); - - boolean matchCondition = false; - if (isAttributeCondition) { - for (Xml.Attribute a : tag.getAttributes()) { - if ((a.getKeyAsString().equals(selector) || "*".equals(selector)) && a.getValueAsString().equals(value)) { - matchCondition = true; + String allConditions = matcher.group(3); + + // Fail quickly if element name doesn't match + if (!isAttributeElement && !tag.getName().equals(element) && !"*".equals(element)) { + return null; + } + + // check that all conditions match on current element + Matcher conditions = CONDITION_PATTERN.matcher(allConditions); + boolean stillMatchesConditions = true; + while (conditions.find() && stillMatchesConditions) { + String conditionGroup = conditions.group(1); + Matcher condition = CONDITION_CONJUNCTION_PATTERN.matcher(conditionGroup); + boolean orCondition = false; + + while (condition.find() && (stillMatchesConditions || orCondition)) { + boolean matchCurrentCondition = false; + + boolean isAttributeCondition = condition.group(4) != null; + String selector = isAttributeCondition ? condition.group(5) : condition.group(2); + boolean isFunctionCondition = selector.endsWith("()"); + String value = condition.group(6); + String conjunction = condition.group(8); + orCondition = conjunction != null && conjunction.equals("or"); + + // invalid conjunction if not 'or' or 'and' + if (!orCondition && conjunction != null && !conjunction.equals("and")) { + // TODO: throw exception for invalid or unsupported XPath conjunction? + stillMatchesConditions = false; break; } - } - } else if (isFunctionCondition) { - if (isAttributeElement) { - for (Xml.Attribute a : tag.getAttributes()) { - if (matchesElementAndFunction(a, cursor, element, selector, value)) { - matchCondition = true; - break; + + if (isAttributeCondition) { // [@attr='value'] pattern + for (Xml.Attribute a : tag.getAttributes()) { + if ((a.getKeyAsString().equals(selector) || "*".equals(selector)) && a.getValueAsString().equals(value)) { + matchCurrentCondition = true; + break; + } + } + } else if (isFunctionCondition) { // [local-name()='name'] pattern + if (isAttributeElement) { + for (Xml.Attribute a : tag.getAttributes()) { + if (matchesElementAndFunction(a, cursor, element, selector, value)) { + matchCurrentCondition = true; + break; + } + } + } else { + matchCurrentCondition = matchesElementAndFunction(tag, cursor, element, selector, value); + } + } else { // other [] conditions + for (Xml.Tag t : FindTags.find(tag, selector)) { + if (t.getValue().map(v -> v.equals(value)).orElse(false)) { + matchCurrentCondition = true; + break; + } } } - } else { - matchCondition = matchesElementAndFunction(tag, cursor, element, selector, value); - } - } else { // other [] conditions - for (Xml.Tag t : FindTags.find(tag, selector)) { - if (t.getValue().map(v -> v.equals(value)).orElse(false)) { - matchCondition = true; + // break condition early if first OR condition is fulfilled + if (matchCurrentCondition && orCondition) { break; } + + stillMatchesConditions = matchCurrentCondition; } } - return matchCondition ? element : null; + return stillMatchesConditions ? element : null; } private static boolean matchesElementAndFunction(Namespaced tagOrAttribute, Cursor cursor, String element, String selector, String value) { diff --git a/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java b/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java index 04bee0758b2..f369bfda2c2 100755 --- a/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java +++ b/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java @@ -107,6 +107,7 @@ class XPathMatcherTest { content2 content3 + content4 """ @@ -228,9 +229,8 @@ void relativePathsWithConditions() { } @Test - @Disabled @Issue("https://github.com/openrewrite/rewrite/issues/3919") - void matchFunctions() { + void namespaceMatchFunctions() { assertThat(match("/root/element1", namespacedXml)).isTrue(); assertThat(match("/root/ns2:element2", namespacedXml)).isTrue(); assertThat(match("/root/dne", namespacedXml)).isFalse(); @@ -243,17 +243,21 @@ void matchFunctions() { assertThat(match("/*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); + } + @Test + @Disabled + void otherUncoveredXpathFunctions() { // Other common XPath functions - assertThat(match("contains(/root/element1, 'content1')", namespacedXml)).isTrue(); - assertThat(match("not(contains(/root/element1, 'content1'))", namespacedXml)).isFalse(); - assertThat(match("string-length(/root/element1) > 2", namespacedXml)).isTrue(); - assertThat(match("starts-with(/root/element1, 'content1')", namespacedXml)).isTrue(); - assertThat(match("ends-with(/root/element1, 'content1')", namespacedXml)).isTrue(); - assertThat(match("substring-before(/root/element1, '1') = 'content'", namespacedXml)).isTrue(); - assertThat(match("substring-after(/root/element1, 'content') = '1'", namespacedXml)).isTrue(); - assertThat(match("/root/element1/text()", namespacedXml)).isTrue(); - assertThat(match("count(/root/*)", namespacedXml)).isTrue(); + assertThat(match("contains(/root/element1, 'content1')", namespacedXml)).isTrue(); + assertThat(match("not(contains(/root/element1, 'content1'))", namespacedXml)).isFalse(); + assertThat(match("string-length(/root/element1) > 2", namespacedXml)).isTrue(); + assertThat(match("starts-with(/root/element1, 'content1')", namespacedXml)).isTrue(); + assertThat(match("ends-with(/root/element1, 'content1')", namespacedXml)).isTrue(); + assertThat(match("substring-before(/root/element1, '1') = 'content'", namespacedXml)).isTrue(); + assertThat(match("substring-after(/root/element1, 'content') = '1'", namespacedXml)).isTrue(); + assertThat(match("/root/element1/text()", namespacedXml)).isTrue(); + assertThat(match("count(/root/*)", namespacedXml)).isTrue(); } @Test @@ -328,6 +332,81 @@ void matchAttributeElement() { // assertThat(match("/root//element1/@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); } + @Test + void matchMultipleConditions() { + assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2'][local-name()='element2']", namespacedXml)).isTrue(); + assertThat(match("//*[local-name()='element2'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + + assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2'][local-name()='dne']", namespacedXml)).isFalse(); + assertThat(match("//*[local-name()='dne'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + + assertThat(match("//*[local-name()='element1'][@ns3:attribute1='content3']", namespacedXml)).isTrue(); + assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3'][local-name()='attribute1']", namespacedXml)).isTrue(); + assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3'][local-name()='dne']", namespacedXml)).isFalse(); + + assertThat(match("//*[@ns3:attr='test'][local-name()='element3']", namespacedXml)).isTrue(); + assertThat(match("//*[@ns3:attr='test'][local-name()='elementX']", namespacedXml)).isFalse(); + + assertThat(match("//*[@ns3:attr='test2'][local-name()='element4'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + assertThat(match("//*[@ns3:attr='testX'][local-name()='element4'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + assertThat(match("//*[@ns3:attr='test2'][local-name()='elementX'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + assertThat(match("//*[@ns3:attr='test2'][local-name()='element4'][namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse(); + } + + @Test + void matchConditionsWithConjunctions() { + // T&T, T&F, F&T, F&F + assertThat(match("//*[local-name()='element3' and @ns3:attr='test']", namespacedXml)).isTrue(); + assertThat(match("//*[local-name()='element3' and @ns3:attr='dne']", namespacedXml)).isFalse(); + assertThat(match("//*[local-name()='dne' and @ns3:attr='test']", namespacedXml)).isFalse(); + assertThat(match("//*[local-name()='dne' and @ns3:attr='dne']", namespacedXml)).isFalse(); + + // T|T, T|F, F|T, F|F + assertThat(match("//*[local-name()='element2' or namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + assertThat(match("//*[local-name()='element2' or namespace-uri()='dne']", namespacedXml)).isTrue(); + assertThat(match("//*[local-name()='dne' or local-name()='element2']", namespacedXml)).isTrue(); + assertThat(match("//*[local-name()='dne' or local-name()='dne2']", namespacedXml)).isFalse(); + + assertThat(match("//@*[namespace-uri()='dne' or namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); + + // T&T&T = T + assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2' and @ns3:attr='test2']", namespacedXml)).isTrue(); + // T&T&F = F + assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2' and @ns3:attr='dne']", namespacedXml)).isFalse(); + // T&T|F = T + assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2' or @ns3:attr='dne']", namespacedXml)).isTrue(); + // T&F|T = T + assertThat(match("//*[local-name()='element4' and @ns3:attr='dne' or namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + // T&F|F = F + assertThat(match("//*[local-name()='element4' and @ns3:attr='dne' or namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse(); + + // F|F|T = T + assertThat(match("//*[local-name()='dne' or local-name()='dne2' or local-name()='element2']", namespacedXml)).isTrue(); + + // [T&T][T] = T + assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2'][@ns3:attr='test2']", namespacedXml)).isTrue(); + // [T&T][F] = F + assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2'][@ns3:attr='dne']", namespacedXml)).isFalse(); + // [F&T][T] = F + assertThat(match("//*[local-name()='dne' and namespace-uri()='http://www.example.com/namespace2'][@ns3:attr='test2']", namespacedXml)).isFalse(); + // [F|T][T] = T + assertThat(match("//*[local-name()='dne' or local-name()='element4'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + // [F|T][F] = F + assertThat(match("//*[local-name()='dne' or local-name()='element4'][namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse(); + + // F|T&T = T + assertThat(match("//*[local-name()='dne' or local-name()='element4' and namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + // F|T&F = F + assertThat(match("//*[local-name()='dne' or local-name()='element4' and namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse(); + // F|F&T = F + assertThat(match("//*[local-name()='dne' or namespace-uri()='http://www.example.com/namespaceX' and local-name()='element4']", namespacedXml)).isFalse(); + + // T|F & T = T + assertThat(match("//*[local-name()='element4' or local-name()='dne' and namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + // T|F & F = T + assertThat(match("//*[local-name()='element4' or local-name()='dne' and namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isTrue(); + } + private boolean match(String xpath, SourceFile x) { XPathMatcher matcher = new XPathMatcher(xpath); return !TreeVisitor.collect(new XmlVisitor<>() {