bpo-40624: Add support for the XPath != operator in xml.etree (GH-22147)

This commit is contained in:
Ammar Askar 2020-11-09 02:02:39 -05:00 committed by GitHub
parent 4eb41d055e
commit 97e8b1eaea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 78 additions and 7 deletions

View File

@ -455,6 +455,12 @@ Supported XPath syntax
| | has the given value. The value cannot contain | | | has the given value. The value cannot contain |
| | quotes. | | | quotes. |
+-----------------------+------------------------------------------------------+ +-----------------------+------------------------------------------------------+
| ``[@attrib!='value']``| Selects all elements for which the given attribute |
| | does not have the given value. The value cannot |
| | contain quotes. |
| | |
| | .. versionadded:: 3.10 |
+-----------------------+------------------------------------------------------+
| ``[tag]`` | Selects all elements that have a child named | | ``[tag]`` | Selects all elements that have a child named |
| | ``tag``. Only immediate children are supported. | | | ``tag``. Only immediate children are supported. |
+-----------------------+------------------------------------------------------+ +-----------------------+------------------------------------------------------+
@ -463,10 +469,22 @@ Supported XPath syntax
| | | | | |
| | .. versionadded:: 3.7 | | | .. versionadded:: 3.7 |
+-----------------------+------------------------------------------------------+ +-----------------------+------------------------------------------------------+
| ``[.!='text']`` | Selects all elements whose complete text content, |
| | including descendants, does not equal the given |
| | ``text``. |
| | |
| | .. versionadded:: 3.10 |
+-----------------------+------------------------------------------------------+
| ``[tag='text']`` | Selects all elements that have a child named | | ``[tag='text']`` | Selects all elements that have a child named |
| | ``tag`` whose complete text content, including | | | ``tag`` whose complete text content, including |
| | descendants, equals the given ``text``. | | | descendants, equals the given ``text``. |
+-----------------------+------------------------------------------------------+ +-----------------------+------------------------------------------------------+
| ``[tag!='text']`` | Selects all elements that have a child named |
| | ``tag`` whose complete text content, including |
| | descendants, does not equal the given ``text``. |
| | |
| | .. versionadded:: 3.10 |
+-----------------------+------------------------------------------------------+
| ``[position]`` | Selects all elements that are located at the given | | ``[position]`` | Selects all elements that are located at the given |
| | position. The position can be either an integer | | | position. The position can be either an integer |
| | (1 is the first position), the expression ``last()`` | | | (1 is the first position), the expression ``last()`` |

View File

@ -2852,8 +2852,12 @@ class ElementFindTest(unittest.TestCase):
['tag'] * 3) ['tag'] * 3)
self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
['tag']) ['tag'])
self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')),
['tag'] * 2)
self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
['tag'] * 2) ['tag'] * 2)
self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')),
['tag'])
self.assertEqual(summarize_list(e.findall('.//tag[@id]')), self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
['tag']) ['tag'])
self.assertEqual(summarize_list(e.findall('.//section[tag]')), self.assertEqual(summarize_list(e.findall('.//section[tag]')),
@ -2875,6 +2879,19 @@ class ElementFindTest(unittest.TestCase):
self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
['section']) ['section'])
# Negations of above tests. They match nothing because the sole section
# tag has subtext.
self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")),
[])
self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")),
[])
self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")),
[])
self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")),
[])
self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")),
[])
self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
['tag']) ['tag'])
self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
@ -2890,6 +2907,24 @@ class ElementFindTest(unittest.TestCase):
self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
[]) [])
# Negations of above tests.
# Matches everything but the tag containing subtext
self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")),
['tag'] * 3)
self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")),
['tag'] * 3)
self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')),
['tag'] * 3)
self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')),
['tag'] * 3)
self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")),
['tag'] * 3)
# Matches all tags.
self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")),
['tag'] * 4)
self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")),
['tag'] * 4)
# duplicate section => 2x tag matches # duplicate section => 2x tag matches
e[1] = e[2] e[1] = e[2]
self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),

View File

@ -65,8 +65,9 @@ xpath_tokenizer_re = re.compile(
r"//?|" r"//?|"
r"\.\.|" r"\.\.|"
r"\(\)|" r"\(\)|"
r"!=|"
r"[/.*:\[\]\(\)@=])|" r"[/.*:\[\]\(\)@=])|"
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" r"((?:\{[^}]+\})?[^/\[\]\(\)@!=\s]+)|"
r"\s+" r"\s+"
) )
@ -253,15 +254,19 @@ def prepare_predicate(next, token):
if elem.get(key) is not None: if elem.get(key) is not None:
yield elem yield elem
return select return select
if signature == "@-='": if signature == "@-='" or signature == "@-!='":
# [@attribute='value'] # [@attribute='value'] or [@attribute!='value']
key = predicate[1] key = predicate[1]
value = predicate[-1] value = predicate[-1]
def select(context, result): def select(context, result):
for elem in result: for elem in result:
if elem.get(key) == value: if elem.get(key) == value:
yield elem yield elem
return select def select_negated(context, result):
for elem in result:
if (attr_value := elem.get(key)) is not None and attr_value != value:
yield elem
return select_negated if '!=' in signature else select
if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
# [tag] # [tag]
tag = predicate[0] tag = predicate[0]
@ -270,8 +275,10 @@ def prepare_predicate(next, token):
if elem.find(tag) is not None: if elem.find(tag) is not None:
yield elem yield elem
return select return select
if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])): if signature == ".='" or signature == ".!='" or (
# [.='value'] or [tag='value'] (signature == "-='" or signature == "-!='")
and not re.match(r"\-?\d+$", predicate[0])):
# [.='value'] or [tag='value'] or [.!='value'] or [tag!='value']
tag = predicate[0] tag = predicate[0]
value = predicate[-1] value = predicate[-1]
if tag: if tag:
@ -281,12 +288,22 @@ def prepare_predicate(next, token):
if "".join(e.itertext()) == value: if "".join(e.itertext()) == value:
yield elem yield elem
break break
def select_negated(context, result):
for elem in result:
for e in elem.iterfind(tag):
if "".join(e.itertext()) != value:
yield elem
break
else: else:
def select(context, result): def select(context, result):
for elem in result: for elem in result:
if "".join(elem.itertext()) == value: if "".join(elem.itertext()) == value:
yield elem yield elem
return select def select_negated(context, result):
for elem in result:
if "".join(elem.itertext()) != value:
yield elem
return select_negated if '!=' in signature else select
if signature == "-" or signature == "-()" or signature == "-()-": if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index] # [index] or [last()] or [last()-index]
if signature == "-": if signature == "-":

View File

@ -0,0 +1 @@
Added support for the XPath ``!=`` operator in xml.etree