bpo-31648: Improve ElementPath (#3835)

* Allow whitespace inside of ElementPath predicates.
* Add ElementPath predicate support for text comparison of the current node, like "[.='text']".
This commit is contained in:
scoder 2017-09-30 15:35:21 +02:00 committed by Serhiy Storchaka
parent 9811e80fd0
commit 101a5e84ac
5 changed files with 68 additions and 7 deletions

View File

@ -437,6 +437,11 @@ Supported XPath syntax
| ``[tag]`` | Selects all elements that have a child named |
| | ``tag``. Only immediate children are supported. |
+-----------------------+------------------------------------------------------+
| ``[.='text']`` | Selects all elements whose complete text content, |
| | including descendants, equals the given ``text``. |
| | |
| | .. versionadded:: 3.7 |
+-----------------------+------------------------------------------------------+
| ``[tag='text']`` | Selects all elements that have a child named |
| | ``tag`` whose complete text content, including |
| | descendants, equals the given ``text``. |

View File

@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick*
keyword argument. When it's true, zeros are represented by ``'`'``
instead of spaces. (Contributed by Xiang Zhang in :issue:`30103`.)
xml.etree
---------
:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
methods can now compare text of the current node with ``[. = "text"]``,
not only text in children. Predicates also allow adding spaces for
better readability. (Contributed by Stefan Behnel in :issue:`31648`.)
zipapp
------

View File

@ -2237,6 +2237,39 @@ class ElementFindTest(unittest.TestCase):
['tag'] * 2)
self.assertEqual(e.findall('section//'), e.findall('section//*'))
self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
['section'])
self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
['section'])
self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
['section'])
self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
['section'])
self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
['section'])
self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
['tag'])
self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
['tag'])
self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
['tag'])
self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
['tag'])
self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
['tag'])
self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
[])
self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
[])
# duplicate section => 2x tag matches
e[1] = e[2]
self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
['section', 'section'])
self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
['tag', 'tag'])
def test_test_find_with_ns(self):
e = ET.XML(SAMPLE_XML_NS)
self.assertEqual(summarize_list(e.findall('tag')), [])

View File

@ -157,6 +157,9 @@ def prepare_predicate(next, token):
return
if token[0] == "]":
break
if token == ('', ''):
# ignore whitespace
continue
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
@ -188,16 +191,22 @@ def prepare_predicate(next, token):
if elem.find(tag) is not None:
yield elem
return select
if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
# [tag='value']
if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
# [.='value'] or [tag='value']
tag = predicate[0]
value = predicate[-1]
if tag:
def select(context, result):
for elem in result:
for e in elem.findall(tag):
if "".join(e.itertext()) == value:
yield elem
break
else:
def select(context, result):
for elem in result:
if "".join(elem.itertext()) == value:
yield elem
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]

View File

@ -0,0 +1,6 @@
Improvements to path predicates in ElementTree:
* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
* Add support for text comparison of the current node, like "[.='text']".
Patch by Stefan Behnel.