bpo-30485: support a default prefix mapping in ElementPath by passing None as prefix (#1823)

This commit is contained in:
Stefan Behnel 2019-04-14 10:09:09 +02:00 committed by GitHub
parent ffca16e25a
commit e9927e1820
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 12 deletions

View File

@ -764,7 +764,8 @@ Element Objects
Finds the first subelement matching *match*. *match* may be a tag name Finds the first subelement matching *match*. *match* may be a tag name
or a :ref:`path <elementtree-xpath>`. Returns an element instance or a :ref:`path <elementtree-xpath>`. Returns an element instance
or ``None``. *namespaces* is an optional mapping from namespace prefix or ``None``. *namespaces* is an optional mapping from namespace prefix
to full name. to full name. Pass ``None`` as prefix to move all unprefixed tag names
in the expression into the given namespace.
.. method:: findall(match, namespaces=None) .. method:: findall(match, namespaces=None)
@ -772,7 +773,8 @@ Element Objects
Finds all matching subelements, by tag name or Finds all matching subelements, by tag name or
:ref:`path <elementtree-xpath>`. Returns a list containing all matching :ref:`path <elementtree-xpath>`. Returns a list containing all matching
elements in document order. *namespaces* is an optional mapping from elements in document order. *namespaces* is an optional mapping from
namespace prefix to full name. namespace prefix to full name. Pass ``None`` as prefix to move all
unprefixed tag names in the expression into the given namespace.
.. method:: findtext(match, default=None, namespaces=None) .. method:: findtext(match, default=None, namespaces=None)
@ -782,7 +784,8 @@ Element Objects
of the first matching element, or *default* if no element was found. of the first matching element, or *default* if no element was found.
Note that if the matching element has no text content an empty string Note that if the matching element has no text content an empty string
is returned. *namespaces* is an optional mapping from namespace prefix is returned. *namespaces* is an optional mapping from namespace prefix
to full name. to full name. Pass ``None`` as prefix to move all unprefixed tag names
in the expression into the given namespace.
.. method:: getchildren() .. method:: getchildren()

View File

@ -2463,6 +2463,12 @@ class ElementFindTest(unittest.TestCase):
nsmap = {'xx': 'Y'} nsmap = {'xx': 'Y'}
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
nsmap = {'xx': 'X', None: 'Y'}
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
nsmap = {'xx': 'X', '': 'Y'}
with self.assertRaisesRegex(ValueError, 'namespace prefix'):
root.findall(".//xx:b", namespaces=nsmap)
def test_bad_find(self): def test_bad_find(self):
e = ET.XML(SAMPLE_XML) e = ET.XML(SAMPLE_XML)

View File

@ -71,16 +71,22 @@ xpath_tokenizer_re = re.compile(
) )
def xpath_tokenizer(pattern, namespaces=None): def xpath_tokenizer(pattern, namespaces=None):
default_namespace = namespaces.get(None) if namespaces else None
for token in xpath_tokenizer_re.findall(pattern): for token in xpath_tokenizer_re.findall(pattern):
tag = token[1] tag = token[1]
if tag and tag[0] != "{" and ":" in tag: if tag and tag[0] != "{":
try: if ":" in tag:
prefix, uri = tag.split(":", 1) prefix, uri = tag.split(":", 1)
if not namespaces: try:
raise KeyError if not namespaces:
yield token[0], "{%s}%s" % (namespaces[prefix], uri) raise KeyError
except KeyError: yield token[0], "{%s}%s" % (namespaces[prefix], uri)
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
elif default_namespace:
yield token[0], "{%s}%s" % (default_namespace, tag)
else:
yield token
else: else:
yield token yield token
@ -264,10 +270,19 @@ class _SelectorContext:
def iterfind(elem, path, namespaces=None): def iterfind(elem, path, namespaces=None):
# compile selector pattern # compile selector pattern
cache_key = (path, None if namespaces is None
else tuple(sorted(namespaces.items())))
if path[-1:] == "/": if path[-1:] == "/":
path = path + "*" # implicit all (FIXME: keep this?) path = path + "*" # implicit all (FIXME: keep this?)
cache_key = (path,)
if namespaces:
if '' in namespaces:
raise ValueError("empty namespace prefix must be passed as None, not the empty string")
if None in namespaces:
cache_key += (namespaces[None],) + tuple(sorted(
item for item in namespaces.items() if item[0] is not None))
else:
cache_key += tuple(sorted(namespaces.items()))
try: try:
selector = _cache[cache_key] selector = _cache[cache_key]
except KeyError: except KeyError:

View File

@ -0,0 +1,3 @@
Path expressions in xml.etree.ElementTree can now avoid explicit namespace
prefixes for tags (or the "{namespace}tag" notation) by passing a default
namespace with a 'None' prefix.