bpo-36831: Do not apply default namespace to unprefixed attributes in ElementPath. (#13201)

Also provide better grouping of the tokenizer tests.
This commit is contained in:
Stefan Behnel 2019-05-09 07:22:47 +02:00 committed by GitHub
parent 3aca40d3cb
commit 88db8bd064
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 11 deletions

View File

@ -1144,14 +1144,9 @@ class ElementTreeTest(unittest.TestCase):
# tests from the xml specification # tests from the xml specification
check("*", ['*']) check("*", ['*'])
check("{ns}*", ['{ns}*'])
check("{}*", ['{}*'])
check("{*}tag", ['{*}tag'])
check("{*}*", ['{*}*'])
check("text()", ['text', '()']) check("text()", ['text', '()'])
check("@name", ['@', 'name']) check("@name", ['@', 'name'])
check("@*", ['@', '*']) check("@*", ['@', '*'])
check("@{ns}attr", ['@', '{ns}attr'])
check("para[1]", ['para', '[', '1', ']']) check("para[1]", ['para', '[', '1', ']'])
check("para[last()]", ['para', '[', 'last', '()', ']']) check("para[last()]", ['para', '[', 'last', '()', ']'])
check("*/para", ['*', '/', 'para']) check("*/para", ['*', '/', 'para'])
@ -1163,7 +1158,6 @@ class ElementTreeTest(unittest.TestCase):
check("//olist/item", ['//', 'olist', '/', 'item']) check("//olist/item", ['//', 'olist', '/', 'item'])
check(".", ['.']) check(".", ['.'])
check(".//para", ['.', '//', 'para']) check(".//para", ['.', '//', 'para'])
check(".//{*}tag", ['.', '//', '{*}tag'])
check("..", ['..']) check("..", ['..'])
check("../@lang", ['..', '/', '@', 'lang']) check("../@lang", ['..', '/', '@', 'lang'])
check("chapter[title]", ['chapter', '[', 'title', ']']) check("chapter[title]", ['chapter', '[', 'title', ']'])
@ -1171,11 +1165,32 @@ class ElementTreeTest(unittest.TestCase):
'[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
# additional tests # additional tests
check("@{ns}attr", ['@', '{ns}attr'])
check("{http://spam}egg", ['{http://spam}egg']) check("{http://spam}egg", ['{http://spam}egg'])
check("./spam.egg", ['.', '/', 'spam.egg']) check("./spam.egg", ['.', '/', 'spam.egg'])
check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
# wildcard tags
check("{ns}*", ['{ns}*'])
check("{}*", ['{}*'])
check("{*}tag", ['{*}tag'])
check("{*}*", ['{*}*'])
check(".//{*}tag", ['.', '//', '{*}tag'])
# namespace prefix resolution
check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'], check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
{'xsd': 'http://www.w3.org/2001/XMLSchema'}) {'xsd': 'http://www.w3.org/2001/XMLSchema'})
check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
{'': 'http://www.w3.org/2001/XMLSchema'})
check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
{'xsd': 'http://www.w3.org/2001/XMLSchema'})
check("@type", ['@', 'type'],
{'': 'http://www.w3.org/2001/XMLSchema'})
check("@{*}type", ['@', '{*}type'],
{'': 'http://www.w3.org/2001/XMLSchema'})
check("@{ns}attr", ['@', '{ns}attr'],
{'': 'http://www.w3.org/2001/XMLSchema',
'ns': 'http://www.w3.org/2001/XMLSchema'})
def test_processinginstruction(self): def test_processinginstruction(self):
# Test ProcessingInstruction directly # Test ProcessingInstruction directly

View File

@ -72,23 +72,27 @@ xpath_tokenizer_re = re.compile(
def xpath_tokenizer(pattern, namespaces=None): def xpath_tokenizer(pattern, namespaces=None):
default_namespace = namespaces.get('') if namespaces else None default_namespace = namespaces.get('') if namespaces else None
parsing_attribute = False
for token in xpath_tokenizer_re.findall(pattern): for token in xpath_tokenizer_re.findall(pattern):
tag = token[1] ttype, tag = token
if tag and tag[0] != "{": if tag and tag[0] != "{":
if ":" in tag: if ":" in tag:
prefix, uri = tag.split(":", 1) prefix, uri = tag.split(":", 1)
try: try:
if not namespaces: if not namespaces:
raise KeyError raise KeyError
yield token[0], "{%s}%s" % (namespaces[prefix], uri) yield ttype, "{%s}%s" % (namespaces[prefix], uri)
except KeyError: except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
elif default_namespace: elif default_namespace and not parsing_attribute:
yield token[0], "{%s}%s" % (default_namespace, tag) yield ttype, "{%s}%s" % (default_namespace, tag)
else: else:
yield token yield token
parsing_attribute = False
else: else:
yield token yield token
parsing_attribute = ttype == '@'
def get_parent_map(context): def get_parent_map(context):
parent_map = context.parent_map parent_map = context.parent_map
@ -100,7 +104,6 @@ def get_parent_map(context):
return parent_map return parent_map
def _is_wildcard_tag(tag): def _is_wildcard_tag(tag):
return tag[:3] == '{*}' or tag[-2:] == '}*' return tag[:3] == '{*}' or tag[-2:] == '}*'