mirror of https://github.com/python/cpython
#7311: fix html.parser to accept non-ASCII attribute values.
This commit is contained in:
parent
9b5ac3efa6
commit
2e3607c1e7
|
@ -28,7 +28,7 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
|
||||||
# make it correctly strict without breaking backward compatibility.
|
# make it correctly strict without breaking backward compatibility.
|
||||||
attrfind = re.compile(
|
attrfind = re.compile(
|
||||||
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
||||||
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
|
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
|
||||||
attrfind_tolerant = re.compile(
|
attrfind_tolerant = re.compile(
|
||||||
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
||||||
r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
|
r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
|
||||||
|
|
|
@ -217,6 +217,23 @@ DOCTYPE html [
|
||||||
("starttag", "a", [("href", "mailto:xyz@example.com")]),
|
("starttag", "a", [("href", "mailto:xyz@example.com")]),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def test_attr_nonascii(self):
|
||||||
|
# see issue 7311
|
||||||
|
self._run_check("<img src=/foo/bar.png alt=\u4e2d\u6587>", [
|
||||||
|
("starttag", "img", [("src", "/foo/bar.png"),
|
||||||
|
("alt", "\u4e2d\u6587")]),
|
||||||
|
])
|
||||||
|
self._run_check("<a title='\u30c6\u30b9\u30c8' "
|
||||||
|
"href='\u30c6\u30b9\u30c8.html'>", [
|
||||||
|
("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
|
||||||
|
("href", "\u30c6\u30b9\u30c8.html")]),
|
||||||
|
])
|
||||||
|
self._run_check('<a title="\u30c6\u30b9\u30c8" '
|
||||||
|
'href="\u30c6\u30b9\u30c8.html">', [
|
||||||
|
("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
|
||||||
|
("href", "\u30c6\u30b9\u30c8.html")]),
|
||||||
|
])
|
||||||
|
|
||||||
def test_attr_entity_replacement(self):
|
def test_attr_entity_replacement(self):
|
||||||
self._run_check("""<a b='&><"''>""", [
|
self._run_check("""<a b='&><"''>""", [
|
||||||
("starttag", "a", [("b", "&><\"'")]),
|
("starttag", "a", [("b", "&><\"'")]),
|
||||||
|
|
|
@ -49,6 +49,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #7311: fix html.parser to accept non-ASCII attribute values.
|
||||||
|
|
||||||
- Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
|
- Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
|
||||||
subpararts with an 8bit CTE into unicode instead of preserving the bytes.
|
subpararts with an 8bit CTE into unicode instead of preserving the bytes.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue