[Bug #921657] Allow '@' in unquoted HTML attributes. Not strictly legal according to the HTML REC, but HTMLParser is already a pretty loose parser. Reported by Bernd Zimmermann.
This commit is contained in:
parent
9021c95595
commit
b7d8ce0275
|
@ -26,7 +26,7 @@ commentclose = re.compile(r'--\s*>')
|
||||||
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
|
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
|
||||||
attrfind = re.compile(
|
attrfind = re.compile(
|
||||||
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
||||||
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~]*))?')
|
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
|
||||||
|
|
||||||
locatestarttagend = re.compile(r"""
|
locatestarttagend = re.compile(r"""
|
||||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||||
|
|
|
@ -204,6 +204,10 @@ DOCTYPE html [
|
||||||
self._run_check("<e a=rgb(1,2,3)>", [
|
self._run_check("<e a=rgb(1,2,3)>", [
|
||||||
("starttag", "e", [("a", "rgb(1,2,3)")]),
|
("starttag", "e", [("a", "rgb(1,2,3)")]),
|
||||||
])
|
])
|
||||||
|
# Regression test for SF bug #921657.
|
||||||
|
self._run_check("<a href=mailto:xyz@example.com>", [
|
||||||
|
("starttag", "a", [("href", "mailto:xyz@example.com")]),
|
||||||
|
])
|
||||||
|
|
||||||
def test_attr_entity_replacement(self):
|
def test_attr_entity_replacement(self):
|
||||||
self._run_check("""<a b='&><"''>""", [
|
self._run_check("""<a b='&><"''>""", [
|
||||||
|
|
Loading…
Reference in New Issue