mirror of https://github.com/python/cpython
- SF bug #853506: IP6 address parsing in sgmllib
('[' and ']' were not accepted in unquoted attribute values) - cleaned up tests of character and entity reference decoding so the tests cover the documented relationships among handle_charref, handle_entityref, convert_charref, convert_codepoint, and convert_entityref, without bringing up Unicode issues that sgmllib cannot be involved in
This commit is contained in:
parent
b114984225
commit
2f99da636b
|
@ -33,7 +33,7 @@ endbracket = re.compile('[<>]')
|
|||
tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
|
||||
attrfind = re.compile(
|
||||
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
|
||||
r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
|
||||
|
||||
|
||||
class SGMLParseError(RuntimeError):
|
||||
|
@ -400,11 +400,11 @@ class SGMLParser(markupbase.ParserBase):
|
|||
|
||||
def handle_charref(self, name):
|
||||
"""Handle character reference, no need to override."""
|
||||
replacement = convert_charref(name)
|
||||
replacement = self.convert_charref(name)
|
||||
if replacement is None:
|
||||
self.unknown_charref(name)
|
||||
else:
|
||||
self.handle_data(convert_charref(name))
|
||||
self.handle_data(replacement)
|
||||
|
||||
# Definition of entities -- derived classes may override
|
||||
entitydefs = \
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import htmlentitydefs
|
||||
import pprint
|
||||
import re
|
||||
import sgmllib
|
||||
import unittest
|
||||
from test import test_support
|
||||
|
@ -65,20 +67,34 @@ class CDATAEventCollector(EventCollector):
|
|||
|
||||
|
||||
class HTMLEntityCollector(EventCollector):
|
||||
import re, htmlentitydefs
|
||||
|
||||
entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)'
|
||||
'|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)')
|
||||
|
||||
def convert_charref(self, name):
|
||||
self.append(("charref", "convert", name))
|
||||
if name.startswith('x'):
|
||||
return unichr(int(name[1:],16))
|
||||
else:
|
||||
return unichr(int(name))
|
||||
if name[0] != "x":
|
||||
return EventCollector.convert_charref(self, name)
|
||||
|
||||
def convert_codepoint(self, codepoint):
|
||||
self.append(("codepoint", "convert", codepoint))
|
||||
EventCollector.convert_codepoint(self, codepoint)
|
||||
|
||||
def convert_entityref(self, name):
|
||||
self.append(("entityref", "convert", name))
|
||||
return unichr(self.htmlentitydefs.name2codepoint[name])
|
||||
return EventCollector.convert_entityref(self, name)
|
||||
|
||||
# These to record that they were called, then pass the call along
|
||||
# to the default implementation so that it's actions can be
|
||||
# recorded.
|
||||
|
||||
def handle_charref(self, data):
|
||||
self.append(("charref", data))
|
||||
sgmllib.SGMLParser.handle_charref(self, data)
|
||||
|
||||
def handle_entityref(self, data):
|
||||
self.append(("entityref", data))
|
||||
sgmllib.SGMLParser.handle_entityref(self, data)
|
||||
|
||||
|
||||
class SGMLParserTestCase(unittest.TestCase):
|
||||
|
@ -251,13 +267,23 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
|
|||
])])
|
||||
|
||||
def test_convert_overrides(self):
|
||||
# This checks that the character and entity reference
|
||||
# conversion helpers are called at the documented times. No
|
||||
# attempt is made to really change what the parser accepts.
|
||||
#
|
||||
self.collector = HTMLEntityCollector
|
||||
self.check_events('<a title="“test”">foo</a>', [
|
||||
self.check_events(('<a title="“test”">foo</a>'
|
||||
'&foobar;*'), [
|
||||
('entityref', 'convert', 'ldquo'),
|
||||
('charref', 'convert', 'x201d'),
|
||||
('starttag', 'a', [('title', u'\u201ctest\u201d')]),
|
||||
('starttag', 'a', [('title', '“test”')]),
|
||||
('data', 'foo'),
|
||||
('endtag', 'a'),
|
||||
('entityref', 'foobar'),
|
||||
('entityref', 'convert', 'foobar'),
|
||||
('charref', '42'),
|
||||
('charref', 'convert', '42'),
|
||||
('codepoint', 'convert', 42),
|
||||
])
|
||||
|
||||
def test_attr_funky_names(self):
|
||||
|
@ -265,6 +291,14 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
|
|||
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
|
||||
])
|
||||
|
||||
def test_attr_value_ip6_url(self):
|
||||
# http://www.python.org/sf/853506
|
||||
self.check_events(("<a href='http://[1080::8:800:200C:417A]/'>"
|
||||
"<a href=http://[1080::8:800:200C:417A]/>"), [
|
||||
("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),
|
||||
("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),
|
||||
])
|
||||
|
||||
def test_illegal_declarations(self):
|
||||
s = 'abc<!spacer type="block" height="25">def'
|
||||
self.check_events(s, [
|
||||
|
|
Loading…
Reference in New Issue