Merged revisions 87542 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r87542 | senthil.kumaran | 2010-12-28 23:55:16 +0800 (Tue, 28 Dec 2010) | 3 lines Fix Issue10759 - html.parser.unescape() fails on HTML entities with incorrect syntax ........
This commit is contained in:
parent
18f6b1987f
commit
6c85838489
|
@ -367,13 +367,16 @@ class HTMLParser(_markupbase.ParserBase):
|
|||
return s
|
||||
def replaceEntities(s):
|
||||
s = s.groups()[0]
|
||||
if s[0] == "#":
|
||||
s = s[1:]
|
||||
if s[0] in ['x','X']:
|
||||
c = int(s[1:], 16)
|
||||
else:
|
||||
c = int(s)
|
||||
return chr(c)
|
||||
try:
|
||||
if s[0] == "#":
|
||||
s = s[1:]
|
||||
if s[0] in ['x','X']:
|
||||
c = int(s[1:], 16)
|
||||
else:
|
||||
c = int(s)
|
||||
return chr(c)
|
||||
except ValueError:
|
||||
return '&#'+ s +';'
|
||||
else:
|
||||
# Cannot use name2codepoint directly, because HTMLParser
|
||||
# supports apos, which is not part of HTML 4
|
||||
|
|
|
@ -319,6 +319,10 @@ DOCTYPE html [
|
|||
self._run_check("<html foo='€&aa&unsupported;'>", [
|
||||
("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])
|
||||
])
|
||||
def test_unescape_function(self):
|
||||
p = html.parser.HTMLParser()
|
||||
self.assertEqual(p.unescape('&#bad;'),'&#bad;')
|
||||
self.assertEqual(p.unescape('&'),'&')
|
||||
|
||||
|
||||
def test_main():
|
||||
|
|
Loading…
Reference in New Issue