mirror of https://github.com/python/cpython
Fix Issue10759 - HTMLParser.unescape() to handle malform charrefs.
This commit is contained in:
parent
06fdbedf81
commit
3f60f09eb2
|
@ -367,13 +367,16 @@ class HTMLParser(markupbase.ParserBase):
|
||||||
return s
|
return s
|
||||||
def replaceEntities(s):
|
def replaceEntities(s):
|
||||||
s = s.groups()[0]
|
s = s.groups()[0]
|
||||||
if s[0] == "#":
|
try:
|
||||||
s = s[1:]
|
if s[0] == "#":
|
||||||
if s[0] in ['x','X']:
|
s = s[1:]
|
||||||
c = int(s[1:], 16)
|
if s[0] in ['x','X']:
|
||||||
else:
|
c = int(s[1:], 16)
|
||||||
c = int(s)
|
else:
|
||||||
return unichr(c)
|
c = int(s)
|
||||||
|
return unichr(c)
|
||||||
|
except ValueError:
|
||||||
|
return '&#'+s+';'
|
||||||
else:
|
else:
|
||||||
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
# Cannot use name2codepoint directly, because HTMLParser supports apos,
|
||||||
# which is not part of HTML 4
|
# which is not part of HTML 4
|
||||||
|
|
|
@ -320,6 +320,11 @@ DOCTYPE html [
|
||||||
("endtag", "p"),
|
("endtag", "p"),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def test_unescape_function(self):
|
||||||
|
parser = HTMLParser.HTMLParser()
|
||||||
|
self.assertEqual(parser.unescape('&#bad;'),'&#bad;')
|
||||||
|
self.assertEqual(parser.unescape('&'),'&')
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(HTMLParserTestCase)
|
test_support.run_unittest(HTMLParserTestCase)
|
||||||
|
|
Loading…
Reference in New Issue