#1651995: fix _convert_ref for non-ASCII characters.

This commit is contained in:
Georg Brandl 2009-03-31 22:11:53 +00:00
parent 95fafec732
commit 0c7b2c9c19
2 changed files with 10 additions and 1 deletions

View File

@ -396,7 +396,7 @@ class SGMLParser(markupbase.ParserBase):
n = int(name)
except ValueError:
return
if not 0 <= n <= 255:
if not 0 <= n <= 127:
return
return self.convert_codepoint(n)

View File

@ -373,6 +373,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
if len(data) != CHUNK:
break
def test_only_decode_ascii(self):
# SF bug #1651995, make sure non-ascii character references are not decoded
s = '<signs exclamation="&#33" copyright="&#169" quoteleft="&#8216;">'
self.check_events(s, [
('starttag', 'signs',
[('exclamation', '!'), ('copyright', '&#169'),
('quoteleft', '&#8216;')]),
])
# XXX These tests have been disabled by prefixing their names with
# an underscore. The first two exercise outstanding bugs in the
# sgmllib module, and the third exhibits questionable behavior