diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index e5fbd32fcc8..104b25f2a07 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -396,7 +396,7 @@ class SGMLParser(markupbase.ParserBase): n = int(name) except ValueError: return - if not 0 <= n <= 255: + if not 0 <= n <= 127: return return self.convert_codepoint(n) diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index 34fd7f00ba0..081e0e17fc9 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -373,6 +373,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' if len(data) != CHUNK: break + def test_only_decode_ascii(self): + # SF bug #1651995, make sure non-ascii character references are not decoded + s = '' + self.check_events(s, [ + ('starttag', 'signs', + [('exclamation', '!'), ('copyright', '©'), + ('quoteleft', '‘')]), + ]) + # XXX These tests have been disabled by prefixing their names with # an underscore. The first two exercise outstanding bugs in the # sgmllib module, and the third exhibits questionable behavior