#15114: The html.parser module now raises a DeprecationWarning when the strict argument of HTMLParser or the HTMLParser.error method are used.

This commit is contained in:
Ezio Melotti 2013-11-02 17:08:24 +02:00
parent 28f0beaff6
commit 88ebfb129b
4 changed files with 29 additions and 9 deletions

View File

@ -74,7 +74,7 @@ as they are encountered::
def handle_data(self, data):
print("Encountered some data :", data)
parser = MyHTMLParser(strict=False)
parser = MyHTMLParser()
parser.feed('<html><head><title>Test</title></head>'
'<body><h1>Parse me!</h1></body></html>')
@ -272,7 +272,7 @@ examples::
def handle_decl(self, data):
print("Decl :", data)
parser = MyHTMLParser(strict=False)
parser = MyHTMLParser()
Parsing a doctype::

View File

@ -94,6 +94,8 @@ class HTMLParseError(Exception):
return result
_strict_sentinel = object()
class HTMLParser(_markupbase.ParserBase):
"""Find tags and other markup and call handler functions.
@ -116,16 +118,18 @@ class HTMLParser(_markupbase.ParserBase):
CDATA_CONTENT_ELEMENTS = ("script", "style")
def __init__(self, strict=False):
def __init__(self, strict=_strict_sentinel):
"""Initialize and reset this instance.
If strict is set to False (the default) the parser will parse invalid
markup, otherwise it will raise an error. Note that the strict mode
is deprecated.
and argument are deprecated.
"""
if strict:
warnings.warn("The strict mode is deprecated.",
if strict is not _strict_sentinel:
warnings.warn("The strict argument and mode are deprecated.",
DeprecationWarning, stacklevel=2)
else:
strict = False # default
self.strict = strict
self.reset()
@ -151,6 +155,8 @@ class HTMLParser(_markupbase.ParserBase):
self.goahead(1)
def error(self, message):
warnings.warn("The 'error' method is deprecated.",
DeprecationWarning, stacklevel=2)
raise HTMLParseError(message, self.getpos())
__starttag_text = None

View File

@ -96,7 +96,9 @@ class TestCaseBase(unittest.TestCase):
parser = self.get_collector()
parser.feed(source)
parser.close()
self.assertRaises(html.parser.HTMLParseError, parse)
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
parse()
class HTMLParserStrictTestCase(TestCaseBase):
@ -360,7 +362,16 @@ text
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
def get_collector(self):
return EventCollector(strict=False)
return EventCollector()
def test_deprecation_warnings(self):
with self.assertWarns(DeprecationWarning):
EventCollector(strict=True)
with self.assertWarns(DeprecationWarning):
EventCollector(strict=False)
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
EventCollector().error('test')
def test_tolerant_parsing(self):
self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'
@ -676,7 +687,7 @@ class AttributesStrictTestCase(TestCaseBase):
class AttributesTolerantTestCase(AttributesStrictTestCase):
def get_collector(self):
return EventCollector(strict=False)
return EventCollector()
def test_attr_funky_names2(self):
self._run_check(

View File

@ -31,6 +31,9 @@ Core and Builtins
Library
-------
- Issue #15114: The html.parser module now raises a DeprecationWarning when the
strict argument of HTMLParser or the HTMLParser.error method are used.
- Issue #19410: Undo the special-casing removal of '' for
importlib.machinery.FileFinder.