#23144: Make sure that HTMLParser.feed() returns all the data, even when convert_charrefs is True.
This commit is contained in:
parent
527ef0792f
commit
6f2bb98966
|
@ -198,7 +198,15 @@ class HTMLParser(_markupbase.ParserBase):
|
|||
if self.convert_charrefs and not self.cdata_elem:
|
||||
j = rawdata.find('<', i)
|
||||
if j < 0:
|
||||
if not end:
|
||||
# if we can't find the next <, either we are at the end
|
||||
# or there's more text incoming. If the latter is True,
|
||||
# we can't pass the text to handle_data in case we have
|
||||
# a charref cut in half at end. Try to determine if
|
||||
# this is the case before proceding by looking for an
|
||||
# & near the end and see if it's followed by a space or ;.
|
||||
amppos = rawdata.rfind('&', max(i, n-34))
|
||||
if (amppos >= 0 and
|
||||
not re.compile(r'[\s;]').search(rawdata, amppos)):
|
||||
break # wait till we get all the text
|
||||
j = n
|
||||
else:
|
||||
|
|
|
@ -72,9 +72,6 @@ class EventCollectorExtra(EventCollector):
|
|||
|
||||
class EventCollectorCharrefs(EventCollector):
|
||||
|
||||
def get_events(self):
|
||||
return self.events
|
||||
|
||||
def handle_charref(self, data):
|
||||
self.fail('This should never be called with convert_charrefs=True')
|
||||
|
||||
|
@ -685,6 +682,18 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
|||
]
|
||||
self._run_check(html, expected)
|
||||
|
||||
def test_convert_charrefs_dropped_text(self):
|
||||
# #23144: make sure that all the events are triggered when
|
||||
# convert_charrefs is True, even if we don't call .close()
|
||||
parser = EventCollector(convert_charrefs=True)
|
||||
# before the fix, bar & baz was missing
|
||||
parser.feed("foo <a>link</a> bar & baz")
|
||||
self.assertEqual(
|
||||
parser.get_events(),
|
||||
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
|
||||
('endtag', 'a'), ('data', ' bar & baz')]
|
||||
)
|
||||
|
||||
|
||||
class AttributesStrictTestCase(TestCaseBase):
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
+++++++++++
|
||||
+++++++++++
|
||||
Python News
|
||||
+++++++++++
|
||||
|
||||
|
@ -81,6 +81,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #23144: Make sure that HTMLParser.feed() returns all the data, even
|
||||
when convert_charrefs is True.
|
||||
|
||||
- Issue #16180: Exit pdb if file has syntax error, instead of trapping user
|
||||
in an infinite loop. Patch by Xavier de Gaye.
|
||||
|
||||
|
|
Loading…
Reference in New Issue