bpo-30264: ExpatParser closes the source on error (#1451) (#1474)

ExpatParser.parse() of xml.sax.xmlreader now always closes the
source: close the file object or the urllib object if source is a
string (not an open file-like object). The change fixes a
ResourceWarning on parsing error.

Add test_parse_close_source() unit test.
(cherry picked from commit ef9c0e732f)
This commit is contained in:
Victor Stinner 2017-05-05 10:04:57 +02:00 committed by GitHub
parent 39b73dd513
commit 0fe870f3f9
2 changed files with 40 additions and 17 deletions

View File

@ -4,6 +4,7 @@
from xml.sax import make_parser, ContentHandler, \ from xml.sax import make_parser, ContentHandler, \
SAXException, SAXReaderNotAvailable, SAXParseException SAXException, SAXReaderNotAvailable, SAXParseException
import unittest import unittest
from unittest import mock
try: try:
make_parser() make_parser()
except SAXReaderNotAvailable: except SAXReaderNotAvailable:
@ -175,12 +176,8 @@ class ParseTest(unittest.TestCase):
with self.assertRaises(SAXException): with self.assertRaises(SAXException):
self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
make_xml_file(self.data, 'iso-8859-1', None) make_xml_file(self.data, 'iso-8859-1', None)
with support.check_warnings(('unclosed file', ResourceWarning)):
# XXX Failed parser leaks an opened file.
with self.assertRaises(SAXException): with self.assertRaises(SAXException):
self.check_parse(TESTFN) self.check_parse(TESTFN)
# Collect leaked file.
gc.collect()
with open(TESTFN, 'rb') as f: with open(TESTFN, 'rb') as f:
with self.assertRaises(SAXException): with self.assertRaises(SAXException):
self.check_parse(f) self.check_parse(f)
@ -194,6 +191,21 @@ class ParseTest(unittest.TestCase):
input.setEncoding('iso-8859-1') input.setEncoding('iso-8859-1')
self.check_parse(input) self.check_parse(input)
def test_parse_close_source(self):
builtin_open = open
fileobj = None
def mock_open(*args):
nonlocal fileobj
fileobj = builtin_open(*args)
return fileobj
with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
make_xml_file(self.data, 'iso-8859-1', None)
with self.assertRaises(SAXException):
self.check_parse(TESTFN)
self.assertTrue(fileobj.closed)
def check_parseString(self, s): def check_parseString(self, s):
from xml.sax import parseString from xml.sax import parseString
result = StringIO() result = StringIO()

View File

@ -105,9 +105,16 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
source = saxutils.prepare_input_source(source) source = saxutils.prepare_input_source(source)
self._source = source self._source = source
try:
self.reset() self.reset()
self._cont_handler.setDocumentLocator(ExpatLocator(self)) self._cont_handler.setDocumentLocator(ExpatLocator(self))
xmlreader.IncrementalParser.parse(self, source) xmlreader.IncrementalParser.parse(self, source)
except:
# bpo-30264: Close the source on error to not leak resources:
# xml.sax.parse() doesn't give access to the underlying parser
# to the caller
self._close_source()
raise
def prepareParser(self, source): def prepareParser(self, source):
if source.getSystemId() is not None: if source.getSystemId() is not None:
@ -213,6 +220,17 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
# FIXME: when to invoke error()? # FIXME: when to invoke error()?
self._err_handler.fatalError(exc) self._err_handler.fatalError(exc)
def _close_source(self):
source = self._source
try:
file = source.getCharacterStream()
if file is not None:
file.close()
finally:
file = source.getByteStream()
if file is not None:
file.close()
def close(self): def close(self):
if (self._entity_stack or self._parser is None or if (self._entity_stack or self._parser is None or
isinstance(self._parser, _ClosedParser)): isinstance(self._parser, _ClosedParser)):
@ -232,14 +250,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
parser.ErrorColumnNumber = self._parser.ErrorColumnNumber parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
parser.ErrorLineNumber = self._parser.ErrorLineNumber parser.ErrorLineNumber = self._parser.ErrorLineNumber
self._parser = parser self._parser = parser
try: self._close_source()
file = self._source.getCharacterStream()
if file is not None:
file.close()
finally:
file = self._source.getByteStream()
if file is not None:
file.close()
def _reset_cont_handler(self): def _reset_cont_handler(self):
self._parser.ProcessingInstructionHandler = \ self._parser.ProcessingInstructionHandler = \