Issue #2175: SAX parsers now support a character stream of InputSource object.
This commit is contained in:
parent
278ba2690c
commit
61de087f0f
|
@ -100,8 +100,10 @@ The :class:`XMLReader` interface supports the following methods:
|
||||||
system identifier (a string identifying the input source -- typically a file
|
system identifier (a string identifying the input source -- typically a file
|
||||||
name or an URL), a file-like object, or an :class:`InputSource` object. When
|
name or an URL), a file-like object, or an :class:`InputSource` object. When
|
||||||
:meth:`parse` returns, the input is completely processed, and the parser object
|
:meth:`parse` returns, the input is completely processed, and the parser object
|
||||||
can be discarded or reset. As a limitation, the current implementation only
|
can be discarded or reset.
|
||||||
accepts byte streams; processing of character streams is for further study.
|
|
||||||
|
.. versionchanged:: 3.5
|
||||||
|
Added support of character streams.
|
||||||
|
|
||||||
|
|
||||||
.. method:: XMLReader.getContentHandler()
|
.. method:: XMLReader.getContentHandler()
|
||||||
|
@ -288,8 +290,7 @@ InputSource Objects
|
||||||
|
|
||||||
.. method:: InputSource.setByteStream(bytefile)
|
.. method:: InputSource.setByteStream(bytefile)
|
||||||
|
|
||||||
Set the byte stream (a Python file-like object which does not perform
|
Set the byte stream (a :term:`binary file`) for this input source.
|
||||||
byte-to-character conversion) for this input source.
|
|
||||||
|
|
||||||
The SAX parser will ignore this if there is also a character stream specified,
|
The SAX parser will ignore this if there is also a character stream specified,
|
||||||
but it will use a byte stream in preference to opening a URI connection itself.
|
but it will use a byte stream in preference to opening a URI connection itself.
|
||||||
|
@ -308,8 +309,7 @@ InputSource Objects
|
||||||
|
|
||||||
.. method:: InputSource.setCharacterStream(charfile)
|
.. method:: InputSource.setCharacterStream(charfile)
|
||||||
|
|
||||||
Set the character stream for this input source. (The stream must be a Python 1.6
|
Set the character stream (a :term:`text file`) for this input source.
|
||||||
Unicode-wrapped file-like that performs conversion to strings.)
|
|
||||||
|
|
||||||
If there is a character stream specified, the SAX parser will ignore any byte
|
If there is a character stream specified, the SAX parser will ignore any byte
|
||||||
stream and will not attempt to open a URI connection to the system identifier.
|
stream and will not attempt to open a URI connection to the system identifier.
|
||||||
|
|
|
@ -499,6 +499,13 @@ xmlrpc
|
||||||
* :class:`xmlrpc.client.ServerProxy` is now a :term:`context manager`.
|
* :class:`xmlrpc.client.ServerProxy` is now a :term:`context manager`.
|
||||||
(Contributed by Claudiu Popa in :issue:`20627`.)
|
(Contributed by Claudiu Popa in :issue:`20627`.)
|
||||||
|
|
||||||
|
xml.sax
|
||||||
|
-------
|
||||||
|
|
||||||
|
* SAX parsers now support a character stream of
|
||||||
|
:class:`~xml.sax.xmlreader.InputSource` object.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`2175`.)
|
||||||
|
|
||||||
faulthandler
|
faulthandler
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
|
|
@ -185,12 +185,24 @@ class PrepareInputSourceTest(unittest.TestCase):
|
||||||
def make_byte_stream(self):
|
def make_byte_stream(self):
|
||||||
return BytesIO(b"This is a byte stream.")
|
return BytesIO(b"This is a byte stream.")
|
||||||
|
|
||||||
|
def make_character_stream(self):
|
||||||
|
return StringIO("This is a character stream.")
|
||||||
|
|
||||||
def checkContent(self, stream, content):
|
def checkContent(self, stream, content):
|
||||||
self.assertIsNotNone(stream)
|
self.assertIsNotNone(stream)
|
||||||
self.assertEqual(stream.read(), content)
|
self.assertEqual(stream.read(), content)
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_character_stream(self):
|
||||||
|
# If the source is an InputSource with a character stream, use it.
|
||||||
|
src = InputSource(self.file)
|
||||||
|
src.setCharacterStream(self.make_character_stream())
|
||||||
|
prep = prepare_input_source(src)
|
||||||
|
self.assertIsNone(prep.getByteStream())
|
||||||
|
self.checkContent(prep.getCharacterStream(),
|
||||||
|
"This is a character stream.")
|
||||||
|
|
||||||
def test_byte_stream(self):
|
def test_byte_stream(self):
|
||||||
# If the source is an InputSource that does not have a character
|
# If the source is an InputSource that does not have a character
|
||||||
# stream but does have a byte stream, use the byte stream.
|
# stream but does have a byte stream, use the byte stream.
|
||||||
|
@ -225,6 +237,14 @@ class PrepareInputSourceTest(unittest.TestCase):
|
||||||
self.checkContent(prep.getByteStream(),
|
self.checkContent(prep.getByteStream(),
|
||||||
b"This is a byte stream.")
|
b"This is a byte stream.")
|
||||||
|
|
||||||
|
def test_text_file(self):
|
||||||
|
# If the source is a text file-like object, use it as a character
|
||||||
|
# stream.
|
||||||
|
prep = prepare_input_source(self.make_character_stream())
|
||||||
|
self.assertIsNone(prep.getByteStream())
|
||||||
|
self.checkContent(prep.getCharacterStream(),
|
||||||
|
"This is a character stream.")
|
||||||
|
|
||||||
|
|
||||||
# ===== XMLGenerator
|
# ===== XMLGenerator
|
||||||
|
|
||||||
|
@ -904,6 +924,19 @@ class ExpatReaderTest(XmlTestBase):
|
||||||
|
|
||||||
self.assertEqual(result.getvalue(), xml_test_out)
|
self.assertEqual(result.getvalue(), xml_test_out)
|
||||||
|
|
||||||
|
def test_expat_inpsource_character_stream(self):
|
||||||
|
parser = create_parser()
|
||||||
|
result = BytesIO()
|
||||||
|
xmlgen = XMLGenerator(result)
|
||||||
|
|
||||||
|
parser.setContentHandler(xmlgen)
|
||||||
|
inpsrc = InputSource()
|
||||||
|
with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
|
||||||
|
inpsrc.setCharacterStream(f)
|
||||||
|
parser.parse(inpsrc)
|
||||||
|
|
||||||
|
self.assertEqual(result.getvalue(), xml_test_out)
|
||||||
|
|
||||||
# ===== IncrementalParser support
|
# ===== IncrementalParser support
|
||||||
|
|
||||||
def test_expat_incremental(self):
|
def test_expat_incremental(self):
|
||||||
|
|
|
@ -219,9 +219,14 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
||||||
self._parsing = 0
|
self._parsing = 0
|
||||||
# break cycle created by expat handlers pointing to our methods
|
# break cycle created by expat handlers pointing to our methods
|
||||||
self._parser = None
|
self._parser = None
|
||||||
bs = self._source.getByteStream()
|
try:
|
||||||
if bs is not None:
|
file = self._source.getCharacterStream()
|
||||||
bs.close()
|
if file is not None:
|
||||||
|
file.close()
|
||||||
|
finally:
|
||||||
|
file = self._source.getByteStream()
|
||||||
|
if file is not None:
|
||||||
|
file.close()
|
||||||
|
|
||||||
def _reset_cont_handler(self):
|
def _reset_cont_handler(self):
|
||||||
self._parser.ProcessingInstructionHandler = \
|
self._parser.ProcessingInstructionHandler = \
|
||||||
|
|
|
@ -345,11 +345,14 @@ def prepare_input_source(source, base=""):
|
||||||
elif hasattr(source, "read"):
|
elif hasattr(source, "read"):
|
||||||
f = source
|
f = source
|
||||||
source = xmlreader.InputSource()
|
source = xmlreader.InputSource()
|
||||||
source.setByteStream(f)
|
if isinstance(f.read(0), str):
|
||||||
|
source.setCharacterStream(f)
|
||||||
|
else:
|
||||||
|
source.setByteStream(f)
|
||||||
if hasattr(f, "name") and isinstance(f.name, str):
|
if hasattr(f, "name") and isinstance(f.name, str):
|
||||||
source.setSystemId(f.name)
|
source.setSystemId(f.name)
|
||||||
|
|
||||||
if source.getByteStream() is None:
|
if source.getCharacterStream() is None and source.getByteStream() is None:
|
||||||
sysid = source.getSystemId()
|
sysid = source.getSystemId()
|
||||||
basehead = os.path.dirname(os.path.normpath(base))
|
basehead = os.path.dirname(os.path.normpath(base))
|
||||||
sysidfilename = os.path.join(basehead, sysid)
|
sysidfilename = os.path.join(basehead, sysid)
|
||||||
|
|
|
@ -117,7 +117,9 @@ class IncrementalParser(XMLReader):
|
||||||
source = saxutils.prepare_input_source(source)
|
source = saxutils.prepare_input_source(source)
|
||||||
|
|
||||||
self.prepareParser(source)
|
self.prepareParser(source)
|
||||||
file = source.getByteStream()
|
file = source.getCharacterStream()
|
||||||
|
if file is None:
|
||||||
|
file = source.getByteStream()
|
||||||
buffer = file.read(self._bufsize)
|
buffer = file.read(self._bufsize)
|
||||||
while buffer:
|
while buffer:
|
||||||
self.feed(buffer)
|
self.feed(buffer)
|
||||||
|
|
|
@ -16,6 +16,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #2175: SAX parsers now support a character stream of InputSource object.
|
||||||
|
|
||||||
- Issue #16840: Tkinter now supports 64-bit integers added in Tcl 8.4 and
|
- Issue #16840: Tkinter now supports 64-bit integers added in Tcl 8.4 and
|
||||||
arbitrary precision integers added in Tcl 8.5.
|
arbitrary precision integers added in Tcl 8.5.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue