From 929b70473829f04dedb8e802abcbd506926886e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Schoentgen?= Date: Sun, 14 Apr 2019 09:16:54 +0000 Subject: [PATCH] bpo-31658: Make xml.sax.parse accepting Path objects (GH-8564) --- Doc/library/xml.sax.reader.rst | 6 +++++- Lib/test/test_sax.py | 13 ++++++++++++- Lib/xml/sax/saxutils.py | 2 ++ .../2018-07-30-12-00-15.bpo-31658._bx7a_.rst | 2 ++ 4 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-07-30-12-00-15.bpo-31658._bx7a_.rst diff --git a/Doc/library/xml.sax.reader.rst b/Doc/library/xml.sax.reader.rst index 1b6e43145b9..113e9e93fb0 100644 --- a/Doc/library/xml.sax.reader.rst +++ b/Doc/library/xml.sax.reader.rst @@ -102,13 +102,17 @@ The :class:`XMLReader` interface supports the following methods: Process an input source, producing SAX events. The *source* object can be a system identifier (a string identifying the input source -- typically a file - name or a URL), a file-like object, or an :class:`InputSource` object. When + name or a URL), a :class:`pathlib.Path` or :term:`path-like ` + object, or an :class:`InputSource` object. When :meth:`parse` returns, the input is completely processed, and the parser object can be discarded or reset. .. versionchanged:: 3.5 Added support of character streams. + .. versionchanged:: 3.8 + Added support of path-like objects. + .. method:: XMLReader.getContentHandler() diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 9addc06f20d..da4eb1da3c6 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -21,7 +21,7 @@ import os.path import shutil from urllib.error import URLError from test import support -from test.support import findfile, run_unittest, TESTFN +from test.support import findfile, run_unittest, FakePath, TESTFN TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") @@ -182,6 +182,10 @@ class ParseTest(unittest.TestCase): with self.assertRaises(SAXException): self.check_parse(f) + def test_parse_path_object(self): + make_xml_file(self.data, 'utf-8', None) + self.check_parse(FakePath(TESTFN)) + def test_parse_InputSource(self): # accept data without declared but with explicitly specified encoding make_xml_file(self.data, 'iso-8859-1', None) @@ -397,6 +401,13 @@ class PrepareInputSourceTest(unittest.TestCase): self.checkContent(prep.getByteStream(), b"This was read from a file.") + def test_path_objects(self): + # If the source is a Path object, use it as a system ID and open it. + prep = prepare_input_source(FakePath(self.file)) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + def test_binary_file(self): # If the source is a binary file-like object, use it as a byte # stream. diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index b4fc2da7640..c1612ea1ceb 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -339,6 +339,8 @@ def prepare_input_source(source, base=""): """This function takes an InputSource and an optional base URL and returns a fully resolved InputSource object ready for reading.""" + if isinstance(source, os.PathLike): + source = os.fspath(source) if isinstance(source, str): source = xmlreader.InputSource(source) elif hasattr(source, "read"): diff --git a/Misc/NEWS.d/next/Library/2018-07-30-12-00-15.bpo-31658._bx7a_.rst b/Misc/NEWS.d/next/Library/2018-07-30-12-00-15.bpo-31658._bx7a_.rst new file mode 100644 index 00000000000..8b35060fd73 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-07-30-12-00-15.bpo-31658._bx7a_.rst @@ -0,0 +1,2 @@ +:func:`xml.sax.parse` now supports :term:`path-like `. +Patch by Mickaƫl Schoentgen.