diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index 56f545c0e6d..eb2b16bd6c0 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -25,6 +25,20 @@ events until either processing is finished or an error condition occurs. maliciously constructed data. If you need to parse untrusted or unauthenticated data see :ref:`xml-vulnerabilities`. +.. versionchanged:: 3.8 + + The SAX parser no longer processes general external entities by default to + increase security by default. To enable processing of external entities, + pass a custom parser instance in:: + + from xml.dom.pulldom import parse + from xml.sax import make_parser + from xml.sax.handler import feature_external_ges + + parser = make_parser() + parser.setFeature(feature_external_ges, True) + parse(filename, parser=parser) + Example:: diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index 63c24f80ac8..9b8ba6b17c8 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -65,8 +65,8 @@ kind sax etree minidom p ========================= ============== =============== ============== ============== ============== billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** -external entity expansion **Vulnerable** Safe (1) Safe (2) **Vulnerable** Safe (3) -`DTD`_ retrieval **Vulnerable** Safe Safe **Vulnerable** Safe +external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3) +`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe decompression bomb Safe Safe Safe Safe **Vulnerable** ========================= ============== =============== ============== ============== ============== @@ -75,6 +75,8 @@ decompression bomb Safe Safe Safe S 2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns the unexpanded entity verbatim. 3. :mod:`xmlrpclib` doesn't expand external entities and omits them. +4. Since Python 3.8.0, external general entities are no longer processed by + default since Python. billion laughs / exponential entity expansion diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index 78d6633e098..aa3ea9bfc55 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -24,6 +24,14 @@ the SAX API. constructed data. If you need to parse untrusted or unauthenticated data see :ref:`xml-vulnerabilities`. +.. versionchanged:: 3.8 + + The SAX parser no longer processes general external entities by default + to increase security. Before, the parser created network connections + to fetch remote files or loaded local files from the file + system for DTD and entities. The feature can be enabled again with method + :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object + and argument :data:`~xml.sax.handler.feature_external_ges`. The convenience functions are: diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 9aaaa761cc8..e37a70f32d9 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -155,6 +155,15 @@ venv activating virtual environments under PowerShell Core 6.1. (Contributed by Brett Cannon in :issue:`32718`.) +xml +--- + +* As mitigation against DTD and external entity retrieval, the + :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process + external entities by default. + (Contributed by Christian Heimes in :issue:`17239`.) + + Optimizations ============= @@ -333,6 +342,9 @@ Changes in the Python API * :class:`uuid.UUID` now uses ``__slots__``, therefore instances can no longer be weak-referenced and attributes can no longer be added. +* :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process + external entities by default. + (Contributed by Christian Heimes in :issue:`17239`.) CPython bytecode changes ------------------------ diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index f454098c65b..4a1bad3442b 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -3,6 +3,7 @@ import unittest import xml.sax from xml.sax.xmlreader import AttributesImpl +from xml.sax.handler import feature_external_ges from xml.dom import pulldom from test.support import findfile @@ -166,6 +167,12 @@ class PullDOMTestCase(unittest.TestCase): # This should have returned 'END_ELEMENT'. self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT) + def test_external_ges_default(self): + parser = pulldom.parseString(SMALL_SAMPLE) + saxparser = parser.parser + ges = saxparser.getFeature(feature_external_ges) + self.assertEqual(ges, False) + class ThoroughTestCase(unittest.TestCase): """Test the hard-to-reach parts of pulldom.""" diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 2eb62905ffa..3044960a0ed 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -13,13 +13,14 @@ except SAXReaderNotAvailable: from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser -from xml.sax.handler import feature_namespaces +from xml.sax.handler import feature_namespaces, feature_external_ges from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from io import BytesIO, StringIO import codecs import gc import os.path import shutil +from urllib.error import URLError from test import support from test.support import findfile, run_unittest, TESTFN @@ -911,6 +912,18 @@ class ExpatReaderTest(XmlTestBase): def unparsedEntityDecl(self, name, publicId, systemId, ndata): self._entities.append((name, publicId, systemId, ndata)) + + class TestEntityRecorder: + def __init__(self): + self.entities = [] + + def resolveEntity(self, publicId, systemId): + self.entities.append((publicId, systemId)) + source = InputSource() + source.setPublicId(publicId) + source.setSystemId(systemId) + return source + def test_expat_dtdhandler(self): parser = create_parser() handler = self.TestDTDHandler() @@ -927,6 +940,32 @@ class ExpatReaderTest(XmlTestBase): [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) + def test_expat_external_dtd_enabled(self): + parser = create_parser() + parser.setFeature(feature_external_ges, True) + resolver = self.TestEntityRecorder() + parser.setEntityResolver(resolver) + + with self.assertRaises(URLError): + parser.feed( + '\n' + ) + self.assertEqual( + resolver.entities, [(None, 'unsupported://non-existing')] + ) + + def test_expat_external_dtd_default(self): + parser = create_parser() + resolver = self.TestEntityRecorder() + parser.setEntityResolver(resolver) + + parser.feed( + '\n' + ) + parser.feed('') + parser.close() + self.assertEqual(resolver.entities, []) + # ===== EntityResolver support class TestEntityResolver: @@ -936,8 +975,9 @@ class ExpatReaderTest(XmlTestBase): inpsrc.setByteStream(BytesIO(b"")) return inpsrc - def test_expat_entityresolver(self): + def test_expat_entityresolver_enabled(self): parser = create_parser() + parser.setFeature(feature_external_ges, True) parser.setEntityResolver(self.TestEntityResolver()) result = BytesIO() parser.setContentHandler(XMLGenerator(result)) @@ -951,6 +991,22 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), start + b"") + def test_expat_entityresolver_default(self): + parser = create_parser() + self.assertEqual(parser.getFeature(feature_external_ges), False) + parser.setEntityResolver(self.TestEntityResolver()) + result = BytesIO() + parser.setContentHandler(XMLGenerator(result)) + + parser.feed('\n') + parser.feed(']>\n') + parser.feed('&test;') + parser.close() + + self.assertEqual(result.getvalue(), start + + b"") + # ===== Attributes support class AttrGatherer(ContentHandler): diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index a52529051bf..ecb910f04f5 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -91,6 +91,12 @@ ENTITY_XML = """\ &entity; """ +EXTERNAL_ENTITY_XML = """\ + +]> +&entity; +""" def checkwarnings(*filters, quiet=False): def decorator(test): @@ -861,6 +867,13 @@ class ElementTreeTest(unittest.TestCase): root = parser.close() self.serialize_check(root, 'text') + # 4) external (SYSTEM) entity + + with self.assertRaises(ET.ParseError) as cm: + ET.XML(EXTERNAL_ENTITY_XML) + self.assertEqual(str(cm.exception), + 'undefined entity &entity;: line 4, column 10') + def test_namespace(self): # Test namespace issues. diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 421358fa5bc..5066ffc2fa5 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -95,7 +95,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): self._lex_handler_prop = None self._parsing = 0 self._entity_stack = [] - self._external_ges = 1 + self._external_ges = 0 self._interning = None # XMLReader methods diff --git a/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst new file mode 100644 index 00000000000..8dd0fe8c1b5 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst @@ -0,0 +1,3 @@ +The xml.sax and xml.dom.minidom parsers no longer processes external +entities by default. External DTD and ENTITY declarations no longer +load files or create network connections.