diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst
index 56f545c0e6d..eb2b16bd6c0 100644
--- a/Doc/library/xml.dom.pulldom.rst
+++ b/Doc/library/xml.dom.pulldom.rst
@@ -25,6 +25,20 @@ events until either processing is finished or an error condition occurs.
maliciously constructed data. If you need to parse untrusted or
unauthenticated data see :ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.8
+
+ The SAX parser no longer processes general external entities by default to
+ increase security by default. To enable processing of external entities,
+ pass a custom parser instance in::
+
+ from xml.dom.pulldom import parse
+ from xml.sax import make_parser
+ from xml.sax.handler import feature_external_ges
+
+ parser = make_parser()
+ parser.setFeature(feature_external_ges, True)
+ parse(filename, parser=parser)
+
Example::
diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst
index 63c24f80ac8..9b8ba6b17c8 100644
--- a/Doc/library/xml.rst
+++ b/Doc/library/xml.rst
@@ -65,8 +65,8 @@ kind sax etree minidom p
========================= ============== =============== ============== ============== ==============
billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
-external entity expansion **Vulnerable** Safe (1) Safe (2) **Vulnerable** Safe (3)
-`DTD`_ retrieval **Vulnerable** Safe Safe **Vulnerable** Safe
+external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3)
+`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe
decompression bomb Safe Safe Safe Safe **Vulnerable**
========================= ============== =============== ============== ============== ==============
@@ -75,6 +75,8 @@ decompression bomb Safe Safe Safe S
2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
the unexpanded entity verbatim.
3. :mod:`xmlrpclib` doesn't expand external entities and omits them.
+4. Since Python 3.8.0, external general entities are no longer processed by
+ default since Python.
billion laughs / exponential entity expansion
diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst
index 78d6633e098..aa3ea9bfc55 100644
--- a/Doc/library/xml.sax.rst
+++ b/Doc/library/xml.sax.rst
@@ -24,6 +24,14 @@ the SAX API.
constructed data. If you need to parse untrusted or unauthenticated data see
:ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.8
+
+ The SAX parser no longer processes general external entities by default
+ to increase security. Before, the parser created network connections
+ to fetch remote files or loaded local files from the file
+ system for DTD and entities. The feature can be enabled again with method
+ :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object
+ and argument :data:`~xml.sax.handler.feature_external_ges`.
The convenience functions are:
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index 9aaaa761cc8..e37a70f32d9 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -155,6 +155,15 @@ venv
activating virtual environments under PowerShell Core 6.1.
(Contributed by Brett Cannon in :issue:`32718`.)
+xml
+---
+
+* As mitigation against DTD and external entity retrieval, the
+ :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
+
+
Optimizations
=============
@@ -333,6 +342,9 @@ Changes in the Python API
* :class:`uuid.UUID` now uses ``__slots__``, therefore instances can no longer
be weak-referenced and attributes can no longer be added.
+* :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
CPython bytecode changes
------------------------
diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py
index f454098c65b..4a1bad3442b 100644
--- a/Lib/test/test_pulldom.py
+++ b/Lib/test/test_pulldom.py
@@ -3,6 +3,7 @@ import unittest
import xml.sax
from xml.sax.xmlreader import AttributesImpl
+from xml.sax.handler import feature_external_ges
from xml.dom import pulldom
from test.support import findfile
@@ -166,6 +167,12 @@ class PullDOMTestCase(unittest.TestCase):
# This should have returned 'END_ELEMENT'.
self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
+ def test_external_ges_default(self):
+ parser = pulldom.parseString(SMALL_SAMPLE)
+ saxparser = parser.parser
+ ges = saxparser.getFeature(feature_external_ges)
+ self.assertEqual(ges, False)
+
class ThoroughTestCase(unittest.TestCase):
"""Test the hard-to-reach parts of pulldom."""
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index 2eb62905ffa..3044960a0ed 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -13,13 +13,14 @@ except SAXReaderNotAvailable:
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import feature_namespaces, feature_external_ges
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
import gc
import os.path
import shutil
+from urllib.error import URLError
from test import support
from test.support import findfile, run_unittest, TESTFN
@@ -911,6 +912,18 @@ class ExpatReaderTest(XmlTestBase):
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._entities.append((name, publicId, systemId, ndata))
+
+ class TestEntityRecorder:
+ def __init__(self):
+ self.entities = []
+
+ def resolveEntity(self, publicId, systemId):
+ self.entities.append((publicId, systemId))
+ source = InputSource()
+ source.setPublicId(publicId)
+ source.setSystemId(systemId)
+ return source
+
def test_expat_dtdhandler(self):
parser = create_parser()
handler = self.TestDTDHandler()
@@ -927,6 +940,32 @@ class ExpatReaderTest(XmlTestBase):
[("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
+ def test_expat_external_dtd_enabled(self):
+ parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ with self.assertRaises(URLError):
+ parser.feed(
+ '\n'
+ )
+ self.assertEqual(
+ resolver.entities, [(None, 'unsupported://non-existing')]
+ )
+
+ def test_expat_external_dtd_default(self):
+ parser = create_parser()
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ parser.feed(
+ '\n'
+ )
+ parser.feed('')
+ parser.close()
+ self.assertEqual(resolver.entities, [])
+
# ===== EntityResolver support
class TestEntityResolver:
@@ -936,8 +975,9 @@ class ExpatReaderTest(XmlTestBase):
inpsrc.setByteStream(BytesIO(b""))
return inpsrc
- def test_expat_entityresolver(self):
+ def test_expat_entityresolver_enabled(self):
parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
parser.setEntityResolver(self.TestEntityResolver())
result = BytesIO()
parser.setContentHandler(XMLGenerator(result))
@@ -951,6 +991,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), start +
b"")
+ def test_expat_entityresolver_default(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_external_ges), False)
+ parser.setEntityResolver(self.TestEntityResolver())
+ result = BytesIO()
+ parser.setContentHandler(XMLGenerator(result))
+
+ parser.feed('\n')
+ parser.feed(']>\n')
+ parser.feed('&test;')
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start +
+ b"")
+
# ===== Attributes support
class AttrGatherer(ContentHandler):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index a52529051bf..ecb910f04f5 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -91,6 +91,12 @@ ENTITY_XML = """\
&entity;
"""
+EXTERNAL_ENTITY_XML = """\
+
+]>
+&entity;
+"""
def checkwarnings(*filters, quiet=False):
def decorator(test):
@@ -861,6 +867,13 @@ class ElementTreeTest(unittest.TestCase):
root = parser.close()
self.serialize_check(root, 'text')
+ # 4) external (SYSTEM) entity
+
+ with self.assertRaises(ET.ParseError) as cm:
+ ET.XML(EXTERNAL_ENTITY_XML)
+ self.assertEqual(str(cm.exception),
+ 'undefined entity &entity;: line 4, column 10')
+
def test_namespace(self):
# Test namespace issues.
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index 421358fa5bc..5066ffc2fa5 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -95,7 +95,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
- self._external_ges = 1
+ self._external_ges = 0
self._interning = None
# XMLReader methods
diff --git a/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst
new file mode 100644
index 00000000000..8dd0fe8c1b5
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst
@@ -0,0 +1,3 @@
+The xml.sax and xml.dom.minidom parsers no longer processes external
+entities by default. External DTD and ENTITY declarations no longer
+load files or create network connections.