From c1f1ddf30a595c2bfa3c06e54fb03fa212cd28b5 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 17 May 2020 17:57:42 -0700 Subject: [PATCH] bpo-40597: email: Use CTE if lines are longer than max_line_length consistently (gh-20038) (gh-20084) raw_data_manager (default for EmailPolicy, EmailMessage) does correct wrapping of 'text' parts as long as the message contains characters outside of 7bit US-ASCII set: base64 or qp Content-Transfer-Encoding is applied if the lines would be too long without it. It did not, however, do this for ascii-only text, which could result in lines that were longer than policy.max_line_length or even the rfc 998 maximum. This changeset fixes the heuristic so that if lines are longer than policy.max_line_length, it will always apply a content-transfer-encoding so that the lines are wrapped correctly. (cherry picked from commit 6f2f475d5a2cd7675dce844f3af436ba919ef92b) Co-authored-by: Arkadiusz Hiler --- Lib/email/contentmanager.py | 14 +++++++------- Lib/test/test_email/test_contentmanager.py | 15 +++++++++++++++ .../2020-05-11-19-17-23.bpo-40597.4SGfgm.rst | 1 + 3 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index b904ded94c9..2b4b8757f46 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -146,13 +146,13 @@ def _encode_text(string, charset, cte, policy): def normal_body(lines): return b'\n'.join(lines) + b'\n' if cte==None: # Use heuristics to decide on the "best" encoding. - try: - return '7bit', normal_body(lines).decode('ascii') - except UnicodeDecodeError: - pass - if (policy.cte_type == '8bit' and - max(len(x) for x in lines) <= policy.max_line_length): - return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') + if max(len(x) for x in lines) <= policy.max_line_length: + try: + return '7bit', normal_body(lines).decode('ascii') + except UnicodeDecodeError: + pass + if policy.cte_type == '8bit': + return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), policy.max_line_length) diff --git a/Lib/test/test_email/test_contentmanager.py b/Lib/test/test_email/test_contentmanager.py index 169058eac83..64dca2d017e 100644 --- a/Lib/test/test_email/test_contentmanager.py +++ b/Lib/test/test_email/test_contentmanager.py @@ -329,6 +329,21 @@ class TestRawDataManager(TestEmailBase): self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content) self.assertEqual(m.get_content(), content) + def test_set_text_plain_long_line_heuristics(self): + m = self._make_message() + content = ("Simple but long message that is over 78 characters" + " long to force transfer encoding.\n") + raw_data_manager.set_content(m, content) + self.assertEqual(str(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="utf-8" + Content-Transfer-Encoding: quoted-printable + + Simple but long message that is over 78 characters long to = + force transfer encoding. + """)) + self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content) + self.assertEqual(m.get_content(), content) + def test_set_text_short_line_minimal_non_ascii_heuristics(self): m = self._make_message() content = "et là il est monté sur moi et il commence à m'éto.\n" diff --git a/Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst b/Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst new file mode 100644 index 00000000000..1b9fe609c25 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-11-19-17-23.bpo-40597.4SGfgm.rst @@ -0,0 +1 @@ +If text content lines are longer than policy.max_line_length, always use a content-encoding to make sure they are wrapped.