From 36eea7af48ca0a1c96b78c82bf95bbd29d2332da Mon Sep 17 00:00:00 2001
From: Abhilash Raj <maxking@users.noreply.github.com>
Date: Tue, 11 Jun 2019 19:28:10 -0400
Subject: [PATCH] [3.8] bpo-36520: Email header folded incorrectly (GH-13608)
 (GH-13909)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [bpo-36520](https://bugs.python.org/issue36520): reset the encoded word offset when starting a new
line during an email header folding operation

* 📜🤖 Added by blurb_it.

* [bpo-36520](https://bugs.python.org/issue36520): add an additional test case, and provide descriptive
comments for the test_folding_with_utf8_encoding_* tests

* [bpo-36520](https://bugs.python.org/issue36520): fix whitespace issue

* [bpo-36520](https://bugs.python.org/issue36520): changes per reviewer request -- remove extraneous
backslashes; add whitespace between terminating quotes and
line-continuation backslashes; use "bpo-" instead of
"issue GH-" in comments
(cherry picked from commit f6713e84afc5addcfa8477dbdf2c027787f711c0)

Co-authored-by: websurfer5 <49998481+websurfer5@users.noreply.github.com>


https://bugs.python.org/issue36520
---
 Lib/email/_header_value_parser.py             |   1 +
 Lib/test/test_email/test_message.py           | 131 ++++++++++++++++++
 .../2019-05-28-02-37-00.bpo-36520.W4tday.rst  |   1 +
 3 files changed, 133 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 34969ab5915..308db4d9105 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2768,6 +2768,7 @@ def _refold_parse_tree(parse_tree, *, policy):
             newline = _steal_trailing_WSP_if_exists(lines)
             if newline or part.startswith_fws():
                 lines.append(newline + tstr)
+                last_ew = None
                 continue
         if not hasattr(part, 'encode'):
             # It's not a terminal, try folding the subparts.
diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py
index f3a57df9e9c..5dc46e1b812 100644
--- a/Lib/test/test_email/test_message.py
+++ b/Lib/test/test_email/test_message.py
@@ -784,6 +784,137 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase):
         m['Subject'] = 'unicöde'
         self.assertEqual(str(m), 'Subject: unicöde\n\n')
 
+    def test_folding_with_utf8_encoding_1(self):
+        # bpo-36520
+        #
+        # Fold a line that contains UTF-8 words before
+        # and after the whitespace fold point, where the
+        # line length limit is reached within an ASCII
+        # word.
+
+        m = EmailMessage()
+        m['Subject'] = 'Hello Wörld! Hello Wörld! '            \
+                       'Hello Wörld! Hello Wörld!Hello Wörld!'
+        self.assertEqual(bytes(m),
+                         b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
+                         b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
+                         b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+
+    def test_folding_with_utf8_encoding_2(self):
+        # bpo-36520
+        #
+        # Fold a line that contains UTF-8 words before
+        # and after the whitespace fold point, where the
+        # line length limit is reached at the end of an
+        # encoded word.
+
+        m = EmailMessage()
+        m['Subject'] = 'Hello Wörld! Hello Wörld! '                \
+                       'Hello Wörlds123! Hello Wörld!Hello Wörld!'
+        self.assertEqual(bytes(m),
+                         b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
+                         b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'
+                         b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+    def test_folding_with_utf8_encoding_3(self):
+        # bpo-36520
+        #
+        # Fold a line that contains UTF-8 words before
+        # and after the whitespace fold point, where the
+        # line length limit is reached at the end of the
+        # first word.
+
+        m = EmailMessage()
+        m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \
+                       'Hello Wörld!Hello Wörld!'
+        self.assertEqual(bytes(m), \
+                         b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
+                         b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
+                         b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+    def test_folding_with_utf8_encoding_4(self):
+        # bpo-36520
+        #
+        # Fold a line that contains UTF-8 words before
+        # and after the fold point, where the first
+        # word is UTF-8 and the fold point is within
+        # the word.
+
+        m = EmailMessage()
+        m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \
+                       ' Wörld!Hello Wörld!'
+        self.assertEqual(bytes(m),
+                         b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
+                         b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
+                         b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
+
+    def test_folding_with_utf8_encoding_5(self):
+        # bpo-36520
+        #
+        # Fold a line that contains a UTF-8 word after
+        # the fold point.
+
+        m = EmailMessage()
+        m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \
+                       ' 123456789 123456789 Hello Wörld!'
+        self.assertEqual(bytes(m),
+                         b'Subject: 123456789 123456789 123456789 123456789'
+                         b' 123456789 123456789 123456789\n'
+                         b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n')
+
+    def test_folding_with_utf8_encoding_6(self):
+        # bpo-36520
+        #
+        # Fold a line that contains a UTF-8 word before
+        # the fold point and ASCII words after
+
+        m = EmailMessage()
+        m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \
+                       ' 123456789 123456789 123456789 123456789 123456789'   \
+                       ' 123456789'
+        self.assertEqual(bytes(m),
+                         b'Subject: 123456789 123456789 123456789 123456789'
+                         b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '
+                         b'123456789 123456789 123456789 123456789 '
+                         b'123456789\n\n')
+
+    def test_folding_with_utf8_encoding_7(self):
+        # bpo-36520
+        #
+        # Fold a line twice that contains UTF-8 words before
+        # and after the first fold point, and ASCII words
+        # after the second fold point.
+
+        m = EmailMessage()
+        m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '       \
+                       '123456789-123456789 123456789 Hello Wörld! 123456789' \
+                       ' 123456789'
+        self.assertEqual(bytes(m),
+                         b'Subject: 123456789 123456789 Hello =?utf-8?q?'
+                         b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
+                         b' 123456789-123456789 123456789 Hello '
+                         b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n')
+
+    def test_folding_with_utf8_encoding_8(self):
+        # bpo-36520
+        #
+        # Fold a line twice that contains UTF-8 words before
+        # the first fold point, and ASCII words after the
+        # first fold point, and UTF-8 words after the second
+        # fold point.
+
+        m = EmailMessage()
+        m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! '       \
+                       '123456789 123456789 123456789 123456789 123456789 '   \
+                       '123456789-123456789 123456789 Hello Wörld! 123456789' \
+                       ' 123456789'
+        self.assertEqual(bytes(m),
+                         b'Subject: 123456789 123456789 Hello '
+                         b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '
+                         b'123456789 123456789 123456789 123456789 '
+                         b'123456789-123456789\n 123456789 Hello '
+                         b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
 
 class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
     # Doing the full test run here may seem a bit redundant, since the two
diff --git a/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst
new file mode 100644
index 00000000000..8171bfe9e2d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-05-28-02-37-00.bpo-36520.W4tday.rst
@@ -0,0 +1 @@
+Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher.