mirror of https://github.com/python/cpython
bpo-36520: Email header folded incorrectly (#13608)
* bpo-36520: reset the encoded word offset when starting a new line during an email header folding operation * 📜🤖 Added by blurb_it. * bpo-36520: add an additional test case, and provide descriptive comments for the test_folding_with_utf8_encoding_* tests * bpo-36520: fix whitespace issue * bpo-36520: changes per reviewer request -- remove extraneous backslashes; add whitespace between terminating quotes and line-continuation backslashes; use "bpo-" instead of "issue #" in comments
This commit is contained in:
parent
e0c0c7e8c9
commit
f6713e84af
|
@ -2789,6 +2789,7 @@ def _refold_parse_tree(parse_tree, *, policy):
|
|||
newline = _steal_trailing_WSP_if_exists(lines)
|
||||
if newline or part.startswith_fws():
|
||||
lines.append(newline + tstr)
|
||||
last_ew = None
|
||||
continue
|
||||
if not hasattr(part, 'encode'):
|
||||
# It's not a terminal, try folding the subparts.
|
||||
|
|
|
@ -784,6 +784,137 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase):
|
|||
m['Subject'] = 'unicöde'
|
||||
self.assertEqual(str(m), 'Subject: unicöde\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_1(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line that contains UTF-8 words before
|
||||
# and after the whitespace fold point, where the
|
||||
# line length limit is reached within an ASCII
|
||||
# word.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
|
||||
'Hello Wörld! Hello Wörld!Hello Wörld!'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
|
||||
b'=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
|
||||
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
|
||||
|
||||
|
||||
def test_folding_with_utf8_encoding_2(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line that contains UTF-8 words before
|
||||
# and after the whitespace fold point, where the
|
||||
# line length limit is reached at the end of an
|
||||
# encoded word.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = 'Hello Wörld! Hello Wörld! ' \
|
||||
'Hello Wörlds123! Hello Wörld!Hello Wörld!'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: Hello =?utf-8?q?W=C3=B6rld!_Hello_W'
|
||||
b'=C3=B6rld!_Hello_W=C3=B6rlds123!?=\n'
|
||||
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_3(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line that contains UTF-8 words before
|
||||
# and after the whitespace fold point, where the
|
||||
# line length limit is reached at the end of the
|
||||
# first word.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123! ' \
|
||||
'Hello Wörld!Hello Wörld!'
|
||||
self.assertEqual(bytes(m), \
|
||||
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
|
||||
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
|
||||
b' Hello =?utf-8?q?W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_4(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line that contains UTF-8 words before
|
||||
# and after the fold point, where the first
|
||||
# word is UTF-8 and the fold point is within
|
||||
# the word.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = 'Hello-Wörld!-Hello-Wörld!-Hello-Wörlds123!-Hello' \
|
||||
' Wörld!Hello Wörld!'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: =?utf-8?q?Hello-W=C3=B6rld!-Hello-W'
|
||||
b'=C3=B6rld!-Hello-W=C3=B6rlds123!?=\n'
|
||||
b' =?utf-8?q?-Hello_W=C3=B6rld!Hello_W=C3=B6rld!?=\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_5(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line that contains a UTF-8 word after
|
||||
# the fold point.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = '123456789 123456789 123456789 123456789 123456789' \
|
||||
' 123456789 123456789 Hello Wörld!'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: 123456789 123456789 123456789 123456789'
|
||||
b' 123456789 123456789 123456789\n'
|
||||
b' Hello =?utf-8?q?W=C3=B6rld!?=\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_6(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line that contains a UTF-8 word before
|
||||
# the fold point and ASCII words after
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = '123456789 123456789 123456789 123456789 Hello Wörld!' \
|
||||
' 123456789 123456789 123456789 123456789 123456789' \
|
||||
' 123456789'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: 123456789 123456789 123456789 123456789'
|
||||
b' Hello =?utf-8?q?W=C3=B6rld!?=\n 123456789 '
|
||||
b'123456789 123456789 123456789 123456789 '
|
||||
b'123456789\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_7(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line twice that contains UTF-8 words before
|
||||
# and after the first fold point, and ASCII words
|
||||
# after the second fold point.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
|
||||
'123456789-123456789 123456789 Hello Wörld! 123456789' \
|
||||
' 123456789'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: 123456789 123456789 Hello =?utf-8?q?'
|
||||
b'W=C3=B6rld!_Hello_W=C3=B6rld!?=\n'
|
||||
b' 123456789-123456789 123456789 Hello '
|
||||
b'=?utf-8?q?W=C3=B6rld!?= 123456789\n 123456789\n\n')
|
||||
|
||||
def test_folding_with_utf8_encoding_8(self):
|
||||
# bpo-36520
|
||||
#
|
||||
# Fold a line twice that contains UTF-8 words before
|
||||
# the first fold point, and ASCII words after the
|
||||
# first fold point, and UTF-8 words after the second
|
||||
# fold point.
|
||||
|
||||
m = EmailMessage()
|
||||
m['Subject'] = '123456789 123456789 Hello Wörld! Hello Wörld! ' \
|
||||
'123456789 123456789 123456789 123456789 123456789 ' \
|
||||
'123456789-123456789 123456789 Hello Wörld! 123456789' \
|
||||
' 123456789'
|
||||
self.assertEqual(bytes(m),
|
||||
b'Subject: 123456789 123456789 Hello '
|
||||
b'=?utf-8?q?W=C3=B6rld!_Hello_W=C3=B6rld!?=\n 123456789 '
|
||||
b'123456789 123456789 123456789 123456789 '
|
||||
b'123456789-123456789\n 123456789 Hello '
|
||||
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
|
||||
|
||||
class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
|
||||
# Doing the full test run here may seem a bit redundant, since the two
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Lengthy email headers with UTF-8 characters are now properly encoded when they are folded. Patch by Jeffrey Kintscher.
|
Loading…
Reference in New Issue