mirror of https://github.com/python/cpython
gh-117313: Fix re-folding email messages containing non-standard line separators (GH-117369)
Only treat '\n', '\r' and '\r\n' as line separators in re-folding the email messages. Preserve control characters '\v', '\f', '\x1c', '\x1d' and '\x1e' and Unicode line separators '\x85', '\u2028' and '\u2029' as is.
This commit is contained in:
parent
4e502a4997
commit
aec1dac4ef
|
@ -21,7 +21,7 @@ __all__ = [
|
|||
'HTTP',
|
||||
]
|
||||
|
||||
linesep_splitter = re.compile(r'\n|\r')
|
||||
linesep_splitter = re.compile(r'\n|\r\n?')
|
||||
|
||||
@_extend_docstrings
|
||||
class EmailPolicy(Policy):
|
||||
|
@ -205,7 +205,8 @@ class EmailPolicy(Policy):
|
|||
if hasattr(value, 'name'):
|
||||
return value.fold(policy=self)
|
||||
maxlen = self.max_line_length if self.max_line_length else sys.maxsize
|
||||
lines = value.splitlines()
|
||||
# We can't use splitlines here because it splits on more than \r and \n.
|
||||
lines = linesep_splitter.split(value)
|
||||
refold = (self.refold_source == 'all' or
|
||||
self.refold_source == 'long' and
|
||||
(lines and len(lines[0])+len(name)+2 > maxlen or
|
||||
|
|
|
@ -140,6 +140,39 @@ class TestGeneratorBase:
|
|||
g.flatten(msg, linesep='\n')
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
def test_flatten_linesep(self):
|
||||
source = 'Subject: one\n two\r three\r\n four\r\n\r\ntest body\r\n'
|
||||
msg = self.msgmaker(self.typ(source))
|
||||
self.assertEqual(msg['Subject'], 'one two three four')
|
||||
|
||||
expected = 'Subject: one\n two\n three\n four\n\ntest body\n'
|
||||
s = self.ioclass()
|
||||
g = self.genclass(s)
|
||||
g.flatten(msg)
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
expected = 'Subject: one two three four\n\ntest body\n'
|
||||
s = self.ioclass()
|
||||
g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
|
||||
g.flatten(msg)
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
def test_flatten_control_linesep(self):
|
||||
source = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\r\n\r\ntest body\r\n'
|
||||
msg = self.msgmaker(self.typ(source))
|
||||
self.assertEqual(msg['Subject'], 'one\v two\f three\x1c four\x1d five\x1e six')
|
||||
|
||||
expected = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\n\ntest body\n'
|
||||
s = self.ioclass()
|
||||
g = self.genclass(s)
|
||||
g.flatten(msg)
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
s = self.ioclass()
|
||||
g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
|
||||
g.flatten(msg)
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
def test_set_mangle_from_via_policy(self):
|
||||
source = textwrap.dedent("""\
|
||||
Subject: test that
|
||||
|
@ -224,6 +257,22 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
|
|||
ioclass = io.StringIO
|
||||
typ = str
|
||||
|
||||
def test_flatten_unicode_linesep(self):
|
||||
source = 'Subject: one\x85 two\u2028 three\u2029 four\r\n\r\ntest body\r\n'
|
||||
msg = self.msgmaker(self.typ(source))
|
||||
self.assertEqual(msg['Subject'], 'one\x85 two\u2028 three\u2029 four')
|
||||
|
||||
expected = 'Subject: =?utf-8?b?b25lwoUgdHdv4oCoIHRocmVl4oCp?= four\n\ntest body\n'
|
||||
s = self.ioclass()
|
||||
g = self.genclass(s)
|
||||
g.flatten(msg)
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
s = self.ioclass()
|
||||
g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
|
||||
g.flatten(msg)
|
||||
self.assertEqual(s.getvalue(), self.typ(expected))
|
||||
|
||||
|
||||
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
|
||||
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Only treat ``'\n'``, ``'\r'`` and ``'\r\n'`` as line separators in
|
||||
re-folding the :mod:`email` messages. Preserve control characters ``'\v'``,
|
||||
``'\f'``, ``'\x1c'``, ``'\x1d'`` and ``'\x1e'`` and Unicode line separators
|
||||
``'\x85'``, ``'\u2028'`` and ``'\u2029'`` as is.
|
Loading…
Reference in New Issue