#11243: tests and fixes for handling of 'dirty data' in additional methods
This commit is contained in:
parent
4e4326829f
commit
a215023b78
|
@ -48,9 +48,9 @@ def _sanitize_header(name, value):
|
|||
def _splitparam(param):
|
||||
# Split header parameters. BAW: this may be too simple. It isn't
|
||||
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
|
||||
# found in the wild. We may eventually need a full fledged parser
|
||||
# eventually.
|
||||
a, sep, b = param.partition(';')
|
||||
# found in the wild. We may eventually need a full fledged parser.
|
||||
# RDM: we might have a Header here; for now just stringify it.
|
||||
a, sep, b = str(param).partition(';')
|
||||
if not sep:
|
||||
return a.strip(), None
|
||||
return a.strip(), b.strip()
|
||||
|
@ -90,6 +90,8 @@ def _formatparam(param, value=None, quote=True):
|
|||
return param
|
||||
|
||||
def _parseparam(s):
|
||||
# RDM This might be a Header, so for now stringify it.
|
||||
s = ';' + str(s)
|
||||
plist = []
|
||||
while s[:1] == ';':
|
||||
s = s[1:]
|
||||
|
@ -240,7 +242,8 @@ class Message:
|
|||
if i is not None and not isinstance(self._payload, list):
|
||||
raise TypeError('Expected list, got %s' % type(self._payload))
|
||||
payload = self._payload
|
||||
cte = self.get('content-transfer-encoding', '').lower()
|
||||
# cte might be a Header, so for now stringify it.
|
||||
cte = str(self.get('content-transfer-encoding', '')).lower()
|
||||
# payload may be bytes here.
|
||||
if isinstance(payload, str):
|
||||
if _has_surrogates(payload):
|
||||
|
@ -561,7 +564,7 @@ class Message:
|
|||
if value is missing:
|
||||
return failobj
|
||||
params = []
|
||||
for p in _parseparam(';' + value):
|
||||
for p in _parseparam(value):
|
||||
try:
|
||||
name, val = p.split('=', 1)
|
||||
name = name.strip()
|
||||
|
|
|
@ -2995,6 +2995,58 @@ class Test8BitBytesHandling(unittest.TestCase):
|
|||
['foo@bar.com',
|
||||
'g\uFFFD\uFFFDst'])
|
||||
|
||||
def test_get_content_type_with_8bit(self):
|
||||
msg = email.message_from_bytes(textwrap.dedent("""\
|
||||
Content-Type: text/pl\xA7in; charset=utf-8
|
||||
""").encode('latin-1'))
|
||||
self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
|
||||
self.assertEqual(msg.get_content_maintype(), "text")
|
||||
self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
|
||||
|
||||
def test_get_params_with_8bit(self):
|
||||
msg = email.message_from_bytes(
|
||||
'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
|
||||
self.assertEqual(msg.get_params(header='x-header'),
|
||||
[('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
|
||||
self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
|
||||
# XXX: someday you might be able to get 'b\xa7r', for now you can't.
|
||||
self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
|
||||
|
||||
def test_get_rfc2231_params_with_8bit(self):
|
||||
msg = email.message_from_bytes(textwrap.dedent("""\
|
||||
Content-Type: text/plain; charset=us-ascii;
|
||||
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
|
||||
).encode('latin-1'))
|
||||
self.assertEqual(msg.get_param('title'),
|
||||
('us-ascii', 'en', 'This is not f\uFFFDn'))
|
||||
|
||||
def test_set_rfc2231_params_with_8bit(self):
|
||||
msg = email.message_from_bytes(textwrap.dedent("""\
|
||||
Content-Type: text/plain; charset=us-ascii;
|
||||
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
|
||||
).encode('latin-1'))
|
||||
msg.set_param('title', 'test')
|
||||
self.assertEqual(msg.get_param('title'), 'test')
|
||||
|
||||
def test_del_rfc2231_params_with_8bit(self):
|
||||
msg = email.message_from_bytes(textwrap.dedent("""\
|
||||
Content-Type: text/plain; charset=us-ascii;
|
||||
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
|
||||
).encode('latin-1'))
|
||||
msg.del_param('title')
|
||||
self.assertEqual(msg.get_param('title'), None)
|
||||
self.assertEqual(msg.get_content_maintype(), 'text')
|
||||
|
||||
def test_get_payload_with_8bit_cte_header(self):
|
||||
msg = email.message_from_bytes(textwrap.dedent("""\
|
||||
Content-Transfer-Encoding: b\xa7se64
|
||||
Content-Type: text/plain; charset=latin-1
|
||||
|
||||
payload
|
||||
""").encode('latin-1'))
|
||||
self.assertEqual(msg.get_payload(), 'payload\n')
|
||||
self.assertEqual(msg.get_payload(decode=True), b'payload\n')
|
||||
|
||||
non_latin_bin_msg = textwrap.dedent("""\
|
||||
From: foo@bar.com
|
||||
To: báz
|
||||
|
|
|
@ -40,6 +40,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #11243: fix the parameter querying methods of Message to work if
|
||||
the headers contain un-encoded non-ASCII data.
|
||||
|
||||
- Issue #11401: fix handling of headers with no value; this fixes a regression
|
||||
relative to Python2 and the result is now the same as it was in Python2.
|
||||
|
||||
|
|
Loading…
Reference in New Issue