From a215023b784eb1e23b2e91a007bc9a19750ed3c0 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Wed, 16 Mar 2011 21:11:23 -0400 Subject: [PATCH] #11243: tests and fixes for handling of 'dirty data' in additional methods --- Lib/email/message.py | 13 +++++---- Lib/email/test/test_email.py | 52 ++++++++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index 2713bc5d355..922617adbbb 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -48,9 +48,9 @@ def _sanitize_header(name, value): def _splitparam(param): # Split header parameters. BAW: this may be too simple. It isn't # strictly RFC 2045 (section 5.1) compliant, but it catches most headers - # found in the wild. We may eventually need a full fledged parser - # eventually. - a, sep, b = param.partition(';') + # found in the wild. We may eventually need a full fledged parser. + # RDM: we might have a Header here; for now just stringify it. + a, sep, b = str(param).partition(';') if not sep: return a.strip(), None return a.strip(), b.strip() @@ -90,6 +90,8 @@ def _formatparam(param, value=None, quote=True): return param def _parseparam(s): + # RDM This might be a Header, so for now stringify it. + s = ';' + str(s) plist = [] while s[:1] == ';': s = s[1:] @@ -240,7 +242,8 @@ class Message: if i is not None and not isinstance(self._payload, list): raise TypeError('Expected list, got %s' % type(self._payload)) payload = self._payload - cte = self.get('content-transfer-encoding', '').lower() + # cte might be a Header, so for now stringify it. + cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. if isinstance(payload, str): if _has_surrogates(payload): @@ -561,7 +564,7 @@ class Message: if value is missing: return failobj params = [] - for p in _parseparam(';' + value): + for p in _parseparam(value): try: name, val = p.split('=', 1) name = name.strip() diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index dcb2e95493d..9f64e091a20 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2995,6 +2995,58 @@ class Test8BitBytesHandling(unittest.TestCase): ['foo@bar.com', 'g\uFFFD\uFFFDst']) + def test_get_content_type_with_8bit(self): + msg = email.message_from_bytes(textwrap.dedent("""\ + Content-Type: text/pl\xA7in; charset=utf-8 + """).encode('latin-1')) + self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") + self.assertEqual(msg.get_content_maintype(), "text") + self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") + + def test_get_params_with_8bit(self): + msg = email.message_from_bytes( + 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) + self.assertEqual(msg.get_params(header='x-header'), + [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) + self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') + # XXX: someday you might be able to get 'b\xa7r', for now you can't. + self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) + + def test_get_rfc2231_params_with_8bit(self): + msg = email.message_from_bytes(textwrap.dedent("""\ + Content-Type: text/plain; charset=us-ascii; + title*=us-ascii'en'This%20is%20not%20f\xa7n""" + ).encode('latin-1')) + self.assertEqual(msg.get_param('title'), + ('us-ascii', 'en', 'This is not f\uFFFDn')) + + def test_set_rfc2231_params_with_8bit(self): + msg = email.message_from_bytes(textwrap.dedent("""\ + Content-Type: text/plain; charset=us-ascii; + title*=us-ascii'en'This%20is%20not%20f\xa7n""" + ).encode('latin-1')) + msg.set_param('title', 'test') + self.assertEqual(msg.get_param('title'), 'test') + + def test_del_rfc2231_params_with_8bit(self): + msg = email.message_from_bytes(textwrap.dedent("""\ + Content-Type: text/plain; charset=us-ascii; + title*=us-ascii'en'This%20is%20not%20f\xa7n""" + ).encode('latin-1')) + msg.del_param('title') + self.assertEqual(msg.get_param('title'), None) + self.assertEqual(msg.get_content_maintype(), 'text') + + def test_get_payload_with_8bit_cte_header(self): + msg = email.message_from_bytes(textwrap.dedent("""\ + Content-Transfer-Encoding: b\xa7se64 + Content-Type: text/plain; charset=latin-1 + + payload + """).encode('latin-1')) + self.assertEqual(msg.get_payload(), 'payload\n') + self.assertEqual(msg.get_payload(decode=True), b'payload\n') + non_latin_bin_msg = textwrap.dedent("""\ From: foo@bar.com To: báz diff --git a/Misc/NEWS b/Misc/NEWS index c787ece2a8f..c787fedbaba 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -40,6 +40,9 @@ Core and Builtins Library ------- +- Issue #11243: fix the parameter querying methods of Message to work if + the headers contain un-encoded non-ASCII data. + - Issue #11401: fix handling of headers with no value; this fixes a regression relative to Python2 and the result is now the same as it was in Python2.