From 554bcbf1b925399ef06dd9fd6b87a20f5d028c1e Mon Sep 17 00:00:00 2001 From: R David Murray Date: Wed, 11 Jun 2014 11:18:08 -0400 Subject: [PATCH] #19662: add decode_data to smtpd so you can get at DATA in bytes form. Otherwise smtpd is restricted to 7bit clean data, since even if the incoming data is actually utf-8, it will often break things to decode it before parsing the message. Patch by Maciej Szulik, with some adjustments (mostly the warning support). --- Doc/library/smtpd.rst | 24 ++++++++- Doc/whatsnew/3.5.rst | 16 ++++++ Lib/smtpd.py | 45 ++++++++++++---- Lib/test/test_smtpd.py | 118 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 185 insertions(+), 18 deletions(-) diff --git a/Doc/library/smtpd.rst b/Doc/library/smtpd.rst index 3ebed062609..e6625df74fd 100644 --- a/Doc/library/smtpd.rst +++ b/Doc/library/smtpd.rst @@ -28,7 +28,7 @@ SMTPServer Objects .. class:: SMTPServer(localaddr, remoteaddr, data_size_limit=33554432,\ - map=None) + map=None, decode_data=True) Create a new :class:`SMTPServer` object, which binds to local address *localaddr*. It will treat *remoteaddr* as an upstream SMTP relayer. It @@ -41,6 +41,11 @@ SMTPServer Objects A dictionary can be specified in *map* to avoid using a global socket map. + *decode_data* specifies whether the data portion of the SMTP transaction + should be decoded using UTF-8. The default is ``True`` for backward + compatibility reasons, but will change to ``False`` in Python 3.6. Specify + the keyword value explicitly to avoid the :exc:`DeprecationWarning`. + .. method:: process_message(peer, mailfrom, rcpttos, data) Raise :exc:`NotImplementedError` exception. Override this in subclasses to @@ -51,6 +56,10 @@ SMTPServer Objects containing the contents of the e-mail (which should be in :rfc:`2822` format). + If the *decode_data* constructor keyword is set to ``True``, the *data* + argument will be a unicode string. If it is set to ``False``, it + will be a bytes object. + .. attribute:: channel_class Override this in subclasses to use a custom :class:`SMTPChannel` for @@ -59,6 +68,9 @@ SMTPServer Objects .. versionchanged:: 3.4 The *map* argument was added. + .. versionchanged:: 3.5 + the *decode_data* argument was added. + DebuggingServer Objects ----------------------- @@ -97,7 +109,7 @@ SMTPChannel Objects ------------------- .. class:: SMTPChannel(server, conn, addr, data_size_limit=33554432,\ - map=None)) + map=None, decode_data=True) Create a new :class:`SMTPChannel` object which manages the communication between the server and a single SMTP client. @@ -110,9 +122,17 @@ SMTPChannel Objects A dictionary can be specified in *map* to avoid using a global socket map. + *decode_data* specifies whether the data portion of the SMTP transaction + should be decoded using UTF-8. The default is ``True`` for backward + compatibility reasons, but will change to ``False`` in Python 3.6. Specify + the keyword value explicitly to avoid the :exc:`DeprecationWarning`. + To use a custom SMTPChannel implementation you need to override the :attr:`SMTPServer.channel_class` of your :class:`SMTPServer`. + .. versionchanged:: 3.5 + the *decode_data* argument was added. + The :class:`SMTPChannel` has the following instance variables: .. attribute:: smtp_server diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index af734c828af..68106c160b8 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -184,6 +184,16 @@ signal debugging, instead of integer “magic numbers”. (contributed by Giampaolo Rodola' in :issue:`21076`) +smtpd +----- + +* Both :class:`~smtpd.SMTPServer` and :class:`smtpd.SMTPChannel` now accept a + *decode_data* keyword to determine if the DATA portion of the SMTP + transaction is decoded using the ``utf-8`` codec or is instead provided to + :meth:`~smtpd.SMTPServer.process_message` as a byte string. The default + is ``True`` for backward compatibility reasons, but will change to ``False`` + in Python 3.6. (Contributed by Maciej Szulik in :issue:`19662`.) + socket ------ @@ -245,6 +255,12 @@ Deprecated Python modules, functions and methods * The :mod:`formatter` module has now graduated to full deprecation and is still slated for removal in Python 3.6. +* :mod:`smtpd` has in the past always decoded the DATA portion of email + messages using the ``utf-8`` codec. This can now be controlled by the new + *decode_data* keyword to :class:`~smtpd.SMTPServer`. The default value is + ``True``, but this default is deprecated. Specify the *decode_data* keyword + with an appropriate value to avoid the deprecation warning. + Deprecated functions and types of the C API ------------------------------------------- diff --git a/Lib/smtpd.py b/Lib/smtpd.py index 1fa157ade7c..569b42e2228 100755 --- a/Lib/smtpd.py +++ b/Lib/smtpd.py @@ -98,7 +98,6 @@ class Devnull: DEBUGSTREAM = Devnull() NEWLINE = '\n' -EMPTYSTRING = '' COMMASPACE = ', ' DATA_SIZE_DEFAULT = 33554432 @@ -122,12 +121,28 @@ class SMTPChannel(asynchat.async_chat): max_command_size_limit = max(command_size_limits.values()) def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT, - map=None): + map=None, decode_data=None): asynchat.async_chat.__init__(self, conn, map=map) self.smtp_server = server self.conn = conn self.addr = addr self.data_size_limit = data_size_limit + if decode_data is None: + warn("The decode_data default of True will change to False in 3.6;" + " specify an explicit value for this keyword", + DeprecationWarning, 2) + decode_data = True + self._decode_data = decode_data + if decode_data: + self._emptystring = '' + self._linesep = '\r\n' + self._dotsep = '.' + self._newline = NEWLINE + else: + self._emptystring = b'' + self._linesep = b'\r\n' + self._dotsep = b'.' + self._newline = b'\n' self.received_lines = [] self.smtp_state = self.COMMAND self.seen_greeting = '' @@ -287,11 +302,14 @@ class SMTPChannel(asynchat.async_chat): return elif limit: self.num_bytes += len(data) - self.received_lines.append(str(data, "utf-8")) + if self._decode_data: + self.received_lines.append(str(data, 'utf-8')) + else: + self.received_lines.append(data) # Implementation of base class abstract method def found_terminator(self): - line = EMPTYSTRING.join(self.received_lines) + line = self._emptystring.join(self.received_lines) print('Data:', repr(line), file=DEBUGSTREAM) self.received_lines = [] if self.smtp_state == self.COMMAND: @@ -300,6 +318,8 @@ class SMTPChannel(asynchat.async_chat): self.push('500 Error: bad syntax') return method = None + if not self._decode_data: + line = str(line, 'utf-8') i = line.find(' ') if i < 0: command = line.upper() @@ -330,12 +350,12 @@ class SMTPChannel(asynchat.async_chat): # Remove extraneous carriage returns and de-transparency according # to RFC 5321, Section 4.5.2. data = [] - for text in line.split('\r\n'): - if text and text[0] == '.': + for text in line.split(self._linesep): + if text and text[0] == self._dotsep: data.append(text[1:]) else: data.append(text) - self.received_data = NEWLINE.join(data) + self.received_data = self._newline.join(data) status = self.smtp_server.process_message(self.peer, self.mailfrom, self.rcpttos, @@ -577,10 +597,17 @@ class SMTPServer(asyncore.dispatcher): channel_class = SMTPChannel def __init__(self, localaddr, remoteaddr, - data_size_limit=DATA_SIZE_DEFAULT, map=None): + data_size_limit=DATA_SIZE_DEFAULT, map=None, + decode_data=None): self._localaddr = localaddr self._remoteaddr = remoteaddr self.data_size_limit = data_size_limit + if decode_data is None: + warn("The decode_data default of True will change to False in 3.6;" + " specify an explicit value for this keyword", + DeprecationWarning, 2) + decode_data = True + self._decode_data = decode_data asyncore.dispatcher.__init__(self, map=map) try: self.create_socket(socket.AF_INET, socket.SOCK_STREAM) @@ -599,7 +626,7 @@ class SMTPServer(asyncore.dispatcher): def handle_accepted(self, conn, addr): print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM) channel = self.channel_class(self, conn, addr, self.data_size_limit, - self._map) + self._map, self._decode_data) # API for "doing something useful with the message" def process_message(self, peer, mailfrom, rcpttos, data): diff --git a/Lib/test/test_smtpd.py b/Lib/test/test_smtpd.py index 93f14c4562b..db1f52b7b4a 100644 --- a/Lib/test/test_smtpd.py +++ b/Lib/test/test_smtpd.py @@ -7,13 +7,18 @@ import asyncore class DummyServer(smtpd.SMTPServer): - def __init__(self, localaddr, remoteaddr): - smtpd.SMTPServer.__init__(self, localaddr, remoteaddr) + def __init__(self, localaddr, remoteaddr, decode_data=True): + smtpd.SMTPServer.__init__(self, localaddr, remoteaddr, + decode_data=decode_data) self.messages = [] + if decode_data: + self.return_status = 'return status' + else: + self.return_status = b'return status' def process_message(self, peer, mailfrom, rcpttos, data): self.messages.append((peer, mailfrom, rcpttos, data)) - if data == 'return status': + if data == self.return_status: return '250 Okish' @@ -31,9 +36,9 @@ class SMTPDServerTest(unittest.TestCase): smtpd.socket = asyncore.socket = mock_socket def test_process_message_unimplemented(self): - server = smtpd.SMTPServer('a', 'b') + server = smtpd.SMTPServer('a', 'b', decode_data=True) conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) + channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) def write_line(line): channel.socket.queue_recv(line) @@ -45,6 +50,10 @@ class SMTPDServerTest(unittest.TestCase): write_line(b'DATA') self.assertRaises(NotImplementedError, write_line, b'spam\r\n.\r\n') + def test_decode_data_default_warns(self): + with self.assertWarns(DeprecationWarning): + smtpd.SMTPServer('a', 'b') + def tearDown(self): asyncore.close_all() asyncore.socket = smtpd.socket = socket @@ -57,7 +66,8 @@ class SMTPDChannelTest(unittest.TestCase): self.debug = smtpd.DEBUGSTREAM = io.StringIO() self.server = DummyServer('a', 'b') conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr) + self.channel = smtpd.SMTPChannel(self.server, conn, addr, + decode_data=True) def tearDown(self): asyncore.close_all() @@ -502,6 +512,12 @@ class SMTPDChannelTest(unittest.TestCase): with support.check_warnings(('', DeprecationWarning)): self.channel._SMTPChannel__addr = 'spam' + def test_decode_data_default_warning(self): + server = DummyServer('a', 'b') + conn, addr = self.server.accept() + with self.assertWarns(DeprecationWarning): + smtpd.SMTPChannel(server, conn, addr) + class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase): @@ -512,7 +528,8 @@ class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase): self.server = DummyServer('a', 'b') conn, addr = self.server.accept() # Set DATA size limit to 32 bytes for easy testing - self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32) + self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32, + decode_data=True) def tearDown(self): asyncore.close_all() @@ -553,5 +570,92 @@ class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase): b'552 Error: Too much mail data\r\n') +class SMTPDChannelWithDecodeDataFalse(unittest.TestCase): + + def setUp(self): + smtpd.socket = asyncore.socket = mock_socket + self.old_debugstream = smtpd.DEBUGSTREAM + self.debug = smtpd.DEBUGSTREAM = io.StringIO() + self.server = DummyServer('a', 'b', decode_data=False) + conn, addr = self.server.accept() + # Set decode_data to False + self.channel = smtpd.SMTPChannel(self.server, conn, addr, + decode_data=False) + + def tearDown(self): + asyncore.close_all() + asyncore.socket = smtpd.socket = socket + smtpd.DEBUGSTREAM = self.old_debugstream + + def write_line(self, line): + self.channel.socket.queue_recv(line) + self.channel.handle_read() + + def test_ascii_data(self): + self.write_line(b'HELO example') + self.write_line(b'MAIL From:eggs@example') + self.write_line(b'RCPT To:spam@example') + self.write_line(b'DATA') + self.write_line(b'plain ascii text') + self.write_line(b'.') + self.assertEqual(self.channel.received_data, b'plain ascii text') + + def test_utf8_data(self): + self.write_line(b'HELO example') + self.write_line(b'MAIL From:eggs@example') + self.write_line(b'RCPT To:spam@example') + self.write_line(b'DATA') + self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') + self.write_line(b'and some plain ascii') + self.write_line(b'.') + self.assertEqual( + self.channel.received_data, + b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87\n' + b'and some plain ascii') + + +class SMTPDChannelWithDecodeDataTrue(unittest.TestCase): + + def setUp(self): + smtpd.socket = asyncore.socket = mock_socket + self.old_debugstream = smtpd.DEBUGSTREAM + self.debug = smtpd.DEBUGSTREAM = io.StringIO() + self.server = DummyServer('a', 'b') + conn, addr = self.server.accept() + # Set decode_data to True + self.channel = smtpd.SMTPChannel(self.server, conn, addr, + decode_data=True) + + def tearDown(self): + asyncore.close_all() + asyncore.socket = smtpd.socket = socket + smtpd.DEBUGSTREAM = self.old_debugstream + + def write_line(self, line): + self.channel.socket.queue_recv(line) + self.channel.handle_read() + + def test_ascii_data(self): + self.write_line(b'HELO example') + self.write_line(b'MAIL From:eggs@example') + self.write_line(b'RCPT To:spam@example') + self.write_line(b'DATA') + self.write_line(b'plain ascii text') + self.write_line(b'.') + self.assertEqual(self.channel.received_data, 'plain ascii text') + + def test_utf8_data(self): + self.write_line(b'HELO example') + self.write_line(b'MAIL From:eggs@example') + self.write_line(b'RCPT To:spam@example') + self.write_line(b'DATA') + self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') + self.write_line(b'and some plain ascii') + self.write_line(b'.') + self.assertEqual( + self.channel.received_data, + 'utf8 enriched text: żźć\nand some plain ascii') + + if __name__ == "__main__": unittest.main()