#19662: add decode_data to smtpd so you can get at DATA in bytes form.

Otherwise smtpd is restricted to 7bit clean data, since even if the
incoming data is actually utf-8, it will often break things to decode
it before parsing the message.

Patch by Maciej Szulik, with some adjustments (mostly the warning
support).
This commit is contained in:
R David Murray 2014-06-11 11:18:08 -04:00
parent 38ee9afb34
commit 554bcbf1b9
4 changed files with 185 additions and 18 deletions

View File

@ -28,7 +28,7 @@ SMTPServer Objects
.. class:: SMTPServer(localaddr, remoteaddr, data_size_limit=33554432,\
map=None)
map=None, decode_data=True)
Create a new :class:`SMTPServer` object, which binds to local address
*localaddr*. It will treat *remoteaddr* as an upstream SMTP relayer. It
@ -41,6 +41,11 @@ SMTPServer Objects
A dictionary can be specified in *map* to avoid using a global socket map.
*decode_data* specifies whether the data portion of the SMTP transaction
should be decoded using UTF-8. The default is ``True`` for backward
compatibility reasons, but will change to ``False`` in Python 3.6. Specify
the keyword value explicitly to avoid the :exc:`DeprecationWarning`.
.. method:: process_message(peer, mailfrom, rcpttos, data)
Raise :exc:`NotImplementedError` exception. Override this in subclasses to
@ -51,6 +56,10 @@ SMTPServer Objects
containing the contents of the e-mail (which should be in :rfc:`2822`
format).
If the *decode_data* constructor keyword is set to ``True``, the *data*
argument will be a unicode string. If it is set to ``False``, it
will be a bytes object.
.. attribute:: channel_class
Override this in subclasses to use a custom :class:`SMTPChannel` for
@ -59,6 +68,9 @@ SMTPServer Objects
.. versionchanged:: 3.4
The *map* argument was added.
.. versionchanged:: 3.5
the *decode_data* argument was added.
DebuggingServer Objects
-----------------------
@ -97,7 +109,7 @@ SMTPChannel Objects
-------------------
.. class:: SMTPChannel(server, conn, addr, data_size_limit=33554432,\
map=None))
map=None, decode_data=True)
Create a new :class:`SMTPChannel` object which manages the communication
between the server and a single SMTP client.
@ -110,9 +122,17 @@ SMTPChannel Objects
A dictionary can be specified in *map* to avoid using a global socket map.
*decode_data* specifies whether the data portion of the SMTP transaction
should be decoded using UTF-8. The default is ``True`` for backward
compatibility reasons, but will change to ``False`` in Python 3.6. Specify
the keyword value explicitly to avoid the :exc:`DeprecationWarning`.
To use a custom SMTPChannel implementation you need to override the
:attr:`SMTPServer.channel_class` of your :class:`SMTPServer`.
.. versionchanged:: 3.5
the *decode_data* argument was added.
The :class:`SMTPChannel` has the following instance variables:
.. attribute:: smtp_server

View File

@ -184,6 +184,16 @@ signal
debugging, instead of integer “magic numbers”. (contributed by Giampaolo
Rodola' in :issue:`21076`)
smtpd
-----
* Both :class:`~smtpd.SMTPServer` and :class:`smtpd.SMTPChannel` now accept a
*decode_data* keyword to determine if the DATA portion of the SMTP
transaction is decoded using the ``utf-8`` codec or is instead provided to
:meth:`~smtpd.SMTPServer.process_message` as a byte string. The default
is ``True`` for backward compatibility reasons, but will change to ``False``
in Python 3.6. (Contributed by Maciej Szulik in :issue:`19662`.)
socket
------
@ -245,6 +255,12 @@ Deprecated Python modules, functions and methods
* The :mod:`formatter` module has now graduated to full deprecation and is still
slated for removal in Python 3.6.
* :mod:`smtpd` has in the past always decoded the DATA portion of email
messages using the ``utf-8`` codec. This can now be controlled by the new
*decode_data* keyword to :class:`~smtpd.SMTPServer`. The default value is
``True``, but this default is deprecated. Specify the *decode_data* keyword
with an appropriate value to avoid the deprecation warning.
Deprecated functions and types of the C API
-------------------------------------------

View File

@ -98,7 +98,6 @@ class Devnull:
DEBUGSTREAM = Devnull()
NEWLINE = '\n'
EMPTYSTRING = ''
COMMASPACE = ', '
DATA_SIZE_DEFAULT = 33554432
@ -122,12 +121,28 @@ class SMTPChannel(asynchat.async_chat):
max_command_size_limit = max(command_size_limits.values())
def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT,
map=None):
map=None, decode_data=None):
asynchat.async_chat.__init__(self, conn, map=map)
self.smtp_server = server
self.conn = conn
self.addr = addr
self.data_size_limit = data_size_limit
if decode_data is None:
warn("The decode_data default of True will change to False in 3.6;"
" specify an explicit value for this keyword",
DeprecationWarning, 2)
decode_data = True
self._decode_data = decode_data
if decode_data:
self._emptystring = ''
self._linesep = '\r\n'
self._dotsep = '.'
self._newline = NEWLINE
else:
self._emptystring = b''
self._linesep = b'\r\n'
self._dotsep = b'.'
self._newline = b'\n'
self.received_lines = []
self.smtp_state = self.COMMAND
self.seen_greeting = ''
@ -287,11 +302,14 @@ class SMTPChannel(asynchat.async_chat):
return
elif limit:
self.num_bytes += len(data)
self.received_lines.append(str(data, "utf-8"))
if self._decode_data:
self.received_lines.append(str(data, 'utf-8'))
else:
self.received_lines.append(data)
# Implementation of base class abstract method
def found_terminator(self):
line = EMPTYSTRING.join(self.received_lines)
line = self._emptystring.join(self.received_lines)
print('Data:', repr(line), file=DEBUGSTREAM)
self.received_lines = []
if self.smtp_state == self.COMMAND:
@ -300,6 +318,8 @@ class SMTPChannel(asynchat.async_chat):
self.push('500 Error: bad syntax')
return
method = None
if not self._decode_data:
line = str(line, 'utf-8')
i = line.find(' ')
if i < 0:
command = line.upper()
@ -330,12 +350,12 @@ class SMTPChannel(asynchat.async_chat):
# Remove extraneous carriage returns and de-transparency according
# to RFC 5321, Section 4.5.2.
data = []
for text in line.split('\r\n'):
if text and text[0] == '.':
for text in line.split(self._linesep):
if text and text[0] == self._dotsep:
data.append(text[1:])
else:
data.append(text)
self.received_data = NEWLINE.join(data)
self.received_data = self._newline.join(data)
status = self.smtp_server.process_message(self.peer,
self.mailfrom,
self.rcpttos,
@ -577,10 +597,17 @@ class SMTPServer(asyncore.dispatcher):
channel_class = SMTPChannel
def __init__(self, localaddr, remoteaddr,
data_size_limit=DATA_SIZE_DEFAULT, map=None):
data_size_limit=DATA_SIZE_DEFAULT, map=None,
decode_data=None):
self._localaddr = localaddr
self._remoteaddr = remoteaddr
self.data_size_limit = data_size_limit
if decode_data is None:
warn("The decode_data default of True will change to False in 3.6;"
" specify an explicit value for this keyword",
DeprecationWarning, 2)
decode_data = True
self._decode_data = decode_data
asyncore.dispatcher.__init__(self, map=map)
try:
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
@ -599,7 +626,7 @@ class SMTPServer(asyncore.dispatcher):
def handle_accepted(self, conn, addr):
print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM)
channel = self.channel_class(self, conn, addr, self.data_size_limit,
self._map)
self._map, self._decode_data)
# API for "doing something useful with the message"
def process_message(self, peer, mailfrom, rcpttos, data):

View File

@ -7,13 +7,18 @@ import asyncore
class DummyServer(smtpd.SMTPServer):
def __init__(self, localaddr, remoteaddr):
smtpd.SMTPServer.__init__(self, localaddr, remoteaddr)
def __init__(self, localaddr, remoteaddr, decode_data=True):
smtpd.SMTPServer.__init__(self, localaddr, remoteaddr,
decode_data=decode_data)
self.messages = []
if decode_data:
self.return_status = 'return status'
else:
self.return_status = b'return status'
def process_message(self, peer, mailfrom, rcpttos, data):
self.messages.append((peer, mailfrom, rcpttos, data))
if data == 'return status':
if data == self.return_status:
return '250 Okish'
@ -31,9 +36,9 @@ class SMTPDServerTest(unittest.TestCase):
smtpd.socket = asyncore.socket = mock_socket
def test_process_message_unimplemented(self):
server = smtpd.SMTPServer('a', 'b')
server = smtpd.SMTPServer('a', 'b', decode_data=True)
conn, addr = server.accept()
channel = smtpd.SMTPChannel(server, conn, addr)
channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True)
def write_line(line):
channel.socket.queue_recv(line)
@ -45,6 +50,10 @@ class SMTPDServerTest(unittest.TestCase):
write_line(b'DATA')
self.assertRaises(NotImplementedError, write_line, b'spam\r\n.\r\n')
def test_decode_data_default_warns(self):
with self.assertWarns(DeprecationWarning):
smtpd.SMTPServer('a', 'b')
def tearDown(self):
asyncore.close_all()
asyncore.socket = smtpd.socket = socket
@ -57,7 +66,8 @@ class SMTPDChannelTest(unittest.TestCase):
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
self.server = DummyServer('a', 'b')
conn, addr = self.server.accept()
self.channel = smtpd.SMTPChannel(self.server, conn, addr)
self.channel = smtpd.SMTPChannel(self.server, conn, addr,
decode_data=True)
def tearDown(self):
asyncore.close_all()
@ -502,6 +512,12 @@ class SMTPDChannelTest(unittest.TestCase):
with support.check_warnings(('', DeprecationWarning)):
self.channel._SMTPChannel__addr = 'spam'
def test_decode_data_default_warning(self):
server = DummyServer('a', 'b')
conn, addr = self.server.accept()
with self.assertWarns(DeprecationWarning):
smtpd.SMTPChannel(server, conn, addr)
class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase):
@ -512,7 +528,8 @@ class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase):
self.server = DummyServer('a', 'b')
conn, addr = self.server.accept()
# Set DATA size limit to 32 bytes for easy testing
self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32)
self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32,
decode_data=True)
def tearDown(self):
asyncore.close_all()
@ -553,5 +570,92 @@ class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase):
b'552 Error: Too much mail data\r\n')
class SMTPDChannelWithDecodeDataFalse(unittest.TestCase):
def setUp(self):
smtpd.socket = asyncore.socket = mock_socket
self.old_debugstream = smtpd.DEBUGSTREAM
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
self.server = DummyServer('a', 'b', decode_data=False)
conn, addr = self.server.accept()
# Set decode_data to False
self.channel = smtpd.SMTPChannel(self.server, conn, addr,
decode_data=False)
def tearDown(self):
asyncore.close_all()
asyncore.socket = smtpd.socket = socket
smtpd.DEBUGSTREAM = self.old_debugstream
def write_line(self, line):
self.channel.socket.queue_recv(line)
self.channel.handle_read()
def test_ascii_data(self):
self.write_line(b'HELO example')
self.write_line(b'MAIL From:eggs@example')
self.write_line(b'RCPT To:spam@example')
self.write_line(b'DATA')
self.write_line(b'plain ascii text')
self.write_line(b'.')
self.assertEqual(self.channel.received_data, b'plain ascii text')
def test_utf8_data(self):
self.write_line(b'HELO example')
self.write_line(b'MAIL From:eggs@example')
self.write_line(b'RCPT To:spam@example')
self.write_line(b'DATA')
self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87')
self.write_line(b'and some plain ascii')
self.write_line(b'.')
self.assertEqual(
self.channel.received_data,
b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87\n'
b'and some plain ascii')
class SMTPDChannelWithDecodeDataTrue(unittest.TestCase):
def setUp(self):
smtpd.socket = asyncore.socket = mock_socket
self.old_debugstream = smtpd.DEBUGSTREAM
self.debug = smtpd.DEBUGSTREAM = io.StringIO()
self.server = DummyServer('a', 'b')
conn, addr = self.server.accept()
# Set decode_data to True
self.channel = smtpd.SMTPChannel(self.server, conn, addr,
decode_data=True)
def tearDown(self):
asyncore.close_all()
asyncore.socket = smtpd.socket = socket
smtpd.DEBUGSTREAM = self.old_debugstream
def write_line(self, line):
self.channel.socket.queue_recv(line)
self.channel.handle_read()
def test_ascii_data(self):
self.write_line(b'HELO example')
self.write_line(b'MAIL From:eggs@example')
self.write_line(b'RCPT To:spam@example')
self.write_line(b'DATA')
self.write_line(b'plain ascii text')
self.write_line(b'.')
self.assertEqual(self.channel.received_data, 'plain ascii text')
def test_utf8_data(self):
self.write_line(b'HELO example')
self.write_line(b'MAIL From:eggs@example')
self.write_line(b'RCPT To:spam@example')
self.write_line(b'DATA')
self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87')
self.write_line(b'and some plain ascii')
self.write_line(b'.')
self.assertEqual(
self.channel.received_data,
'utf8 enriched text: żźć\nand some plain ascii')
if __name__ == "__main__":
unittest.main()