#20476: add a message_factory policy attribute to email.

This commit is contained in:
R David Murray 2016-09-09 18:39:18 -04:00
parent 37df068e86
commit 06ed218ed0
9 changed files with 128 additions and 66 deletions

View File

@ -73,8 +73,9 @@ Here is the API for the :class:`BytesFeedParser`:
.. class:: BytesFeedParser(_factory=None, *, policy=policy.compat32)
Create a :class:`BytesFeedParser` instance. Optional *_factory* is a
no-argument callable; if not specified determine the default based on the
*policy*. Call *_factory* whenever a new message object is needed.
no-argument callable; if not specified use the
:attr:`~email.policy.Policy.message_factory` from the *policy*. Call
*_factory* whenever a new message object is needed.
If *policy* is specified use the rules it specifies to update the
representation of the message. If *policy* is not set, use the
@ -91,6 +92,7 @@ Here is the API for the :class:`BytesFeedParser`:
.. versionadded:: 3.2
.. versionchanged:: 3.3 Added the *policy* keyword.
.. versionchanged:: 3.6 _factory defaults to the policy ``message_factory``.
.. method:: feed(data)
@ -146,6 +148,7 @@ message body, instead setting the payload to the raw body.
.. versionchanged:: 3.3
Removed the *strict* argument that was deprecated in 2.4. Added the
*policy* keyword.
.. versionchanged:: 3.6 _class defaults to the policy ``message_factory``.
.. method:: parse(fp, headersonly=False)
@ -194,6 +197,7 @@ message body, instead setting the payload to the raw body.
.. versionchanged:: 3.3
Removed the *strict* argument. Added the *policy* keyword.
.. versionchanged:: 3.6 _class defaults to the policy ``message_factory``.
.. method:: parse(fp, headersonly=False)
@ -230,8 +234,7 @@ in the top-level :mod:`email` package namespace.
.. currentmodule:: email
.. function:: message_from_bytes(s, _class=None, *, \
policy=policy.compat32)
.. function:: message_from_bytes(s, _class=None, *, policy=policy.compat32)
Return a message object structure from a :term:`bytes-like object`. This is
equivalent to ``BytesParser().parsebytes(s)``. Optional *_class* and
@ -243,7 +246,7 @@ in the top-level :mod:`email` package namespace.
Removed the *strict* argument. Added the *policy* keyword.
.. function:: message_from_binary_file(fp, _class=None, *, \
.. function:: message_from_binary_file(fp, _class=None, *,
policy=policy.compat32)
Return a message object structure tree from an open binary :term:`file
@ -256,8 +259,7 @@ in the top-level :mod:`email` package namespace.
Removed the *strict* argument. Added the *policy* keyword.
.. function:: message_from_string(s, _class=None, *, \
policy=policy.compat32)
.. function:: message_from_string(s, _class=None, *, policy=policy.compat32)
Return a message object structure from a string. This is equivalent to
``Parser().parsestr(s)``. *_class* and *policy* are interpreted as
@ -267,8 +269,7 @@ in the top-level :mod:`email` package namespace.
Removed the *strict* argument. Added the *policy* keyword.
.. function:: message_from_file(fp, _class=None, *, \
policy=policy.compat32)
.. function:: message_from_file(fp, _class=None, *, policy=policy.compat32)
Return a message object structure tree from an open :term:`file object`.
This is equivalent to ``Parser().parse(fp)``. *_class* and *policy* are
@ -276,6 +277,7 @@ in the top-level :mod:`email` package namespace.
.. versionchanged:: 3.3
Removed the *strict* argument. Added the *policy* keyword.
.. versionchanged:: 3.6 _class defaults to the policy ``message_factory``.
Here's an example of how you might use :func:`message_from_bytes` at an

View File

@ -221,6 +221,14 @@ added matters. To illustrate::
The *mangle_from_* parameter.
.. attribute:: message_factory
A factory function for constructing a new empty message object. Used
by the parser when building messages. Defaults to
:class:`~email.message.Message`.
.. versionadded:: 3.6
The following :class:`Policy` method is intended to be called by code using
the email library to create policy instances with custom settings:
@ -368,6 +376,9 @@ added matters. To illustrate::
on the type of the field. The parsing and folding algorithm fully implement
:rfc:`2047` and :rfc:`5322`.
The default value for the :attr:`~email.policy.Policy.message_factory`
attribute is :class:`~email.message.EmailMessage`.
In addition to the settable attributes listed above that apply to all
policies, this policy adds the following additional attributes:

View File

@ -598,6 +598,13 @@ The :mod:`email.mime` classes now all accept an optional *policy* keyword.
The :class:`~email.generator.DecodedGenerator` now supports the *policy*
keyword.
There is a new :mod:`~email.policy` attribute,
:attr:`~email.policy.Policy.message_factory`, that controls what class is used
by default when the parser creates new message objects. For the
:attr:`email.policy.compat32` policy this is :class:`~email.message.Message`,
for the new policies it is :class:`~email.message.EmailMessage`.
(Contributed by R. David Murray in :issue:`20476`.)
encodings
---------

View File

@ -154,6 +154,8 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
them. This is used when the message is being
serialized by a generator. Default: True.
message_factory -- the class to use to create new message objects.
"""
raise_on_defect = False
@ -161,6 +163,8 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
cte_type = '8bit'
max_line_length = 78
mangle_from_ = False
# XXX To avoid circular imports, this is set in email.message.
message_factory = None
def handle_defect(self, obj, defect):
"""Based on policy, either raise defect or call register_defect.

View File

@ -24,7 +24,6 @@ __all__ = ['FeedParser', 'BytesFeedParser']
import re
from email import errors
from email import message
from email._policybase import compat32
from collections import deque
from io import StringIO
@ -148,13 +147,7 @@ class FeedParser:
self.policy = policy
self._old_style_factory = False
if _factory is None:
# What this should be:
#self._factory = policy.default_message_factory
# but, because we are post 3.4 feature freeze, fix with temp hack:
if self.policy is compat32:
self._factory = message.Message
else:
self._factory = message.EmailMessage
self._factory = policy.message_factory
else:
self._factory = _factory
try:

View File

@ -4,18 +4,17 @@
"""Basic message object for the email package object model."""
__all__ = ['Message']
__all__ = ['Message', 'EmailMessage']
import re
import uu
import quopri
import warnings
from io import BytesIO, StringIO
# Intrapackage imports
from email import utils
from email import errors
from email._policybase import compat32
from email._policybase import Policy, compat32
from email import charset as _charset
from email._encoded_words import decode_b
Charset = _charset.Charset
@ -1163,3 +1162,6 @@ class EmailMessage(MIMEPart):
super().set_content(*args, **kw)
if 'MIME-Version' not in self:
self['MIME-Version'] = '1.0'
# Set message_factory on Policy here to avoid a circular import.
Policy.message_factory = Message

View File

@ -7,6 +7,7 @@ from email._policybase import Policy, Compat32, compat32, _extend_docstrings
from email.utils import _has_surrogates
from email.headerregistry import HeaderRegistry as HeaderRegistry
from email.contentmanager import raw_data_manager
from email.message import EmailMessage
__all__ = [
'Compat32',
@ -82,6 +83,7 @@ class EmailPolicy(Policy):
"""
message_factory = EmailMessage
utf8 = False
refold_source = 'long'
header_factory = HeaderRegistry()

View File

@ -1,7 +1,7 @@
import io
import email
import unittest
from email.message import Message
from email.message import Message, EmailMessage
from email.policy import default
from test.test_email import TestEmailBase
@ -39,38 +39,71 @@ class TestParserBase:
# The unicode line splitter splits on unicode linebreaks, which are
# more numerous than allowed by the email RFCs; make sure we are only
# splitting on those two.
msg = self.parser(
"Next-Line: not\x85broken\r\n"
"Null: not\x00broken\r\n"
"Vertical-Tab: not\vbroken\r\n"
"Form-Feed: not\fbroken\r\n"
"File-Separator: not\x1Cbroken\r\n"
"Group-Separator: not\x1Dbroken\r\n"
"Record-Separator: not\x1Ebroken\r\n"
"Line-Separator: not\u2028broken\r\n"
"Paragraph-Separator: not\u2029broken\r\n"
"\r\n",
policy=default,
)
self.assertEqual(msg.items(), [
("Next-Line", "not\x85broken"),
("Null", "not\x00broken"),
("Vertical-Tab", "not\vbroken"),
("Form-Feed", "not\fbroken"),
("File-Separator", "not\x1Cbroken"),
("Group-Separator", "not\x1Dbroken"),
("Record-Separator", "not\x1Ebroken"),
("Line-Separator", "not\u2028broken"),
("Paragraph-Separator", "not\u2029broken"),
])
self.assertEqual(msg.get_payload(), "")
for parser in self.parsers:
with self.subTest(parser=parser.__name__):
msg = parser(
"Next-Line: not\x85broken\r\n"
"Null: not\x00broken\r\n"
"Vertical-Tab: not\vbroken\r\n"
"Form-Feed: not\fbroken\r\n"
"File-Separator: not\x1Cbroken\r\n"
"Group-Separator: not\x1Dbroken\r\n"
"Record-Separator: not\x1Ebroken\r\n"
"Line-Separator: not\u2028broken\r\n"
"Paragraph-Separator: not\u2029broken\r\n"
"\r\n",
policy=default,
)
self.assertEqual(msg.items(), [
("Next-Line", "not\x85broken"),
("Null", "not\x00broken"),
("Vertical-Tab", "not\vbroken"),
("Form-Feed", "not\fbroken"),
("File-Separator", "not\x1Cbroken"),
("Group-Separator", "not\x1Dbroken"),
("Record-Separator", "not\x1Ebroken"),
("Line-Separator", "not\u2028broken"),
("Paragraph-Separator", "not\u2029broken"),
])
self.assertEqual(msg.get_payload(), "")
class MyMessage(EmailMessage):
pass
def test_custom_message_factory_on_policy(self):
for parser in self.parsers:
with self.subTest(parser=parser.__name__):
MyPolicy = default.clone(message_factory=self.MyMessage)
msg = parser("To: foo\n\ntest", policy=MyPolicy)
self.assertIsInstance(msg, self.MyMessage)
def test_factory_arg_overrides_policy(self):
for parser in self.parsers:
with self.subTest(parser=parser.__name__):
MyPolicy = default.clone(message_factory=self.MyMessage)
msg = parser("To: foo\n\ntest", Message, policy=MyPolicy)
self.assertNotIsInstance(msg, self.MyMessage)
self.assertIsInstance(msg, Message)
# Play some games to get nice output in subTest. This code could be clearer
# if staticmethod supported __name__.
def message_from_file(s, *args, **kw):
f = io.StringIO(s)
return email.message_from_file(f, *args, **kw)
class TestParser(TestParserBase, TestEmailBase):
parser = staticmethod(email.message_from_string)
parsers = (email.message_from_string, message_from_file)
def message_from_bytes(s, *args, **kw):
return email.message_from_bytes(s.encode(), *args, **kw)
def message_from_binary_file(s, *args, **kw):
f = io.BytesIO(s.encode())
return email.message_from_binary_file(f, *args, **kw)
class TestBytesParser(TestParserBase, TestEmailBase):
def parser(self, s, *args, **kw):
return email.message_from_bytes(s.encode(), *args, **kw)
parsers = (message_from_bytes, message_from_binary_file)
if __name__ == '__main__':

View File

@ -5,6 +5,7 @@ import unittest
import email.policy
import email.parser
import email.generator
import email.message
from email import headerregistry
def make_defaults(base_defaults, differences):
@ -23,6 +24,7 @@ class PolicyAPITests(unittest.TestCase):
'cte_type': '8bit',
'raise_on_defect': False,
'mangle_from_': True,
'message_factory': email.message.Message,
}
# These default values are the ones set on email.policy.default.
# If any of these defaults change, the docs must be updated.
@ -34,6 +36,7 @@ class PolicyAPITests(unittest.TestCase):
'refold_source': 'long',
'content_manager': email.policy.EmailPolicy.content_manager,
'mangle_from_': False,
'message_factory': email.message.EmailMessage,
})
# For each policy under test, we give here what we expect the defaults to
@ -62,20 +65,22 @@ class PolicyAPITests(unittest.TestCase):
def test_defaults(self):
for policy, expected in self.policies.items():
for attr, value in expected.items():
self.assertEqual(getattr(policy, attr), value,
("change {} docs/docstrings if defaults have "
"changed").format(policy))
with self.subTest(policy=policy, attr=attr):
self.assertEqual(getattr(policy, attr), value,
("change {} docs/docstrings if defaults have "
"changed").format(policy))
def test_all_attributes_covered(self):
for policy, expected in self.policies.items():
for attr in dir(policy):
if (attr.startswith('_') or
isinstance(getattr(email.policy.EmailPolicy, attr),
types.FunctionType)):
continue
else:
self.assertIn(attr, expected,
"{} is not fully tested".format(attr))
with self.subTest(policy=policy, attr=attr):
if (attr.startswith('_') or
isinstance(getattr(email.policy.EmailPolicy, attr),
types.FunctionType)):
continue
else:
self.assertIn(attr, expected,
"{} is not fully tested".format(attr))
def test_abc(self):
with self.assertRaises(TypeError) as cm:
@ -237,6 +242,9 @@ class PolicyAPITests(unittest.TestCase):
# wins), but that the order still works (right overrides left).
class TestException(Exception):
pass
class TestPolicyPropagation(unittest.TestCase):
# The abstract methods are used by the parser but not by the wrapper
@ -244,40 +252,40 @@ class TestPolicyPropagation(unittest.TestCase):
# policy was actually propagated all the way to feedparser.
class MyPolicy(email.policy.Policy):
def badmethod(self, *args, **kw):
raise Exception("test")
raise TestException("test")
fold = fold_binary = header_fetch_parser = badmethod
header_source_parse = header_store_parse = badmethod
def test_message_from_string(self):
with self.assertRaisesRegex(Exception, "^test$"):
with self.assertRaisesRegex(TestException, "^test$"):
email.message_from_string("Subject: test\n\n",
policy=self.MyPolicy)
def test_message_from_bytes(self):
with self.assertRaisesRegex(Exception, "^test$"):
with self.assertRaisesRegex(TestException, "^test$"):
email.message_from_bytes(b"Subject: test\n\n",
policy=self.MyPolicy)
def test_message_from_file(self):
f = io.StringIO('Subject: test\n\n')
with self.assertRaisesRegex(Exception, "^test$"):
with self.assertRaisesRegex(TestException, "^test$"):
email.message_from_file(f, policy=self.MyPolicy)
def test_message_from_binary_file(self):
f = io.BytesIO(b'Subject: test\n\n')
with self.assertRaisesRegex(Exception, "^test$"):
with self.assertRaisesRegex(TestException, "^test$"):
email.message_from_binary_file(f, policy=self.MyPolicy)
# These are redundant, but we need them for black-box completeness.
def test_parser(self):
p = email.parser.Parser(policy=self.MyPolicy)
with self.assertRaisesRegex(Exception, "^test$"):
with self.assertRaisesRegex(TestException, "^test$"):
p.parsestr('Subject: test\n\n')
def test_bytes_parser(self):
p = email.parser.BytesParser(policy=self.MyPolicy)
with self.assertRaisesRegex(Exception, "^test$"):
with self.assertRaisesRegex(TestException, "^test$"):
p.parsebytes(b'Subject: test\n\n')
# Now that we've established that all the parse methods get the