cpython/Lib/test/test_email/test_generator.py

import io
import textwrap
import unittest
from email import message_from_string, message_from_bytes
from email.message import EmailMessage
from email.generator import Generator, BytesGenerator
from email.headerregistry import Address
from email import policy
from test.test_email import TestEmailBase, parameterize


@parameterize
class TestGeneratorBase:

    policy = policy.default

    def msgmaker(self, msg, policy=None):
        policy = self.policy if policy is None else policy
        return self.msgfunc(msg, policy=policy)

    refold_long_expected = {
        0: textwrap.dedent("""\
            To: whom_it_may_concern@example.com
            From: nobody_you_want_to_know@example.com
            Subject: We the willing led by the unknowing are doing the
             impossible for the ungrateful. We have done so much for so long with so little
             we are now qualified to do anything with nothing.

            None
            """),
        40: textwrap.dedent("""\
            To: whom_it_may_concern@example.com
            From:
             nobody_you_want_to_know@example.com
            Subject: We the willing led by the
             unknowing are doing the impossible for
             the ungrateful. We have done so much
             for so long with so little we are now
             qualified to do anything with nothing.

            None
            """),
        20: textwrap.dedent("""\
            To:
             whom_it_may_concern@example.com
            From:
             nobody_you_want_to_know@example.com
            Subject: We the
             willing led by the
             unknowing are doing
             the impossible for
             the ungrateful. We
             have done so much
             for so long with so
             little we are now
             qualified to do
             anything with
             nothing.

            None
            """),
        }
    refold_long_expected[100] = refold_long_expected[0]

    refold_all_expected = refold_long_expected.copy()
    refold_all_expected[0] = (
            "To: whom_it_may_concern@example.com\n"
            "From: nobody_you_want_to_know@example.com\n"
            "Subject: We the willing led by the unknowing are doing the "
              "impossible for the ungrateful. We have done so much for "
              "so long with so little we are now qualified to do anything "
              "with nothing.\n"
              "\n"
              "None\n")
    refold_all_expected[100] = (
            "To: whom_it_may_concern@example.com\n"
            "From: nobody_you_want_to_know@example.com\n"
            "Subject: We the willing led by the unknowing are doing the "
                "impossible for the ungrateful. We have\n"
              " done so much for so long with so little we are now qualified "
                "to do anything with nothing.\n"
              "\n"
              "None\n")

    length_params = [n for n in refold_long_expected]

    def length_as_maxheaderlen_parameter(self, n):
        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
        s = self.ioclass()
        g = self.genclass(s, maxheaderlen=n, policy=self.policy)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[n]))

    def length_as_max_line_length_policy(self, n):
        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(max_line_length=n))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[n]))

    def length_as_maxheaderlen_parm_overrides_policy(self, n):
        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
        s = self.ioclass()
        g = self.genclass(s, maxheaderlen=n,
                          policy=self.policy.clone(max_line_length=10))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[n]))

    def length_as_max_line_length_with_refold_none_does_not_fold(self, n):
        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(refold_source='none',
                                                      max_line_length=n))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[0]))

    def length_as_max_line_length_with_refold_all_folds(self, n):
        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(refold_source='all',
                                                      max_line_length=n))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(self.refold_all_expected[n]))

    def test_crlf_control_via_policy(self):
        source = "Subject: test\r\n\r\ntest body\r\n"
        expected = source
        msg = self.msgmaker(self.typ(source))
        s = self.ioclass()
        g = self.genclass(s, policy=policy.SMTP)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

    def test_flatten_linesep_overrides_policy(self):
        source = "Subject: test\n\ntest body\n"
        expected = source
        msg = self.msgmaker(self.typ(source))
        s = self.ioclass()
        g = self.genclass(s, policy=policy.SMTP)
        g.flatten(msg, linesep='\n')
        self.assertEqual(s.getvalue(), self.typ(expected))

    def test_flatten_linesep(self):
        source = 'Subject: one\n two\r three\r\n four\r\n\r\ntest body\r\n'
        msg = self.msgmaker(self.typ(source))
        self.assertEqual(msg['Subject'], 'one two three four')

        expected = 'Subject: one\n two\n three\n four\n\ntest body\n'
        s = self.ioclass()
        g = self.genclass(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

        expected = 'Subject: one two three four\n\ntest body\n'
        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

    def test_flatten_control_linesep(self):
        source = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\r\n\r\ntest body\r\n'
        msg = self.msgmaker(self.typ(source))
        self.assertEqual(msg['Subject'], 'one\v two\f three\x1c four\x1d five\x1e six')

        expected = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\n\ntest body\n'
        s = self.ioclass()
        g = self.genclass(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

    def test_set_mangle_from_via_policy(self):
        source = textwrap.dedent("""\
            Subject: test that
             from is mangled in the body!

            From time to time I write a rhyme.
            """)
        variants = (
            (None, True),
            (policy.compat32, True),
            (policy.default, False),
            (policy.default.clone(mangle_from_=True), True),
            )
        for p, mangle in variants:
            expected = source.replace('From ', '>From ') if mangle else source
            with self.subTest(policy=p, mangle_from_=mangle):
                msg = self.msgmaker(self.typ(source))
                s = self.ioclass()
                g = self.genclass(s, policy=p)
                g.flatten(msg)
                self.assertEqual(s.getvalue(), self.typ(expected))

    def test_compat32_max_line_length_does_not_fold_when_none(self):
        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
        s = self.ioclass()
        g = self.genclass(s, policy=policy.compat32.clone(max_line_length=None))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[0]))

    def test_rfc2231_wrapping(self):
        # This is pretty much just to make sure we don't have an infinite
        # loop; I don't expect anyone to hit this in the field.
        msg = self.msgmaker(self.typ(textwrap.dedent("""\
            To: nobody
            Content-Disposition: attachment;
             filename="afilenamelongenoghtowraphere"

            None
            """)))
        expected = textwrap.dedent("""\
            To: nobody
            Content-Disposition: attachment;
             filename*0*=us-ascii''afilename;
             filename*1*=longenoghtowraphere

            None
            """)
        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(max_line_length=33))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

    def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self):
        # This is just to make sure we don't have an infinite loop; I don't
        # expect anyone to hit this in the field, so I'm not bothering to make
        # the result optimal (the encoding isn't needed).
        msg = self.msgmaker(self.typ(textwrap.dedent("""\
            To: nobody
            Content-Disposition: attachment;
             filename="afilenamelongenoghtowraphere"

            None
            """)))
        expected = textwrap.dedent("""\
            To: nobody
            Content-Disposition:
             attachment;
             filename*0*=us-ascii''afilenamelongenoghtowraphere

            None
            """)
        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(max_line_length=20))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))


class TestGenerator(TestGeneratorBase, TestEmailBase):

    msgfunc = staticmethod(message_from_string)
    genclass = Generator
    ioclass = io.StringIO
    typ = str

    def test_flatten_unicode_linesep(self):
        source = 'Subject: one\x85 two\u2028 three\u2029 four\r\n\r\ntest body\r\n'
        msg = self.msgmaker(self.typ(source))
        self.assertEqual(msg['Subject'], 'one\x85 two\u2028 three\u2029 four')

        expected = 'Subject: =?utf-8?b?b25lwoUgdHdv4oCoIHRocmVl4oCp?= four\n\ntest body\n'
        s = self.ioclass()
        g = self.genclass(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))

        s = self.ioclass()
        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), self.typ(expected))


class TestBytesGenerator(TestGeneratorBase, TestEmailBase):

    msgfunc = staticmethod(message_from_bytes)
    genclass = BytesGenerator
    ioclass = io.BytesIO
    typ = lambda self, x: x.encode('ascii')

    def test_defaults_handle_spaces_between_encoded_words_when_folded(self):
        source = ("Уведомление о принятии в работу обращения для"
                  " подключения услуги")
        expected = ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'
                    ' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'
                    ' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')
        msg = EmailMessage()
        msg['Subject'] = source
        s = io.BytesIO()
        g = BytesGenerator(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_defaults_handle_spaces_when_encoded_words_is_folded_in_middle(self):
        source = ('A very long long long long long long long long long long long long '
                  'long long long long long long long long long long long súmmäry')
        expected = ('Subject: A very long long long long long long long long long long long long\n'
                    ' long long long long long long long long long long long =?utf-8?q?s=C3=BAmm?=\n'
                    ' =?utf-8?q?=C3=A4ry?=\n\n').encode('ascii')
        msg = EmailMessage()
        msg['Subject'] = source
        s = io.BytesIO()
        g = BytesGenerator(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_defaults_handle_spaces_at_start_of_subject(self):
        source = " Уведомление"
        expected = b"Subject:  =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtQ==?=\n\n"
        msg = EmailMessage()
        msg['Subject'] = source
        s = io.BytesIO()
        g = BytesGenerator(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_defaults_handle_spaces_at_start_of_continuation_line(self):
        source = " ф ффффффффффффффффффф ф ф"
        expected = (b"Subject:  "
                    b"=?utf-8?b?0YQg0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YQ=?=\n"
                    b" =?utf-8?b?INGEINGE?=\n\n")
        msg = EmailMessage()
        msg['Subject'] = source
        s = io.BytesIO()
        g = BytesGenerator(s)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_cte_type_7bit_handles_unknown_8bit(self):
        source = ("Subject: Maintenant je vous présente mon "
                 "collègue\n\n").encode('utf-8')
        expected = ('Subject: Maintenant je vous =?unknown-8bit?q?'
                    'pr=C3=A9sente_mon_coll=C3=A8gue?=\n\n').encode('ascii')
        msg = message_from_bytes(source)
        s = io.BytesIO()
        g = BytesGenerator(s, policy=self.policy.clone(cte_type='7bit'))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_cte_type_7bit_transforms_8bit_cte(self):
        source = textwrap.dedent("""\
            From: foo@bar.com
            To: Dinsdale
            Subject: Nudge nudge, wink, wink
            Mime-Version: 1.0
            Content-Type: text/plain; charset="latin-1"
            Content-Transfer-Encoding: 8bit

            oh là là, know what I mean, know what I mean?
            """).encode('latin1')
        msg = message_from_bytes(source)
        expected =  textwrap.dedent("""\
            From: foo@bar.com
            To: Dinsdale
            Subject: Nudge nudge, wink, wink
            Mime-Version: 1.0
            Content-Type: text/plain; charset="iso-8859-1"
            Content-Transfer-Encoding: quoted-printable

            oh l=E0 l=E0, know what I mean, know what I mean?
            """).encode('ascii')
        s = io.BytesIO()
        g = BytesGenerator(s, policy=self.policy.clone(cte_type='7bit',
                                                       linesep='\n'))
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_smtputf8_policy(self):
        msg = EmailMessage()
        msg['From'] = "Páolo <főo@bar.com>"
        msg['To'] = 'Dinsdale'
        msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
        msg.set_content("oh là là, know what I mean, know what I mean?")
        expected = textwrap.dedent("""\
            From: Páolo <főo@bar.com>
            To: Dinsdale
            Subject: Nudge nudge, wink, wink \u1F609
            Content-Type: text/plain; charset="utf-8"
            Content-Transfer-Encoding: 8bit
            MIME-Version: 1.0

            oh là là, know what I mean, know what I mean?
            """).encode('utf-8').replace(b'\n', b'\r\n')
        s = io.BytesIO()
        g = BytesGenerator(s, policy=policy.SMTPUTF8)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)

    def test_smtp_policy(self):
        msg = EmailMessage()
        msg["From"] = Address(addr_spec="foo@bar.com", display_name="Páolo")
        msg["To"] = Address(addr_spec="bar@foo.com", display_name="Dinsdale")
        msg["Subject"] = "Nudge nudge, wink, wink"
        msg.set_content("oh boy, know what I mean, know what I mean?")
        expected = textwrap.dedent("""\
            From: =?utf-8?q?P=C3=A1olo?= <foo@bar.com>
            To: Dinsdale <bar@foo.com>
            Subject: Nudge nudge, wink, wink
            Content-Type: text/plain; charset="utf-8"
            Content-Transfer-Encoding: 7bit
            MIME-Version: 1.0

            oh boy, know what I mean, know what I mean?
            """).encode().replace(b"\n", b"\r\n")
        s = io.BytesIO()
        g = BytesGenerator(s, policy=policy.SMTP)
        g.flatten(msg)
        self.assertEqual(s.getvalue(), expected)


if __name__ == '__main__':
    unittest.main()
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								import io
 								import textwrap
 								import unittest
 								from email import message_from_string, message_from_bytes
-												#24211: Add RFC6532 support to the email library.

This could use more edge case tests, but the basic functionality is tested.
(Note that this changeset does not add tailored support for the RFC 6532
message/global MIME type, but the email package generic facilities will handle
it.)

Reviewed by Maciej Szulik.

											
										
										
											2015-05-17 12:29:21 -03:00
+								from email.message import EmailMessage
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								from email.generator import Generator, BytesGenerator
-												bpo-34424: Handle different policy.linesep lengths correctly. (#8803)



											
										
										
											2019-05-13 22:07:39 -03:00
+								from email.headerregistry import Address
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								from email import policy
-												Don't use metaclasses when class decorators can do the job.

Thanks to Nick Coghlan for pointing out that I'd forgotten about class
decorators.

											
										
										
											2012-05-31 19:00:45 -03:00
+								from test.test_email import TestEmailBase, parameterize
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												Don't use metaclasses when class decorators can do the job.

Thanks to Nick Coghlan for pointing out that I'd forgotten about class
decorators.

											
										
										
											2012-05-31 19:00:45 -03:00
+								@parameterize
 								class TestGeneratorBase:
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								    policy = policy.default
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								    def msgmaker(self, msg, policy=None):
 								        policy = self.policy if policy is None else policy
 								        return self.msgfunc(msg, policy=policy)
-												#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.

											
										
										
											2012-05-25 16:01:48 -03:00
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								    refold_long_expected = {
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+: textwrap.dedent("""\
 								            To: whom_it_may_concern@example.com
 								            From: nobody_you_want_to_know@example.com
 								            Subject: We the willing led by the unknowing are doing the
 								             impossible for the ungrateful. We have done so much for so long with so little
 								             we are now qualified to do anything with nothing.
 								            None
 								            """),
 : textwrap.dedent("""\
 								            To: whom_it_may_concern@example.com
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								            From:
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								             nobody_you_want_to_know@example.com
 								            Subject: We the willing led by the
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								             unknowing are doing the impossible for
 								             the ungrateful. We have done so much
 								             for so long with so little we are now
 								             qualified to do anything with nothing.
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
 								            None
 								            """),
 : textwrap.dedent("""\
-												bpo-27240 Rewrite the email header folding algorithm. (#3488)

The original algorithm tried to delegate the folding to the tokens so
that those tokens whose folding rules differed could specify the
differences.  However, this resulted in a lot of duplicated code because
most of the rules were the same.

The new algorithm moves all folding logic into a set of functions
external to the token classes, but puts the information about which
tokens can be folded in which ways on the tokens...with the exception of
mime-parameters, which are a special case (which was not even
implemented in the old folder).

This algorithm can still probably be improved and hopefully simplified
somewhat.

Note that some of the test expectations are changed.  I believe the
changes are toward more desirable and consistent behavior: in general
when (re) folding a line the canonical version of the tokens is
generated, rather than preserving errors or extra whitespace.

											
										
										
											2017-12-03 19:51:41 -04:00
+								            To:
 								             whom_it_may_concern@example.com
 								            From:
 								             nobody_you_want_to_know@example.com
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								            Subject: We the
 								             willing led by the
 								             unknowing are doing
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								             the impossible for
 								             the ungrateful. We
 								             have done so much
 								             for so long with so
 								             little we are now
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								             qualified to do
 								             anything with
 								             nothing.
 								            None
 								            """),
 								        }
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								    refold_long_expected[100] = refold_long_expected[0]
 								    refold_all_expected = refold_long_expected.copy()
 								    refold_all_expected[0] = (
 								            "To: whom_it_may_concern@example.com\n"
 								            "From: nobody_you_want_to_know@example.com\n"
 								            "Subject: We the willing led by the unknowing are doing the "
 								              "impossible for the ungrateful. We have done so much for "
 								              "so long with so little we are now qualified to do anything "
 								              "with nothing.\n"
 								              "\n"
 								              "None\n")
 								    refold_all_expected[100] = (
 								            "To: whom_it_may_concern@example.com\n"
 								            "From: nobody_you_want_to_know@example.com\n"
 								            "Subject: We the willing led by the unknowing are doing the "
 								                "impossible for the ungrateful. We have\n"
 								              " done so much for so long with so little we are now qualified "
 								                "to do anything with nothing.\n"
 								              "\n"
 								              "None\n")
-												Make parameterized tests in email less hackish.

Or perhaps more hackish, depending on your perspective.  But at least this
way it is now possible to run the individual tests using the unittest CLI.

											
										
										
											2012-05-30 22:53:40 -03:00
+								    length_params = [n for n in refold_long_expected]
 								    def length_as_maxheaderlen_parameter(self, n):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								        s = self.ioclass()
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        g = self.genclass(s, maxheaderlen=n, policy=self.policy)
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								        g.flatten(msg)
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[n]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												Make parameterized tests in email less hackish.

Or perhaps more hackish, depending on your perspective.  But at least this
way it is now possible to run the individual tests using the unittest CLI.

											
										
										
											2012-05-30 22:53:40 -03:00
+								    def length_as_max_line_length_policy(self, n):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								        s = self.ioclass()
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        g = self.genclass(s, policy=self.policy.clone(max_line_length=n))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								        g.flatten(msg)
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[n]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												Make parameterized tests in email less hackish.

Or perhaps more hackish, depending on your perspective.  But at least this
way it is now possible to run the individual tests using the unittest CLI.

											
										
										
											2012-05-30 22:53:40 -03:00
+								    def length_as_maxheaderlen_parm_overrides_policy(self, n):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								        s = self.ioclass()
 								        g = self.genclass(s, maxheaderlen=n,
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								                          policy=self.policy.clone(max_line_length=10))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								        g.flatten(msg)
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[n]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												Make parameterized tests in email less hackish.

Or perhaps more hackish, depending on your perspective.  But at least this
way it is now possible to run the individual tests using the unittest CLI.

											
										
										
											2012-05-30 22:53:40 -03:00
+								    def length_as_max_line_length_with_refold_none_does_not_fold(self, n):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(refold_source='none',
 								                                                      max_line_length=n))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[0]))
-												Make parameterized tests in email less hackish.

Or perhaps more hackish, depending on your perspective.  But at least this
way it is now possible to run the individual tests using the unittest CLI.

											
										
										
											2012-05-30 22:53:40 -03:00
+								    def length_as_max_line_length_with_refold_all_folds(self, n):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(refold_source='all',
 								                                                      max_line_length=n))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(self.refold_all_expected[n]))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.

											
										
										
											2012-05-25 16:01:48 -03:00
+								    def test_crlf_control_via_policy(self):
 								        source = "Subject: test\r\n\r\ntest body\r\n"
 								        expected = source
 								        msg = self.msgmaker(self.typ(source))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=policy.SMTP)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
 								    def test_flatten_linesep_overrides_policy(self):
 								        source = "Subject: test\n\ntest body\n"
 								        expected = source
 								        msg = self.msgmaker(self.typ(source))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=policy.SMTP)
 								        g.flatten(msg, linesep='\n')
 								        self.assertEqual(s.getvalue(), self.typ(expected))
-												gh-117313: Fix re-folding email messages containing non-standard line separators (GH-117369)

Only treat '\n', '\r' and '\r\n' as line separators in re-folding the email
messages.  Preserve control characters '\v', '\f', '\x1c', '\x1d' and '\x1e'
and Unicode line separators '\x85', '\u2028' and '\u2029' as is.
											
										
										
											2024-04-17 07:00:25 -03:00
+								    def test_flatten_linesep(self):
 								        source = 'Subject: one\n two\r three\r\n four\r\n\r\ntest body\r\n'
 								        msg = self.msgmaker(self.typ(source))
 								        self.assertEqual(msg['Subject'], 'one two three four')
 								        expected = 'Subject: one\n two\n three\n four\n\ntest body\n'
 								        s = self.ioclass()
 								        g = self.genclass(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
 								        expected = 'Subject: one two three four\n\ntest body\n'
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
 								    def test_flatten_control_linesep(self):
 								        source = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\r\n\r\ntest body\r\n'
 								        msg = self.msgmaker(self.typ(source))
 								        self.assertEqual(msg['Subject'], 'one\v two\f three\x1c four\x1d five\x1e six')
 								        expected = 'Subject: one\v two\f three\x1c four\x1d five\x1e six\n\ntest body\n'
 								        s = self.ioclass()
 								        g = self.genclass(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
-												#20098: add mangle_from_ policy option.

This defaults to True in the compat32 policy for backward compatibility,
but to False for all new policies.

Patch by Milan Oberkirch, with a few tweaks.

											
										
										
											2015-05-17 15:24:33 -03:00
+								    def test_set_mangle_from_via_policy(self):
 								        source = textwrap.dedent("""\
 								            Subject: test that
-												Issue #27895:  Spelling fixes (Contributed by Ville Skyttä).

											
										
										
											2016-08-30 14:47:49 -03:00
+								             from is mangled in the body!
-												#20098: add mangle_from_ policy option.

This defaults to True in the compat32 policy for backward compatibility,
but to False for all new policies.

Patch by Milan Oberkirch, with a few tweaks.

											
										
										
											2015-05-17 15:24:33 -03:00
 								            From time to time I write a rhyme.
 								            """)
 								        variants = (
 								            (None, True),
 								            (policy.compat32, True),
 								            (policy.default, False),
 								            (policy.default.clone(mangle_from_=True), True),
 								            )
 								        for p, mangle in variants:
 								            expected = source.replace('From ', '>From ') if mangle else source
 								            with self.subTest(policy=p, mangle_from_=mangle):
 								                msg = self.msgmaker(self.typ(source))
 								                s = self.ioclass()
 								                g = self.genclass(s, policy=p)
 								                g.flatten(msg)
 								                self.assertEqual(s.getvalue(), self.typ(expected))
-												[email] bpo-29478: Fix passing max_line_length=None from Compat32 policy (GH-595)

If max_line_length=None is specified while using the Compat32 policy,
it is no longer ignored.
											
										
										
											2017-06-12 03:43:41 -03:00
+								    def test_compat32_max_line_length_does_not_fold_when_none(self):
 								        msg = self.msgmaker(self.typ(self.refold_long_expected[0]))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=policy.compat32.clone(max_line_length=None))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(self.refold_long_expected[0]))
-												bpo-27240 Rewrite the email header folding algorithm. (#3488)

The original algorithm tried to delegate the folding to the tokens so
that those tokens whose folding rules differed could specify the
differences.  However, this resulted in a lot of duplicated code because
most of the rules were the same.

The new algorithm moves all folding logic into a set of functions
external to the token classes, but puts the information about which
tokens can be folded in which ways on the tokens...with the exception of
mime-parameters, which are a special case (which was not even
implemented in the old folder).

This algorithm can still probably be improved and hopefully simplified
somewhat.

Note that some of the test expectations are changed.  I believe the
changes are toward more desirable and consistent behavior: in general
when (re) folding a line the canonical version of the tokens is
generated, rather than preserving errors or extra whitespace.

											
										
										
											2017-12-03 19:51:41 -04:00
+								    def test_rfc2231_wrapping(self):
 								        # This is pretty much just to make sure we don't have an infinite
 								        # loop; I don't expect anyone to hit this in the field.
 								        msg = self.msgmaker(self.typ(textwrap.dedent("""\
 								            To: nobody
 								            Content-Disposition: attachment;
 								             filename="afilenamelongenoghtowraphere"
 								            None
 								            """)))
 								        expected = textwrap.dedent("""\
 								            To: nobody
 								            Content-Disposition: attachment;
 								             filename*0*=us-ascii''afilename;
 								             filename*1*=longenoghtowraphere
 								            None
 								            """)
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(max_line_length=33))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
 								    def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self):
 								        # This is just to make sure we don't have an infinite loop; I don't
 								        # expect anyone to hit this in the field, so I'm not bothering to make
 								        # the result optimal (the encoding isn't needed).
 								        msg = self.msgmaker(self.typ(textwrap.dedent("""\
 								            To: nobody
 								            Content-Disposition: attachment;
 								             filename="afilenamelongenoghtowraphere"
 								            None
 								            """)))
 								        expected = textwrap.dedent("""\
 								            To: nobody
 								            Content-Disposition:
 								             attachment;
 								             filename*0*=us-ascii''afilenamelongenoghtowraphere
 								            None
 								            """)
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(max_line_length=20))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
 								class TestGenerator(TestGeneratorBase, TestEmailBase):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								    msgfunc = staticmethod(message_from_string)
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								    genclass = Generator
 								    ioclass = io.StringIO
-												#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.

											
										
										
											2012-05-25 16:01:48 -03:00
+								    typ = str
-												gh-117313: Fix re-folding email messages containing non-standard line separators (GH-117369)

Only treat '\n', '\r' and '\r\n' as line separators in re-folding the email
messages.  Preserve control characters '\v', '\f', '\x1c', '\x1d' and '\x1e'
and Unicode line separators '\x85', '\u2028' and '\u2029' as is.
											
										
										
											2024-04-17 07:00:25 -03:00
+								    def test_flatten_unicode_linesep(self):
 								        source = 'Subject: one\x85 two\u2028 three\u2029 four\r\n\r\ntest body\r\n'
 								        msg = self.msgmaker(self.typ(source))
 								        self.assertEqual(msg['Subject'], 'one\x85 two\u2028 three\u2029 four')
 								        expected = 'Subject: =?utf-8?b?b25lwoUgdHdv4oCoIHRocmVl4oCp?= four\n\ntest body\n'
 								        s = self.ioclass()
 								        g = self.genclass(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
 								        s = self.ioclass()
 								        g = self.genclass(s, policy=self.policy.clone(refold_source='all'))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), self.typ(expected))
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
 								class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								    msgfunc = staticmethod(message_from_bytes)
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
+								    genclass = BytesGenerator
 								    ioclass = io.BytesIO
-												#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.

											
										
										
											2012-05-25 16:01:48 -03:00
+								    typ = lambda self, x: x.encode('ascii')
-												gh-92081: Fix for email.generator.Generator with whitespace between encoded words. (#92281)

* Fix for email.generator.Generator with whitespace between encoded words.

email.generator.Generator currently does not handle whitespace between
encoded words correctly when the encoded words span multiple lines.  The
current generator will create an encoded word for each line.  If the end
of the line happens to correspond with the end real word in the
plaintext, the generator will place an unencoded space at the start of
the subsequent lines to represent the whitespace between the plaintext
words.

A compliant decoder will strip all the whitespace from between two
encoded words which leads to missing spaces in the round-tripped
output.

The fix for this is to make sure that whitespace between two encoded
words ends up inside of one or the other of the encoded words.  This
fix places the space inside of the second encoded word.

A second problem happens with continuation lines.  A continuation line that
starts with whitespace and is followed by a non-encoded word is fine because
the newline between such continuation lines is defined as condensing to
a single space character.  When the continuation line starts with whitespace
followed by an encoded word, however, the RFCs specify that the word is run
together with the encoded word on the previous line.  This is because normal
words are filded on syntactic breaks by encoded words are not.

The solution to this is to add the whitespace to the start of the encoded word
on the continuation line.

Test cases are from #92081

* Rename a variable so it's not confused with the final variable.
											
										
										
											2024-05-20 16:10:47 -03:00
+								    def test_defaults_handle_spaces_between_encoded_words_when_folded(self):
 								        source = ("Уведомление о принятии в работу обращения для"
 								                  " подключения услуги")
 								        expected = ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'
 								                    ' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'
 								                    ' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')
 								        msg = EmailMessage()
 								        msg['Subject'] = source
 								        s = io.BytesIO()
 								        g = BytesGenerator(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
-												gh-120930: Remove extra blank occuring in wrapped encoded words in email headers (GH-121747)


											
										
										
											2024-07-18 09:48:05 -03:00
+								    def test_defaults_handle_spaces_when_encoded_words_is_folded_in_middle(self):
 								        source = ('A very long long long long long long long long long long long long '
 								                  'long long long long long long long long long long long súmmäry')
 								        expected = ('Subject: A very long long long long long long long long long long long long\n'
 								                    ' long long long long long long long long long long long =?utf-8?q?s=C3=BAmm?=\n'
 								                    ' =?utf-8?q?=C3=A4ry?=\n\n').encode('ascii')
 								        msg = EmailMessage()
 								        msg['Subject'] = source
 								        s = io.BytesIO()
 								        g = BytesGenerator(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
-												gh-92081: Fix for email.generator.Generator with whitespace between encoded words. (#92281)

* Fix for email.generator.Generator with whitespace between encoded words.

email.generator.Generator currently does not handle whitespace between
encoded words correctly when the encoded words span multiple lines.  The
current generator will create an encoded word for each line.  If the end
of the line happens to correspond with the end real word in the
plaintext, the generator will place an unencoded space at the start of
the subsequent lines to represent the whitespace between the plaintext
words.

A compliant decoder will strip all the whitespace from between two
encoded words which leads to missing spaces in the round-tripped
output.

The fix for this is to make sure that whitespace between two encoded
words ends up inside of one or the other of the encoded words.  This
fix places the space inside of the second encoded word.

A second problem happens with continuation lines.  A continuation line that
starts with whitespace and is followed by a non-encoded word is fine because
the newline between such continuation lines is defined as condensing to
a single space character.  When the continuation line starts with whitespace
followed by an encoded word, however, the RFCs specify that the word is run
together with the encoded word on the previous line.  This is because normal
words are filded on syntactic breaks by encoded words are not.

The solution to this is to add the whitespace to the start of the encoded word
on the continuation line.

Test cases are from #92081

* Rename a variable so it's not confused with the final variable.
											
										
										
											2024-05-20 16:10:47 -03:00
+								    def test_defaults_handle_spaces_at_start_of_subject(self):
 								        source = " Уведомление"
 								        expected = b"Subject:  =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtQ==?=\n\n"
 								        msg = EmailMessage()
 								        msg['Subject'] = source
 								        s = io.BytesIO()
 								        g = BytesGenerator(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
 								    def test_defaults_handle_spaces_at_start_of_continuation_line(self):
 								        source = " ф ффффффффффффффффффф ф ф"
 								        expected = (b"Subject:  "
 								                    b"=?utf-8?b?0YQg0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YTRhNGE0YQ=?=\n"
 								                    b" =?utf-8?b?INGEINGE?=\n\n")
 								        msg = EmailMessage()
 								        msg['Subject'] = source
 								        s = io.BytesIO()
 								        g = BytesGenerator(s)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
-												#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.

											
										
										
											2012-05-25 16:01:48 -03:00
+								    def test_cte_type_7bit_handles_unknown_8bit(self):
 								        source = ("Subject: Maintenant je vous présente mon "
 								                 "collègue\n\n").encode('utf-8')
-												#12586: add provisional email policy with new header parsing and folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.

											
										
										
											2012-05-25 19:42:14 -03:00
+								        expected = ('Subject: Maintenant je vous =?unknown-8bit?q?'
 								                    'pr=C3=A9sente_mon_coll=C3=A8gue?=\n\n').encode('ascii')
-												#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.

											
										
										
											2012-05-25 16:01:48 -03:00
+								        msg = message_from_bytes(source)
 								        s = io.BytesIO()
 								        g = BytesGenerator(s, policy=self.policy.clone(cte_type='7bit'))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
 								    def test_cte_type_7bit_transforms_8bit_cte(self):
 								        source = textwrap.dedent("""\
 								            From: foo@bar.com
 								            To: Dinsdale
 								            Subject: Nudge nudge, wink, wink
 								            Mime-Version: 1.0
 								            Content-Type: text/plain; charset="latin-1"
 								            Content-Transfer-Encoding: 8bit
 								            oh là là, know what I mean, know what I mean?
 								            """).encode('latin1')
 								        msg = message_from_bytes(source)
 								        expected =  textwrap.dedent("""\
 								            From: foo@bar.com
 								            To: Dinsdale
 								            Subject: Nudge nudge, wink, wink
 								            Mime-Version: 1.0
 								            Content-Type: text/plain; charset="iso-8859-1"
 								            Content-Transfer-Encoding: quoted-printable
 								            oh l=E0 l=E0, know what I mean, know what I mean?
 								            """).encode('ascii')
 								        s = io.BytesIO()
 								        g = BytesGenerator(s, policy=self.policy.clone(cte_type='7bit',
 								                                                       linesep='\n'))
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
-												#24211: Add RFC6532 support to the email library.

This could use more edge case tests, but the basic functionality is tested.
(Note that this changeset does not add tailored support for the RFC 6532
message/global MIME type, but the email package generic facilities will handle
it.)

Reviewed by Maciej Szulik.

											
										
										
											2015-05-17 12:29:21 -03:00
+								    def test_smtputf8_policy(self):
 								        msg = EmailMessage()
 								        msg['From'] = "Páolo <főo@bar.com>"
 								        msg['To'] = 'Dinsdale'
 								        msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
 								        msg.set_content("oh là là, know what I mean, know what I mean?")
 								        expected = textwrap.dedent("""\
 								            From: Páolo <főo@bar.com>
 								            To: Dinsdale
 								            Subject: Nudge nudge, wink, wink \u1F609
 								            Content-Type: text/plain; charset="utf-8"
 								            Content-Transfer-Encoding: 8bit
 								            MIME-Version: 1.0
 								            oh là là, know what I mean, know what I mean?
 								            """).encode('utf-8').replace(b'\n', b'\r\n')
 								        s = io.BytesIO()
 								        g = BytesGenerator(s, policy=policy.SMTPUTF8)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
-												bpo-34424: Handle different policy.linesep lengths correctly. (#8803)



											
										
										
											2019-05-13 22:07:39 -03:00
+								    def test_smtp_policy(self):
 								        msg = EmailMessage()
 								        msg["From"] = Address(addr_spec="foo@bar.com", display_name="Páolo")
 								        msg["To"] = Address(addr_spec="bar@foo.com", display_name="Dinsdale")
 								        msg["Subject"] = "Nudge nudge, wink, wink"
 								        msg.set_content("oh boy, know what I mean, know what I mean?")
 								        expected = textwrap.dedent("""\
 								            From: =?utf-8?q?P=C3=A1olo?= <foo@bar.com>
 								            To: Dinsdale <bar@foo.com>
 								            Subject: Nudge nudge, wink, wink
 								            Content-Type: text/plain; charset="utf-8"
 								            Content-Transfer-Encoding: 7bit
 								            MIME-Version: 1.0
 								            oh boy, know what I mean, know what I mean?
 								            """).encode().replace(b"\n", b"\r\n")
 								        s = io.BytesIO()
 								        g = BytesGenerator(s, policy=policy.SMTP)
 								        g.flatten(msg)
 								        self.assertEqual(s.getvalue(), expected)
-												#11731: simplify/enhance parser/generator API by introducing policy objects.

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.

											
										
										
											2011-04-18 14:59:37 -03:00
 								if __name__ == '__main__':
 								    unittest.main()