bpo-30681: Support invalid date format or value in email Date header (GH-22090)

I am re-submitting an older PR which was abandoned but is still relevant, #10783 by @timb07.

The issue being solved () is still relevant. The original PR #10783 was closed as
the final request changes were not applied and since abandoned.

In this new PR I have re-used the original patch plus applied both comments from the review, by @maxking and @pganssle.


For reference, here is the original PR description:
In email.utils.parsedate_to_datetime(), a failure to parse the date, or invalid date components (such as hour outside 0..23) raises an exception. Document this behaviour, and add tests to test_email/test_utils.py to confirm this behaviour.

In email.headerregistry.DateHeader.parse(), check when parsedate_to_datetime() raises an exception and add a new defect InvalidDateDefect; preserve the invalid value as the string value of the header, but set the datetime attribute to None.

Add tests to test_email/test_headerregistry.py to confirm this behaviour; also added test to test_email/test_inversion.py to confirm emails with such defective date headers round trip successfully.

This pull request incorporates feedback gratefully received from @bitdancer, @brettcannon, @Mariatta and @warsaw, and replaces the earlier PR #2254.

Automerge-Triggered-By: GH:warsaw
This commit is contained in:
Georges Toth 2020-10-27 01:31:06 +01:00 committed by GitHub
parent 8e3b9f9283
commit 303aac8c56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 59 additions and 5 deletions

View File

@ -112,3 +112,6 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded * :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
bytes, the number of non-padding base64 characters was invalid (1 more than bytes, the number of non-padding base64 characters was invalid (1 more than
a multiple of 4). The encoded block was kept as-is. a multiple of 4). The encoded block was kept as-is.
* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field.
The original value is kept as-is.

View File

@ -124,8 +124,10 @@ of the new API.
.. function:: parsedate_to_datetime(date) .. function:: parsedate_to_datetime(date)
The inverse of :func:`format_datetime`. Performs the same function as The inverse of :func:`format_datetime`. Performs the same function as
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`;
the input date has a timezone of ``-0000``, the ``datetime`` will be a naive otherwise ``ValueError`` is raised if *date* contains an invalid value such
as an hour greater than 23 or a timezone offset not between -24 and 24 hours.
If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
``datetime``, and if the date is conforming to the RFCs it will represent a ``datetime``, and if the date is conforming to the RFCs it will represent a
time in UTC but with no indication of the actual source timezone of the time in UTC but with no indication of the actual source timezone of the
message the date comes from. If the input date has any other valid timezone message the date comes from. If the input date has any other valid timezone

View File

@ -65,7 +65,7 @@ def _parsedate_tz(data):
""" """
if not data: if not data:
return return None
data = data.split() data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and # The FWS after the comma after the day-of-week is optional, so search and
# adjust for this. # adjust for this.

View File

@ -108,3 +108,6 @@ class NonASCIILocalPartDefect(HeaderDefect):
"""local_part contains non-ASCII characters""" """local_part contains non-ASCII characters"""
# This defect only occurs during unicode parsing, not when # This defect only occurs during unicode parsing, not when
# parsing messages decoded from binary. # parsing messages decoded from binary.
class InvalidDateDefect(HeaderDefect):
"""Header has unparseable or invalid date"""

View File

@ -302,7 +302,14 @@ class DateHeader:
kwds['parse_tree'] = parser.TokenList() kwds['parse_tree'] = parser.TokenList()
return return
if isinstance(value, str): if isinstance(value, str):
kwds['decoded'] = value
try:
value = utils.parsedate_to_datetime(value) value = utils.parsedate_to_datetime(value)
except ValueError:
kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
kwds['datetime'] = None
kwds['parse_tree'] = parser.TokenList()
return
kwds['datetime'] = value kwds['datetime'] = value
kwds['decoded'] = utils.format_datetime(kwds['datetime']) kwds['decoded'] = utils.format_datetime(kwds['datetime'])
kwds['parse_tree'] = cls.value_parser(kwds['decoded']) kwds['parse_tree'] = cls.value_parser(kwds['decoded'])

View File

@ -195,7 +195,10 @@ def make_msgid(idstring=None, domain=None):
def parsedate_to_datetime(data): def parsedate_to_datetime(data):
*dtuple, tz = _parsedate_tz(data) parsed_date_tz = _parsedate_tz(data)
if parsed_date_tz is None:
raise ValueError('Invalid date value or format "%s"' % str(data))
*dtuple, tz = parsed_date_tz
if tz is None: if tz is None:
return datetime.datetime(*dtuple[:6]) return datetime.datetime(*dtuple[:6])
return datetime.datetime(*dtuple[:6], return datetime.datetime(*dtuple[:6],

View File

@ -204,6 +204,22 @@ class TestDateHeader(TestHeaderBase):
self.assertEqual(len(h.defects), 1) self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue) self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
def test_invalid_date_format(self):
s = 'Not a date header'
h = self.make_header('date', s)
self.assertEqual(h, s)
self.assertIsNone(h.datetime)
self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
def test_invalid_date_value(self):
s = 'Tue, 06 Jun 2017 27:39:33 +0600'
h = self.make_header('date', s)
self.assertEqual(h, s)
self.assertIsNone(h.datetime)
self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
def test_datetime_read_only(self): def test_datetime_read_only(self):
h = self.make_header('date', self.datestring) h = self.make_header('date', self.datestring)
with self.assertRaises(AttributeError): with self.assertRaises(AttributeError):

View File

@ -46,6 +46,14 @@ class TestInversion(TestEmailBase):
foo foo
"""),), """),),
'header_with_invalid_date': (dedent(b"""\
Date: Tue, 06 Jun 2017 27:39:33 +0600
From: abc@xyz.com
Subject: timezones
How do they work even?
"""),),
} }
payload_params = { payload_params = {

View File

@ -48,6 +48,16 @@ class DateTimeTests(unittest.TestCase):
utils.parsedate_to_datetime(self.datestring + ' -0000'), utils.parsedate_to_datetime(self.datestring + ' -0000'),
self.naive_dt) self.naive_dt)
def test_parsedate_to_datetime_with_invalid_raises_valueerror(self):
invalid_dates = ['',
'0',
'A Complete Waste of Time'
'Tue, 06 Jun 2017 27:39:33 +0600',
'Tue, 06 Jun 2017 07:39:33 +2600',
'Tue, 06 Jun 2017 27:39:33']
for dtstr in invalid_dates:
with self.subTest(dtstr=dtstr):
self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr)
class LocaltimeTests(unittest.TestCase): class LocaltimeTests(unittest.TestCase):

View File

@ -0,0 +1,2 @@
Handle exceptions caused by unparseable date headers when using email
"default" policy. Patch by Tim Bell, Georges Toth