bpo-30681: Support invalid date format or value in email Date header (GH-22090)
I am re-submitting an older PR which was abandoned but is still relevant, #10783 by @timb07. The issue being solved () is still relevant. The original PR #10783 was closed as the final request changes were not applied and since abandoned. In this new PR I have re-used the original patch plus applied both comments from the review, by @maxking and @pganssle. For reference, here is the original PR description: In email.utils.parsedate_to_datetime(), a failure to parse the date, or invalid date components (such as hour outside 0..23) raises an exception. Document this behaviour, and add tests to test_email/test_utils.py to confirm this behaviour. In email.headerregistry.DateHeader.parse(), check when parsedate_to_datetime() raises an exception and add a new defect InvalidDateDefect; preserve the invalid value as the string value of the header, but set the datetime attribute to None. Add tests to test_email/test_headerregistry.py to confirm this behaviour; also added test to test_email/test_inversion.py to confirm emails with such defective date headers round trip successfully. This pull request incorporates feedback gratefully received from @bitdancer, @brettcannon, @Mariatta and @warsaw, and replaces the earlier PR #2254. Automerge-Triggered-By: GH:warsaw
This commit is contained in:
parent
8e3b9f9283
commit
303aac8c56
|
@ -112,3 +112,6 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
|
||||||
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
|
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
|
||||||
bytes, the number of non-padding base64 characters was invalid (1 more than
|
bytes, the number of non-padding base64 characters was invalid (1 more than
|
||||||
a multiple of 4). The encoded block was kept as-is.
|
a multiple of 4). The encoded block was kept as-is.
|
||||||
|
|
||||||
|
* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field.
|
||||||
|
The original value is kept as-is.
|
|
@ -124,8 +124,10 @@ of the new API.
|
||||||
.. function:: parsedate_to_datetime(date)
|
.. function:: parsedate_to_datetime(date)
|
||||||
|
|
||||||
The inverse of :func:`format_datetime`. Performs the same function as
|
The inverse of :func:`format_datetime`. Performs the same function as
|
||||||
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
|
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`;
|
||||||
the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
|
otherwise ``ValueError`` is raised if *date* contains an invalid value such
|
||||||
|
as an hour greater than 23 or a timezone offset not between -24 and 24 hours.
|
||||||
|
If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
|
||||||
``datetime``, and if the date is conforming to the RFCs it will represent a
|
``datetime``, and if the date is conforming to the RFCs it will represent a
|
||||||
time in UTC but with no indication of the actual source timezone of the
|
time in UTC but with no indication of the actual source timezone of the
|
||||||
message the date comes from. If the input date has any other valid timezone
|
message the date comes from. If the input date has any other valid timezone
|
||||||
|
|
|
@ -65,7 +65,7 @@ def _parsedate_tz(data):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not data:
|
if not data:
|
||||||
return
|
return None
|
||||||
data = data.split()
|
data = data.split()
|
||||||
# The FWS after the comma after the day-of-week is optional, so search and
|
# The FWS after the comma after the day-of-week is optional, so search and
|
||||||
# adjust for this.
|
# adjust for this.
|
||||||
|
|
|
@ -108,3 +108,6 @@ class NonASCIILocalPartDefect(HeaderDefect):
|
||||||
"""local_part contains non-ASCII characters"""
|
"""local_part contains non-ASCII characters"""
|
||||||
# This defect only occurs during unicode parsing, not when
|
# This defect only occurs during unicode parsing, not when
|
||||||
# parsing messages decoded from binary.
|
# parsing messages decoded from binary.
|
||||||
|
|
||||||
|
class InvalidDateDefect(HeaderDefect):
|
||||||
|
"""Header has unparseable or invalid date"""
|
||||||
|
|
|
@ -302,7 +302,14 @@ class DateHeader:
|
||||||
kwds['parse_tree'] = parser.TokenList()
|
kwds['parse_tree'] = parser.TokenList()
|
||||||
return
|
return
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
|
kwds['decoded'] = value
|
||||||
|
try:
|
||||||
value = utils.parsedate_to_datetime(value)
|
value = utils.parsedate_to_datetime(value)
|
||||||
|
except ValueError:
|
||||||
|
kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
|
||||||
|
kwds['datetime'] = None
|
||||||
|
kwds['parse_tree'] = parser.TokenList()
|
||||||
|
return
|
||||||
kwds['datetime'] = value
|
kwds['datetime'] = value
|
||||||
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
|
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
|
||||||
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
|
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
|
||||||
|
|
|
@ -195,7 +195,10 @@ def make_msgid(idstring=None, domain=None):
|
||||||
|
|
||||||
|
|
||||||
def parsedate_to_datetime(data):
|
def parsedate_to_datetime(data):
|
||||||
*dtuple, tz = _parsedate_tz(data)
|
parsed_date_tz = _parsedate_tz(data)
|
||||||
|
if parsed_date_tz is None:
|
||||||
|
raise ValueError('Invalid date value or format "%s"' % str(data))
|
||||||
|
*dtuple, tz = parsed_date_tz
|
||||||
if tz is None:
|
if tz is None:
|
||||||
return datetime.datetime(*dtuple[:6])
|
return datetime.datetime(*dtuple[:6])
|
||||||
return datetime.datetime(*dtuple[:6],
|
return datetime.datetime(*dtuple[:6],
|
||||||
|
|
|
@ -204,6 +204,22 @@ class TestDateHeader(TestHeaderBase):
|
||||||
self.assertEqual(len(h.defects), 1)
|
self.assertEqual(len(h.defects), 1)
|
||||||
self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
|
self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
|
||||||
|
|
||||||
|
def test_invalid_date_format(self):
|
||||||
|
s = 'Not a date header'
|
||||||
|
h = self.make_header('date', s)
|
||||||
|
self.assertEqual(h, s)
|
||||||
|
self.assertIsNone(h.datetime)
|
||||||
|
self.assertEqual(len(h.defects), 1)
|
||||||
|
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
|
||||||
|
|
||||||
|
def test_invalid_date_value(self):
|
||||||
|
s = 'Tue, 06 Jun 2017 27:39:33 +0600'
|
||||||
|
h = self.make_header('date', s)
|
||||||
|
self.assertEqual(h, s)
|
||||||
|
self.assertIsNone(h.datetime)
|
||||||
|
self.assertEqual(len(h.defects), 1)
|
||||||
|
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
|
||||||
|
|
||||||
def test_datetime_read_only(self):
|
def test_datetime_read_only(self):
|
||||||
h = self.make_header('date', self.datestring)
|
h = self.make_header('date', self.datestring)
|
||||||
with self.assertRaises(AttributeError):
|
with self.assertRaises(AttributeError):
|
||||||
|
|
|
@ -46,6 +46,14 @@ class TestInversion(TestEmailBase):
|
||||||
foo
|
foo
|
||||||
"""),),
|
"""),),
|
||||||
|
|
||||||
|
'header_with_invalid_date': (dedent(b"""\
|
||||||
|
Date: Tue, 06 Jun 2017 27:39:33 +0600
|
||||||
|
From: abc@xyz.com
|
||||||
|
Subject: timezones
|
||||||
|
|
||||||
|
How do they work even?
|
||||||
|
"""),),
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
payload_params = {
|
payload_params = {
|
||||||
|
|
|
@ -48,6 +48,16 @@ class DateTimeTests(unittest.TestCase):
|
||||||
utils.parsedate_to_datetime(self.datestring + ' -0000'),
|
utils.parsedate_to_datetime(self.datestring + ' -0000'),
|
||||||
self.naive_dt)
|
self.naive_dt)
|
||||||
|
|
||||||
|
def test_parsedate_to_datetime_with_invalid_raises_valueerror(self):
|
||||||
|
invalid_dates = ['',
|
||||||
|
'0',
|
||||||
|
'A Complete Waste of Time'
|
||||||
|
'Tue, 06 Jun 2017 27:39:33 +0600',
|
||||||
|
'Tue, 06 Jun 2017 07:39:33 +2600',
|
||||||
|
'Tue, 06 Jun 2017 27:39:33']
|
||||||
|
for dtstr in invalid_dates:
|
||||||
|
with self.subTest(dtstr=dtstr):
|
||||||
|
self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr)
|
||||||
|
|
||||||
class LocaltimeTests(unittest.TestCase):
|
class LocaltimeTests(unittest.TestCase):
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Handle exceptions caused by unparseable date headers when using email
|
||||||
|
"default" policy. Patch by Tim Bell, Georges Toth
|
Loading…
Reference in New Issue