bpo-40944: Fix IndexError when parse emails with truncated Message-ID, address, routes, etc (GH-20790)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Ivan Savin 2024-04-17 14:14:22 +04:00 committed by GitHub
parent 147cd0581e
commit 1aa8bbe62f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 51 additions and 5 deletions

View File

@ -1213,7 +1213,7 @@ def get_bare_quoted_string(value):
value is the text between the quote marks, with whitespace value is the text between the quote marks, with whitespace
preserved and quoted pairs decoded. preserved and quoted pairs decoded.
""" """
if value[0] != '"': if not value or value[0] != '"':
raise errors.HeaderParseError( raise errors.HeaderParseError(
"expected '\"' but found '{}'".format(value)) "expected '\"' but found '{}'".format(value))
bare_quoted_string = BareQuotedString() bare_quoted_string = BareQuotedString()
@ -1454,7 +1454,7 @@ def get_local_part(value):
""" """
local_part = LocalPart() local_part = LocalPart()
leader = None leader = None
if value[0] in CFWS_LEADER: if value and value[0] in CFWS_LEADER:
leader, value = get_cfws(value) leader, value = get_cfws(value)
if not value: if not value:
raise errors.HeaderParseError( raise errors.HeaderParseError(
@ -1613,7 +1613,7 @@ def get_domain(value):
""" """
domain = Domain() domain = Domain()
leader = None leader = None
if value[0] in CFWS_LEADER: if value and value[0] in CFWS_LEADER:
leader, value = get_cfws(value) leader, value = get_cfws(value)
if not value: if not value:
raise errors.HeaderParseError( raise errors.HeaderParseError(
@ -1689,6 +1689,8 @@ def get_obs_route(value):
if value[0] in CFWS_LEADER: if value[0] in CFWS_LEADER:
token, value = get_cfws(value) token, value = get_cfws(value)
obs_route.append(token) obs_route.append(token)
if not value:
break
if value[0] == '@': if value[0] == '@':
obs_route.append(RouteComponentMarker) obs_route.append(RouteComponentMarker)
token, value = get_domain(value[1:]) token, value = get_domain(value[1:])
@ -1707,7 +1709,7 @@ def get_angle_addr(value):
""" """
angle_addr = AngleAddr() angle_addr = AngleAddr()
if value[0] in CFWS_LEADER: if value and value[0] in CFWS_LEADER:
token, value = get_cfws(value) token, value = get_cfws(value)
angle_addr.append(token) angle_addr.append(token)
if not value or value[0] != '<': if not value or value[0] != '<':
@ -1717,7 +1719,7 @@ def get_angle_addr(value):
value = value[1:] value = value[1:]
# Although it is not legal per RFC5322, SMTP uses '<>' in certain # Although it is not legal per RFC5322, SMTP uses '<>' in certain
# circumstances. # circumstances.
if value[0] == '>': if value and value[0] == '>':
angle_addr.append(ValueTerminal('>', 'angle-addr-end')) angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
angle_addr.defects.append(errors.InvalidHeaderDefect( angle_addr.defects.append(errors.InvalidHeaderDefect(
"null addr-spec in angle-addr")) "null addr-spec in angle-addr"))
@ -1769,6 +1771,9 @@ def get_name_addr(value):
name_addr = NameAddr() name_addr = NameAddr()
# Both the optional display name and the angle-addr can start with cfws. # Both the optional display name and the angle-addr can start with cfws.
leader = None leader = None
if not value:
raise errors.HeaderParseError(
"expected name-addr but found '{}'".format(value))
if value[0] in CFWS_LEADER: if value[0] in CFWS_LEADER:
leader, value = get_cfws(value) leader, value = get_cfws(value)
if not value: if not value:

View File

@ -801,6 +801,10 @@ class TestParser(TestParserMixin, TestEmailBase):
self.assertEqual(qs.content, 'bob') self.assertEqual(qs.content, 'bob')
self.assertEqual(qs.quoted_value, ' "bob"') self.assertEqual(qs.quoted_value, ' "bob"')
def test_get_quoted_string_cfws_only_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_quoted_string(' (foo) ')
def test_get_quoted_string_no_quoted_string(self): def test_get_quoted_string_no_quoted_string(self):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_quoted_string(' (ab) xyz') parser.get_quoted_string(' (ab) xyz')
@ -1135,6 +1139,10 @@ class TestParser(TestParserMixin, TestEmailBase):
'@python.org') '@python.org')
self.assertEqual(local_part.local_part, 'Fred.A.Johnson and dogs') self.assertEqual(local_part.local_part, 'Fred.A.Johnson and dogs')
def test_get_local_part_empty_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_local_part('')
def test_get_local_part_no_part_raises(self): def test_get_local_part_no_part_raises(self):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_local_part(' (foo) ') parser.get_local_part(' (foo) ')
@ -1387,6 +1395,10 @@ class TestParser(TestParserMixin, TestEmailBase):
'') '')
self.assertEqual(domain.domain, 'example.com') self.assertEqual(domain.domain, 'example.com')
def test_get_domain_empty_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_domain("")
def test_get_domain_no_non_cfws_raises(self): def test_get_domain_no_non_cfws_raises(self):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_domain(" (foo)\t") parser.get_domain(" (foo)\t")
@ -1512,6 +1524,10 @@ class TestParser(TestParserMixin, TestEmailBase):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) @example.com,') parser.get_obs_route('(foo) @example.com,')
def test_get_obs_route_no_route_before_end_raises2(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) @example.com, (foo) ')
def test_get_obs_route_no_route_before_special_raises(self): def test_get_obs_route_no_route_before_special_raises(self):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) [abc],') parser.get_obs_route('(foo) [abc],')
@ -1520,6 +1536,14 @@ class TestParser(TestParserMixin, TestEmailBase):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('(foo) @example.com [abc],') parser.get_obs_route('(foo) @example.com [abc],')
def test_get_obs_route_no_domain_after_at_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('@')
def test_get_obs_route_no_domain_after_at_raises2(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_obs_route('@example.com, @')
# get_angle_addr # get_angle_addr
def test_get_angle_addr_simple(self): def test_get_angle_addr_simple(self):
@ -1646,6 +1670,14 @@ class TestParser(TestParserMixin, TestEmailBase):
self.assertIsNone(angle_addr.route) self.assertIsNone(angle_addr.route)
self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com')
def test_get_angle_addr_empty_raise(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_angle_addr('')
def test_get_angle_addr_left_angle_only_raise(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_angle_addr('<')
def test_get_angle_addr_no_angle_raise(self): def test_get_angle_addr_no_angle_raise(self):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_angle_addr('(foo) ') parser.get_angle_addr('(foo) ')
@ -1857,6 +1889,10 @@ class TestParser(TestParserMixin, TestEmailBase):
self.assertIsNone(name_addr.route) self.assertIsNone(name_addr.route)
self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com')
def test_get_name_addr_empty_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_name_addr('')
def test_get_name_addr_no_content_raises(self): def test_get_name_addr_no_content_raises(self):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_name_addr(' (foo) ') parser.get_name_addr(' (foo) ')
@ -2732,6 +2768,10 @@ class TestParser(TestParserMixin, TestEmailBase):
with self.assertRaises(errors.HeaderParseError): with self.assertRaises(errors.HeaderParseError):
parser.get_msg_id("<simplelocal@>") parser.get_msg_id("<simplelocal@>")
def test_get_msg_id_no_id_right(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_msg_id("<simplelocal@")
def test_get_msg_id_with_brackets(self): def test_get_msg_id_with_brackets(self):
# Microsof Outlook generates non-standard one-off addresses: # Microsof Outlook generates non-standard one-off addresses:
# https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses # https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses

View File

@ -0,0 +1 @@
Fix several IndexError when parse emails with truncated Message-ID, address, routes, etc, e.g. ``example@``.