gh-106669: Revert "gh-102988: Detect email address parsing errors ... (#105127)" (#106733)

This reverts commit 18dfbd0357.
Adds a regression test from the issue.

See https://github.com/python/cpython/issues/106669.
This commit is contained in:
Gregory P. Smith 2023-07-20 20:30:52 -07:00 committed by GitHub
parent d81b4f8ff8
commit a31dea1feb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 30 additions and 171 deletions

View File

@ -65,11 +65,6 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
.. versionchanged:: 3.12
For security reasons, addresses that were ambiguous and could parse into
multiple different addresses now cause ``('', '')`` to be returned
instead of only one of the *potential* addresses.
.. function:: formataddr(pair, charset='utf-8')
@ -92,7 +87,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
example that gets all the recipients of a message:
example that gets all the recipients of a message::
from email.utils import getaddresses
@ -102,25 +97,6 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
is returned in its place. Other errors in parsing the list of
addresses such as a fieldvalue seemingly parsing into multiple
addresses may result in a list containing a single empty 2-tuple
``[('', '')]`` being returned rather than returning potentially
invalid output.
Example malformed input parsing:
.. doctest::
>>> from email.utils import getaddresses
>>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
[('', '')]
.. versionchanged:: 3.12
The 2-tuple of ``('', '')`` in the returned values when parsing
fails were added as to address a security issue.
.. function:: parsedate(date)

View File

@ -570,14 +570,6 @@ dis
:data:`~dis.hasarg` collection instead.
(Contributed by Irit Katriel in :gh:`94216`.)
email
-----
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
``('', '')`` 2-tuples in more situations where invalid email addresses are
encountered instead of potentially inaccurate values.
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
fractions
---------

View File

@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'):
return address
def _pre_parse_validation(email_header_fields):
accepted_values = []
for v in email_header_fields:
s = v.replace('\\(', '').replace('\\)', '')
if s.count('(') != s.count(')'):
v = "('', '')"
accepted_values.append(v)
return accepted_values
def _post_parse_validation(parsed_email_header_tuples):
accepted_values = []
# The parser would have parsed a correctly formatted domain-literal
# The existence of an [ after parsing indicates a parsing failure
for v in parsed_email_header_tuples:
if '[' in v[1]:
v = ('', '')
accepted_values.append(v)
return accepted_values
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
its place.
If the resulting list of parsed address is not the same as the number of
fieldvalues in the input list a parsing error has occurred. A list
containing a single empty 2-tuple [('', '')] is returned in its place.
This is done to avoid invalid output.
"""
fieldvalues = [str(v) for v in fieldvalues]
fieldvalues = _pre_parse_validation(fieldvalues)
all = COMMASPACE.join(v for v in fieldvalues)
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
result = _post_parse_validation(a.addresslist)
n = 0
for v in fieldvalues:
n += v.count(',') + 1
if len(result) != n:
return [('', '')]
return result
return a.addresslist
def _format_timetuple_and_zone(timetuple, zone):
@ -254,18 +212,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
if isinstance(addr, list):
addr = addr[0]
if not isinstance(addr, str):
return ('', '')
addr = _pre_parse_validation([addr])[0]
addrs = _post_parse_validation(_AddressList(addr).addresslist)
if not addrs or len(addrs) > 1:
return ('', '')
addrs = _AddressList(addr).addresslist
if not addrs:
return '', ''
return addrs[0]

View File

@ -3319,90 +3319,32 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
def test_getaddresses_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
[('', 'alice@example.org'), ('', 'bob@example.com')])
eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
[('', '')])
eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
[('', '')])
def test_parseaddr_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
('', ''))
eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
('', ''))
def test_getaddresses_comma_in_name(self):
"""GH-106669 regression test."""
self.assertEqual(
utils.getaddresses(
[
'"Bud, Person" <bperson@dom.ain>',
'aperson@dom.ain (Al Person)',
'"Mariusz Felisiak" <to@example.com>',
]
),
[
('Bud, Person', 'bperson@dom.ain'),
('Al Person', 'aperson@dom.ain'),
('Mariusz Felisiak', 'to@example.com'),
],
)
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
eq(utils.getaddresses(
['[]*-- =~$']),
[('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
eq(utils.getaddresses(
[r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
[('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
eq(utils.getaddresses(
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
[('', '')])
eq(utils.getaddresses(
['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
[('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
eq(utils.getaddresses(
['John Doe <jdoe@machine(comment). example>']),
[('John Doe (comment)', 'jdoe@machine.example')])
eq(utils.getaddresses(
['"Mary Smith: Personal Account" <smith@home.example>']),
[('Mary Smith: Personal Account', 'smith@home.example')])
eq(utils.getaddresses(
['Undisclosed recipients:;']),
[('', '')])
eq(utils.getaddresses(
[r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
[('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""

View File

@ -1,4 +1,4 @@
CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
and :func:`email.utils.getaddresses` from returning the realname portion of an
invalid RFC2822 email header in the email address portion of the 2-tuple
returned after being parsed by :class:`email._parseaddr.AddressList`.
Reverted the :mod:`email.utils` security improvement change released in
3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
to parse email addresses with a comma in the quoted name field.
See :gh:`106669`.