bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239)
Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either: - a case without trailing whitespace - an invalid encoded word https://bugs.python.org/issue37764 This fix should also be backported to 3.7 and 3.8 https://bugs.python.org/issue37764
This commit is contained in:
parent
daa82d019c
commit
c5b242f87f
|
@ -935,6 +935,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
class _InvalidEwError(errors.HeaderParseError):
|
||||||
|
"""Invalid encoded word found while parsing headers."""
|
||||||
|
|
||||||
|
|
||||||
# XXX these need to become classes and used as instances so
|
# XXX these need to become classes and used as instances so
|
||||||
# that a program can't change them in a parse tree and screw
|
# that a program can't change them in a parse tree and screw
|
||||||
# up other parse trees. Maybe should have tests for that, too.
|
# up other parse trees. Maybe should have tests for that, too.
|
||||||
|
@ -1039,7 +1043,10 @@ def get_encoded_word(value):
|
||||||
raise errors.HeaderParseError(
|
raise errors.HeaderParseError(
|
||||||
"expected encoded word but found {}".format(value))
|
"expected encoded word but found {}".format(value))
|
||||||
remstr = ''.join(remainder)
|
remstr = ''.join(remainder)
|
||||||
if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
|
if (len(remstr) > 1 and
|
||||||
|
remstr[0] in hexdigits and
|
||||||
|
remstr[1] in hexdigits and
|
||||||
|
tok.count('?') < 2):
|
||||||
# The ? after the CTE was followed by an encoded word escape (=XX).
|
# The ? after the CTE was followed by an encoded word escape (=XX).
|
||||||
rest, *remainder = remstr.split('?=', 1)
|
rest, *remainder = remstr.split('?=', 1)
|
||||||
tok = tok + '?=' + rest
|
tok = tok + '?=' + rest
|
||||||
|
@ -1051,7 +1058,7 @@ def get_encoded_word(value):
|
||||||
try:
|
try:
|
||||||
text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
|
text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise errors.HeaderParseError(
|
raise _InvalidEwError(
|
||||||
"encoded word format invalid: '{}'".format(ew.cte))
|
"encoded word format invalid: '{}'".format(ew.cte))
|
||||||
ew.charset = charset
|
ew.charset = charset
|
||||||
ew.lang = lang
|
ew.lang = lang
|
||||||
|
@ -1101,9 +1108,12 @@ def get_unstructured(value):
|
||||||
token, value = get_fws(value)
|
token, value = get_fws(value)
|
||||||
unstructured.append(token)
|
unstructured.append(token)
|
||||||
continue
|
continue
|
||||||
|
valid_ew = True
|
||||||
if value.startswith('=?'):
|
if value.startswith('=?'):
|
||||||
try:
|
try:
|
||||||
token, value = get_encoded_word(value)
|
token, value = get_encoded_word(value)
|
||||||
|
except _InvalidEwError:
|
||||||
|
valid_ew = False
|
||||||
except errors.HeaderParseError:
|
except errors.HeaderParseError:
|
||||||
# XXX: Need to figure out how to register defects when
|
# XXX: Need to figure out how to register defects when
|
||||||
# appropriate here.
|
# appropriate here.
|
||||||
|
@ -1125,7 +1135,10 @@ def get_unstructured(value):
|
||||||
# Split in the middle of an atom if there is a rfc2047 encoded word
|
# Split in the middle of an atom if there is a rfc2047 encoded word
|
||||||
# which does not have WSP on both sides. The defect will be registered
|
# which does not have WSP on both sides. The defect will be registered
|
||||||
# the next time through the loop.
|
# the next time through the loop.
|
||||||
if rfc2047_matcher.search(tok):
|
# This needs to only be performed when the encoded word is valid;
|
||||||
|
# otherwise, performing it on an invalid encoded word can cause
|
||||||
|
# the parser to go in an infinite loop.
|
||||||
|
if valid_ew and rfc2047_matcher.search(tok):
|
||||||
tok, *remainder = value.partition('=?')
|
tok, *remainder = value.partition('=?')
|
||||||
vtext = ValueTerminal(tok, 'vtext')
|
vtext = ValueTerminal(tok, 'vtext')
|
||||||
_validate_xtext(vtext)
|
_validate_xtext(vtext)
|
||||||
|
|
|
@ -383,6 +383,22 @@ class TestParser(TestParserMixin, TestEmailBase):
|
||||||
[errors.InvalidHeaderDefect],
|
[errors.InvalidHeaderDefect],
|
||||||
'')
|
'')
|
||||||
|
|
||||||
|
def test_get_unstructured_without_trailing_whitespace_hang_case(self):
|
||||||
|
self._test_get_x(self._get_unst,
|
||||||
|
'=?utf-8?q?somevalue?=aa',
|
||||||
|
'somevalueaa',
|
||||||
|
'somevalueaa',
|
||||||
|
[errors.InvalidHeaderDefect],
|
||||||
|
'')
|
||||||
|
|
||||||
|
def test_get_unstructured_invalid_ew(self):
|
||||||
|
self._test_get_x(self._get_unst,
|
||||||
|
'=?utf-8?q?=somevalue?=',
|
||||||
|
'=?utf-8?q?=somevalue?=',
|
||||||
|
'=?utf-8?q?=somevalue?=',
|
||||||
|
[],
|
||||||
|
'')
|
||||||
|
|
||||||
# get_qp_ctext
|
# get_qp_ctext
|
||||||
|
|
||||||
def test_get_qp_ctext_only(self):
|
def test_get_qp_ctext_only(self):
|
||||||
|
|
|
@ -5381,6 +5381,27 @@ Content-Type: application/x-foo;
|
||||||
eq(language, 'en-us')
|
eq(language, 'en-us')
|
||||||
eq(s, 'My Document For You')
|
eq(s, 'My Document For You')
|
||||||
|
|
||||||
|
def test_should_not_hang_on_invalid_ew_messages(self):
|
||||||
|
messages = ["""From: user@host.com
|
||||||
|
To: user@host.com
|
||||||
|
Bad-Header:
|
||||||
|
=?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
|
||||||
|
=?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
|
||||||
|
=?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
|
||||||
|
|
||||||
|
Hello!
|
||||||
|
""", """From: ����� �������� <xxx@xxx>
|
||||||
|
To: "xxx" <xxx@xxx>
|
||||||
|
Subject: ��� ���������� ����� ����� � ��������� �� ����
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset="windows-1251";
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
�� ����� � ���� ������ ��� ��������
|
||||||
|
"""]
|
||||||
|
for m in messages:
|
||||||
|
with self.subTest(m=m):
|
||||||
|
msg = email.message_from_string(m)
|
||||||
|
|
||||||
|
|
||||||
# Tests to ensure that signed parts of an email are completely preserved, as
|
# Tests to ensure that signed parts of an email are completely preserved, as
|
||||||
|
|
|
@ -1336,6 +1336,7 @@ Burton Radons
|
||||||
Abhilash Raj
|
Abhilash Raj
|
||||||
Shorya Raj
|
Shorya Raj
|
||||||
Dhushyanth Ramasamy
|
Dhushyanth Ramasamy
|
||||||
|
Ashwin Ramaswami
|
||||||
Jeff Ramnani
|
Jeff Ramnani
|
||||||
Bayard Randel
|
Bayard Randel
|
||||||
Varpu Rantala
|
Varpu Rantala
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Fixes email._header_value_parser.get_unstructured going into an infinite loop for a specific case in which the email header does not have trailing whitespace, and the case in which it contains an invalid encoded word. Patch by Ashwin Ramaswami.
|
Loading…
Reference in New Issue