bpo-39040: Fix parsing of email mime headers with whitespace between encoded-words. (gh-17620)
* bpo-39040: Fix parsing of email headers with encoded-words inside a quoted string. It is fairly common to find malformed mime headers (especially content-disposition headers) where the parameter values, instead of being encoded to RFC standards, are "encoded" by doing RFC 2047 "encoded word" encoding, and then enclosing the whole thing in quotes. The processing of these malformed headers was incorrectly leaving the spaces between encoded words in the decoded text (whitespace between adjacent encoded words is supposed to be stripped on decoding). This changeset fixes the encoded word processing inside quoted strings (bare-quoted-string) to do correct RFC 2047 decoding by stripping that whitespace.
This commit is contained in:
parent
c610d970f5
commit
21017ed904
|
@ -1218,12 +1218,21 @@ def get_bare_quoted_string(value):
|
||||||
if value[0] in WSP:
|
if value[0] in WSP:
|
||||||
token, value = get_fws(value)
|
token, value = get_fws(value)
|
||||||
elif value[:2] == '=?':
|
elif value[:2] == '=?':
|
||||||
|
valid_ew = False
|
||||||
try:
|
try:
|
||||||
token, value = get_encoded_word(value)
|
token, value = get_encoded_word(value)
|
||||||
bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
|
bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
|
||||||
"encoded word inside quoted string"))
|
"encoded word inside quoted string"))
|
||||||
|
valid_ew = True
|
||||||
except errors.HeaderParseError:
|
except errors.HeaderParseError:
|
||||||
token, value = get_qcontent(value)
|
token, value = get_qcontent(value)
|
||||||
|
# Collapse the whitespace between two encoded words that occur in a
|
||||||
|
# bare-quoted-string.
|
||||||
|
if valid_ew and len(bare_quoted_string) > 1:
|
||||||
|
if (bare_quoted_string[-1].token_type == 'fws' and
|
||||||
|
bare_quoted_string[-2].token_type == 'encoded-word'):
|
||||||
|
bare_quoted_string[-1] = EWWhiteSpaceTerminal(
|
||||||
|
bare_quoted_string[-1], 'fws')
|
||||||
else:
|
else:
|
||||||
token, value = get_qcontent(value)
|
token, value = get_qcontent(value)
|
||||||
bare_quoted_string.append(token)
|
bare_quoted_string.append(token)
|
||||||
|
|
|
@ -873,6 +873,25 @@ class TestContentDisposition(TestHeaderBase):
|
||||||
{'filename': 'foo'},
|
{'filename': 'foo'},
|
||||||
[errors.InvalidHeaderDefect]),
|
[errors.InvalidHeaderDefect]),
|
||||||
|
|
||||||
|
'invalid_parameter_value_with_fws_between_ew': (
|
||||||
|
'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?='
|
||||||
|
' =?UTF-8?Q?pdf?="',
|
||||||
|
'attachment',
|
||||||
|
{'filename': 'Schulbesuchsbestättigung.pdf'},
|
||||||
|
[errors.InvalidHeaderDefect]*3,
|
||||||
|
('attachment; filename="Schulbesuchsbestättigung.pdf"'),
|
||||||
|
('Content-Disposition: attachment;\n'
|
||||||
|
' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'),
|
||||||
|
),
|
||||||
|
|
||||||
|
'parameter_value_with_fws_between_tokens': (
|
||||||
|
'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"',
|
||||||
|
'attachment',
|
||||||
|
{'filename': 'File Name With Spaces.pdf'},
|
||||||
|
[errors.InvalidHeaderDefect],
|
||||||
|
'attachment; filename="File Name With Spaces.pdf"',
|
||||||
|
('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix parsing of invalid mime headers parameters by collapsing whitespace between
|
||||||
|
encoded words in a bare-quote-string.
|
Loading…
Reference in New Issue