Merge: #23745: handle duplicate MIME parameter names in new parser.
This commit is contained in:
commit
c4bb599be0
|
@ -71,6 +71,7 @@ import re
|
|||
import urllib # For urllib.parse.unquote
|
||||
from string import hexdigits
|
||||
from collections import OrderedDict
|
||||
from operator import itemgetter
|
||||
from email import _encoded_words as _ew
|
||||
from email import errors
|
||||
from email import utils
|
||||
|
@ -1098,15 +1099,34 @@ class MimeParameters(TokenList):
|
|||
params[name] = []
|
||||
params[name].append((token.section_number, token))
|
||||
for name, parts in params.items():
|
||||
parts = sorted(parts)
|
||||
# XXX: there might be more recovery we could do here if, for
|
||||
# example, this is really a case of a duplicate attribute name.
|
||||
parts = sorted(parts, key=itemgetter(0))
|
||||
first_param = parts[0][1]
|
||||
charset = first_param.charset
|
||||
# Our arbitrary error recovery is to ignore duplicate parameters,
|
||||
# to use appearance order if there are duplicate rfc 2231 parts,
|
||||
# and to ignore gaps. This mimics the error recovery of get_param.
|
||||
if not first_param.extended and len(parts) > 1:
|
||||
if parts[1][0] == 0:
|
||||
parts[1][1].defects.append(errors.InvalidHeaderDefect(
|
||||
'duplicate parameter name; duplicate(s) ignored'))
|
||||
parts = parts[:1]
|
||||
# Else assume the *0* was missing...note that this is different
|
||||
# from get_param, but we registered a defect for this earlier.
|
||||
value_parts = []
|
||||
charset = parts[0][1].charset
|
||||
for i, (section_number, param) in enumerate(parts):
|
||||
i = 0
|
||||
for section_number, param in parts:
|
||||
if section_number != i:
|
||||
param.defects.append(errors.InvalidHeaderDefect(
|
||||
"inconsistent multipart parameter numbering"))
|
||||
# We could get fancier here and look for a complete
|
||||
# duplicate extended parameter and ignore the second one
|
||||
# seen. But we're not doing that. The old code didn't.
|
||||
if not param.extended:
|
||||
param.defects.append(errors.InvalidHeaderDefect(
|
||||
'duplicate parameter name; duplicate ignored'))
|
||||
continue
|
||||
else:
|
||||
param.defects.append(errors.InvalidHeaderDefect(
|
||||
"inconsistent RFC2231 parameter numbering"))
|
||||
i += 1
|
||||
value = param.param_value
|
||||
if param.extended:
|
||||
try:
|
||||
|
|
|
@ -2456,6 +2456,115 @@ class TestParser(TestParserMixin, TestEmailBase):
|
|||
";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3
|
||||
)
|
||||
|
||||
|
||||
@parameterize
|
||||
class Test_parse_mime_parameters(TestParserMixin, TestEmailBase):
|
||||
|
||||
def mime_parameters_as_value(self,
|
||||
value,
|
||||
tl_str,
|
||||
tl_value,
|
||||
params,
|
||||
defects):
|
||||
mime_parameters = self._test_parse_x(parser.parse_mime_parameters,
|
||||
value, tl_str, tl_value, defects)
|
||||
self.assertEqual(mime_parameters.token_type, 'mime-parameters')
|
||||
self.assertEqual(list(mime_parameters.params), params)
|
||||
|
||||
|
||||
mime_parameters_params = {
|
||||
|
||||
'simple': (
|
||||
'filename="abc.py"',
|
||||
' filename="abc.py"',
|
||||
'filename=abc.py',
|
||||
[('filename', 'abc.py')],
|
||||
[]),
|
||||
|
||||
'multiple_keys': (
|
||||
'filename="abc.py"; xyz=abc',
|
||||
' filename="abc.py"; xyz="abc"',
|
||||
'filename=abc.py; xyz=abc',
|
||||
[('filename', 'abc.py'), ('xyz', 'abc')],
|
||||
[]),
|
||||
|
||||
'split_value': (
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
|
||||
' filename="201.tif"',
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
|
||||
[('filename', '201.tif')],
|
||||
[]),
|
||||
|
||||
# Note that it is undefined what we should do for error recovery when
|
||||
# there are duplicate parameter names or duplicate parts in a split
|
||||
# part. We choose to ignore all duplicate parameters after the first
|
||||
# and to take duplicate or missing rfc 2231 parts in apperance order.
|
||||
# This is backward compatible with get_param's behavior, but the
|
||||
# decisions are arbitrary.
|
||||
|
||||
'duplicate_key': (
|
||||
'filename=abc.gif; filename=def.tiff',
|
||||
' filename="abc.gif"',
|
||||
"filename=abc.gif; filename=def.tiff",
|
||||
[('filename', 'abc.gif')],
|
||||
[errors.InvalidHeaderDefect]),
|
||||
|
||||
'duplicate_key_with_split_value': (
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
|
||||
" filename=abc.gif",
|
||||
' filename="201.tif"',
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
|
||||
" filename=abc.gif",
|
||||
[('filename', '201.tif')],
|
||||
[errors.InvalidHeaderDefect]),
|
||||
|
||||
'duplicate_key_with_split_value_other_order': (
|
||||
"filename=abc.gif; "
|
||||
" filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
|
||||
' filename="abc.gif"',
|
||||
"filename=abc.gif;"
|
||||
" filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
|
||||
[('filename', 'abc.gif')],
|
||||
[errors.InvalidHeaderDefect]),
|
||||
|
||||
'duplicate_in_split_value': (
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
|
||||
" filename*1*=abc.gif",
|
||||
' filename="201.tifabc.gif"',
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
|
||||
" filename*1*=abc.gif",
|
||||
[('filename', '201.tifabc.gif')],
|
||||
[errors.InvalidHeaderDefect]),
|
||||
|
||||
'missing_split_value': (
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
|
||||
' filename="201.tif"',
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
|
||||
[('filename', '201.tif')],
|
||||
[errors.InvalidHeaderDefect]),
|
||||
|
||||
'duplicate_and_missing_split_value': (
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
|
||||
" filename*3*=abc.gif",
|
||||
' filename="201.tifabc.gif"',
|
||||
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
|
||||
" filename*3*=abc.gif",
|
||||
[('filename', '201.tifabc.gif')],
|
||||
[errors.InvalidHeaderDefect]*2),
|
||||
|
||||
# Here we depart from get_param and assume the *0* was missing.
|
||||
'duplicate_with_broken_split_value': (
|
||||
"filename=abc.gif; "
|
||||
" filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
|
||||
' filename="abc.gif201.tif"',
|
||||
"filename=abc.gif;"
|
||||
" filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
|
||||
[('filename', 'abc.gif201.tif')],
|
||||
# Defects are apparent missing *0*, and two 'out of sequence'.
|
||||
[errors.InvalidHeaderDefect]*3),
|
||||
|
||||
}
|
||||
|
||||
@parameterize
|
||||
class Test_parse_mime_version(TestParserMixin, TestEmailBase):
|
||||
|
||||
|
|
|
@ -56,6 +56,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #23745: The new email header parser now handles duplicate MIME
|
||||
parameter names without error, similar to how get_param behaves.
|
||||
|
||||
- Issue #22117: Fix os.utime(), it now rounds the timestamp towards minus
|
||||
infinity (-inf) instead of rounding towards zero.
|
||||
|
||||
|
|
Loading…
Reference in New Issue