gh-100884: email/_header_value_parser: don't encode list separators (GH-100885)

ListSeparator should not be encoded. This could happen when a long line
pushes its separator to the next line, which would have been encoded.
This commit is contained in:
Thomas Weißschuh 2024-02-17 11:13:46 +01:00 committed by GitHub
parent 465db27cb9
commit 09fab93c3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 1 deletions

View File

@ -949,6 +949,7 @@ class _InvalidEwError(errors.HeaderParseError):
# up other parse trees. Maybe should have tests for that, too.
DOT = ValueTerminal('.', 'dot')
ListSeparator = ValueTerminal(',', 'list-separator')
ListSeparator.as_ew_allowed = False
RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
#
@ -2022,7 +2023,7 @@ def get_address_list(value):
address_list.defects.append(errors.InvalidHeaderDefect(
"invalid address in address-list"))
if value: # Must be a , at this point.
address_list.append(ValueTerminal(',', 'list-separator'))
address_list.append(ListSeparator)
value = value[1:]
return address_list, value

View File

@ -2985,6 +2985,11 @@ class TestFolding(TestEmailBase):
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
def test_address_list_with_list_separator_after_fold(self):
to = '0123456789' * 8 + '@foo, ä <foo@bar>'
self._test(parser.get_address_list(to)[0],
'0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= <foo@bar>\n')
# XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing.

View File

@ -0,0 +1,2 @@
email: fix misfolding of comma in address-lists over multiple lines in
combination with unicode encoding.