#11492: rewrite header folding algorithm. Less code, more passing tests.
This commit is contained in:
parent
74c0031066
commit
01581ee0b7
|
@ -109,9 +109,17 @@ Here is the :class:`Header` class description:
|
||||||
|
|
||||||
Encode a message header into an RFC-compliant format, possibly wrapping
|
Encode a message header into an RFC-compliant format, possibly wrapping
|
||||||
long lines and encapsulating non-ASCII parts in base64 or quoted-printable
|
long lines and encapsulating non-ASCII parts in base64 or quoted-printable
|
||||||
encodings. Optional *splitchars* is a string containing characters to
|
encodings.
|
||||||
split long ASCII lines on, in rough support of :rfc:`2822`'s *highest
|
|
||||||
level syntactic breaks*. This doesn't affect :rfc:`2047` encoded lines.
|
Optional *splitchars* is a string containing characters which should be
|
||||||
|
given extra weight by the splitting algorithm during normal header
|
||||||
|
wrapping. This is in very rough support of :RFC:`2822`\'s 'higher level
|
||||||
|
syntactic breaks': split points preceded by a splitchar are preferred
|
||||||
|
during line splitting, with the characters preferred in the order in
|
||||||
|
which they appear in the string. Space and tab may be included in the
|
||||||
|
string to indicate whether preference should be given to one over the
|
||||||
|
other as a split point when other split chars do not appear in the line
|
||||||
|
being split. Splitchars does not affect RFC 2047 encoded lines.
|
||||||
|
|
||||||
*maxlinelen*, if given, overrides the instance's value for the maximum
|
*maxlinelen*, if given, overrides the instance's value for the maximum
|
||||||
line length.
|
line length.
|
||||||
|
|
|
@ -26,6 +26,7 @@ BSPACE = b' '
|
||||||
SPACE8 = ' ' * 8
|
SPACE8 = ' ' * 8
|
||||||
EMPTYSTRING = ''
|
EMPTYSTRING = ''
|
||||||
MAXLINELEN = 78
|
MAXLINELEN = 78
|
||||||
|
FWS = ' \t'
|
||||||
|
|
||||||
USASCII = Charset('us-ascii')
|
USASCII = Charset('us-ascii')
|
||||||
UTF8 = Charset('utf-8')
|
UTF8 = Charset('utf-8')
|
||||||
|
@ -299,9 +300,15 @@ class Header:
|
||||||
name was specified at Header construction time. The default value for
|
name was specified at Header construction time. The default value for
|
||||||
maxlinelen is determined at header construction time.
|
maxlinelen is determined at header construction time.
|
||||||
|
|
||||||
Optional splitchars is a string containing characters to split long
|
Optional splitchars is a string containing characters which should be
|
||||||
ASCII lines on, in rough support of RFC 2822's `highest level
|
given extra weight by the splitting algorithm during normal header
|
||||||
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
|
wrapping. This is in very rough support of RFC 2822's `higher level
|
||||||
|
syntactic breaks': split points preceded by a splitchar are preferred
|
||||||
|
during line splitting, with the characters preferred in the order in
|
||||||
|
which they appear in the string. Space and tab may be included in the
|
||||||
|
string to indicate whether preference should be given to one over the
|
||||||
|
other as a split point when other split chars do not appear in the line
|
||||||
|
being split. Splitchars does not affect RFC 2047 encoded lines.
|
||||||
|
|
||||||
Optional linesep is a string to be used to separate the lines of
|
Optional linesep is a string to be used to separate the lines of
|
||||||
the value. The default value is the most useful for typical
|
the value. The default value is the most useful for typical
|
||||||
|
@ -320,13 +327,19 @@ class Header:
|
||||||
self._continuation_ws, splitchars)
|
self._continuation_ws, splitchars)
|
||||||
for string, charset in self._chunks:
|
for string, charset in self._chunks:
|
||||||
lines = string.splitlines()
|
lines = string.splitlines()
|
||||||
formatter.feed(lines[0] if lines else '', charset)
|
if lines:
|
||||||
|
formatter.feed('', lines[0], charset)
|
||||||
|
else:
|
||||||
|
formatter.feed('', '', charset)
|
||||||
for line in lines[1:]:
|
for line in lines[1:]:
|
||||||
formatter.newline()
|
formatter.newline()
|
||||||
if charset.header_encoding is not None:
|
if charset.header_encoding is not None:
|
||||||
formatter.feed(self._continuation_ws, USASCII)
|
formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
|
||||||
line = ' ' + line.lstrip()
|
charset)
|
||||||
formatter.feed(line, charset)
|
else:
|
||||||
|
sline = line.lstrip()
|
||||||
|
fws = line[:len(line)-len(sline)]
|
||||||
|
formatter.feed(fws, sline, charset)
|
||||||
if len(lines) > 1:
|
if len(lines) > 1:
|
||||||
formatter.newline()
|
formatter.newline()
|
||||||
formatter.add_transition()
|
formatter.add_transition()
|
||||||
|
@ -360,7 +373,7 @@ class _ValueFormatter:
|
||||||
def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
|
def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
|
||||||
self._maxlen = maxlen
|
self._maxlen = maxlen
|
||||||
self._continuation_ws = continuation_ws
|
self._continuation_ws = continuation_ws
|
||||||
self._continuation_ws_len = len(continuation_ws.replace('\t', SPACE8))
|
self._continuation_ws_len = len(continuation_ws)
|
||||||
self._splitchars = splitchars
|
self._splitchars = splitchars
|
||||||
self._lines = []
|
self._lines = []
|
||||||
self._current_line = _Accumulator(headerlen)
|
self._current_line = _Accumulator(headerlen)
|
||||||
|
@ -374,43 +387,26 @@ class _ValueFormatter:
|
||||||
|
|
||||||
def newline(self):
|
def newline(self):
|
||||||
end_of_line = self._current_line.pop()
|
end_of_line = self._current_line.pop()
|
||||||
if end_of_line is not None:
|
if end_of_line != (' ', ''):
|
||||||
self._current_line.push(end_of_line)
|
self._current_line.push(*end_of_line)
|
||||||
if len(self._current_line) > 0:
|
if len(self._current_line) > 0:
|
||||||
self._lines.append(str(self._current_line))
|
if self._current_line.is_onlyws():
|
||||||
|
self._lines[-1] += str(self._current_line)
|
||||||
|
else:
|
||||||
|
self._lines.append(str(self._current_line))
|
||||||
self._current_line.reset()
|
self._current_line.reset()
|
||||||
|
|
||||||
def add_transition(self):
|
def add_transition(self):
|
||||||
self._current_line.push(None)
|
self._current_line.push(' ', '')
|
||||||
|
|
||||||
def feed(self, string, charset):
|
def feed(self, fws, string, charset):
|
||||||
# If the string itself fits on the current line in its encoded format,
|
|
||||||
# then add it now and be done with it.
|
|
||||||
encoded_string = charset.header_encode(string)
|
|
||||||
if len(encoded_string) + len(self._current_line) <= self._maxlen:
|
|
||||||
self._current_line.push(encoded_string)
|
|
||||||
return
|
|
||||||
# If the charset has no header encoding (i.e. it is an ASCII encoding)
|
# If the charset has no header encoding (i.e. it is an ASCII encoding)
|
||||||
# then we must split the header at the "highest level syntactic break"
|
# then we must split the header at the "highest level syntactic break"
|
||||||
# possible. Note that we don't have a lot of smarts about field
|
# possible. Note that we don't have a lot of smarts about field
|
||||||
# syntax; we just try to break on semi-colons, then commas, then
|
# syntax; we just try to break on semi-colons, then commas, then
|
||||||
# whitespace. Eventually, this should be pluggable.
|
# whitespace. Eventually, this should be pluggable.
|
||||||
if charset.header_encoding is None:
|
if charset.header_encoding is None:
|
||||||
for ch in self._splitchars:
|
self._ascii_split(fws, string, self._splitchars)
|
||||||
if ch in string:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
ch = None
|
|
||||||
# If there's no available split character then regardless of
|
|
||||||
# whether the string fits on the line, we have to put it on a line
|
|
||||||
# by itself.
|
|
||||||
if ch is None:
|
|
||||||
if not self._current_line.is_onlyws():
|
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
self._current_line.reset(self._continuation_ws)
|
|
||||||
self._current_line.push(encoded_string)
|
|
||||||
else:
|
|
||||||
self._ascii_split(string, ch)
|
|
||||||
return
|
return
|
||||||
# Otherwise, we're doing either a Base64 or a quoted-printable
|
# Otherwise, we're doing either a Base64 or a quoted-printable
|
||||||
# encoding which means we don't need to split the line on syntactic
|
# encoding which means we don't need to split the line on syntactic
|
||||||
|
@ -428,15 +424,14 @@ class _ValueFormatter:
|
||||||
# There are no encoded lines, so we're done.
|
# There are no encoded lines, so we're done.
|
||||||
return
|
return
|
||||||
if first_line is not None:
|
if first_line is not None:
|
||||||
self._current_line.push(first_line)
|
self._append_chunk(fws, first_line)
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
self._current_line.reset(self._continuation_ws)
|
|
||||||
try:
|
try:
|
||||||
last_line = encoded_lines.pop()
|
last_line = encoded_lines.pop()
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# There was only one line.
|
# There was only one line.
|
||||||
return
|
return
|
||||||
self._current_line.push(last_line)
|
self.newline()
|
||||||
|
self._current_line.push(self._continuation_ws, last_line)
|
||||||
# Everything else are full lines in themselves.
|
# Everything else are full lines in themselves.
|
||||||
for line in encoded_lines:
|
for line in encoded_lines:
|
||||||
self._lines.append(self._continuation_ws + line)
|
self._lines.append(self._continuation_ws + line)
|
||||||
|
@ -447,162 +442,96 @@ class _ValueFormatter:
|
||||||
while True:
|
while True:
|
||||||
yield self._maxlen - self._continuation_ws_len
|
yield self._maxlen - self._continuation_ws_len
|
||||||
|
|
||||||
def _ascii_split(self, string, ch):
|
def _ascii_split(self, fws, string, splitchars):
|
||||||
holding = _Accumulator()
|
# The RFC 2822 header folding algorithm is simple in principle but
|
||||||
# Split the line on the split character, preserving it. If the split
|
# complex in practice. Lines may be folded any place where "folding
|
||||||
# character is whitespace RFC 2822 $2.2.3 requires us to fold on the
|
# white space" appears by inserting a linesep character in front of the
|
||||||
# whitespace, so that the line leads with the original whitespace we
|
# FWS. The complication is that not all spaces or tabs qualify as FWS,
|
||||||
# split on. However, if a higher syntactic break is used instead
|
# and we are also supposed to prefer to break at "higher level
|
||||||
# (e.g. comma or semicolon), the folding should happen after the split
|
# syntactic breaks". We can't do either of these without intimate
|
||||||
# character. But then in that case, we need to add our own
|
# knowledge of the structure of structured headers, which we don't have
|
||||||
# continuation whitespace -- although won't that break unfolding?
|
# here. So the best we can do here is prefer to break at the specified
|
||||||
for part, splitpart, nextpart in _spliterator(ch, string):
|
# splitchars, and hope that we don't choose any spaces or tabs that
|
||||||
if not splitpart:
|
# aren't legal FWS. (This is at least better than the old algorithm,
|
||||||
# No splitpart means this is the last chunk. Put this part
|
# where we would sometimes *introduce* FWS after a splitchar, or the
|
||||||
# either on the current line or the next line depending on
|
# algorithm before that, where we would turn all white space runs into
|
||||||
# whether it fits.
|
# single spaces or tabs.)
|
||||||
holding.push(part)
|
parts = re.split("(["+FWS+"]+)", fws+string)
|
||||||
if len(holding) + len(self._current_line) <= self._maxlen:
|
if parts[0]:
|
||||||
# It fits, but we're done.
|
parts[:0] = ['']
|
||||||
self._current_line.push(str(holding))
|
else:
|
||||||
|
parts.pop(0)
|
||||||
|
for fws, part in zip(*[iter(parts)]*2):
|
||||||
|
self._append_chunk(fws, part)
|
||||||
|
|
||||||
|
def _append_chunk(self, fws, string):
|
||||||
|
self._current_line.push(fws, string)
|
||||||
|
if len(self._current_line) > self._maxlen:
|
||||||
|
# Find the best split point, working backward from the end.
|
||||||
|
# There might be none, on a long first line.
|
||||||
|
for ch in self._splitchars:
|
||||||
|
for i in range(self._current_line.part_count()-1, 0, -1):
|
||||||
|
if ch.isspace():
|
||||||
|
fws = self._current_line[i][0]
|
||||||
|
if fws and fws[0]==ch:
|
||||||
|
break
|
||||||
|
prevpart = self._current_line[i-1][1]
|
||||||
|
if prevpart and prevpart[-1]==ch:
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
# It doesn't fit, but we're done. Before pushing a new
|
continue
|
||||||
# line, watch out for the current line containing only
|
break
|
||||||
# whitespace.
|
|
||||||
holding.pop()
|
|
||||||
if self._current_line.is_onlyws() and holding.is_onlyws():
|
|
||||||
# Don't start a new line.
|
|
||||||
holding.push(part)
|
|
||||||
part = None
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
if part is None:
|
|
||||||
self._current_line.reset()
|
|
||||||
else:
|
|
||||||
holding.reset(part)
|
|
||||||
self._current_line.reset(str(holding))
|
|
||||||
return
|
|
||||||
elif not nextpart:
|
|
||||||
# There must be some trailing or duplicated split characters
|
|
||||||
# because we
|
|
||||||
# found a split character but no next part. In this case we
|
|
||||||
# must treat the thing to fit as the part + splitpart because
|
|
||||||
# if splitpart is whitespace it's not allowed to be the only
|
|
||||||
# thing on the line, and if it's not whitespace we must split
|
|
||||||
# after the syntactic break.
|
|
||||||
holding_prelen = len(holding)
|
|
||||||
holding.push(part + splitpart)
|
|
||||||
if len(holding) + len(self._current_line) <= self._maxlen:
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
elif holding_prelen == 0:
|
|
||||||
# This is the only chunk left so it has to go on the
|
|
||||||
# current line.
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
else:
|
|
||||||
save_part = holding.pop()
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
holding.reset(save_part)
|
|
||||||
self._current_line.reset(str(holding))
|
|
||||||
holding.reset()
|
|
||||||
elif not part:
|
|
||||||
# We're leading with a split character. See if the splitpart
|
|
||||||
# and nextpart fits on the current line.
|
|
||||||
holding.push(splitpart + nextpart)
|
|
||||||
holding_len = len(holding)
|
|
||||||
# We know we're not leaving the nextpart on the stack.
|
|
||||||
holding.pop()
|
|
||||||
if holding_len + len(self._current_line) <= self._maxlen:
|
|
||||||
holding.push(splitpart)
|
|
||||||
else:
|
|
||||||
# It doesn't fit. Since there's no current part really
|
|
||||||
# the best we can do is start a new line and push the
|
|
||||||
# split part onto it.
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
holding.reset()
|
|
||||||
if len(self._current_line) > 0 and self._lines:
|
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
self._current_line.reset()
|
|
||||||
holding.push(splitpart)
|
|
||||||
else:
|
else:
|
||||||
# All three parts are present. First let's see if all three
|
fws, part = self._current_line.pop()
|
||||||
# parts will fit on the current line. If so, we don't need to
|
if self._current_line._initial_size > 0:
|
||||||
# split it.
|
# There will be a header, so leave it on a line by itself.
|
||||||
holding.push(part + splitpart + nextpart)
|
self.newline()
|
||||||
holding_len = len(holding)
|
if not fws:
|
||||||
# Pop the part because we'll push nextpart on the next
|
# We don't use continuation_ws here because the whitespace
|
||||||
# iteration through the loop.
|
# after a header should always be a space.
|
||||||
holding.pop()
|
fws = ' '
|
||||||
if holding_len + len(self._current_line) <= self._maxlen:
|
self._current_line.push(fws, part)
|
||||||
holding.push(part + splitpart)
|
return
|
||||||
else:
|
remainder = self._current_line.pop_from(i)
|
||||||
# The entire thing doesn't fit. See if we need to split
|
self._lines.append(str(self._current_line))
|
||||||
# before or after the split characters.
|
self._current_line.reset(remainder)
|
||||||
if splitpart.isspace():
|
|
||||||
# Split before whitespace. Remember that the
|
|
||||||
# whitespace becomes the continuation whitespace of
|
|
||||||
# the next line so it goes to current_line not holding.
|
|
||||||
holding.push(part)
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
holding.reset()
|
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
self._current_line.reset(splitpart)
|
|
||||||
else:
|
|
||||||
# Split after non-whitespace. The continuation
|
|
||||||
# whitespace comes from the instance variable.
|
|
||||||
holding.push(part + splitpart)
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
holding.reset()
|
|
||||||
self._lines.append(str(self._current_line))
|
|
||||||
if nextpart[0].isspace():
|
|
||||||
self._current_line.reset()
|
|
||||||
else:
|
|
||||||
self._current_line.reset(self._continuation_ws)
|
|
||||||
# Get the last of the holding part
|
|
||||||
self._current_line.push(str(holding))
|
|
||||||
|
|
||||||
|
|
||||||
|
class _Accumulator(list):
|
||||||
def _spliterator(character, string):
|
|
||||||
parts = list(reversed(re.split('(%s)' % character, string)))
|
|
||||||
while parts:
|
|
||||||
part = parts.pop()
|
|
||||||
splitparts = (parts.pop() if parts else None)
|
|
||||||
nextpart = (parts.pop() if parts else None)
|
|
||||||
yield (part, splitparts, nextpart)
|
|
||||||
if nextpart is not None:
|
|
||||||
parts.append(nextpart)
|
|
||||||
|
|
||||||
|
|
||||||
class _Accumulator:
|
|
||||||
def __init__(self, initial_size=0):
|
def __init__(self, initial_size=0):
|
||||||
self._initial_size = initial_size
|
self._initial_size = initial_size
|
||||||
self._current = []
|
super().__init__()
|
||||||
|
|
||||||
def push(self, string):
|
def push(self, fws, string):
|
||||||
self._current.append(string)
|
self.append((fws, string))
|
||||||
|
|
||||||
|
def pop_from(self, i=0):
|
||||||
|
popped = self[i:]
|
||||||
|
self[i:] = []
|
||||||
|
return popped
|
||||||
|
|
||||||
def pop(self):
|
def pop(self):
|
||||||
if not self._current:
|
if self.part_count()==0:
|
||||||
return None
|
return ('', '')
|
||||||
return self._current.pop()
|
return super().pop()
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return sum(((1 if string is None else len(string))
|
return sum((len(fws)+len(part) for fws, part in self),
|
||||||
for string in self._current),
|
|
||||||
self._initial_size)
|
self._initial_size)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self._current and self._current[-1] is None:
|
return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
|
||||||
self._current.pop()
|
for fws, part in self))
|
||||||
return EMPTYSTRING.join((' ' if string is None else string)
|
|
||||||
for string in self._current)
|
|
||||||
|
|
||||||
def reset(self, string=None):
|
def reset(self, startval=None):
|
||||||
self._current = []
|
if startval is None:
|
||||||
|
startval = []
|
||||||
|
self[:] = startval
|
||||||
self._initial_size = 0
|
self._initial_size = 0
|
||||||
if string is not None:
|
|
||||||
self.push(string)
|
|
||||||
|
|
||||||
def is_onlyws(self):
|
def is_onlyws(self):
|
||||||
return len(self) == 0 or str(self).isspace()
|
return self._initial_size==0 and (not self or str(self).isspace())
|
||||||
|
|
||||||
|
def part_count(self):
|
||||||
|
return super().__len__()
|
||||||
|
|
|
@ -660,6 +660,9 @@ class TestEncoders(unittest.TestCase):
|
||||||
|
|
||||||
# Test long header wrapping
|
# Test long header wrapping
|
||||||
class TestLongHeaders(TestEmailBase):
|
class TestLongHeaders(TestEmailBase):
|
||||||
|
|
||||||
|
maxDiff = None
|
||||||
|
|
||||||
def test_split_long_continuation(self):
|
def test_split_long_continuation(self):
|
||||||
eq = self.ndiffAssertEqual
|
eq = self.ndiffAssertEqual
|
||||||
msg = email.message_from_string("""\
|
msg = email.message_from_string("""\
|
||||||
|
@ -868,14 +871,12 @@ Subject: the first part of this is short,
|
||||||
eq = self.ndiffAssertEqual
|
eq = self.ndiffAssertEqual
|
||||||
h = Header('; '
|
h = Header('; '
|
||||||
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
|
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
|
||||||
'be_on_a_line_all_by_itself;')
|
'be_on_a_line_all_by_itself; ')
|
||||||
eq(h.encode(), """\
|
eq(h.encode(), """\
|
||||||
;
|
;
|
||||||
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
|
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
|
||||||
|
|
||||||
def test_long_header_with_multiple_sequential_split_chars(self):
|
def test_long_header_with_multiple_sequential_split_chars(self):
|
||||||
# Issue 11492
|
|
||||||
|
|
||||||
eq = self.ndiffAssertEqual
|
eq = self.ndiffAssertEqual
|
||||||
h = Header('This is a long line that has two whitespaces in a row. '
|
h = Header('This is a long line that has two whitespaces in a row. '
|
||||||
'This used to cause truncation of the header when folded')
|
'This used to cause truncation of the header when folded')
|
||||||
|
@ -883,6 +884,105 @@ Subject: the first part of this is short,
|
||||||
This is a long line that has two whitespaces in a row. This used to cause
|
This is a long line that has two whitespaces in a row. This used to cause
|
||||||
truncation of the header when folded""")
|
truncation of the header when folded""")
|
||||||
|
|
||||||
|
def test_splitter_split_on_punctuation_only_if_fws(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
h = Header('thisverylongheaderhas;semicolons;and,commas,but'
|
||||||
|
'they;arenotlegal;fold,points')
|
||||||
|
eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
|
||||||
|
"arenotlegal;fold,points")
|
||||||
|
|
||||||
|
def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
h = Header('this is a test where we need to have more than one line '
|
||||||
|
'before; our final line that is just too big to fit;; '
|
||||||
|
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
|
||||||
|
'be_on_a_line_all_by_itself;')
|
||||||
|
eq(h.encode(), """\
|
||||||
|
this is a test where we need to have more than one line before;
|
||||||
|
our final line that is just too big to fit;;
|
||||||
|
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
|
||||||
|
|
||||||
|
def test_overlong_last_part_followed_by_split_point(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
|
||||||
|
'be_on_a_line_all_by_itself ')
|
||||||
|
eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
|
||||||
|
"should_be_on_a_line_all_by_itself ")
|
||||||
|
|
||||||
|
def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
|
||||||
|
'before_our_final_line_; ; '
|
||||||
|
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
|
||||||
|
'be_on_a_line_all_by_itself; ')
|
||||||
|
eq(h.encode(), """\
|
||||||
|
this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
|
||||||
|
;
|
||||||
|
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
|
||||||
|
|
||||||
|
def test_multiline_with_overlong_last_part_followed_by_split_point(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
h = Header('this is a test where we need to have more than one line '
|
||||||
|
'before our final line; ; '
|
||||||
|
'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
|
||||||
|
'be_on_a_line_all_by_itself; ')
|
||||||
|
eq(h.encode(), """\
|
||||||
|
this is a test where we need to have more than one line before our final line;
|
||||||
|
;
|
||||||
|
this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
|
||||||
|
|
||||||
|
def test_long_header_with_whitespace_runs(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
msg = Message()
|
||||||
|
msg['From'] = 'test@dom.ain'
|
||||||
|
msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
|
||||||
|
msg.set_payload('Test')
|
||||||
|
sfp = StringIO()
|
||||||
|
g = Generator(sfp)
|
||||||
|
g.flatten(msg)
|
||||||
|
eq(sfp.getvalue(), """\
|
||||||
|
From: test@dom.ain
|
||||||
|
References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
|
||||||
|
<foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
|
||||||
|
<foo@dom.ain> <foo@dom.ain>\x20\x20
|
||||||
|
|
||||||
|
Test""")
|
||||||
|
|
||||||
|
def test_long_run_with_semi_header_splitter(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
msg = Message()
|
||||||
|
msg['From'] = 'test@dom.ain'
|
||||||
|
msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
|
||||||
|
msg.set_payload('Test')
|
||||||
|
sfp = StringIO()
|
||||||
|
g = Generator(sfp)
|
||||||
|
g.flatten(msg)
|
||||||
|
eq(sfp.getvalue(), """\
|
||||||
|
From: test@dom.ain
|
||||||
|
References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
|
||||||
|
<foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
|
||||||
|
<foo@dom.ain>; abc
|
||||||
|
|
||||||
|
Test""")
|
||||||
|
|
||||||
|
def test_splitter_split_on_punctuation_only_if_fws(self):
|
||||||
|
eq = self.ndiffAssertEqual
|
||||||
|
msg = Message()
|
||||||
|
msg['From'] = 'test@dom.ain'
|
||||||
|
msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
|
||||||
|
'they;arenotlegal;fold,points')
|
||||||
|
msg.set_payload('Test')
|
||||||
|
sfp = StringIO()
|
||||||
|
g = Generator(sfp)
|
||||||
|
g.flatten(msg)
|
||||||
|
# XXX the space after the header should not be there.
|
||||||
|
eq(sfp.getvalue(), """\
|
||||||
|
From: test@dom.ain
|
||||||
|
References:\x20
|
||||||
|
thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
|
||||||
|
|
||||||
|
Test""")
|
||||||
|
|
||||||
def test_no_split_long_header(self):
|
def test_no_split_long_header(self):
|
||||||
eq = self.ndiffAssertEqual
|
eq = self.ndiffAssertEqual
|
||||||
hstr = 'References: ' + 'x' * 80
|
hstr = 'References: ' + 'x' * 80
|
||||||
|
@ -973,7 +1073,7 @@ Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
|
||||||
def test_long_to_header(self):
|
def test_long_to_header(self):
|
||||||
eq = self.ndiffAssertEqual
|
eq = self.ndiffAssertEqual
|
||||||
to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
|
to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
|
||||||
'<someone@eecs.umich.edu>,'
|
'<someone@eecs.umich.edu>, '
|
||||||
'"Someone Test #B" <someone@umich.edu>, '
|
'"Someone Test #B" <someone@umich.edu>, '
|
||||||
'"Someone Test #C" <someone@eecs.umich.edu>, '
|
'"Someone Test #C" <someone@eecs.umich.edu>, '
|
||||||
'"Someone Test #D" <someone@eecs.umich.edu>')
|
'"Someone Test #D" <someone@eecs.umich.edu>')
|
||||||
|
@ -1028,9 +1128,11 @@ This is an example of string which has almost the limit of header length.
|
||||||
msg['Received-2'] = h
|
msg['Received-2'] = h
|
||||||
# This should be splitting on spaces not semicolons.
|
# This should be splitting on spaces not semicolons.
|
||||||
self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
|
self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
|
||||||
Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
|
Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
|
||||||
|
hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
|
||||||
Wed, 05 Mar 2003 18:10:18 -0700
|
Wed, 05 Mar 2003 18:10:18 -0700
|
||||||
Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
|
Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
|
||||||
|
hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
|
||||||
Wed, 05 Mar 2003 18:10:18 -0700
|
Wed, 05 Mar 2003 18:10:18 -0700
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
@ -1043,12 +1145,14 @@ Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.m
|
||||||
msg['Received-1'] = Header(h, header_name='Received-1',
|
msg['Received-1'] = Header(h, header_name='Received-1',
|
||||||
continuation_ws='\t')
|
continuation_ws='\t')
|
||||||
msg['Received-2'] = h
|
msg['Received-2'] = h
|
||||||
# XXX This should be splitting on spaces not commas.
|
# XXX The space after the ':' should not be there.
|
||||||
self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
|
self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
|
||||||
Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
|
Received-1:\x20
|
||||||
6 Mar 2003 13:58:21 +0100\")
|
<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
|
||||||
Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
|
Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
|
||||||
6 Mar 2003 13:58:21 +0100\")
|
Received-2:\x20
|
||||||
|
<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
|
||||||
|
Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
@ -1060,8 +1164,9 @@ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
|
||||||
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
|
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
|
||||||
msg['Face-1'] = t
|
msg['Face-1'] = t
|
||||||
msg['Face-2'] = Header(t, header_name='Face-2')
|
msg['Face-2'] = Header(t, header_name='Face-2')
|
||||||
|
msg['Face-3'] = ' ' + t
|
||||||
# XXX This splitting is all wrong. It the first value line should be
|
# XXX This splitting is all wrong. It the first value line should be
|
||||||
# snug against the field name.
|
# snug against the field name or the space after the header not there.
|
||||||
eq(msg.as_string(maxheaderlen=78), """\
|
eq(msg.as_string(maxheaderlen=78), """\
|
||||||
Face-1:\x20
|
Face-1:\x20
|
||||||
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
|
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
|
||||||
|
@ -1069,6 +1174,9 @@ Face-1:\x20
|
||||||
Face-2:\x20
|
Face-2:\x20
|
||||||
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
|
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
|
||||||
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
|
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
|
||||||
|
Face-3:\x20
|
||||||
|
iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
|
||||||
|
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
@ -1080,8 +1188,8 @@ Face-2:\x20
|
||||||
'Wed, 16 Oct 2002 07:41:11 -0700')
|
'Wed, 16 Oct 2002 07:41:11 -0700')
|
||||||
msg = email.message_from_string(m)
|
msg = email.message_from_string(m)
|
||||||
eq(msg.as_string(maxheaderlen=78), '''\
|
eq(msg.as_string(maxheaderlen=78), '''\
|
||||||
Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
|
Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
|
||||||
Wed, 16 Oct 2002 07:41:11 -0700
|
Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
|
||||||
|
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
@ -1095,9 +1203,11 @@ Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsof
|
||||||
msg['List'] = h
|
msg['List'] = h
|
||||||
msg['List'] = Header(h, header_name='List')
|
msg['List'] = Header(h, header_name='List')
|
||||||
eq(msg.as_string(maxheaderlen=78), """\
|
eq(msg.as_string(maxheaderlen=78), """\
|
||||||
List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
|
List: List-Unsubscribe:
|
||||||
|
<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
|
||||||
<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
|
<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
|
||||||
List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
|
List: List-Unsubscribe:
|
||||||
|
<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
|
||||||
<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
|
<mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
|
||||||
|
|
||||||
""")
|
""")
|
||||||
|
@ -4113,6 +4223,11 @@ A very long line that must get split to something other than at the
|
||||||
msg = email.message_from_string("EmptyHeader:")
|
msg = email.message_from_string("EmptyHeader:")
|
||||||
self.assertEqual(str(msg), "EmptyHeader: \n\n")
|
self.assertEqual(str(msg), "EmptyHeader: \n\n")
|
||||||
|
|
||||||
|
def test_encode_preserves_leading_ws_on_value(self):
|
||||||
|
msg = Message()
|
||||||
|
msg['SomeHeader'] = ' value with leading ws'
|
||||||
|
self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Test RFC 2231 header parameters (en/de)coding
|
# Test RFC 2231 header parameters (en/de)coding
|
||||||
|
|
|
@ -60,6 +60,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #11492: fix several issues with header folding in the email package.
|
||||||
|
|
||||||
- Issue #11852: Add missing imports and update tests.
|
- Issue #11852: Add missing imports and update tests.
|
||||||
|
|
||||||
- Issue #11467: Fix urlparse behavior when handling urls which contains scheme
|
- Issue #11467: Fix urlparse behavior when handling urls which contains scheme
|
||||||
|
|
Loading…
Reference in New Issue