__init__(): Fix an invariant, that the charset item in a chunk tuple
must be a Charset instance, not a string. The bug here was that self._charset wasn't being converted to a Charset instance so later .append() calls which used the default charset would break. _split(): If the charset of the chunk is '8bit', return the chunk unchanged. We can't safely split it, so this is the avenue of least harm.
This commit is contained in:
parent
6c2bc46355
commit
5e3bcff651
|
@ -153,6 +153,8 @@ class Header:
|
||||||
"""
|
"""
|
||||||
if charset is None:
|
if charset is None:
|
||||||
charset = USASCII
|
charset = USASCII
|
||||||
|
if not isinstance(charset, Charset):
|
||||||
|
charset = Charset(charset)
|
||||||
self._charset = charset
|
self._charset = charset
|
||||||
self._continuation_ws = continuation_ws
|
self._continuation_ws = continuation_ws
|
||||||
cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
|
cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
|
||||||
|
@ -233,14 +235,21 @@ class Header:
|
||||||
self._chunks.append((s, charset))
|
self._chunks.append((s, charset))
|
||||||
|
|
||||||
def _split(self, s, charset, firstline=False):
|
def _split(self, s, charset, firstline=False):
|
||||||
# Split up a header safely for use with encode_chunks. BAW: this
|
# Split up a header safely for use with encode_chunks.
|
||||||
# appears to be a private convenience method.
|
|
||||||
splittable = charset.to_splittable(s)
|
splittable = charset.to_splittable(s)
|
||||||
encoded = charset.from_splittable(splittable)
|
encoded = charset.from_splittable(splittable)
|
||||||
elen = charset.encoded_header_len(encoded)
|
elen = charset.encoded_header_len(encoded)
|
||||||
|
|
||||||
if elen <= self._maxlinelen:
|
if elen <= self._maxlinelen:
|
||||||
return [(encoded, charset)]
|
return [(encoded, charset)]
|
||||||
|
# If we have undetermined raw 8bit characters sitting in a byte
|
||||||
|
# string, we really don't know what the right thing to do is. We
|
||||||
|
# can't really split it because it might be multibyte data which we
|
||||||
|
# could break if we split it between pairs. The least harm seems to
|
||||||
|
# be to not split the header at all, but that means they could go out
|
||||||
|
# longer than maxlinelen.
|
||||||
|
elif charset == '8bit':
|
||||||
|
return [(s, charset)]
|
||||||
# BAW: I'm not sure what the right test here is. What we're trying to
|
# BAW: I'm not sure what the right test here is. What we're trying to
|
||||||
# do is be faithful to RFC 2822's recommendation that ($2.2.3):
|
# do is be faithful to RFC 2822's recommendation that ($2.2.3):
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue