__unicode__(): When converting to a unicode string, we need to

preserve spaces in the encoded/unencoded word boundaries.  RFC 2047 is
ambiguous here, but most people expect the space to be preserved.
Really closes SF bug # 640110.
This commit is contained in:
Barry Warsaw 2003-03-06 16:10:30 +00:00
parent e05dcce686
commit 4848805341
1 changed files with 20 additions and 3 deletions

View File

@ -28,8 +28,10 @@ CRLFSPACE = '\r\n '
CRLF = '\r\n' CRLF = '\r\n'
NL = '\n' NL = '\n'
SPACE = ' ' SPACE = ' '
USPACE = u' '
SPACE8 = ' ' * 8 SPACE8 = ' ' * 8
EMPTYSTRING = '' EMPTYSTRING = ''
UEMPTYSTRING = u''
MAXLINELEN = 76 MAXLINELEN = 76
@ -204,9 +206,24 @@ class Header:
def __unicode__(self): def __unicode__(self):
"""Helper for the built-in unicode function.""" """Helper for the built-in unicode function."""
# charset item is a Charset instance so we need to stringify it. uchunks = []
uchunks = [unicode(s, str(charset)) for s, charset in self._chunks] lastcs = None
return u''.join(uchunks) for s, charset in self._chunks:
# We must preserve spaces between encoded and non-encoded word
# boundaries, which means for us we need to add a space when we go
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
nextcs = charset
if uchunks:
if lastcs is not None:
if nextcs is None or nextcs == 'us-ascii':
uchunks.append(USPACE)
nextcs = None
elif nextcs is not None and nextcs <> 'us-ascii':
uchunks.append(USPACE)
lastcs = nextcs
uchunks.append(unicode(s, str(charset)))
return UEMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to # Rich comparison operators for equality only. BAW: does it make sense to
# have or explicitly disable <, <=, >, >= operators? # have or explicitly disable <, <=, >, >= operators?