From 484880534160893c610e5e3a723cf8a3d9f8f116 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Thu, 6 Mar 2003 16:10:30 +0000 Subject: [PATCH] __unicode__(): When converting to a unicode string, we need to preserve spaces in the encoded/unencoded word boundaries. RFC 2047 is ambiguous here, but most people expect the space to be preserved. Really closes SF bug # 640110. --- Lib/email/Header.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/Lib/email/Header.py b/Lib/email/Header.py index a6a1b0705b5..47a5508ff27 100644 --- a/Lib/email/Header.py +++ b/Lib/email/Header.py @@ -28,8 +28,10 @@ CRLFSPACE = '\r\n ' CRLF = '\r\n' NL = '\n' SPACE = ' ' +USPACE = u' ' SPACE8 = ' ' * 8 EMPTYSTRING = '' +UEMPTYSTRING = u'' MAXLINELEN = 76 @@ -204,9 +206,24 @@ class Header: def __unicode__(self): """Helper for the built-in unicode function.""" - # charset item is a Charset instance so we need to stringify it. - uchunks = [unicode(s, str(charset)) for s, charset in self._chunks] - return u''.join(uchunks) + uchunks = [] + lastcs = None + for s, charset in self._chunks: + # We must preserve spaces between encoded and non-encoded word + # boundaries, which means for us we need to add a space when we go + # from a charset to None/us-ascii, or from None/us-ascii to a + # charset. Only do this for the second and subsequent chunks. + nextcs = charset + if uchunks: + if lastcs is not None: + if nextcs is None or nextcs == 'us-ascii': + uchunks.append(USPACE) + nextcs = None + elif nextcs is not None and nextcs <> 'us-ascii': + uchunks.append(USPACE) + lastcs = nextcs + uchunks.append(unicode(s, str(charset))) + return UEMPTYSTRING.join(uchunks) # Rich comparison operators for equality only. BAW: does it make sense to # have or explicitly disable <, <=, >, >= operators?