_split_header(): If we have a header which is a byte string containing

8-bit data, we cannot split it safely, so return the original string
unchanged.

_is8bitstring(): Helper function which returns True when we have a
byte string that contains non-ascii characters (i.e. mysterious 8-bit
data).
This commit is contained in:
Barry Warsaw 2002-10-14 15:09:30 +00:00
parent 7cd724049f
commit 6c2bc46355
1 changed files with 17 additions and 1 deletions

View File

@ -8,7 +8,7 @@ import time
import re
import random
from types import ListType
from types import ListType, StringType
from cStringIO import StringIO
from email.Header import Header
@ -35,6 +35,14 @@ SPACE8 = ' ' * 8
fcre = re.compile(r'^From ', re.MULTILINE)
def _is8bitstring(s):
if isinstance(s, StringType):
try:
unicode(s, 'us-ascii')
except UnicodeError:
return True
return False
class Generator:
@ -174,6 +182,14 @@ class Generator:
# No line was actually longer than maxheaderlen characters, so
# just return the original unchanged.
return text
# If we have raw 8bit data in a byte string, we have no idea what the
# encoding is. I think there is no safe way to split this string. If
# it's ascii-subset, then we could do a normal ascii split, but if
# it's multibyte then we could break the string. There's no way to
# know so the least harm seems to be to not split the string and risk
# it being too long.
if _is8bitstring(text):
return text
# The `text' argument already has the field name prepended, so don't
# provide it here or the first line will get folded too short.
h = Header(text, maxlinelen=maxheaderlen,