Follow-up to r67746 in order to restore backwards-compatibility for

those who (monkey-)patch TextWrapper.wordsep_re with a custom RE.
This commit is contained in:
Georg Brandl 2008-12-27 18:27:53 +00:00
parent 8d5934b25d
commit 3eef441700
1 changed files with 19 additions and 7 deletions

View File

@ -84,7 +84,7 @@ class TextWrapper:
# splits into # splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings). # (after stripping out empty strings).
wordsep_re = ( wordsep_re = re.compile(
r'(\s+|' # any whitespace r'(\s+|' # any whitespace
r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
@ -93,7 +93,7 @@ class TextWrapper:
# "Hello there -- you goof-ball, use the -b option!" # "Hello there -- you goof-ball, use the -b option!"
# splits into # splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re = r'(\s+)' wordsep_simple_re = re.compile(r'(\s+)')
# XXX this is not locale- or charset-aware -- string.lowercase # XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only) # is US-ASCII only (and therefore English-only)
@ -124,6 +124,13 @@ class TextWrapper:
self.drop_whitespace = drop_whitespace self.drop_whitespace = drop_whitespace
self.break_on_hyphens = break_on_hyphens self.break_on_hyphens = break_on_hyphens
# recompile the regexes for Unicode mode -- done in this clumsy way for
# backwards compatibility because it's rather common to monkey-patch
# the TextWrapper class' wordsep_re attribute.
self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
self.wordsep_simple_re_uni = re.compile(
self.wordsep_simple_re.pattern, re.U)
# -- Private methods ----------------------------------------------- # -- Private methods -----------------------------------------------
# (possibly useful for subclasses to override) # (possibly useful for subclasses to override)
@ -160,12 +167,17 @@ class TextWrapper:
'use', ' ', 'the', ' ', '-b', ' ', option!' 'use', ' ', 'the', ' ', '-b', ' ', option!'
otherwise. otherwise.
""" """
flags = re.UNICODE if isinstance(text, unicode) else 0 if isinstance(text, unicode):
if self.break_on_hyphens:
pat = self.wordsep_re_uni
else:
pat = self.wordsep_simple_re_uni
else:
if self.break_on_hyphens: if self.break_on_hyphens:
pat = self.wordsep_re pat = self.wordsep_re
else: else:
pat = self.wordsep_simple_re pat = self.wordsep_simple_re
chunks = re.compile(pat, flags).split(text) chunks = pat.split(text)
chunks = filter(None, chunks) # remove empty chunks chunks = filter(None, chunks) # remove empty chunks
return chunks return chunks