mirror of https://github.com/python/cpython
Follow-up to r67746 in order to restore backwards-compatibility for
those who (monkey-)patch TextWrapper.wordsep_re with a custom RE.
This commit is contained in:
parent
8d5934b25d
commit
3eef441700
|
@ -84,7 +84,7 @@ class TextWrapper:
|
||||||
# splits into
|
# splits into
|
||||||
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
|
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
|
||||||
# (after stripping out empty strings).
|
# (after stripping out empty strings).
|
||||||
wordsep_re = (
|
wordsep_re = re.compile(
|
||||||
r'(\s+|' # any whitespace
|
r'(\s+|' # any whitespace
|
||||||
r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
|
r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
|
||||||
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
||||||
|
@ -93,7 +93,7 @@ class TextWrapper:
|
||||||
# "Hello there -- you goof-ball, use the -b option!"
|
# "Hello there -- you goof-ball, use the -b option!"
|
||||||
# splits into
|
# splits into
|
||||||
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
|
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
|
||||||
wordsep_simple_re = r'(\s+)'
|
wordsep_simple_re = re.compile(r'(\s+)')
|
||||||
|
|
||||||
# XXX this is not locale- or charset-aware -- string.lowercase
|
# XXX this is not locale- or charset-aware -- string.lowercase
|
||||||
# is US-ASCII only (and therefore English-only)
|
# is US-ASCII only (and therefore English-only)
|
||||||
|
@ -124,6 +124,13 @@ class TextWrapper:
|
||||||
self.drop_whitespace = drop_whitespace
|
self.drop_whitespace = drop_whitespace
|
||||||
self.break_on_hyphens = break_on_hyphens
|
self.break_on_hyphens = break_on_hyphens
|
||||||
|
|
||||||
|
# recompile the regexes for Unicode mode -- done in this clumsy way for
|
||||||
|
# backwards compatibility because it's rather common to monkey-patch
|
||||||
|
# the TextWrapper class' wordsep_re attribute.
|
||||||
|
self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
|
||||||
|
self.wordsep_simple_re_uni = re.compile(
|
||||||
|
self.wordsep_simple_re.pattern, re.U)
|
||||||
|
|
||||||
|
|
||||||
# -- Private methods -----------------------------------------------
|
# -- Private methods -----------------------------------------------
|
||||||
# (possibly useful for subclasses to override)
|
# (possibly useful for subclasses to override)
|
||||||
|
@ -160,12 +167,17 @@ class TextWrapper:
|
||||||
'use', ' ', 'the', ' ', '-b', ' ', option!'
|
'use', ' ', 'the', ' ', '-b', ' ', option!'
|
||||||
otherwise.
|
otherwise.
|
||||||
"""
|
"""
|
||||||
flags = re.UNICODE if isinstance(text, unicode) else 0
|
if isinstance(text, unicode):
|
||||||
|
if self.break_on_hyphens:
|
||||||
|
pat = self.wordsep_re_uni
|
||||||
|
else:
|
||||||
|
pat = self.wordsep_simple_re_uni
|
||||||
|
else:
|
||||||
if self.break_on_hyphens:
|
if self.break_on_hyphens:
|
||||||
pat = self.wordsep_re
|
pat = self.wordsep_re
|
||||||
else:
|
else:
|
||||||
pat = self.wordsep_simple_re
|
pat = self.wordsep_simple_re
|
||||||
chunks = re.compile(pat, flags).split(text)
|
chunks = pat.split(text)
|
||||||
chunks = filter(None, chunks) # remove empty chunks
|
chunks = filter(None, chunks) # remove empty chunks
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue