#2659: add ``break_on_hyphens`` to TextWrapper.

This commit is contained in:
Georg Brandl 2008-05-11 10:42:28 +00:00
parent 5b54887deb
commit 6f95ae55b1
4 changed files with 48 additions and 3 deletions

View File

@ -41,6 +41,10 @@ instance and calling a single method on it. That instance is not reused, so for
applications that wrap/fill many text strings, it will be more efficient for you
to create your own :class:`TextWrapper` object.
Text is preferably wrapped on whitespaces and right after the hyphens in
hyphenated words; only then will long words be broken if necessary, unless
:attr:`TextWrapper.break_long_words` is set to false.
An additional utility function, :func:`dedent`, is provided to remove
indentation from strings that have unwanted whitespace to the left of the text.
@ -174,10 +178,22 @@ indentation from strings that have unwanted whitespace to the left of the text.
than :attr:`width`. (Long words will be put on a line by themselves, in
order to minimize the amount by which :attr:`width` is exceeded.)
.. attribute:: break_on_hyphens
(default: ``True``) If true, wrapping will occur preferably on whitespaces
and right after hyphens in compound words, as it is customary in English.
If false, only whitespaces will be considered as potentially good places
for line breaks, but you need to set :attr:`break_long_words` to false if
you want truly insecable words. Default behaviour in previous versions
was to always allow breaking hyphenated words.
.. versionadded:: 2.6
:class:`TextWrapper` also provides two public methods, analogous to the
module-level convenience functions:
.. method:: wrap(text)
Wraps the single paragraph in *text* (a string) so every line is at most

View File

@ -364,6 +364,14 @@ What a mess!
["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
"ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
def test_break_on_hyphens(self):
# Ensure that the break_on_hyphens attributes work
text = "yaba daba-doo"
self.check_wrap(text, 10, ["yaba daba-", "doo"],
break_on_hyphens=True)
self.check_wrap(text, 10, ["yaba", "daba-doo"],
break_on_hyphens=False)
def test_bad_width(self):
# Ensure that width <= 0 is caught.
text = "Whatever, it doesn't matter."

View File

@ -63,6 +63,10 @@ class TextWrapper:
break_long_words (default: true)
Break words longer than 'width'. If false, those words will not
be broken, and some lines might be longer than 'width'.
break_on_hyphens (default: true)
Allow breaking hyphenated words. If true, wrapping will occur
preferably on whitespaces and right after hyphens part of
compound words.
drop_whitespace (default: true)
Drop leading and trailing whitespace from lines.
"""
@ -85,6 +89,12 @@ class TextWrapper:
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re = re.compile(r'(\s+)')
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[%s]' # lowercase letter
@ -102,7 +112,8 @@ class TextWrapper:
replace_whitespace=True,
fix_sentence_endings=False,
break_long_words=True,
drop_whitespace=True):
drop_whitespace=True,
break_on_hyphens=True):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
@ -111,6 +122,7 @@ class TextWrapper:
self.fix_sentence_endings = fix_sentence_endings
self.break_long_words = break_long_words
self.drop_whitespace = drop_whitespace
self.break_on_hyphens = break_on_hyphens
# -- Private methods -----------------------------------------------
@ -143,8 +155,15 @@ class TextWrapper:
breaks into the following chunks:
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
if break_on_hyphens is True, or in:
'Look,', ' ', 'goof-ball', ' ', '--', ' ',
'use', ' ', 'the', ' ', '-b', ' ', option!'
otherwise.
"""
chunks = self.wordsep_re.split(text)
if self.break_on_hyphens is True:
chunks = self.wordsep_re.split(text)
else:
chunks = self.wordsep_simple_re.split(text)
chunks = filter(None, chunks) # remove empty chunks
return chunks

View File

@ -23,6 +23,8 @@ Extension Modules
Library
-------
- #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class.
- The mhlib module has been deprecated for removal in Python 3.0.
- The linuxaudiodev module has been deprecated for removal in Python 3.0.