Issue #18725: The textwrap module now supports truncating multiline text.

This commit is contained in:
Serhiy Storchaka 2013-10-15 21:22:54 +03:00
parent bc2bfa6b68
commit acc9f3fb16
4 changed files with 165 additions and 58 deletions

View File

@ -250,6 +250,22 @@ hyphenated words; only then will long words be broken if necessary, unless
was to always allow breaking hyphenated words. was to always allow breaking hyphenated words.
.. attribute:: max_lines
(default: ``None``) If not ``None``, then the text be will truncated to
*max_lines* lines.
.. versionadded:: 3.4
.. attribute:: placeholder
(default: ``' [...]'``) String that will be appended to the last line of
text if it will be truncated.
.. versionadded:: 3.4
:class:`TextWrapper` also provides some public methods, analogous to the :class:`TextWrapper` also provides some public methods, analogous to the
module-level convenience functions: module-level convenience functions:
@ -266,15 +282,3 @@ hyphenated words; only then will long words be broken if necessary, unless
Wraps the single paragraph in *text*, and returns a single string Wraps the single paragraph in *text*, and returns a single string
containing the wrapped paragraph. containing the wrapped paragraph.
.. function:: shorten(text, *, placeholder=" [...]")
Collapse and truncate the given text to fit in :attr:`width`
characters.
The text first has its whitespace collapsed. If it then fits in
:attr:`width`, it is returned as-is. Otherwise, as many words
as possible are joined and then the *placeholder* is appended.
.. versionadded:: 3.4

View File

@ -42,10 +42,6 @@ class BaseTestCase(unittest.TestCase):
"\nexpected %r\n" "\nexpected %r\n"
"but got %r" % (expect, result)) "but got %r" % (expect, result))
def check_shorten(self, text, width, expect, **kwargs):
result = shorten(text, width, **kwargs)
self.check(result, expect)
class WrapTestCase(BaseTestCase): class WrapTestCase(BaseTestCase):
@ -433,6 +429,90 @@ What a mess!
self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"]) self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"])
class MaxLinesTestCase(BaseTestCase):
text = "Hello there, how are you this fine day? I'm glad to hear it!"
def test_simple(self):
self.check_wrap(self.text, 12,
["Hello [...]"],
max_lines=0)
self.check_wrap(self.text, 12,
["Hello [...]"],
max_lines=1)
self.check_wrap(self.text, 12,
["Hello there,",
"how [...]"],
max_lines=2)
self.check_wrap(self.text, 13,
["Hello there,",
"how are [...]"],
max_lines=2)
self.check_wrap(self.text, 80, [self.text], max_lines=1)
self.check_wrap(self.text, 12,
["Hello there,",
"how are you",
"this fine",
"day? I'm",
"glad to hear",
"it!"],
max_lines=6)
def test_spaces(self):
# strip spaces before placeholder
self.check_wrap(self.text, 12,
["Hello there,",
"how are you",
"this fine",
"day? [...]"],
max_lines=4)
# placeholder at the start of line
self.check_wrap(self.text, 6,
["Hello",
"[...]"],
max_lines=2)
# final spaces
self.check_wrap(self.text + ' ' * 10, 12,
["Hello there,",
"how are you",
"this fine",
"day? I'm",
"glad to hear",
"it!"],
max_lines=6)
def test_placeholder(self):
self.check_wrap(self.text, 12,
["Hello..."],
max_lines=1,
placeholder='...')
self.check_wrap(self.text, 12,
["Hello there,",
"how are..."],
max_lines=2,
placeholder='...')
# long placeholder and indentation
with self.assertRaises(ValueError):
wrap(self.text, 16, initial_indent=' ',
max_lines=1, placeholder=' [truncated]...')
with self.assertRaises(ValueError):
wrap(self.text, 16, subsequent_indent=' ',
max_lines=2, placeholder=' [truncated]...')
self.check_wrap(self.text, 16,
[" Hello there,",
" [truncated]..."],
max_lines=2,
initial_indent=' ',
subsequent_indent=' ',
placeholder=' [truncated]...')
self.check_wrap(self.text, 16,
[" [truncated]..."],
max_lines=1,
initial_indent=' ',
subsequent_indent=' ',
placeholder=' [truncated]...')
self.check_wrap(self.text, 80, [self.text], placeholder='.' * 1000)
class LongWordTestCase (BaseTestCase): class LongWordTestCase (BaseTestCase):
def setUp(self): def setUp(self):
self.wrapper = TextWrapper() self.wrapper = TextWrapper()
@ -493,6 +573,14 @@ How *do* you spell that odd word, anyways?
result = wrap(self.text, width=30, break_long_words=0) result = wrap(self.text, width=30, break_long_words=0)
self.check(result, expect) self.check(result, expect)
def test_max_lines_long(self):
self.check_wrap(self.text, 12,
['Did you say ',
'"supercalifr',
'agilisticexp',
'[...]'],
max_lines=4)
class IndentTestCases(BaseTestCase): class IndentTestCases(BaseTestCase):
@ -782,6 +870,10 @@ class IndentTestCase(unittest.TestCase):
class ShortenTestCase(BaseTestCase): class ShortenTestCase(BaseTestCase):
def check_shorten(self, text, width, expect, **kwargs):
result = shorten(text, width, **kwargs)
self.check(result, expect)
def test_simple(self): def test_simple(self):
# Simple case: just words, spaces, and a bit of punctuation # Simple case: just words, spaces, and a bit of punctuation
text = "Hello there, how are you this fine day? I'm glad to hear it!" text = "Hello there, how are you this fine day? I'm glad to hear it!"
@ -825,10 +917,9 @@ class ShortenTestCase(BaseTestCase):
self.check_shorten("hello world! ", 10, "[...]") self.check_shorten("hello world! ", 10, "[...]")
def test_width_too_small_for_placeholder(self): def test_width_too_small_for_placeholder(self):
wrapper = TextWrapper(width=8) shorten("x" * 20, width=8, placeholder="(......)")
wrapper.shorten("x" * 20, placeholder="(......)")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
wrapper.shorten("x" * 20, placeholder="(.......)") shorten("x" * 20, width=8, placeholder="(.......)")
def test_first_word_too_long_but_placeholder_fits(self): def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]") self.check_shorten("Helloo", 5, "[...]")

View File

@ -19,8 +19,6 @@ __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent']
# since 0xa0 is not in range(128). # since 0xa0 is not in range(128).
_whitespace = '\t\n\x0b\x0c\r ' _whitespace = '\t\n\x0b\x0c\r '
_default_placeholder = ' [...]'
class TextWrapper: class TextWrapper:
""" """
Object for wrapping/filling text. The public interface consists of Object for wrapping/filling text. The public interface consists of
@ -64,6 +62,10 @@ class TextWrapper:
compound words. compound words.
drop_whitespace (default: true) drop_whitespace (default: true)
Drop leading and trailing whitespace from lines. Drop leading and trailing whitespace from lines.
max_lines (default: None)
Truncate wrapped lines.
placeholder (default: ' [...]')
Append to the last line of truncated text.
""" """
unicode_whitespace_trans = {} unicode_whitespace_trans = {}
@ -106,7 +108,10 @@ class TextWrapper:
break_long_words=True, break_long_words=True,
drop_whitespace=True, drop_whitespace=True,
break_on_hyphens=True, break_on_hyphens=True,
tabsize=8): tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
self.width = width self.width = width
self.initial_indent = initial_indent self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent self.subsequent_indent = subsequent_indent
@ -117,6 +122,8 @@ class TextWrapper:
self.drop_whitespace = drop_whitespace self.drop_whitespace = drop_whitespace
self.break_on_hyphens = break_on_hyphens self.break_on_hyphens = break_on_hyphens
self.tabsize = tabsize self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
# -- Private methods ----------------------------------------------- # -- Private methods -----------------------------------------------
@ -225,6 +232,13 @@ class TextWrapper:
lines = [] lines = []
if self.width <= 0: if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width) raise ValueError("invalid width %r (must be > 0)" % self.width)
if self.max_lines is not None:
if self.max_lines > 1:
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
raise ValueError("placeholder too large for max width")
# Arrange in reverse order so items can be efficiently popped # Arrange in reverse order so items can be efficiently popped
# from a stack of chucks. # from a stack of chucks.
@ -267,15 +281,41 @@ class TextWrapper:
# fit on *any* line (not just this one). # fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width: if chunks and len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width) self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
# If the last chunk on this line is all whitespace, drop it. # If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
del cur_line[-1] del cur_line[-1]
# Convert current line back to a string and store it in list
# of all lines (return value).
if cur_line: if cur_line:
lines.append(indent + ''.join(cur_line)) if (self.max_lines is None or
len(lines) + 1 < self.max_lines or
(not chunks or
self.drop_whitespace and
len(chunks) == 1 and
not chunks[0].strip()) and cur_len <= width):
# Convert current line back to a string and store it in
# list of all lines (return value).
lines.append(indent + ''.join(cur_line))
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
lines.append(indent + self.placeholder.lstrip())
break
return lines return lines
@ -308,36 +348,6 @@ class TextWrapper:
""" """
return "\n".join(self.wrap(text)) return "\n".join(self.wrap(text))
def shorten(self, text, *, placeholder=_default_placeholder):
"""shorten(text: str) -> str
Collapse and truncate the given text to fit in 'self.width' columns.
"""
max_length = self.width
if max_length < len(placeholder.strip()):
raise ValueError("placeholder too large for max width")
sep = ' '
sep_len = len(sep)
parts = []
cur_len = 0
chunks = self._split_chunks(text)
for chunk in chunks:
if not chunk.strip():
continue
chunk_len = len(chunk) + sep_len if parts else len(chunk)
if cur_len + chunk_len > max_length:
break
parts.append(chunk)
cur_len += chunk_len
else:
# No truncation necessary
return sep.join(parts)
max_truncated_length = max_length - len(placeholder)
while parts and cur_len > max_truncated_length:
last = parts.pop()
cur_len -= len(last) + sep_len
return (sep.join(parts) + placeholder).strip()
# -- Convenience interface --------------------------------------------- # -- Convenience interface ---------------------------------------------
@ -366,7 +376,7 @@ def fill(text, width=70, **kwargs):
w = TextWrapper(width=width, **kwargs) w = TextWrapper(width=width, **kwargs)
return w.fill(text) return w.fill(text)
def shorten(text, width, *, placeholder=_default_placeholder, **kwargs): def shorten(text, width, **kwargs):
"""Collapse and truncate the given text to fit in the given width. """Collapse and truncate the given text to fit in the given width.
The text first has its whitespace collapsed. If it then fits in The text first has its whitespace collapsed. If it then fits in
@ -378,8 +388,8 @@ def shorten(text, width, *, placeholder=_default_placeholder, **kwargs):
>>> textwrap.shorten("Hello world!", width=11) >>> textwrap.shorten("Hello world!", width=11)
'Hello [...]' 'Hello [...]'
""" """
w = TextWrapper(width=width, **kwargs) w = TextWrapper(width=width, max_lines=1, **kwargs)
return w.shorten(text, placeholder=placeholder) return w.fill(' '.join(text.strip().split()))
# -- Loosely related functionality ------------------------------------- # -- Loosely related functionality -------------------------------------

View File

@ -42,6 +42,8 @@ Core and Builtins
Library Library
------- -------
- Issue #18725: The textwrap module now supports truncating multiline text.
- Issue #18776: atexit callbacks now display their full traceback when they - Issue #18776: atexit callbacks now display their full traceback when they
raise an exception. raise an exception.