Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately,

i.e. do *not* expand tabs, but treat them as whitespace that is not
equivalent to spaces.  Add a couple of test cases.  Clarify docs.
This commit is contained in:
Greg Ward 2006-06-11 00:40:49 +00:00
parent 0e1159583c
commit 7f54740c4d
4 changed files with 107 additions and 43 deletions

View File

@ -47,12 +47,17 @@ remove indentation from strings that have unwanted whitespace to the
left of the text.
\begin{funcdesc}{dedent}{text}
Remove any whitespace that can be uniformly removed from the left
of every line in \var{text}.
Remove any common leading whitespace from every line in \var{text}.
This is typically used to make triple-quoted strings line up with
the left edge of screen/whatever, while still presenting it in the
source code in indented form.
This can be used to make triple-quoted strings line up with the left
edge of the display, while still presenting them in the source code
in indented form.
Note that tabs and spaces are both treated as whitespace, but they are
not equal: the lines \code{" {} hello"} and \code{"\textbackslash{}thello"}
are considered to have no common leading whitespace. (This behaviour is
new in Python 2.5; older versions of this module incorrectly expanded
tabs before searching for common leading whitespace.)
For example:
\begin{verbatim}

View File

@ -460,38 +460,42 @@ some (including a hanging indent).'''
# of IndentTestCase!
class DedentTestCase(unittest.TestCase):
def assertUnchanged(self, text):
"""assert that dedent() has no effect on 'text'"""
self.assertEquals(text, dedent(text))
def test_dedent_nomargin(self):
# No lines indented.
text = "Hello there.\nHow are you?\nOh good, I'm glad."
self.assertEquals(dedent(text), text)
self.assertUnchanged(text)
# Similar, with a blank line.
text = "Hello there.\n\nBoo!"
self.assertEquals(dedent(text), text)
self.assertUnchanged(text)
# Some lines indented, but overall margin is still zero.
text = "Hello there.\n This is indented."
self.assertEquals(dedent(text), text)
self.assertUnchanged(text)
# Again, add a blank line.
text = "Hello there.\n\n Boo!\n"
self.assertEquals(dedent(text), text)
self.assertUnchanged(text)
def test_dedent_even(self):
# All lines indented by two spaces.
text = " Hello there.\n How are ya?\n Oh good."
expect = "Hello there.\nHow are ya?\nOh good."
self.assertEquals(dedent(text), expect)
self.assertEquals(expect, dedent(text))
# Same, with blank lines.
text = " Hello there.\n\n How are ya?\n Oh good.\n"
expect = "Hello there.\n\nHow are ya?\nOh good.\n"
self.assertEquals(dedent(text), expect)
self.assertEquals(expect, dedent(text))
# Now indent one of the blank lines.
text = " Hello there.\n \n How are ya?\n Oh good.\n"
expect = "Hello there.\n\nHow are ya?\nOh good.\n"
self.assertEquals(dedent(text), expect)
self.assertEquals(expect, dedent(text))
def test_dedent_uneven(self):
# Lines indented unevenly.
@ -505,18 +509,53 @@ def foo():
while 1:
return foo
'''
self.assertEquals(dedent(text), expect)
self.assertEquals(expect, dedent(text))
# Uneven indentation with a blank line.
text = " Foo\n Bar\n\n Baz\n"
expect = "Foo\n Bar\n\n Baz\n"
self.assertEquals(dedent(text), expect)
self.assertEquals(expect, dedent(text))
# Uneven indentation with a whitespace-only line.
text = " Foo\n Bar\n \n Baz\n"
expect = "Foo\n Bar\n\n Baz\n"
self.assertEquals(dedent(text), expect)
self.assertEquals(expect, dedent(text))
# dedent() should not mangle internal tabs
def test_dedent_preserve_internal_tabs(self):
text = " hello\tthere\n how are\tyou?"
expect = "hello\tthere\nhow are\tyou?"
self.assertEquals(expect, dedent(text))
# make sure that it preserves tabs when it's not making any
# changes at all
self.assertEquals(expect, dedent(expect))
# dedent() should not mangle tabs in the margin (i.e.
# tabs and spaces both count as margin, but are *not*
# considered equivalent)
def test_dedent_preserve_margin_tabs(self):
text = " hello there\n\thow are you?"
self.assertUnchanged(text)
# same effect even if we have 8 spaces
text = " hello there\n\thow are you?"
self.assertUnchanged(text)
# dedent() only removes whitespace that can be uniformly removed!
text = "\thello there\n\thow are you?"
expect = "hello there\nhow are you?"
self.assertEquals(expect, dedent(text))
text = " \thello there\n \thow are you?"
self.assertEquals(expect, dedent(text))
text = " \t hello there\n \t how are you?"
self.assertEquals(expect, dedent(text))
text = " \thello there\n \t how are you?"
expect = "hello there\n how are you?"
self.assertEquals(expect, dedent(text))
def test_main():

View File

@ -317,41 +317,58 @@ def fill(text, width=70, **kwargs):
# -- Loosely related functionality -------------------------------------
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
def dedent(text):
"""dedent(text : string) -> string
"""Remove any common leading whitespace from every line in `text`.
Remove any whitespace than can be uniformly removed from the left
of every line in `text`.
This can be used to make triple-quoted strings line up with the left
edge of the display, while still presenting them in the source code
in indented form.
This can be used e.g. to make triple-quoted strings line up with
the left edge of screen/whatever, while still presenting it in the
source code in indented form.
For example:
def test():
# end first line with \ to avoid the empty line!
s = '''\
hello
world
'''
print repr(s) # prints ' hello\n world\n '
print repr(dedent(s)) # prints 'hello\n world\n'
Note that tabs and spaces are both treated as whitespace, but they
are not equal: the lines " hello" and "\thello" are
considered to have no common leading whitespace. (This behaviour is
new in Python 2.5; older versions of this module incorrectly
expanded tabs before searching for common leading whitespace.)
"""
lines = text.expandtabs().split('\n')
# Look for the longest leading string of spaces and tabs common to
# all lines.
margin = None
for line in lines:
content = line.lstrip()
if not content:
continue
indent = len(line) - len(content)
text = _whitespace_only_re.sub('', text)
indents = _leading_whitespace_re.findall(text)
for indent in indents:
if margin is None:
margin = indent
# Current line more deeply indented than previous winner:
# no change (previous winner is still on top).
elif indent.startswith(margin):
pass
# Current line consistent with and no deeper than previous winner:
# it's the new winner.
elif margin.startswith(indent):
margin = indent
# Current line and previous winner have no common whitespace:
# there is no margin.
else:
margin = min(margin, indent)
margin = ""
break
if margin is not None and margin > 0:
for i in range(len(lines)):
lines[i] = lines[i][margin:]
# sanity check (testing/debugging only)
if 0 and margin:
for line in text.split("\n"):
assert not line or line.startswith(margin), \
"line = %r, margin = %r" % (line, margin)
return '\n'.join(lines)
if margin:
text = re.sub(r'(?m)^' + margin, '', text)
return text
if __name__ == "__main__":
#print dedent("\tfoo\n\tbar")
#print dedent(" \thello there\n \t how are you?")
print dedent("Hello there.\n This is indented.")

View File

@ -145,6 +145,9 @@ Extension Modules
Library
-------
- Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately;
clarify docs.
- The wsgiref package has been added to the standard library.
- The functions update_wrapper() and wraps() have been added to the functools