cpython/Lib/idlelib/FormatParagraph.py

# Extension to format a paragraph

# Does basic, standard text formatting, and also understands Python
# comment blocks.  Thus, for editing Python source code, this
# extension is really only suitable for reformatting these comment
# blocks or triple-quoted strings.

# Known problems with comment reformatting:
# * If there is a selection marked, and the first line of the
#   selection is not complete, the block will probably not be detected
#   as comments, and will have the normal "text formatting" rules
#   applied.
# * If a comment block has leading whitespace that mixes tabs and
#   spaces, they will not be considered part of the same block.
# * Fancy comments, like this bulleted list, arent handled :-)

import re
from .configHandler import idleConf

class FormatParagraph:

    menudefs = [
        ('format', [   # /s/edit/format   dscherer@cmu.edu
            ('Format Paragraph', '<<format-paragraph>>'),
         ])
    ]

    def __init__(self, editwin):
        self.editwin = editwin

    def close(self):
        self.editwin = None

    def format_paragraph_event(self, event):
        maxformatwidth = int(idleConf.GetOption('main','FormatParagraph','paragraph'))
        text = self.editwin.text
        first, last = self.editwin.get_selection_indices()
        if first and last:
            data = text.get(first, last)
            comment_header = ''
        else:
            first, last, comment_header, data = \
                    find_paragraph(text, text.index("insert"))
        if comment_header:
            # Reformat the comment lines - convert to text sans header.
            lines = data.split("\n")
            lines = map(lambda st, l=len(comment_header): st[l:], lines)
            data = "\n".join(lines)
            # Reformat to maxformatwidth chars or a 20 char width, whichever is greater.
            format_width = max(maxformatwidth - len(comment_header), 20)
            newdata = reformat_paragraph(data, format_width)
            # re-split and re-insert the comment header.
            newdata = newdata.split("\n")
            # If the block ends in a \n, we dont want the comment
            # prefix inserted after it. (Im not sure it makes sense to
            # reformat a comment block that isnt made of complete
            # lines, but whatever!)  Can't think of a clean soltution,
            # so we hack away
            block_suffix = ""
            if not newdata[-1]:
                block_suffix = "\n"
                newdata = newdata[:-1]
            builder = lambda item, prefix=comment_header: prefix+item
            newdata = '\n'.join(map(builder, newdata)) + block_suffix
        else:
            # Just a normal text format
            newdata = reformat_paragraph(data, maxformatwidth)
        text.tag_remove("sel", "1.0", "end")
        if newdata != data:
            text.mark_set("insert", first)
            text.undo_block_start()
            text.delete(first, last)
            text.insert(first, newdata)
            text.undo_block_stop()
        else:
            text.mark_set("insert", last)
        text.see("insert")

def find_paragraph(text, mark):
    lineno, col = map(int, mark.split("."))
    line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line):
        lineno = lineno + 1
        line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    first_lineno = lineno
    comment_header = get_comment_header(line)
    comment_header_len = len(comment_header)
    while get_comment_header(line)==comment_header and \
              not is_all_white(line[comment_header_len:]):
        lineno = lineno + 1
        line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    last = "%d.0" % lineno
    # Search back to beginning of paragraph
    lineno = first_lineno - 1
    line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    while lineno > 0 and \
              get_comment_header(line)==comment_header and \
              not is_all_white(line[comment_header_len:]):
        lineno = lineno - 1
        line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)
    first = "%d.0" % (lineno+1)
    return first, last, comment_header, text.get(first, last)

def reformat_paragraph(data, limit):
    lines = data.split("\n")
    i = 0
    n = len(lines)
    while i < n and is_all_white(lines[i]):
        i = i+1
    if i >= n:
        return data
    indent1 = get_indent(lines[i])
    if i+1 < n and not is_all_white(lines[i+1]):
        indent2 = get_indent(lines[i+1])
    else:
        indent2 = indent1
    new = lines[:i]
    partial = indent1
    while i < n and not is_all_white(lines[i]):
        # XXX Should take double space after period (etc.) into account
        words = re.split("(\s+)", lines[i])
        for j in range(0, len(words), 2):
            word = words[j]
            if not word:
                continue # Can happen when line ends in whitespace
            if len((partial + word).expandtabs()) > limit and \
               partial != indent1:
                new.append(partial.rstrip())
                partial = indent2
            partial = partial + word + " "
            if j+1 < len(words) and words[j+1] != " ":
                partial = partial + " "
        i = i+1
    new.append(partial.rstrip())
    # XXX Should reformat remaining paragraphs as well
    new.extend(lines[i:])
    return "\n".join(new)

def is_all_white(line):
    return re.match(r"^\s*$", line) is not None

def get_indent(line):
    return re.match(r"^(\s*)", line).group()

def get_comment_header(line):
    m = re.match(r"^(\s*#*)", line)
    if m is None: return ""
    return m.group(1)
Initial revision 2000-08-14 22:13:23 -03:00			`# Extension to format a paragraph`

			`# Does basic, standard text formatting, and also understands Python`
			`# comment blocks. Thus, for editing Python source code, this`
			`# extension is really only suitable for reformatting these comment`
			`# blocks or triple-quoted strings.`

			`# Known problems with comment reformatting:`
			`# * If there is a selection marked, and the first line of the`
			`# selection is not complete, the block will probably not be detected`
			`# as comments, and will have the normal "text formatting" rules`
			`# applied.`
			`# * If a comment block has leading whitespace that mixes tabs and`
			`# spaces, they will not be considered part of the same block.`
			`# * Fancy comments, like this bulleted list, arent handled :-)`

			`import re`
Merged revisions 56443-56466 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/p3yk ................ r56454 \| kurt.kaiser \| 2007-07-18 22:26:14 -0700 (Wed, 18 Jul 2007) \| 2 lines Make relative imports explicit for py3k ................ r56455 \| kurt.kaiser \| 2007-07-18 23:12:15 -0700 (Wed, 18 Jul 2007) \| 2 lines Was modifying dict during iteration. ................ r56457 \| guido.van.rossum \| 2007-07-19 07:33:19 -0700 (Thu, 19 Jul 2007) \| 2 lines Fix failing test. ................ r56466 \| guido.van.rossum \| 2007-07-19 20:58:16 -0700 (Thu, 19 Jul 2007) \| 35 lines Merged revisions 56413-56465 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r56439 \| georg.brandl \| 2007-07-17 23:37:55 -0700 (Tue, 17 Jul 2007) \| 2 lines Use "Unix" as platform name, not "UNIX". ........ r56441 \| guido.van.rossum \| 2007-07-18 10:19:14 -0700 (Wed, 18 Jul 2007) \| 3 lines SF patch# 1755885 by Kurt Kaiser: show location of Unicode escape errors. (Slightly tweaked for style and refcounts.) ........ r56444 \| kurt.kaiser \| 2007-07-18 12:58:42 -0700 (Wed, 18 Jul 2007) \| 2 lines Fix failing unicode test caused by change to ast.c at r56441 ........ r56451 \| georg.brandl \| 2007-07-18 15:36:53 -0700 (Wed, 18 Jul 2007) \| 2 lines Add description for wave.setcomptype() values ........ r56456 \| walter.doerwald \| 2007-07-19 06:04:38 -0700 (Thu, 19 Jul 2007) \| 3 lines Document that codecs.lookup() returns a CodecInfo object. (fixes SF bug #1754453). ........ r56463 \| facundo.batista \| 2007-07-19 16:57:38 -0700 (Thu, 19 Jul 2007) \| 6 lines Added a select.select call in the test server loop to make sure the socket is ready to be read from before attempting a read (this prevents an error 10035 on some Windows platforms). [GSoC - Alan McIntyre] ........ ................ 2007-07-20 01:05:57 -03:00			`from .configHandler import idleConf`
Initial revision 2000-08-14 22:13:23 -03:00
			`class FormatParagraph:`

			`menudefs = [`
			`('format', [ # /s/edit/format dscherer@cmu.edu`
			`('Format Paragraph', '<<format-paragraph>>'),`
			`])`
			`]`

			`def __init__(self, editwin):`
			`self.editwin = editwin`

			`def close(self):`
			`self.editwin = None`

			`def format_paragraph_event(self, event):`
SF patch #961387: Make IDLE's paragraph reformatting width configurable 2004-06-04 03:31:08 -03:00			`maxformatwidth = int(idleConf.GetOption('main','FormatParagraph','paragraph'))`
Initial revision 2000-08-14 22:13:23 -03:00			`text = self.editwin.text`
			`first, last = self.editwin.get_selection_indices()`
			`if first and last:`
			`data = text.get(first, last)`
			`comment_header = ''`
			`else:`
			`first, last, comment_header, data = \`
			`find_paragraph(text, text.index("insert"))`
			`if comment_header:`
			`# Reformat the comment lines - convert to text sans header.`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`lines = data.split("\n")`
Initial revision 2000-08-14 22:13:23 -03:00			`lines = map(lambda st, l=len(comment_header): st[l:], lines)`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`data = "\n".join(lines)`
SF patch #961387: Make IDLE's paragraph reformatting width configurable 2004-06-04 03:31:08 -03:00			`# Reformat to maxformatwidth chars or a 20 char width, whichever is greater.`
format_paragraph_event(): Patch 961387 introduced a bug here, causing the indentation of a comment block to be ignored when reformatting the block, leading to overly long reformatted lines (too wide by an amount equal to the indentation width). Looks like a typo in the original patch, a 1-character repair. 2004-10-24 20:45:42 -03:00			`format_width = max(maxformatwidth - len(comment_header), 20)`
Initial revision 2000-08-14 22:13:23 -03:00			`newdata = reformat_paragraph(data, format_width)`
			`# re-split and re-insert the comment header.`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`newdata = newdata.split("\n")`
Initial revision 2000-08-14 22:13:23 -03:00			`# If the block ends in a \n, we dont want the comment`
			`# prefix inserted after it. (Im not sure it makes sense to`
			`# reformat a comment block that isnt made of complete`
			`# lines, but whatever!) Can't think of a clean soltution,`
			`# so we hack away`
			`block_suffix = ""`
			`if not newdata[-1]:`
			`block_suffix = "\n"`
			`newdata = newdata[:-1]`
			`builder = lambda item, prefix=comment_header: prefix+item`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`newdata = '\n'.join(map(builder, newdata)) + block_suffix`
Initial revision 2000-08-14 22:13:23 -03:00			`else:`
			`# Just a normal text format`
SF patch #961387: Make IDLE's paragraph reformatting width configurable 2004-06-04 03:31:08 -03:00			`newdata = reformat_paragraph(data, maxformatwidth)`
Initial revision 2000-08-14 22:13:23 -03:00			`text.tag_remove("sel", "1.0", "end")`
			`if newdata != data:`
			`text.mark_set("insert", first)`
			`text.undo_block_start()`
			`text.delete(first, last)`
			`text.insert(first, newdata)`
			`text.undo_block_stop()`
			`else:`
			`text.mark_set("insert", last)`
			`text.see("insert")`

			`def find_paragraph(text, mark):`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`lineno, col = map(int, mark.split("."))`
Initial revision 2000-08-14 22:13:23 -03:00			`line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)`
			`while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line):`
			`lineno = lineno + 1`
			`line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)`
			`first_lineno = lineno`
			`comment_header = get_comment_header(line)`
			`comment_header_len = len(comment_header)`
			`while get_comment_header(line)==comment_header and \`
			`not is_all_white(line[comment_header_len:]):`
			`lineno = lineno + 1`
			`line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)`
			`last = "%d.0" % lineno`
			`# Search back to beginning of paragraph`
			`lineno = first_lineno - 1`
			`line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)`
			`while lineno > 0 and \`
			`get_comment_header(line)==comment_header and \`
			`not is_all_white(line[comment_header_len:]):`
			`lineno = lineno - 1`
			`line = text.get("%d.0" % lineno, "%d.0 lineend" % lineno)`
			`first = "%d.0" % (lineno+1)`
			`return first, last, comment_header, text.get(first, last)`

SF patch #961387: Make IDLE's paragraph reformatting width configurable 2004-06-04 03:31:08 -03:00			`def reformat_paragraph(data, limit):`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`lines = data.split("\n")`
Initial revision 2000-08-14 22:13:23 -03:00			`i = 0`
			`n = len(lines)`
			`while i < n and is_all_white(lines[i]):`
			`i = i+1`
			`if i >= n:`
			`return data`
			`indent1 = get_indent(lines[i])`
			`if i+1 < n and not is_all_white(lines[i+1]):`
			`indent2 = get_indent(lines[i+1])`
			`else:`
			`indent2 = indent1`
			`new = lines[:i]`
			`partial = indent1`
			`while i < n and not is_all_white(lines[i]):`
			`# XXX Should take double space after period (etc.) into account`
			`words = re.split("(\s+)", lines[i])`
			`for j in range(0, len(words), 2):`
			`word = words[j]`
			`if not word:`
			`continue # Can happen when line ends in whitespace`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`if len((partial + word).expandtabs()) > limit and \`
Initial revision 2000-08-14 22:13:23 -03:00			`partial != indent1:`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`new.append(partial.rstrip())`
Initial revision 2000-08-14 22:13:23 -03:00			`partial = indent2`
			`partial = partial + word + " "`
			`if j+1 < len(words) and words[j+1] != " ":`
			`partial = partial + " "`
			`i = i+1`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`new.append(partial.rstrip())`
Initial revision 2000-08-14 22:13:23 -03:00			`# XXX Should reformat remaining paragraphs as well`
			`new.extend(lines[i:])`
Merge Py Idle changes: Rev 1.10 (string methods) 2002-09-15 23:22:19 -03:00			`return "\n".join(new)`
Initial revision 2000-08-14 22:13:23 -03:00
			`def is_all_white(line):`
			`return re.match(r"^\s*$", line) is not None`

			`def get_indent(line):`
			`return re.match(r"^(\s*)", line).group()`

			`def get_comment_header(line):`
			`m = re.match(r"^(\s#)", line)`
			`if m is None: return ""`
			`return m.group(1)`