Convert _fix_sentence_endings() to use a regex, and augment it to

handle sentences like this:
  And she said, "Go to hell!"  Can you believe that?
This commit is contained in:
Greg Ward 2002-06-07 22:04:15 +00:00
parent 62e4f3bf22
commit 9b4864e40a
1 changed files with 8 additions and 8 deletions

View File

@ -54,8 +54,12 @@ class TextWrapper:
r'\w{2,}-(?=\w{2,})|' # hyphenated words r'\w{2,}-(?=\w{2,})|' # hyphenated words
r'(?<=\w)-{2,}(?=\w))') # em-dash r'(?<=\w)-{2,}(?=\w))') # em-dash
# Punctuation characters found at the end of a sentence. # XXX will there be a locale-or-charset-aware version of
sentence_end = ".?!" # string.lowercase in 2.3?
sentence_end_re = re.compile(r'[%s]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
% string.lowercase)
def __init__ (self): def __init__ (self):
@ -107,13 +111,9 @@ class TextWrapper:
space to two. space to two.
""" """
i = 0 i = 0
punct = self.sentence_end pat = self.sentence_end_re
while i < len(chunks)-1: while i < len(chunks)-1:
# chunks[i] looks like the last word of a sentence, if chunks[i+1] == " " and pat.search(chunks[i]):
# and it's followed by a single space.
if (chunks[i][-1] in punct and
chunks[i+1] == " " and
islower(chunks[i][-2])):
chunks[i+1] = " " chunks[i+1] = " "
i += 2 i += 2
else: else: