Issue #4163: textwrap module: allow word splitting on a hyphen preceded by a non-ASCII letter.
This commit is contained in:
parent
d9c03e0da3
commit
7c59bc6f2f
|
@ -365,6 +365,14 @@ What a mess!
|
|||
self.assertRaises(ValueError, wrap, text, 0)
|
||||
self.assertRaises(ValueError, wrap, text, -1)
|
||||
|
||||
def test_no_split_at_umlaut(self):
|
||||
text = "Die Empf\xe4nger-Auswahl"
|
||||
self.check_wrap(text, 13, ["Die", "Empf\xe4nger-", "Auswahl"])
|
||||
|
||||
def test_umlaut_followed_by_dash(self):
|
||||
text = "aa \xe4\xe4-\xe4\xe4"
|
||||
self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"])
|
||||
|
||||
|
||||
class LongWordTestCase (BaseTestCase):
|
||||
def setUp(self):
|
||||
|
|
|
@ -76,7 +76,7 @@ class TextWrapper:
|
|||
# (after stripping out empty strings).
|
||||
wordsep_re = re.compile(
|
||||
r'(\s+|' # any whitespace
|
||||
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
|
||||
r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
|
||||
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
|
||||
|
||||
# This less funky little regex just split on recognized spaces. E.g.
|
||||
|
|
|
@ -45,6 +45,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #4163: textwrap module: allow word splitting on a hyphen preceded by
|
||||
a non-ASCII letter.
|
||||
|
||||
- Issue #4616: TarFile.utime(): Restore directory times on Windows.
|
||||
|
||||
- Issue #4021: tokenize.detect_encoding() now raises a SyntaxError when the
|
||||
|
|
Loading…
Reference in New Issue