From d08a8ebf2a8fe9705a48468c9280445afd1df324 Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Thu, 10 Jan 2008 21:59:42 +0000 Subject: [PATCH] Closing issue1761. Surprising behaviour of the "$" regexp: it matches the end of the string, AND just before the newline at the end of the string:: re.sub('$', '#', 'foo\n') == 'foo#\n#' Python is consistent with Perl and the pcre library, so we just document it. Guido prefers "\Z" to match only the end of the string. --- Doc/library/re.rst | 4 +++- Lib/test/test_re.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 850e1f87279..dd228356ec5 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -98,7 +98,9 @@ The special characters are: string, and in :const:`MULTILINE` mode also matches before a newline. ``foo`` matches both 'foo' and 'foobar', while the regular expression ``foo$`` matches only 'foo'. More interestingly, searching for ``foo.$`` in ``'foo1\nfoo2\n'`` - matches 'foo2' normally, but 'foo1' in :const:`MULTILINE` mode. + matches 'foo2' normally, but 'foo1' in :const:`MULTILINE` mode; searching for + a single ``$`` in ``'foo\n'`` will find two (empty) matches: one just before + the newline, and one at the end of the string. ``'*'`` Causes the resulting RE to match 0 or more repetitions of the preceding RE, as diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 3056ef35f38..a2470cd6f27 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -671,6 +671,18 @@ class ReTests(unittest.TestCase): q = p.match(upper_char) self.assertNotEqual(q, None) + def test_dollar_matches_twice(self): + "$ matches the end of string, and just before the terminating \n" + pattern = re.compile('$') + self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') + self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') + self.assertEqual(pattern.sub('#', '\n'), '#\n#') + + pattern = re.compile('$', re.MULTILINE) + self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) + self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') + self.assertEqual(pattern.sub('#', '\n'), '#\n#') + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR