From 1e91d8eb030656386ef3a07e8a516683bea85610 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Sat, 19 Apr 2003 04:00:56 +0000 Subject: [PATCH] Make _strptime escape regex syntax in format string to prevent use in internal regex. --- Lib/_strptime.py | 11 ++++++++++- Lib/test/test_strptime.py | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 55391c1f7df..0777b7e801d 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -373,8 +373,17 @@ class TimeRE(dict): return '%s)' % regex def pattern(self, format): - """Return re pattern for the format string.""" + """Return re pattern for the format string. + + Need to make sure that any characters that might be interpreted as + regex syntax is escaped. + + """ processed_format = '' + # The sub() call escapes all characters that might be misconstrued + # as regex syntax. + regex_chars = re_compile(r"([\\.^$*+?{}\[\]|])") + format = regex_chars.sub(r"\\\1", format) whitespace_replacement = re_compile('\s+') format = whitespace_replacement.sub('\s*', format) while format.find('%') != -1: diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index e708f4c0335..a106a4289da 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -168,6 +168,14 @@ class TimeRETests(unittest.TestCase): "did not find 'd' directive pattern string '%s'" % pattern_string) + def test_pattern_escaping(self): + # Make sure any characters in the format string that might be taken as + # regex syntax is escaped. + pattern_string = self.time_re.pattern("\d+") + self.failUnless(r"\\d\+" in pattern_string, + "%s does not have re characters escaped properly" % + pattern_string) + def test_compile(self): # Check that compiled regex is correct found = self.time_re.compile(r"%A").match(self.locale_time.f_weekday[6]) @@ -201,6 +209,12 @@ class TimeRETests(unittest.TestCase): self.failUnless(_strptime.TimeRE(test_locale).pattern("%Z") == '', "with timezone == ('',''), TimeRE().pattern('%Z') != ''") + def test_matching_with_escapes(self): + # Make sure a format that requires escaping of characters works + compiled_re = self.time_re.compile("\w+ %m") + found = compiled_re.match("\w+ 10") + self.failUnless(found, "Escaping failed of format '\w+ 10'") + class StrptimeTests(unittest.TestCase): """Tests for _strptime.strptime."""