Make _strptime escape regex syntax in format string to prevent use in internal regex.

This commit is contained in:
Brett Cannon 2003-04-19 04:00:56 +00:00
parent 482c5f7eb7
commit 1e91d8eb03
2 changed files with 24 additions and 1 deletions

View File

@ -373,8 +373,17 @@ class TimeRE(dict):
return '%s)' % regex
def pattern(self, format):
"""Return re pattern for the format string."""
"""Return re pattern for the format string.
Need to make sure that any characters that might be interpreted as
regex syntax is escaped.
"""
processed_format = ''
# The sub() call escapes all characters that might be misconstrued
# as regex syntax.
regex_chars = re_compile(r"([\\.^$*+?{}\[\]|])")
format = regex_chars.sub(r"\\\1", format)
whitespace_replacement = re_compile('\s+')
format = whitespace_replacement.sub('\s*', format)
while format.find('%') != -1:

View File

@ -168,6 +168,14 @@ class TimeRETests(unittest.TestCase):
"did not find 'd' directive pattern string '%s'" %
pattern_string)
def test_pattern_escaping(self):
# Make sure any characters in the format string that might be taken as
# regex syntax is escaped.
pattern_string = self.time_re.pattern("\d+")
self.failUnless(r"\\d\+" in pattern_string,
"%s does not have re characters escaped properly" %
pattern_string)
def test_compile(self):
# Check that compiled regex is correct
found = self.time_re.compile(r"%A").match(self.locale_time.f_weekday[6])
@ -201,6 +209,12 @@ class TimeRETests(unittest.TestCase):
self.failUnless(_strptime.TimeRE(test_locale).pattern("%Z") == '',
"with timezone == ('',''), TimeRE().pattern('%Z') != ''")
def test_matching_with_escapes(self):
# Make sure a format that requires escaping of characters works
compiled_re = self.time_re.compile("\w+ %m")
found = compiled_re.match("\w+ 10")
self.failUnless(found, "Escaping failed of format '\w+ 10'")
class StrptimeTests(unittest.TestCase):
"""Tests for _strptime.strptime."""