From 4f35c71543f5dc0023590676c21b18b1a3a5986f Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Wed, 6 Oct 2004 02:11:37 +0000 Subject: [PATCH] Locale data that contains regex metacharacters are now properly escaped. Closes bug #1039270. --- Lib/_strptime.py | 6 ++++-- Lib/test/test_strptime.py | 13 +++++++++++++ Misc/NEWS | 5 +++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 22455ae9934..d93139ebeb6 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -15,6 +15,7 @@ import locale import calendar from re import compile as re_compile from re import IGNORECASE +from re import escape as re_escape from datetime import date as datetime_date try: from thread import allocate_lock as _thread_allocate_lock @@ -232,7 +233,7 @@ class TimeRE(dict): return '' to_convert = to_convert[:] to_convert.sort(key=len, reverse=True) - regex = '|'.join(to_convert) + regex = '|'.join(re_escape(stuff) for stuff in to_convert) regex = '(?P<%s>%s' % (directive, regex) return '%s)' % regex @@ -245,7 +246,8 @@ class TimeRE(dict): """ processed_format = '' # The sub() call escapes all characters that might be misconstrued - # as regex syntax. + # as regex syntax. Cannot use re.escape since we have to deal with + # format directives (%m, etc.). regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") format = regex_chars.sub(r"\\\1", format) whitespace_replacement = re_compile('\s+') diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 5aaa9a70e43..17f339b349e 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -176,6 +176,19 @@ class TimeRETests(unittest.TestCase): found = compiled_re.match("\w+ 10") self.failUnless(found, "Escaping failed of format '\w+ 10'") + def test_locale_data_w_regex_metacharacters(self): + # Check that if locale data contains regex metacharacters they are + # escaped properly. + # Discovered by bug #1039270 . + locale_time = _strptime.LocaleTime() + locale_time.timezone = (frozenset(("utc", "gmt", + "Tokyo (standard time)")), + frozenset("Tokyo (daylight time)")) + time_re = _strptime.TimeRE(locale_time) + self.failUnless(time_re.compile("%Z").match("Tokyo (standard time)"), + "locale data that contains regex metacharacters is not" + " properly escaped") + class StrptimeTests(unittest.TestCase): """Tests for _strptime.strptime.""" diff --git a/Misc/NEWS b/Misc/NEWS index 7927036982e..81db100b53b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -34,6 +34,11 @@ Extension modules Library ------- +- time.strptime() now properly escapes timezones and all other locale-specific + strings for regex-specific symbols. Was breaking under Japanese Windows when + the timezone was specified as "Tokyo (standard time)". + Closes bug #1039270. + - Updates for the email package: + All deprecated APIs that in email 2.x issued warnings have been removed: _encoder argument to the MIMEText constructor, Message.add_payload(),