Issue #13592: Improved the repr for regular expression pattern objects.

Based on patch by Hugo Lopes Tavares.
This commit is contained in:
Serhiy Storchaka 2013-11-23 22:42:43 +02:00
parent c1207c1bcf
commit 5c24d0e504
5 changed files with 150 additions and 1 deletions

View File

@ -250,6 +250,8 @@ if __name__ == "__main__":
f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG)
f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII)
f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)

View File

@ -1164,6 +1164,68 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(2), "y")
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
self.assertEqual(repr(re.compile(pattern)), expected)
def check_flags(self, pattern, flags, expected):
self.assertEqual(repr(re.compile(pattern, flags)), expected)
def test_without_flags(self):
self.check('random pattern',
"re.compile('random pattern')")
def test_single_flag(self):
self.check_flags('random pattern', re.IGNORECASE,
"re.compile('random pattern', re.IGNORECASE)")
def test_multiple_flags(self):
self.check_flags('random pattern', re.I|re.S|re.X,
"re.compile('random pattern', "
"re.IGNORECASE|re.DOTALL|re.VERBOSE)")
def test_unicode_flag(self):
self.check_flags('random pattern', re.U,
"re.compile('random pattern')")
self.check_flags('random pattern', re.I|re.S|re.U,
"re.compile('random pattern', "
"re.IGNORECASE|re.DOTALL)")
def test_inline_flags(self):
self.check('(?i)pattern',
"re.compile('(?i)pattern', re.IGNORECASE)")
def test_unknown_flags(self):
self.check_flags('random pattern', 0x123000,
"re.compile('random pattern', 0x123000)")
self.check_flags('random pattern', 0x123000|re.I,
"re.compile('random pattern', re.IGNORECASE|0x123000)")
def test_bytes(self):
self.check(b'bytes pattern',
"re.compile(b'bytes pattern')")
self.check_flags(b'bytes pattern', re.A,
"re.compile(b'bytes pattern', re.ASCII)")
def test_quotes(self):
self.check('random "double quoted" pattern',
'''re.compile('random "double quoted" pattern')''')
self.check("random 'single quoted' pattern",
'''re.compile("random 'single quoted' pattern")''')
self.check('''both 'single' and "double" quotes''',
'''re.compile('both \\'single\\' and "double" quotes')''')
def test_long_pattern(self):
pattern = 'Very %spattern' % ('long ' * 1000)
r = repr(re.compile(pattern))
self.assertLess(len(r), 300)
self.assertEqual(r[:30], "re.compile('Very long long lon")
r = repr(re.compile(pattern, re.I))
self.assertLess(len(r), 300)
self.assertEqual(r[:30], "re.compile('Very long long lon")
self.assertEqual(r[-16:], ", re.IGNORECASE)")
class ImplementationTest(unittest.TestCase):
"""
Test implementation details of the re module.

View File

@ -68,6 +68,9 @@ Core and Builtins
Library
-------
- Issue #13592: Improved the repr for regular expression pattern objects.
Based on patch by Hugo Lopes Tavares.
- Issue #19641: Added the audioop.byteswap() function to convert big-endian
samples to little-endian and vice versa.

View File

@ -1139,6 +1139,86 @@ pattern_deepcopy(PatternObject* self, PyObject* memo)
#endif
}
static PyObject *
pattern_repr(PatternObject *obj)
{
static const struct {
const char *name;
int value;
} flag_names[] = {
{"re.TEMPLATE", SRE_FLAG_TEMPLATE},
{"re.IGNORECASE", SRE_FLAG_IGNORECASE},
{"re.LOCALE", SRE_FLAG_LOCALE},
{"re.MULTILINE", SRE_FLAG_MULTILINE},
{"re.DOTALL", SRE_FLAG_DOTALL},
{"re.UNICODE", SRE_FLAG_UNICODE},
{"re.VERBOSE", SRE_FLAG_VERBOSE},
{"re.DEBUG", SRE_FLAG_DEBUG},
{"re.ASCII", SRE_FLAG_ASCII},
};
PyObject *result = NULL;
PyObject *flag_items;
int i;
int flags = obj->flags;
/* Omit re.UNICODE for valid string patterns. */
if (obj->isbytes == 0 &&
(flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
SRE_FLAG_UNICODE)
flags &= ~SRE_FLAG_UNICODE;
flag_items = PyList_New(0);
if (!flag_items)
return NULL;
for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
if (flags & flag_names[i].value) {
PyObject *item = PyUnicode_FromString(flag_names[i].name);
if (!item)
goto done;
if (PyList_Append(flag_items, item) < 0) {
Py_DECREF(item);
goto done;
}
Py_DECREF(item);
flags &= ~flag_names[i].value;
}
}
if (flags) {
PyObject *item = PyUnicode_FromFormat("0x%x", flags);
if (!item)
goto done;
if (PyList_Append(flag_items, item) < 0) {
Py_DECREF(item);
goto done;
}
Py_DECREF(item);
}
if (PyList_Size(flag_items) > 0) {
PyObject *flags_result;
PyObject *sep = PyUnicode_FromString("|");
if (!sep)
goto done;
flags_result = PyUnicode_Join(sep, flag_items);
Py_DECREF(sep);
if (!flags_result)
goto done;
result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
obj->pattern, flags_result);
Py_DECREF(flags_result);
}
else {
result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
}
done:
Py_DECREF(flag_items);
return result;
}
PyDoc_STRVAR(pattern_match_doc,
"match(string[, pos[, endpos]]) -> match object or None.\n\
Matches zero or more characters at the beginning of the string");
@ -1214,7 +1294,7 @@ static PyTypeObject Pattern_Type = {
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
(reprfunc)pattern_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */

View File

@ -81,6 +81,8 @@
#define SRE_FLAG_DOTALL 16
#define SRE_FLAG_UNICODE 32
#define SRE_FLAG_VERBOSE 64
#define SRE_FLAG_DEBUG 128
#define SRE_FLAG_ASCII 256
#define SRE_INFO_PREFIX 1
#define SRE_INFO_LITERAL 2
#define SRE_INFO_CHARSET 4