Issue #13592: Improved the repr for regular expression pattern objects.
Based on patch by Hugo Lopes Tavares.
This commit is contained in:
parent
c1207c1bcf
commit
5c24d0e504
|
@ -250,6 +250,8 @@ if __name__ == "__main__":
|
|||
f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
|
||||
f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
|
||||
f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
|
||||
f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG)
|
||||
f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII)
|
||||
|
||||
f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
|
||||
f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
|
||||
|
|
|
@ -1164,6 +1164,68 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(m.group(2), "y")
|
||||
|
||||
|
||||
class PatternReprTests(unittest.TestCase):
|
||||
def check(self, pattern, expected):
|
||||
self.assertEqual(repr(re.compile(pattern)), expected)
|
||||
|
||||
def check_flags(self, pattern, flags, expected):
|
||||
self.assertEqual(repr(re.compile(pattern, flags)), expected)
|
||||
|
||||
def test_without_flags(self):
|
||||
self.check('random pattern',
|
||||
"re.compile('random pattern')")
|
||||
|
||||
def test_single_flag(self):
|
||||
self.check_flags('random pattern', re.IGNORECASE,
|
||||
"re.compile('random pattern', re.IGNORECASE)")
|
||||
|
||||
def test_multiple_flags(self):
|
||||
self.check_flags('random pattern', re.I|re.S|re.X,
|
||||
"re.compile('random pattern', "
|
||||
"re.IGNORECASE|re.DOTALL|re.VERBOSE)")
|
||||
|
||||
def test_unicode_flag(self):
|
||||
self.check_flags('random pattern', re.U,
|
||||
"re.compile('random pattern')")
|
||||
self.check_flags('random pattern', re.I|re.S|re.U,
|
||||
"re.compile('random pattern', "
|
||||
"re.IGNORECASE|re.DOTALL)")
|
||||
|
||||
def test_inline_flags(self):
|
||||
self.check('(?i)pattern',
|
||||
"re.compile('(?i)pattern', re.IGNORECASE)")
|
||||
|
||||
def test_unknown_flags(self):
|
||||
self.check_flags('random pattern', 0x123000,
|
||||
"re.compile('random pattern', 0x123000)")
|
||||
self.check_flags('random pattern', 0x123000|re.I,
|
||||
"re.compile('random pattern', re.IGNORECASE|0x123000)")
|
||||
|
||||
def test_bytes(self):
|
||||
self.check(b'bytes pattern',
|
||||
"re.compile(b'bytes pattern')")
|
||||
self.check_flags(b'bytes pattern', re.A,
|
||||
"re.compile(b'bytes pattern', re.ASCII)")
|
||||
|
||||
def test_quotes(self):
|
||||
self.check('random "double quoted" pattern',
|
||||
'''re.compile('random "double quoted" pattern')''')
|
||||
self.check("random 'single quoted' pattern",
|
||||
'''re.compile("random 'single quoted' pattern")''')
|
||||
self.check('''both 'single' and "double" quotes''',
|
||||
'''re.compile('both \\'single\\' and "double" quotes')''')
|
||||
|
||||
def test_long_pattern(self):
|
||||
pattern = 'Very %spattern' % ('long ' * 1000)
|
||||
r = repr(re.compile(pattern))
|
||||
self.assertLess(len(r), 300)
|
||||
self.assertEqual(r[:30], "re.compile('Very long long lon")
|
||||
r = repr(re.compile(pattern, re.I))
|
||||
self.assertLess(len(r), 300)
|
||||
self.assertEqual(r[:30], "re.compile('Very long long lon")
|
||||
self.assertEqual(r[-16:], ", re.IGNORECASE)")
|
||||
|
||||
|
||||
class ImplementationTest(unittest.TestCase):
|
||||
"""
|
||||
Test implementation details of the re module.
|
||||
|
|
|
@ -68,6 +68,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #13592: Improved the repr for regular expression pattern objects.
|
||||
Based on patch by Hugo Lopes Tavares.
|
||||
|
||||
- Issue #19641: Added the audioop.byteswap() function to convert big-endian
|
||||
samples to little-endian and vice versa.
|
||||
|
||||
|
|
|
@ -1139,6 +1139,86 @@ pattern_deepcopy(PatternObject* self, PyObject* memo)
|
|||
#endif
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
pattern_repr(PatternObject *obj)
|
||||
{
|
||||
static const struct {
|
||||
const char *name;
|
||||
int value;
|
||||
} flag_names[] = {
|
||||
{"re.TEMPLATE", SRE_FLAG_TEMPLATE},
|
||||
{"re.IGNORECASE", SRE_FLAG_IGNORECASE},
|
||||
{"re.LOCALE", SRE_FLAG_LOCALE},
|
||||
{"re.MULTILINE", SRE_FLAG_MULTILINE},
|
||||
{"re.DOTALL", SRE_FLAG_DOTALL},
|
||||
{"re.UNICODE", SRE_FLAG_UNICODE},
|
||||
{"re.VERBOSE", SRE_FLAG_VERBOSE},
|
||||
{"re.DEBUG", SRE_FLAG_DEBUG},
|
||||
{"re.ASCII", SRE_FLAG_ASCII},
|
||||
};
|
||||
PyObject *result = NULL;
|
||||
PyObject *flag_items;
|
||||
int i;
|
||||
int flags = obj->flags;
|
||||
|
||||
/* Omit re.UNICODE for valid string patterns. */
|
||||
if (obj->isbytes == 0 &&
|
||||
(flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
|
||||
SRE_FLAG_UNICODE)
|
||||
flags &= ~SRE_FLAG_UNICODE;
|
||||
|
||||
flag_items = PyList_New(0);
|
||||
if (!flag_items)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
|
||||
if (flags & flag_names[i].value) {
|
||||
PyObject *item = PyUnicode_FromString(flag_names[i].name);
|
||||
if (!item)
|
||||
goto done;
|
||||
|
||||
if (PyList_Append(flag_items, item) < 0) {
|
||||
Py_DECREF(item);
|
||||
goto done;
|
||||
}
|
||||
Py_DECREF(item);
|
||||
flags &= ~flag_names[i].value;
|
||||
}
|
||||
}
|
||||
if (flags) {
|
||||
PyObject *item = PyUnicode_FromFormat("0x%x", flags);
|
||||
if (!item)
|
||||
goto done;
|
||||
|
||||
if (PyList_Append(flag_items, item) < 0) {
|
||||
Py_DECREF(item);
|
||||
goto done;
|
||||
}
|
||||
Py_DECREF(item);
|
||||
}
|
||||
|
||||
if (PyList_Size(flag_items) > 0) {
|
||||
PyObject *flags_result;
|
||||
PyObject *sep = PyUnicode_FromString("|");
|
||||
if (!sep)
|
||||
goto done;
|
||||
flags_result = PyUnicode_Join(sep, flag_items);
|
||||
Py_DECREF(sep);
|
||||
if (!flags_result)
|
||||
goto done;
|
||||
result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
|
||||
obj->pattern, flags_result);
|
||||
Py_DECREF(flags_result);
|
||||
}
|
||||
else {
|
||||
result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
|
||||
}
|
||||
|
||||
done:
|
||||
Py_DECREF(flag_items);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(pattern_match_doc,
|
||||
"match(string[, pos[, endpos]]) -> match object or None.\n\
|
||||
Matches zero or more characters at the beginning of the string");
|
||||
|
@ -1214,7 +1294,7 @@ static PyTypeObject Pattern_Type = {
|
|||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
(reprfunc)pattern_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
|
|
|
@ -81,6 +81,8 @@
|
|||
#define SRE_FLAG_DOTALL 16
|
||||
#define SRE_FLAG_UNICODE 32
|
||||
#define SRE_FLAG_VERBOSE 64
|
||||
#define SRE_FLAG_DEBUG 128
|
||||
#define SRE_FLAG_ASCII 256
|
||||
#define SRE_INFO_PREFIX 1
|
||||
#define SRE_INFO_LITERAL 2
|
||||
#define SRE_INFO_CHARSET 4
|
||||
|
|
Loading…
Reference in New Issue