diff --git a/Lib/sre.py b/Lib/sre.py index 8d03e921a91..859ff9e70dd 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -181,7 +181,7 @@ def _split(pattern, string, maxsplit=0): continue append(string[i:b]) if g and b != e: - extend(m.groups()) + extend(list(m.groups())) i = e n = n + 1 append(string[i:]) diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 454e4779f84..7c6eb9f7647 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -60,6 +60,12 @@ FLAGS = { "u": SRE_FLAG_UNICODE, } +try: + int("10", 8) + atoi = int +except TypeError: + atoi = string.atoi + class Pattern: # master pattern object. keeps track of global attributes def __init__(self): @@ -216,7 +222,7 @@ def isname(name): def _group(escape, groups): # check if the escape string represents a valid group try: - gid = int(escape[1:]) + gid = atoi(escape[1:]) if gid and gid < groups: return gid except ValueError: @@ -239,13 +245,13 @@ def _class_escape(source, escape): escape = escape[2:] if len(escape) != 2: raise error, "bogus escape: %s" % repr("\\" + escape) - return LITERAL, int(escape, 16) & 0xff + return LITERAL, atoi(escape, 16) & 0xff elif str(escape[1:2]) in OCTDIGITS: # octal escape (up to three digits) while source.next in OCTDIGITS and len(escape) < 5: escape = escape + source.get() escape = escape[1:] - return LITERAL, int(escape, 8) & 0xff + return LITERAL, atoi(escape, 8) & 0xff if len(escape) == 2: return LITERAL, ord(escape[1]) except ValueError: @@ -267,12 +273,12 @@ def _escape(source, escape, state): escape = escape + source.get() if len(escape) != 4: raise ValueError - return LITERAL, int(escape[2:], 16) & 0xff + return LITERAL, atoi(escape[2:], 16) & 0xff elif escape[1:2] == "0": # octal escape while source.next in OCTDIGITS and len(escape) < 4: escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff + return LITERAL, atoi(escape[1:], 8) & 0xff elif escape[1:2] in DIGITS: # octal escape *or* decimal group reference (sigh) here = source.tell() @@ -282,7 +288,7 @@ def _escape(source, escape, state): source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff + return LITERAL, atoi(escape[1:], 8) & 0xff # got at least one decimal digit; this is a group reference group = _group(escape, state.groups) if group: @@ -456,9 +462,9 @@ def _parse(source, state): source.seek(here) continue if lo: - min = int(lo) + min = atoi(lo) if hi: - max = int(hi) + max = atoi(hi) if max < min: raise error, "bad repeat interval" else: @@ -646,7 +652,7 @@ def parse_template(source, pattern): if not name: raise error, "bad group name" try: - index = int(name) + index = atoi(name) except ValueError: if not isname(name): raise error, "bad character in group name" @@ -662,7 +668,7 @@ def parse_template(source, pattern): if group: if (s.next not in DIGITS or not _group(this + s.next, pattern.groups+1)): - code = MARK, int(group) + code = MARK, group break elif s.next in OCTDIGITS: this = this + s.get() @@ -670,7 +676,7 @@ def parse_template(source, pattern): break if not code: this = this[1:] - code = LITERAL, int(this[-6:], 8) & 0xff + code = LITERAL, atoi(this[-6:], 8) & 0xff a(code) else: try: diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 5d19d40988d..88c0d62e8db 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -325,16 +325,26 @@ for t in tests: # Try the match on a unicode string, and check that it # still succeeds. - result=obj.search(unicode(s, "latin-1")) - if result==None: - print '=== Fails on unicode match', t + try: + u = unicode(s, "latin-1") + except NameError: + pass + else: + result=obj.search(u) + if result==None: + print '=== Fails on unicode match', t # Try the match on a unicode pattern, and check that it # still succeeds. - obj=sre.compile(unicode(pattern, "latin-1")) - result=obj.search(s) - if result==None: - print '=== Fails on unicode pattern match', t + try: + u = unicode(pattern, "latin-1") + except NameError: + pass + else: + obj=sre.compile(u) + result=obj.search(s) + if result==None: + print '=== Fails on unicode pattern match', t # Try the match with the search area limited to the extent # of the match and see if it still succeeds. \B will diff --git a/Modules/_sre.c b/Modules/_sre.c index f308dacdc04..3d4054a8c7d 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -24,7 +24,7 @@ * 2000-10-24 fl really fixed assert_not; reset groups in findall * 2000-12-21 fl fixed memory leak in groupdict * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL - * 2001-01-15 fl don't use recursion for unbounded MIN_UTIL; fixed + * 2001-01-15 fl avoid recursion for MIN_UTIL; fixed uppercase literal bug * 2001-01-16 fl fixed memory leak in pattern destructor * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. @@ -40,7 +40,7 @@ #ifndef SRE_RECURSIVE -char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB "; +char copyright[] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB "; #include "Python.h" @@ -49,7 +49,9 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB "; #include /* name of this module, minus the leading underscore */ -#define MODULE "sre" +#if !defined(SRE_MODULE) +#define SRE_MODULE "sre" +#endif /* defining this one enables tracing */ #undef VERBOSE @@ -81,6 +83,10 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB "; /* enables aggressive inlining (always on for Visual C) */ #undef USE_INLINE +#if PY_VERSION_HEX < 0x01060000 +#define PyObject_DEL(op) PyMem_DEL((op)) +#endif + /* -------------------------------------------------------------------- */ #if defined(_MSC_VER) @@ -221,6 +227,23 @@ sre_category(SRE_CODE category, unsigned int ch) return SRE_UNI_IS_LINEBREAK(ch); case SRE_CATEGORY_UNI_NOT_LINEBREAK: return !SRE_UNI_IS_LINEBREAK(ch); +#else + case SRE_CATEGORY_UNI_DIGIT: + return SRE_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_NOT_DIGIT: + return !SRE_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_SPACE: + return SRE_IS_SPACE(ch); + case SRE_CATEGORY_UNI_NOT_SPACE: + return !SRE_IS_SPACE(ch); + case SRE_CATEGORY_UNI_WORD: + return SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_UNI_NOT_WORD: + return !SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_UNI_LINEBREAK: + return SRE_IS_LINEBREAK(ch); + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + return !SRE_IS_LINEBREAK(ch); #endif } return 0; @@ -1208,33 +1231,22 @@ _compile(PyObject* self_, PyObject* args) int groups = 0; PyObject* groupindex = NULL; PyObject* indexgroup = NULL; - if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code, - &groups, &groupindex, &indexgroup)) + if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags, + &PyList_Type, &code, &groups, + &groupindex, &indexgroup)) return NULL; - code = PySequence_Fast(code, "code argument must be a sequence"); - if (!code) - return NULL; - -#if PY_VERSION_HEX >= 0x01060000 - n = PySequence_Size(code); -#else - n = PySequence_Length(code); -#endif + n = PyList_GET_SIZE(code); self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); - if (!self) { - Py_DECREF(code); + if (!self) return NULL; - } for (i = 0; i < n; i++) { - PyObject *o = PySequence_Fast_GET_ITEM(code, i); + PyObject *o = PyList_GET_ITEM(code, i); self->code[i] = (SRE_CODE) PyInt_AsLong(o); } - Py_DECREF(code); - if (PyErr_Occurred()) { PyObject_DEL(self); return NULL; @@ -1270,9 +1282,11 @@ sre_getlower(PyObject* self, PyObject* args) return NULL; if (flags & SRE_FLAG_LOCALE) return Py_BuildValue("i", sre_lower_locale(character)); -#if defined(HAVE_UNICODE) if (flags & SRE_FLAG_UNICODE) +#if defined(HAVE_UNICODE) return Py_BuildValue("i", sre_lower_unicode(character)); +#else + return Py_BuildValue("i", sre_lower_locale(character)); #endif return Py_BuildValue("i", sre_lower(character)); } @@ -1380,9 +1394,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, if (pattern->flags & SRE_FLAG_LOCALE) state->lower = sre_lower_locale; -#if defined(HAVE_UNICODE) else if (pattern->flags & SRE_FLAG_UNICODE) +#if defined(HAVE_UNICODE) state->lower = sre_lower_unicode; +#else + state->lower = sre_lower_locale; #endif else state->lower = sre_lower; @@ -1520,7 +1536,7 @@ pattern_scanner(PatternObject* pattern, PyObject* args) string = state_init(&self->state, pattern, string, start, end); if (!string) { - PyObject_Del(self); + PyObject_DEL(self); return NULL; } @@ -1619,7 +1635,7 @@ call(char* function, PyObject* args) PyObject* func; PyObject* result; - name = PyString_FromString(MODULE); + name = PyString_FromString(SRE_MODULE); if (!name) return NULL; module = PyImport_Import(name); @@ -2366,7 +2382,7 @@ init_sre(void) Pattern_Type.ob_type = Match_Type.ob_type = Scanner_Type.ob_type = &PyType_Type; - m = Py_InitModule("_" MODULE, _functions); + m = Py_InitModule("_" SRE_MODULE, _functions); d = PyModule_GetDict(m); PyDict_SetItemString(