diff --git a/Modules/_sre.c b/Modules/_sre.c index 4d9d1cd773f..3cc90d4860f 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -451,7 +451,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15)))) return ok; set += 16; - } + } else { /* (32 bits per code word) */ if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31)))) @@ -492,7 +492,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) else block = -1; set += 64; - if (block >=0 && + if (block >=0 && (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31)))) return ok; set += count*8; @@ -568,7 +568,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount) while (ptr < end && (SRE_CODE) *ptr != chr) ptr++; break; - + case SRE_OP_NOT_LITERAL_IGNORE: /* repeated non-literal */ chr = pattern[1]; @@ -644,7 +644,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern) * - Recursive SRE_MATCH() returned false, and will continue the * outside 'for' loop: must be protected when breaking, since the next * OP could potentially depend on lastmark; - * + * * - Recursive SRE_MATCH() returned false, and will be called again * inside a local for/while loop: must be protected between each * loop iteration, since the recursive SRE_MATCH() could do anything, @@ -848,7 +848,7 @@ entrance: /* state->lastmark is the highest valid index in the state->mark array. If it is increased by more than 1, the intervening marks must be set to NULL to signal - that these marks have not been encountered. */ + that these marks have not been encountered. */ int j = state->lastmark + 1; while (j < i) state->mark[j++] = NULL; @@ -1065,9 +1065,9 @@ entrance: RETURN_ON_ERROR(ret); RETURN_SUCCESS; } - + LASTMARK_RESTORE(); - + ctx->ptr--; ctx->count--; } @@ -1115,7 +1115,7 @@ entrance: RETURN_ON_ERROR(ret); DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos); if (ret < (int) ctx->pattern[1]) - /* didn't match minimum number of times */ + /* didn't match minimum number of times */ RETURN_FAILURE; /* advance past minimum matches of repeat */ ctx->count = ret; @@ -1546,7 +1546,6 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) } break; } - } ptr++; } @@ -1601,7 +1600,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) return status; } - + LOCAL(int) SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len) { @@ -1652,10 +1651,14 @@ _compile(PyObject* self_, PyObject* args) for (i = 0; i < n; i++) { PyObject *o = PyList_GET_ITEM(code, i); - if (PyInt_Check(o)) - self->code[i] = (SRE_CODE) PyInt_AsLong(o); - else - self->code[i] = (SRE_CODE) PyLong_AsUnsignedLong(o); + unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) + : PyLong_AsUnsignedLong(o); + self->code[i] = (SRE_CODE) value; + if ((unsigned long) self->code[i] != value) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + break; + } } if (PyErr_Occurred()) { @@ -1724,7 +1727,7 @@ getstring(PyObject* string, int* p_length, int* p_charsize) /* given a python object, return a data pointer, a length (in characters), and a character size. return NULL if the object is not a string (or not compatible) */ - + PyBufferProcs *buffer; int size, bytes, charsize; void* ptr; @@ -2203,7 +2206,7 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) while (state.start <= state.end) { PyObject* item; - + state_reset(&state); state.ptr = state.start; @@ -2222,7 +2225,7 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) pattern_error(status); goto error; } - + /* don't bother to build a match object */ switch (self->groups) { case 0: @@ -2271,7 +2274,7 @@ error: Py_DECREF(list); state_fini(&state); return NULL; - + } #if PY_VERSION_HEX >= 0x02020000 @@ -2349,7 +2352,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw) pattern_error(status); goto error; } - + if (state.start == state.ptr) { if (last == state.end) break; @@ -2405,7 +2408,7 @@ error: Py_DECREF(list); state_fini(&state); return NULL; - + } static PyObject* @@ -2496,7 +2499,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, pattern_error(status); goto error; } - + b = STATE_OFFSET(&state, state.start); e = STATE_OFFSET(&state, state.ptr); @@ -2542,7 +2545,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, if (status < 0) goto error; } - + i = e; n = n + 1; @@ -2586,7 +2589,7 @@ error: state_fini(&state); Py_DECREF(filter); return NULL; - + } static PyObject* @@ -2626,7 +2629,7 @@ pattern_copy(PatternObject* self, PyObject* args) if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__")) return NULL; - + copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); if (!copy) return NULL; @@ -2653,7 +2656,7 @@ pattern_deepcopy(PatternObject* self, PyObject* args) { #ifdef USE_BUILTIN_COPY PatternObject* copy; - + PyObject* memo; if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) return NULL; @@ -2701,28 +2704,28 @@ PyDoc_STRVAR(pattern_finditer_doc, PyDoc_STRVAR(pattern_sub_doc, "sub(repl, string[, count = 0]) --> newstring\n\ Return the string obtained by replacing the leftmost non-overlapping\n\ - occurrences of pattern in string by the replacement repl."); + occurrences of pattern in string by the replacement repl."); PyDoc_STRVAR(pattern_subn_doc, "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\ Return the tuple (new_string, number_of_subs_made) found by replacing\n\ the leftmost non-overlapping occurrences of pattern with the\n\ - replacement repl."); + replacement repl."); PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); static PyMethodDef pattern_methods[] = { - {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, + {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match_doc}, - {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, + {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, pattern_search_doc}, {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, pattern_sub_doc}, {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS, pattern_subn_doc}, - {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, + {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS, pattern_split_doc}, - {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, + {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS, pattern_findall_doc}, #if PY_VERSION_HEX >= 0x02020000 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS, @@ -2734,7 +2737,7 @@ static PyMethodDef pattern_methods[] = { {NULL, NULL} }; -static PyObject* +static PyObject* pattern_getattr(PatternObject* self, char* name) { PyObject* res; @@ -3113,7 +3116,7 @@ match_copy(MatchObject* self, PyObject* args) #ifdef USE_BUILTIN_COPY MatchObject* copy; int slots, offset; - + if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__")) return NULL; @@ -3146,7 +3149,7 @@ match_deepcopy(MatchObject* self, PyObject* args) { #ifdef USE_BUILTIN_COPY MatchObject* copy; - + PyObject* memo; if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo)) return NULL; @@ -3181,7 +3184,7 @@ static PyMethodDef match_methods[] = { {NULL, NULL} }; -static PyObject* +static PyObject* match_getattr(MatchObject* self, char* name) { PyObject* res; @@ -3337,7 +3340,7 @@ static PyMethodDef scanner_methods[] = { {NULL, NULL} }; -static PyObject* +static PyObject* scanner_getattr(ScannerObject* self, char* name) { PyObject* res; @@ -3374,7 +3377,7 @@ static PyMethodDef _functions[] = { {NULL, NULL} }; -#if PY_VERSION_HEX < 0x02030000 +#if PY_VERSION_HEX < 0x02030000 DL_EXPORT(void) init_sre(void) #else PyMODINIT_FUNC init_sre(void)