Several changes. Test program to follow.

- Where optional arguments were being used, converted to
  PyArg_ParseTuple() style instead of nested PyArg_Parse() style.

- Check for and handle many potential error conditions that were never
  being tested.

- internal reg_* functions renamed to regobj_* (makes it easier to
  figure out which are global regex functions and which are for regex
  objects).

- reg_group (now regobj_group) was quite extensively reworked.  it no
  longer recurses to do its job (by factoring core functionality into
  a separate function that knows about string and integer indexes).

- some minor formatting fixes.

- regex_set_syntax() now invalidates the cache.  Without this change
  (in the example below), the second search would produce different
  output depending on whether the first search were performed or not
  (since performing the first search would cache the compiled object
  with RE_SYNTAX_EMACS, causing the second test to unexpectedly fail).

  regex.search('(a+)|(b+)', 'cdb')
  prev = regex.set_syntax(RE_SYNTAX_AWK)
  regex.search('(a+)|(b+)', 'cdb')
This commit is contained in:
Barry Warsaw 1996-12-20 21:56:07 +00:00
parent 4455cd8e19
commit c357325663
1 changed files with 267 additions and 162 deletions

View File

@ -80,6 +80,7 @@ makeresult(regs)
PyObject *v; PyObject *v;
int i; int i;
static PyObject *filler = NULL; static PyObject *filler = NULL;
if (filler == NULL) { if (filler == NULL) {
filler = Py_BuildValue("(ii)", -1, -1); filler = Py_BuildValue("(ii)", -1, -1);
if (filler == NULL) if (filler == NULL)
@ -88,6 +89,7 @@ makeresult(regs)
v = PyTuple_New(RE_NREGS); v = PyTuple_New(RE_NREGS);
if (v == NULL) if (v == NULL)
return NULL; return NULL;
for (i = 0; i < RE_NREGS; i++) { for (i = 0; i < RE_NREGS; i++) {
int lo = regs->start[i]; int lo = regs->start[i];
int hi = regs->end[i]; int hi = regs->end[i];
@ -98,35 +100,27 @@ makeresult(regs)
} }
else else
w = Py_BuildValue("(ii)", lo, hi); w = Py_BuildValue("(ii)", lo, hi);
if (w == NULL) { if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
Py_XDECREF(v); Py_DECREF(v);
return NULL; return NULL;
} }
PyTuple_SetItem(v, i, w);
} }
return v; return v;
} }
static PyObject * static PyObject *
reg_match(re, args) regobj_match(re, args)
regexobject *re; regexobject *re;
PyObject *args; PyObject *args;
{ {
PyObject *argstring;
char *buffer; char *buffer;
int size; int size;
int offset; int offset = 0;
int result; int result;
if (PyArg_Parse(args, "S", &argstring)) {
offset = 0; if (!PyArg_ParseTuple(args, "s#|i", &buffer, &size, &offset))
}
else {
PyErr_Clear();
if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
return NULL; return NULL;
}
buffer = PyString_AsString(argstring);
size = PyString_Size(argstring);
if (offset < 0 || offset > size) { if (offset < 0 || offset > size) {
PyErr_SetString(RegexError, "match offset out of range"); PyErr_SetString(RegexError, "match offset out of range");
return NULL; return NULL;
@ -140,34 +134,28 @@ reg_match(re, args)
return NULL; return NULL;
} }
if (result >= 0) { if (result >= 0) {
Py_INCREF(argstring); PyObject* str = PyString_FromStringAndSize(buffer, size);
re->re_lastok = argstring; if (!str)
return NULL;
re->re_lastok = str;
} }
return PyInt_FromLong((long)result); /* Length of the match or -1 */ return PyInt_FromLong((long)result); /* Length of the match or -1 */
} }
static PyObject * static PyObject *
reg_search(re, args) regobj_search(re, args)
regexobject *re; regexobject *re;
PyObject *args; PyObject *args;
{ {
PyObject *argstring;
char *buffer; char *buffer;
int size; int size;
int offset; int offset = 0;
int range; int range;
int result; int result;
if (PyArg_Parse(args, "S", &argstring)) { if (!PyArg_ParseTuple(args, "s#|i", &buffer, &size, &offset))
offset = 0;
}
else {
PyErr_Clear();
if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
return NULL; return NULL;
}
buffer = PyString_AsString(argstring);
size = PyString_Size(argstring);
if (offset < 0 || offset > size) { if (offset < 0 || offset > size) {
PyErr_SetString(RegexError, "search offset out of range"); PyErr_SetString(RegexError, "search offset out of range");
return NULL; return NULL;
@ -187,51 +175,38 @@ reg_search(re, args)
return NULL; return NULL;
} }
if (result >= 0) { if (result >= 0) {
Py_INCREF(argstring); PyObject* str = PyString_FromStringAndSize(buffer, size);
re->re_lastok = argstring; if (!str)
return NULL;
re->re_lastok = str;
} }
return PyInt_FromLong((long)result); /* Position of the match or -1 */ return PyInt_FromLong((long)result); /* Position of the match or -1 */
} }
static PyObject * /* get the group from the regex where index can be a string (group name) or
reg_group(re, args) an integer index [0 .. 99]
*/
static PyObject*
group_from_index(re, index)
regexobject *re; regexobject *re;
PyObject *args; PyObject *index;
{ {
int i, a, b; int i, a, b;
if (args != NULL && PyTuple_Check(args)) { char *v;
int n = PyTuple_Size(args);
PyObject *res = PyTuple_New(n); if (PyString_Check(index))
if (res == NULL) if (re->re_groupindex == NULL ||
return NULL; !(index = PyDict_GetItem(re->re_groupindex, index)))
for (i = 0; i < n; i++) { {
PyObject *v = reg_group(re, PyTuple_GetItem(args, i)); PyErr_SetString(RegexError,
if (v == NULL) { "group() group name doesn't exist");
Py_DECREF(res);
return NULL;
}
PyTuple_SetItem(res, i, v);
}
return res;
}
if (!PyArg_Parse(args, "i", &i)) {
PyObject *n;
PyErr_Clear();
if (!PyArg_Parse(args, "S", &n))
return NULL;
else {
PyObject *index;
if (re->re_groupindex == NULL)
index = NULL;
else
index = PyDict_GetItem(re->re_groupindex, n);
if (index == NULL) {
PyErr_SetString(RegexError, "group() group name doesn't exist");
return NULL; return NULL;
} }
i = PyInt_AsLong(index); i = PyInt_AsLong(index);
} if (i == -1 && PyErr_Occurred())
} return NULL;
if (i < 0 || i >= RE_NREGS) { if (i < 0 || i >= RE_NREGS) {
PyErr_SetString(RegexError, "group() index out of range"); PyErr_SetString(RegexError, "group() index out of range");
return NULL; return NULL;
@ -247,18 +222,79 @@ reg_group(re, args)
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
if (!(v = PyString_AsString(re->re_lastok)))
return NULL;
return PyString_FromStringAndSize(v+a, b-a);
} }
static PyObject *
regobj_group(re, args)
regexobject *re;
PyObject *args;
{
int n = PyTuple_Size(args);
int i;
PyObject *res = NULL;
if (n < 0)
return NULL;
if (n == 0) {
PyErr_SetString(PyExc_TypeError, "not enough arguments");
return NULL;
}
if (n == 1) {
/* return value is a single string */
PyObject *index = PyTuple_GetItem(args, 0);
if (!index)
return NULL;
return group_from_index(re, index);
}
/* return value is a tuple */
if (!(res = PyTuple_New(n)))
return NULL;
for (i = 0; i < n; i++) {
PyObject *index = PyTuple_GetItem(args, i);
PyObject *group = NULL;
if (!index)
goto finally;
if (!(group = group_from_index(re, index)))
goto finally;
if (PyTuple_SetItem(res, i, group) < 0)
goto finally;
}
return res;
finally:
Py_DECREF(res);
return NULL;
}
static struct PyMethodDef reg_methods[] = { static struct PyMethodDef reg_methods[] = {
{"match", (PyCFunction)reg_match}, {"match", (PyCFunction)regobj_match, 1},
{"search", (PyCFunction)reg_search}, {"search", (PyCFunction)regobj_search, 1},
{"group", (PyCFunction)reg_group}, {"group", (PyCFunction)regobj_group, 1},
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };
static char* members[] = {
"last", "regs", "translate",
"groupindex", "realpat", "givenpat",
NULL
};
static PyObject * static PyObject *
reg_getattr(re, name) regobj_getattr(re, name)
regexobject *re; regexobject *re;
char *name; char *name;
{ {
@ -310,18 +346,25 @@ reg_getattr(re, name)
return re->re_givenpat; return re->re_givenpat;
} }
if (strcmp(name, "__members__") == 0) { if (strcmp(name, "__members__") == 0) {
PyObject *list = PyList_New(6); int i = 0;
if (list) { PyObject *list = NULL;
PyList_SetItem(list, 0, PyString_FromString("last"));
PyList_SetItem(list, 1, PyString_FromString("regs")); /* okay, so it's unlikely this list will change that often.
PyList_SetItem(list, 2, PyString_FromString("translate")); still, it's easier to change it in just one place.
PyList_SetItem(list, 3, PyString_FromString("groupindex")); */
PyList_SetItem(list, 4, PyString_FromString("realpat")); while (members[i])
PyList_SetItem(list, 5, PyString_FromString("givenpat")); i++;
if (PyErr_Occurred()) { if (!(list = PyList_New(i)))
return NULL;
i = 0;
while (members[i]) {
PyObject* v = PyString_FromString(members[i]);
if (!v || PyList_SetItem(list, i, v) < 0) {
Py_DECREF(list); Py_DECREF(list);
list = NULL; return NULL;
} }
i++;
} }
return list; return list;
} }
@ -337,12 +380,18 @@ static PyTypeObject Regextype = {
/* methods */ /* methods */
(destructor)reg_dealloc, /*tp_dealloc*/ (destructor)reg_dealloc, /*tp_dealloc*/
0, /*tp_print*/ 0, /*tp_print*/
(getattrfunc)reg_getattr, /*tp_getattr*/ (getattrfunc)regobj_getattr, /*tp_getattr*/
0, /*tp_setattr*/ 0, /*tp_setattr*/
0, /*tp_compare*/ 0, /*tp_compare*/
0, /*tp_repr*/ 0, /*tp_repr*/
}; };
/* reference counting invariants:
pattern: borrowed
translate: borrowed
givenpat: borrowed
groupindex: transferred
*/
static PyObject * static PyObject *
newregexobject(pattern, translate, givenpat, groupindex) newregexobject(pattern, translate, givenpat, groupindex)
PyObject *pattern; PyObject *pattern;
@ -351,8 +400,11 @@ newregexobject(pattern, translate, givenpat, groupindex)
PyObject *groupindex; PyObject *groupindex;
{ {
regexobject *re; regexobject *re;
char *pat = PyString_AsString(pattern); char *pat;
int size = PyString_Size(pattern); int size;
if (!PyArg_Parse(pattern, "s#", &pat, &size))
return NULL;
if (translate != NULL && PyString_Size(translate) != 256) { if (translate != NULL && PyString_Size(translate) != 256) {
PyErr_SetString(RegexError, PyErr_SetString(RegexError,
@ -365,11 +417,14 @@ newregexobject(pattern, translate, givenpat, groupindex)
re->re_patbuf.buffer = NULL; re->re_patbuf.buffer = NULL;
re->re_patbuf.allocated = 0; re->re_patbuf.allocated = 0;
re->re_patbuf.fastmap = re->re_fastmap; re->re_patbuf.fastmap = re->re_fastmap;
if (translate) if (translate) {
re->re_patbuf.translate = PyString_AsString(translate); re->re_patbuf.translate = PyString_AsString(translate);
if (!re->re_patbuf.translate)
goto finally;
Py_INCREF(translate);
}
else else
re->re_patbuf.translate = NULL; re->re_patbuf.translate = NULL;
Py_XINCREF(translate);
re->re_translate = translate; re->re_translate = translate;
re->re_lastok = NULL; re->re_lastok = NULL;
re->re_groupindex = groupindex; re->re_groupindex = groupindex;
@ -380,11 +435,13 @@ newregexobject(pattern, translate, givenpat, groupindex)
error = re_compile_pattern(pat, size, &re->re_patbuf); error = re_compile_pattern(pat, size, &re->re_patbuf);
if (error != NULL) { if (error != NULL) {
PyErr_SetString(RegexError, error); PyErr_SetString(RegexError, error);
Py_DECREF(re); goto finally;
re = NULL;
} }
} }
return (PyObject *)re; return (PyObject *)re;
finally:
Py_DECREF(re);
return NULL;
} }
static PyObject * static PyObject *
@ -394,11 +451,9 @@ regex_compile(self, args)
{ {
PyObject *pat = NULL; PyObject *pat = NULL;
PyObject *tran = NULL; PyObject *tran = NULL;
if (!PyArg_Parse(args, "S", &pat)) {
PyErr_Clear(); if (!PyArg_ParseTuple(args, "S|S", &pat, &tran))
if (!PyArg_Parse(args, "(SS)", &pat, &tran))
return NULL; return NULL;
}
return newregexobject(pat, tran, pat, NULL); return newregexobject(pat, tran, pat, NULL);
} }
@ -407,26 +462,31 @@ symcomp(pattern, gdict)
PyObject *pattern; PyObject *pattern;
PyObject *gdict; PyObject *gdict;
{ {
char *opat = PyString_AsString(pattern); char *opat, *oend, *o, *n, *g, *v;
char *oend = opat + PyString_Size(pattern);
int group_count = 0; int group_count = 0;
int sz;
int escaped = 0; int escaped = 0;
char *o = opat;
char *n;
char name_buf[128]; char name_buf[128];
char *g;
PyObject *npattern; PyObject *npattern;
int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1; int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
if (!(opat = PyString_AsString(pattern)))
return NULL;
if ((sz = PyString_Size(pattern)) < 0)
return NULL;
oend = opat + sz;
o = opat;
if (oend == opat) { if (oend == opat) {
Py_INCREF(pattern); Py_INCREF(pattern);
return pattern; return pattern;
} }
npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern)); if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
if (npattern == NULL) !(n = PyString_AsString(npattern)))
return NULL; return NULL;
n = PyString_AsString(npattern);
while (o < oend) { while (o < oend) {
if (*o == '(' && escaped == require_escape) { if (*o == '(' && escaped == require_escape) {
@ -448,8 +508,11 @@ symcomp(pattern, gdict)
*g++ = '\0'; *g++ = '\0';
group_name = PyString_FromString(name_buf); group_name = PyString_FromString(name_buf);
group_index = PyInt_FromLong(group_count); group_index = PyInt_FromLong(group_count);
if (group_name == NULL || group_index == NULL if (group_name == NULL ||
|| PyDict_SetItem(gdict, group_name, group_index) != 0) { group_index == NULL ||
PyDict_SetItem(gdict, group_name,
group_index) != 0)
{
Py_XDECREF(group_name); Py_XDECREF(group_name);
Py_XDECREF(group_index); Py_XDECREF(group_index);
Py_XDECREF(npattern); Py_XDECREF(npattern);
@ -488,7 +551,12 @@ symcomp(pattern, gdict)
} }
} }
if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0) if (!(v = PyString_AsString(npattern))) {
Py_DECREF(npattern);
return NULL;
}
/* _PyString_Resize() decrements npattern on failure */
if (_PyString_Resize(&npattern, n - v) == 0)
return npattern; return npattern;
else { else {
return NULL; return NULL;
@ -505,14 +573,12 @@ regex_symcomp(self, args)
PyObject *tran = NULL; PyObject *tran = NULL;
PyObject *gdict = NULL; PyObject *gdict = NULL;
PyObject *npattern; PyObject *npattern;
if (!PyArg_Parse(args, "S", &pattern)) {
PyErr_Clear(); if (!PyArg_ParseTuple(args, "S|S", &pattern, &tran))
if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
return NULL; return NULL;
}
gdict = PyDict_New(); gdict = PyDict_New();
if (gdict == NULL if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
|| (npattern = symcomp(pattern, gdict)) == NULL) {
Py_DECREF(gdict); Py_DECREF(gdict);
Py_DECREF(pattern); Py_DECREF(pattern);
return NULL; return NULL;
@ -528,17 +594,27 @@ static int
update_cache(pat) update_cache(pat)
PyObject *pat; PyObject *pat;
{ {
PyObject *tuple = Py_BuildValue("(O)", pat);
int status = 0;
if (!tuple)
return -1;
if (pat != cache_pat) { if (pat != cache_pat) {
Py_XDECREF(cache_pat); Py_XDECREF(cache_pat);
cache_pat = NULL; cache_pat = NULL;
Py_XDECREF(cache_prog); Py_XDECREF(cache_prog);
cache_prog = regex_compile((PyObject *)NULL, pat); cache_prog = regex_compile((PyObject *)NULL, tuple);
if (cache_prog == NULL) if (cache_prog == NULL) {
return -1; status = -1;
goto finally;
}
cache_pat = pat; cache_pat = pat;
Py_INCREF(cache_pat); Py_INCREF(cache_pat);
} }
return 0; finally:
Py_DECREF(tuple);
return status;
} }
static PyObject * static PyObject *
@ -547,11 +623,18 @@ regex_match(self, args)
PyObject *args; PyObject *args;
{ {
PyObject *pat, *string; PyObject *pat, *string;
PyObject *tuple, *v;
if (!PyArg_Parse(args, "(SS)", &pat, &string)) if (!PyArg_Parse(args, "(SS)", &pat, &string))
return NULL; return NULL;
if (update_cache(pat) < 0) if (update_cache(pat) < 0)
return NULL; return NULL;
return reg_match((regexobject *)cache_prog, string);
if (!(tuple = Py_BuildValue("(S)", string)))
return NULL;
v = regobj_match((regexobject *)cache_prog, tuple);
Py_DECREF(tuple);
return v;
} }
static PyObject * static PyObject *
@ -560,27 +643,40 @@ regex_search(self, args)
PyObject *args; PyObject *args;
{ {
PyObject *pat, *string; PyObject *pat, *string;
PyObject *tuple, *v;
if (!PyArg_Parse(args, "(SS)", &pat, &string)) if (!PyArg_Parse(args, "(SS)", &pat, &string))
return NULL; return NULL;
if (update_cache(pat) < 0) if (update_cache(pat) < 0)
return NULL; return NULL;
return reg_search((regexobject *)cache_prog, string);
if (!(tuple = Py_BuildValue("(S)", string)))
return NULL;
v = regobj_search((regexobject *)cache_prog, tuple);
Py_DECREF(tuple);
return v;
} }
static PyObject * static PyObject *
regex_set_syntax(self, args) regex_set_syntax(self, args)
PyObject *self, *args; PyObject *self;
PyObject *args;
{ {
int syntax; int syntax;
if (!PyArg_Parse(args, "i", &syntax)) if (!PyArg_Parse(args, "i", &syntax))
return NULL; return NULL;
syntax = re_set_syntax(syntax); syntax = re_set_syntax(syntax);
/* wipe the global pattern cache */
Py_XDECREF(cache_pat);
cache_pat = NULL;
Py_XDECREF(cache_prog);
cache_prog = NULL;
return PyInt_FromLong((long)syntax); return PyInt_FromLong((long)syntax);
} }
static struct PyMethodDef regex_global_methods[] = { static struct PyMethodDef regex_global_methods[] = {
{"compile", regex_compile, 0}, {"compile", regex_compile, 1},
{"symcomp", regex_symcomp, 0}, {"symcomp", regex_symcomp, 1},
{"match", regex_match, 0}, {"match", regex_match, 0},
{"search", regex_search, 0}, {"search", regex_search, 0},
{"set_syntax", regex_set_syntax, 0}, {"set_syntax", regex_set_syntax, 0},
@ -591,27 +687,36 @@ void
initregex() initregex()
{ {
PyObject *m, *d, *v; PyObject *m, *d, *v;
int i;
char *s;
m = Py_InitModule("regex", regex_global_methods); m = Py_InitModule("regex", regex_global_methods);
d = PyModule_GetDict(m); d = PyModule_GetDict(m);
/* Initialize regex.error exception */ /* Initialize regex.error exception */
RegexError = PyString_FromString("regex.error"); v = RegexError = PyString_FromString("regex.error");
if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0) if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
Py_FatalError("can't define regex.error"); goto finally;
/* Initialize regex.casefold constant */ /* Initialize regex.casefold constant */
v = PyString_FromStringAndSize((char *)NULL, 256); if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
if (v != NULL) { goto finally;
int i;
char *s = PyString_AsString(v); if (!(s = PyString_AsString(v)))
goto finally;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
if (isupper(i)) if (isupper(i))
s[i] = tolower(i); s[i] = tolower(i);
else else
s[i] = i; s[i] = i;
} }
PyDict_SetItemString(d, "casefold", v); if (PyDict_SetItemString(d, "casefold", v) < 0)
goto finally;
Py_DECREF(v); Py_DECREF(v);
}
if (!PyErr_Occurred())
return;
finally:
Py_FatalError("can't initialize regex module");
} }