1991-12-29 21:42:57 -04:00
|
|
|
|
/*
|
|
|
|
|
XXX support range parameter on search
|
|
|
|
|
XXX support mstop parameter on search
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Regular expression objects */
|
1992-01-19 12:31:57 -04:00
|
|
|
|
/* This uses Tatu Ylonen's copyleft-free reimplementation of
|
|
|
|
|
GNU regular expressions */
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
#include "Python.h"
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
1996-12-05 19:43:35 -04:00
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
1992-01-19 12:31:57 -04:00
|
|
|
|
#include "regexpr.h"
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *RegexError; /* Exception */
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
|
|
|
|
typedef struct {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject_HEAD
|
1991-12-29 21:42:57 -04:00
|
|
|
|
struct re_pattern_buffer re_patbuf; /* The compiled expression */
|
|
|
|
|
struct re_registers re_regs; /* The registers from the last match */
|
|
|
|
|
char re_fastmap[256]; /* Storage for fastmap */
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *re_translate; /* String object for translate table */
|
|
|
|
|
PyObject *re_lastok; /* String object last matched/searched */
|
|
|
|
|
PyObject *re_groupindex; /* Group name to index dictionary */
|
|
|
|
|
PyObject *re_givenpat; /* Pattern with symbolic groups */
|
|
|
|
|
PyObject *re_realpat; /* Pattern without symbolic groups */
|
1991-12-29 21:42:57 -04:00
|
|
|
|
} regexobject;
|
|
|
|
|
|
|
|
|
|
/* Regex object methods */
|
|
|
|
|
|
|
|
|
|
static void
|
2000-07-10 10:05:29 -03:00
|
|
|
|
reg_dealloc(regexobject *re)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
2000-05-03 20:44:39 -03:00
|
|
|
|
if (re->re_patbuf.buffer)
|
2000-07-11 21:49:17 -03:00
|
|
|
|
free(re->re_patbuf.buffer);
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_XDECREF(re->re_translate);
|
|
|
|
|
Py_XDECREF(re->re_lastok);
|
|
|
|
|
Py_XDECREF(re->re_groupindex);
|
|
|
|
|
Py_XDECREF(re->re_givenpat);
|
|
|
|
|
Py_XDECREF(re->re_realpat);
|
2000-05-03 20:44:39 -03:00
|
|
|
|
PyObject_Del(re);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
makeresult(struct re_registers *regs)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1996-10-08 11:18:42 -03:00
|
|
|
|
PyObject *v;
|
|
|
|
|
int i;
|
|
|
|
|
static PyObject *filler = NULL;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
1996-10-08 11:18:42 -03:00
|
|
|
|
if (filler == NULL) {
|
|
|
|
|
filler = Py_BuildValue("(ii)", -1, -1);
|
|
|
|
|
if (filler == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
v = PyTuple_New(RE_NREGS);
|
|
|
|
|
if (v == NULL)
|
|
|
|
|
return NULL;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
1996-10-08 11:18:42 -03:00
|
|
|
|
for (i = 0; i < RE_NREGS; i++) {
|
|
|
|
|
int lo = regs->start[i];
|
|
|
|
|
int hi = regs->end[i];
|
|
|
|
|
PyObject *w;
|
|
|
|
|
if (lo == -1 && hi == -1) {
|
|
|
|
|
w = filler;
|
|
|
|
|
Py_INCREF(w);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
w = Py_BuildValue("(ii)", lo, hi);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
|
|
|
|
|
Py_DECREF(v);
|
1996-10-08 11:18:42 -03:00
|
|
|
|
return NULL;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return v;
|
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regobj_match(regexobject *re, PyObject *args)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1997-05-12 13:04:09 -03:00
|
|
|
|
PyObject *argstring;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
char *buffer;
|
1992-01-27 12:46:19 -04:00
|
|
|
|
int size;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
int offset = 0;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
int result;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
2000-02-29 09:59:29 -04:00
|
|
|
|
if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
|
1997-05-12 13:04:09 -03:00
|
|
|
|
return NULL;
|
1998-10-07 23:25:24 -03:00
|
|
|
|
if (!PyArg_Parse(argstring, "t#", &buffer, &size))
|
1996-12-20 17:56:07 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (offset < 0 || offset > size) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyErr_SetString(RegexError, "match offset out of range");
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_XDECREF(re->re_lastok);
|
1993-02-21 16:12:16 -04:00
|
|
|
|
re->re_lastok = NULL;
|
1997-08-18 12:31:24 -03:00
|
|
|
|
result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
|
1997-08-14 11:35:12 -03:00
|
|
|
|
&re->re_regs);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
if (result < -1) {
|
1997-08-13 19:34:14 -03:00
|
|
|
|
/* Serious failure of some sort; if re_match didn't
|
|
|
|
|
set an exception, raise a generic error */
|
|
|
|
|
if (!PyErr_Occurred())
|
|
|
|
|
PyErr_SetString(RegexError, "match failure");
|
1991-12-29 21:42:57 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (result >= 0) {
|
1997-05-12 13:04:09 -03:00
|
|
|
|
Py_INCREF(argstring);
|
|
|
|
|
re->re_lastok = argstring;
|
1993-02-21 16:12:16 -04:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
return PyInt_FromLong((long)result); /* Length of the match or -1 */
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
1992-01-27 12:46:19 -04:00
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regobj_search(regexobject *re, PyObject *args)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1997-05-12 13:04:09 -03:00
|
|
|
|
PyObject *argstring;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
char *buffer;
|
|
|
|
|
int size;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
int offset = 0;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
int range;
|
|
|
|
|
int result;
|
1992-01-27 12:46:19 -04:00
|
|
|
|
|
2000-02-29 09:59:29 -04:00
|
|
|
|
if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
|
1997-05-12 13:04:09 -03:00
|
|
|
|
return NULL;
|
2000-02-29 09:59:29 -04:00
|
|
|
|
if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
|
1996-12-20 17:56:07 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (offset < 0 || offset > size) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyErr_SetString(RegexError, "search offset out of range");
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return NULL;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
1992-01-27 12:46:19 -04:00
|
|
|
|
/* NB: In Emacs 18.57, the documentation for re_search[_2] and
|
|
|
|
|
the implementation don't match: the documentation states that
|
|
|
|
|
|range| positions are tried, while the code tries |range|+1
|
|
|
|
|
positions. It seems more productive to believe the code! */
|
1992-01-26 14:12:41 -04:00
|
|
|
|
range = size - offset;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_XDECREF(re->re_lastok);
|
1993-02-21 16:12:16 -04:00
|
|
|
|
re->re_lastok = NULL;
|
1997-08-18 12:31:24 -03:00
|
|
|
|
result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
|
1991-12-29 21:42:57 -04:00
|
|
|
|
&re->re_regs);
|
|
|
|
|
if (result < -1) {
|
1997-08-13 19:34:14 -03:00
|
|
|
|
/* Serious failure of some sort; if re_match didn't
|
|
|
|
|
set an exception, raise a generic error */
|
|
|
|
|
if (!PyErr_Occurred())
|
|
|
|
|
PyErr_SetString(RegexError, "match failure");
|
1991-12-29 21:42:57 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (result >= 0) {
|
1997-05-12 13:04:09 -03:00
|
|
|
|
Py_INCREF(argstring);
|
|
|
|
|
re->re_lastok = argstring;
|
1993-02-21 16:12:16 -04:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
return PyInt_FromLong((long)result); /* Position of the match or -1 */
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
/* get the group from the regex where index can be a string (group name) or
|
|
|
|
|
an integer index [0 .. 99]
|
|
|
|
|
*/
|
|
|
|
|
static PyObject*
|
2000-07-10 10:05:29 -03:00
|
|
|
|
group_from_index(regexobject *re, PyObject *index)
|
1993-02-21 16:12:16 -04:00
|
|
|
|
{
|
|
|
|
|
int i, a, b;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
char *v;
|
|
|
|
|
|
|
|
|
|
if (PyString_Check(index))
|
|
|
|
|
if (re->re_groupindex == NULL ||
|
|
|
|
|
!(index = PyDict_GetItem(re->re_groupindex, index)))
|
|
|
|
|
{
|
|
|
|
|
PyErr_SetString(RegexError,
|
|
|
|
|
"group() group name doesn't exist");
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return NULL;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
}
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
|
|
|
|
i = PyInt_AsLong(index);
|
|
|
|
|
if (i == -1 && PyErr_Occurred())
|
|
|
|
|
return NULL;
|
|
|
|
|
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (i < 0 || i >= RE_NREGS) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyErr_SetString(RegexError, "group() index out of range");
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
if (re->re_lastok == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyErr_SetString(RegexError,
|
1996-12-20 17:56:07 -04:00
|
|
|
|
"group() only valid after successful match/search");
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
a = re->re_regs.start[i];
|
|
|
|
|
b = re->re_regs.end[i];
|
|
|
|
|
if (a < 0 || b < 0) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1993-02-21 16:12:16 -04:00
|
|
|
|
}
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
|
|
|
|
if (!(v = PyString_AsString(re->re_lastok)))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return PyString_FromStringAndSize(v+a, b-a);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regobj_group(regexobject *re, PyObject *args)
|
1996-12-20 17:56:07 -04:00
|
|
|
|
{
|
|
|
|
|
int n = PyTuple_Size(args);
|
|
|
|
|
int i;
|
|
|
|
|
PyObject *res = NULL;
|
|
|
|
|
|
|
|
|
|
if (n < 0)
|
|
|
|
|
return NULL;
|
|
|
|
|
if (n == 0) {
|
|
|
|
|
PyErr_SetString(PyExc_TypeError, "not enough arguments");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
if (n == 1) {
|
|
|
|
|
/* return value is a single string */
|
|
|
|
|
PyObject *index = PyTuple_GetItem(args, 0);
|
|
|
|
|
if (!index)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
return group_from_index(re, index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* return value is a tuple */
|
|
|
|
|
if (!(res = PyTuple_New(n)))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
|
PyObject *index = PyTuple_GetItem(args, i);
|
|
|
|
|
PyObject *group = NULL;
|
|
|
|
|
|
|
|
|
|
if (!index)
|
|
|
|
|
goto finally;
|
|
|
|
|
if (!(group = group_from_index(re, index)))
|
|
|
|
|
goto finally;
|
|
|
|
|
if (PyTuple_SetItem(res, i, group) < 0)
|
|
|
|
|
goto finally;
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
Py_DECREF(res);
|
|
|
|
|
return NULL;
|
1993-02-21 16:12:16 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static struct PyMethodDef reg_methods[] = {
|
1996-12-20 17:56:07 -04:00
|
|
|
|
{"match", (PyCFunction)regobj_match, 1},
|
|
|
|
|
{"search", (PyCFunction)regobj_search, 1},
|
|
|
|
|
{"group", (PyCFunction)regobj_group, 1},
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{NULL, NULL} /* sentinel */
|
|
|
|
|
};
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static char* members[] = {
|
|
|
|
|
"last", "regs", "translate",
|
|
|
|
|
"groupindex", "realpat", "givenpat",
|
|
|
|
|
NULL
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regobj_getattr(regexobject *re, char *name)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1992-01-01 10:52:16 -04:00
|
|
|
|
if (strcmp(name, "regs") == 0) {
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (re->re_lastok == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1992-01-01 10:52:16 -04:00
|
|
|
|
}
|
|
|
|
|
return makeresult(&re->re_regs);
|
|
|
|
|
}
|
1993-02-21 16:12:16 -04:00
|
|
|
|
if (strcmp(name, "last") == 0) {
|
|
|
|
|
if (re->re_lastok == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1993-02-21 16:12:16 -04:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(re->re_lastok);
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return re->re_lastok;
|
|
|
|
|
}
|
1993-02-23 09:42:39 -04:00
|
|
|
|
if (strcmp(name, "translate") == 0) {
|
|
|
|
|
if (re->re_translate == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1993-02-23 09:42:39 -04:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(re->re_translate);
|
1993-02-23 09:42:39 -04:00
|
|
|
|
return re->re_translate;
|
|
|
|
|
}
|
1994-08-01 08:34:53 -03:00
|
|
|
|
if (strcmp(name, "groupindex") == 0) {
|
|
|
|
|
if (re->re_groupindex == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(re->re_groupindex);
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return re->re_groupindex;
|
|
|
|
|
}
|
|
|
|
|
if (strcmp(name, "realpat") == 0) {
|
|
|
|
|
if (re->re_realpat == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(re->re_realpat);
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return re->re_realpat;
|
|
|
|
|
}
|
|
|
|
|
if (strcmp(name, "givenpat") == 0) {
|
|
|
|
|
if (re->re_givenpat == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(Py_None);
|
|
|
|
|
return Py_None;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(re->re_givenpat);
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return re->re_givenpat;
|
|
|
|
|
}
|
1993-02-23 09:42:39 -04:00
|
|
|
|
if (strcmp(name, "__members__") == 0) {
|
1996-12-20 17:56:07 -04:00
|
|
|
|
int i = 0;
|
|
|
|
|
PyObject *list = NULL;
|
|
|
|
|
|
|
|
|
|
/* okay, so it's unlikely this list will change that often.
|
|
|
|
|
still, it's easier to change it in just one place.
|
|
|
|
|
*/
|
|
|
|
|
while (members[i])
|
|
|
|
|
i++;
|
|
|
|
|
if (!(list = PyList_New(i)))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
|
while (members[i]) {
|
|
|
|
|
PyObject* v = PyString_FromString(members[i]);
|
|
|
|
|
if (!v || PyList_SetItem(list, i, v) < 0) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_DECREF(list);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
return NULL;
|
1993-02-23 09:42:39 -04:00
|
|
|
|
}
|
1996-12-20 17:56:07 -04:00
|
|
|
|
i++;
|
1993-02-23 09:42:39 -04:00
|
|
|
|
}
|
|
|
|
|
return list;
|
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
return Py_FindMethod(reg_methods, (PyObject *)re, name);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyTypeObject Regextype = {
|
|
|
|
|
PyObject_HEAD_INIT(&PyType_Type)
|
1996-12-20 17:56:07 -04:00
|
|
|
|
0, /*ob_size*/
|
|
|
|
|
"regex", /*tp_name*/
|
|
|
|
|
sizeof(regexobject), /*tp_size*/
|
|
|
|
|
0, /*tp_itemsize*/
|
1991-12-29 21:42:57 -04:00
|
|
|
|
/* methods */
|
1996-12-20 17:56:07 -04:00
|
|
|
|
(destructor)reg_dealloc, /*tp_dealloc*/
|
|
|
|
|
0, /*tp_print*/
|
|
|
|
|
(getattrfunc)regobj_getattr, /*tp_getattr*/
|
|
|
|
|
0, /*tp_setattr*/
|
|
|
|
|
0, /*tp_compare*/
|
|
|
|
|
0, /*tp_repr*/
|
1991-12-29 21:42:57 -04:00
|
|
|
|
};
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
/* reference counting invariants:
|
|
|
|
|
pattern: borrowed
|
|
|
|
|
translate: borrowed
|
|
|
|
|
givenpat: borrowed
|
|
|
|
|
groupindex: transferred
|
|
|
|
|
*/
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
|
|
|
|
regexobject *re;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
char *pat;
|
|
|
|
|
int size;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
|
1998-10-07 23:25:24 -03:00
|
|
|
|
if (!PyArg_Parse(pattern, "t#", &pat, &size))
|
1996-12-20 17:56:07 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
if (translate != NULL && PyString_Size(translate) != 256) {
|
|
|
|
|
PyErr_SetString(RegexError,
|
1996-12-20 17:56:07 -04:00
|
|
|
|
"translation table must be 256 bytes");
|
1993-02-21 16:12:16 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2000-05-03 20:44:39 -03:00
|
|
|
|
re = PyObject_New(regexobject, &Regextype);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
if (re != NULL) {
|
|
|
|
|
char *error;
|
|
|
|
|
re->re_patbuf.buffer = NULL;
|
|
|
|
|
re->re_patbuf.allocated = 0;
|
1997-08-18 12:31:24 -03:00
|
|
|
|
re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (translate) {
|
1997-08-18 12:31:24 -03:00
|
|
|
|
re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (!re->re_patbuf.translate)
|
|
|
|
|
goto finally;
|
|
|
|
|
Py_INCREF(translate);
|
|
|
|
|
}
|
1993-02-21 16:12:16 -04:00
|
|
|
|
else
|
|
|
|
|
re->re_patbuf.translate = NULL;
|
|
|
|
|
re->re_translate = translate;
|
|
|
|
|
re->re_lastok = NULL;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
re->re_groupindex = groupindex;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(pattern);
|
1994-08-01 08:34:53 -03:00
|
|
|
|
re->re_realpat = pattern;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(givenpat);
|
1994-08-01 08:34:53 -03:00
|
|
|
|
re->re_givenpat = givenpat;
|
1997-09-02 21:47:36 -03:00
|
|
|
|
error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
if (error != NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyErr_SetString(RegexError, error);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
goto finally;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
1996-07-23 21:51:20 -03:00
|
|
|
|
return (PyObject *)re;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
finally:
|
|
|
|
|
Py_DECREF(re);
|
|
|
|
|
return NULL;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regex_compile(PyObject *self, PyObject *args)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *pat = NULL;
|
|
|
|
|
PyObject *tran = NULL;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
2000-02-29 09:59:29 -04:00
|
|
|
|
if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
|
1996-12-20 17:56:07 -04:00
|
|
|
|
return NULL;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return newregexobject(pat, tran, pat, NULL);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
symcomp(PyObject *pattern, PyObject *gdict)
|
1994-08-01 08:34:53 -03:00
|
|
|
|
{
|
1996-12-20 17:56:07 -04:00
|
|
|
|
char *opat, *oend, *o, *n, *g, *v;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
int group_count = 0;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
int sz;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
int escaped = 0;
|
|
|
|
|
char name_buf[128];
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *npattern;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (!(opat = PyString_AsString(pattern)))
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if ((sz = PyString_Size(pattern)) < 0)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
oend = opat + sz;
|
|
|
|
|
o = opat;
|
|
|
|
|
|
1996-06-11 15:33:14 -03:00
|
|
|
|
if (oend == opat) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(pattern);
|
1996-06-11 15:33:14 -03:00
|
|
|
|
return pattern;
|
|
|
|
|
}
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
|
|
|
|
|
!(n = PyString_AsString(npattern)))
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
while (o < oend) {
|
|
|
|
|
if (*o == '(' && escaped == require_escape) {
|
|
|
|
|
char *backtrack;
|
|
|
|
|
escaped = 0;
|
|
|
|
|
++group_count;
|
|
|
|
|
*n++ = *o;
|
|
|
|
|
if (++o >= oend || *o != '<')
|
|
|
|
|
continue;
|
|
|
|
|
/* *o == '<' */
|
|
|
|
|
if (o+1 < oend && *(o+1) == '>')
|
|
|
|
|
continue;
|
|
|
|
|
backtrack = o;
|
|
|
|
|
g = name_buf;
|
|
|
|
|
for (++o; o < oend;) {
|
|
|
|
|
if (*o == '>') {
|
1996-12-20 17:56:07 -04:00
|
|
|
|
PyObject *group_name = NULL;
|
|
|
|
|
PyObject *group_index = NULL;
|
|
|
|
|
*g++ = '\0';
|
|
|
|
|
group_name = PyString_FromString(name_buf);
|
|
|
|
|
group_index = PyInt_FromLong(group_count);
|
|
|
|
|
if (group_name == NULL ||
|
|
|
|
|
group_index == NULL ||
|
|
|
|
|
PyDict_SetItem(gdict, group_name,
|
|
|
|
|
group_index) != 0)
|
|
|
|
|
{
|
|
|
|
|
Py_XDECREF(group_name);
|
|
|
|
|
Py_XDECREF(group_index);
|
|
|
|
|
Py_XDECREF(npattern);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
1997-01-09 18:22:05 -04:00
|
|
|
|
Py_DECREF(group_name);
|
|
|
|
|
Py_DECREF(group_index);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
++o; /* eat the '>' */
|
|
|
|
|
break;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
}
|
1995-02-10 13:01:56 -04:00
|
|
|
|
if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
|
1994-08-01 08:34:53 -03:00
|
|
|
|
o = backtrack;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
*g++ = *o++;
|
|
|
|
|
}
|
|
|
|
|
}
|
1996-10-23 14:53:06 -03:00
|
|
|
|
else if (*o == '[' && !escaped) {
|
1994-08-01 08:34:53 -03:00
|
|
|
|
*n++ = *o;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
++o; /* eat the char following '[' */
|
1994-08-01 08:34:53 -03:00
|
|
|
|
*n++ = *o;
|
|
|
|
|
while (o < oend && *o != ']') {
|
|
|
|
|
++o;
|
|
|
|
|
*n++ = *o;
|
|
|
|
|
}
|
|
|
|
|
if (o < oend)
|
|
|
|
|
++o;
|
|
|
|
|
}
|
|
|
|
|
else if (*o == '\\') {
|
|
|
|
|
escaped = 1;
|
|
|
|
|
*n++ = *o;
|
|
|
|
|
++o;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
escaped = 0;
|
|
|
|
|
*n++ = *o;
|
|
|
|
|
++o;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (!(v = PyString_AsString(npattern))) {
|
|
|
|
|
Py_DECREF(npattern);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
/* _PyString_Resize() decrements npattern on failure */
|
|
|
|
|
if (_PyString_Resize(&npattern, n - v) == 0)
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return npattern;
|
|
|
|
|
else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regex_symcomp(PyObject *self, PyObject *args)
|
1994-08-01 08:34:53 -03:00
|
|
|
|
{
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *pattern;
|
|
|
|
|
PyObject *tran = NULL;
|
|
|
|
|
PyObject *gdict = NULL;
|
|
|
|
|
PyObject *npattern;
|
1997-01-09 18:22:05 -04:00
|
|
|
|
PyObject *retval = NULL;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
2000-02-29 09:59:29 -04:00
|
|
|
|
if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
|
1996-12-20 17:56:07 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
gdict = PyDict_New();
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_DECREF(gdict);
|
|
|
|
|
Py_DECREF(pattern);
|
1994-08-01 08:34:53 -03:00
|
|
|
|
return NULL;
|
|
|
|
|
}
|
1997-01-09 18:22:05 -04:00
|
|
|
|
retval = newregexobject(npattern, tran, pattern, gdict);
|
|
|
|
|
Py_DECREF(npattern);
|
|
|
|
|
return retval;
|
1994-08-01 08:34:53 -03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *cache_pat;
|
|
|
|
|
static PyObject *cache_prog;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
|
|
|
|
static int
|
2000-07-10 10:05:29 -03:00
|
|
|
|
update_cache(PyObject *pat)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1996-12-20 17:56:07 -04:00
|
|
|
|
PyObject *tuple = Py_BuildValue("(O)", pat);
|
|
|
|
|
int status = 0;
|
|
|
|
|
|
|
|
|
|
if (!tuple)
|
|
|
|
|
return -1;
|
|
|
|
|
|
1991-12-29 21:42:57 -04:00
|
|
|
|
if (pat != cache_pat) {
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_XDECREF(cache_pat);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
cache_pat = NULL;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_XDECREF(cache_prog);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
cache_prog = regex_compile((PyObject *)NULL, tuple);
|
|
|
|
|
if (cache_prog == NULL) {
|
|
|
|
|
status = -1;
|
|
|
|
|
goto finally;
|
|
|
|
|
}
|
1991-12-29 21:42:57 -04:00
|
|
|
|
cache_pat = pat;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
Py_INCREF(cache_pat);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
1996-12-20 17:56:07 -04:00
|
|
|
|
finally:
|
|
|
|
|
Py_DECREF(tuple);
|
|
|
|
|
return status;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regex_match(PyObject *self, PyObject *args)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *pat, *string;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
PyObject *tuple, *v;
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
if (!PyArg_Parse(args, "(SS)", &pat, &string))
|
1991-12-29 21:42:57 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
if (update_cache(pat) < 0)
|
|
|
|
|
return NULL;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
|
|
|
|
if (!(tuple = Py_BuildValue("(S)", string)))
|
|
|
|
|
return NULL;
|
|
|
|
|
v = regobj_match((regexobject *)cache_prog, tuple);
|
|
|
|
|
Py_DECREF(tuple);
|
|
|
|
|
return v;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regex_search(PyObject *self, PyObject *args)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *pat, *string;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
PyObject *tuple, *v;
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
if (!PyArg_Parse(args, "(SS)", &pat, &string))
|
1991-12-29 21:42:57 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
if (update_cache(pat) < 0)
|
|
|
|
|
return NULL;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
|
|
|
|
|
if (!(tuple = Py_BuildValue("(S)", string)))
|
|
|
|
|
return NULL;
|
|
|
|
|
v = regobj_search((regexobject *)cache_prog, tuple);
|
|
|
|
|
Py_DECREF(tuple);
|
|
|
|
|
return v;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regex_set_syntax(PyObject *self, PyObject *args)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
|
|
|
|
int syntax;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
if (!PyArg_Parse(args, "i", &syntax))
|
1991-12-29 21:42:57 -04:00
|
|
|
|
return NULL;
|
|
|
|
|
syntax = re_set_syntax(syntax);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
/* wipe the global pattern cache */
|
|
|
|
|
Py_XDECREF(cache_pat);
|
|
|
|
|
cache_pat = NULL;
|
|
|
|
|
Py_XDECREF(cache_prog);
|
|
|
|
|
cache_prog = NULL;
|
1996-07-23 21:51:20 -03:00
|
|
|
|
return PyInt_FromLong((long)syntax);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|
|
|
|
|
|
1997-02-18 14:48:50 -04:00
|
|
|
|
static PyObject *
|
2000-07-10 10:05:29 -03:00
|
|
|
|
regex_get_syntax(PyObject *self, PyObject *args)
|
1997-02-18 14:48:50 -04:00
|
|
|
|
{
|
|
|
|
|
if (!PyArg_Parse(args, ""))
|
|
|
|
|
return NULL;
|
|
|
|
|
return PyInt_FromLong((long)re_syntax);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
static struct PyMethodDef regex_global_methods[] = {
|
1996-12-20 17:56:07 -04:00
|
|
|
|
{"compile", regex_compile, 1},
|
|
|
|
|
{"symcomp", regex_symcomp, 1},
|
1995-02-19 11:55:19 -04:00
|
|
|
|
{"match", regex_match, 0},
|
|
|
|
|
{"search", regex_search, 0},
|
|
|
|
|
{"set_syntax", regex_set_syntax, 0},
|
1997-02-18 14:48:50 -04:00
|
|
|
|
{"get_syntax", regex_get_syntax, 0},
|
1996-12-20 17:56:07 -04:00
|
|
|
|
{NULL, NULL} /* sentinel */
|
1991-12-29 21:42:57 -04:00
|
|
|
|
};
|
|
|
|
|
|
1998-12-04 14:50:17 -04:00
|
|
|
|
DL_EXPORT(void)
|
2000-07-21 03:00:07 -03:00
|
|
|
|
initregex(void)
|
1991-12-29 21:42:57 -04:00
|
|
|
|
{
|
1996-07-23 21:51:20 -03:00
|
|
|
|
PyObject *m, *d, *v;
|
1996-12-20 17:56:07 -04:00
|
|
|
|
int i;
|
|
|
|
|
char *s;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
1996-07-23 21:51:20 -03:00
|
|
|
|
m = Py_InitModule("regex", regex_global_methods);
|
|
|
|
|
d = PyModule_GetDict(m);
|
1991-12-29 21:42:57 -04:00
|
|
|
|
|
|
|
|
|
/* Initialize regex.error exception */
|
1997-10-01 01:29:29 -03:00
|
|
|
|
v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
|
|
|
|
|
goto finally;
|
|
|
|
|
|
1993-02-23 09:42:39 -04:00
|
|
|
|
/* Initialize regex.casefold constant */
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
|
|
|
|
|
goto finally;
|
|
|
|
|
|
|
|
|
|
if (!(s = PyString_AsString(v)))
|
|
|
|
|
goto finally;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
|
|
|
if (isupper(i))
|
|
|
|
|
s[i] = tolower(i);
|
|
|
|
|
else
|
|
|
|
|
s[i] = i;
|
1993-02-23 09:42:39 -04:00
|
|
|
|
}
|
1996-12-20 17:56:07 -04:00
|
|
|
|
if (PyDict_SetItemString(d, "casefold", v) < 0)
|
|
|
|
|
goto finally;
|
|
|
|
|
Py_DECREF(v);
|
|
|
|
|
|
|
|
|
|
if (!PyErr_Occurred())
|
|
|
|
|
return;
|
|
|
|
|
finally:
|
1997-10-01 01:29:29 -03:00
|
|
|
|
/* Nothing */ ;
|
1991-12-29 21:42:57 -04:00
|
|
|
|
}
|