Jeffrey's latests

This commit is contained in:
Guido van Rossum 1997-07-17 22:41:38 +00:00
parent 9e18ec7dc9
commit 74fb303997
3 changed files with 165 additions and 32 deletions

View File

@ -1,7 +1,3 @@
/*
* -*- mode: c-mode; c-file-style: python -*-
*/
/* regexpr.c
*
* Author: Tatu Ylonen <ylo@ngs.fi>
@ -472,11 +468,10 @@ static int regexp_ansi_sequences;
#define MAX_NESTING 100 /* max nesting level of operators */
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
#define Sword 1
static char re_syntax_table[256];
char re_syntax_table[256];
static void re_compile_initialize(void)
void re_compile_initialize(void)
{
int a;
@ -491,7 +486,11 @@ static void re_compile_initialize(void)
for (a = 'A'; a <= 'Z'; a++)
re_syntax_table[a] = Sword;
for (a = '0'; a <= '9'; a++)
re_syntax_table[a] = Sword;
re_syntax_table[a] = Sword | Sdigit;
re_syntax_table['_'] = Sword;
for (a = 9; a <= 13; a++)
re_syntax_table[a] = Swhitespace;
re_syntax_table[' '] = Swhitespace;
}
re_compile_initialized = 1;
for (a = 0; a < 256; a++)
@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
return; /* we have already been here */
visited[pos] = 1;
for (;;)
switch (code[pos++])
{
switch (code[pos++]) {
case Cend:
{
*can_be_null = 1;
return;
}
{
*can_be_null = 1;
return;
}
case Cbol:
case Cbegbuf:
case Cendbuf:
@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
NEW_STATE(state, bufp->num_registers);
if (!re_compile_initialized)
re_compile_initialize();
continue_matching:
switch (*code++)
{
@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp,
{
if (text == textend)
goto fail;
if (SYNTAX(*text) != Sword)
if (SYNTAX(*text) & Sword)
goto fail;
if (text == textstart)
goto continue_matching;
if (SYNTAX(text[-1]) != Sword)
if (!(SYNTAX(text[-1]) & Sword))
goto continue_matching;
goto fail;
}
@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp,
{
if (text == textstart)
goto fail;
if (SYNTAX(text[-1]) != Sword)
if (!(SYNTAX(text[-1]) & Sword))
goto fail;
if (text == textend)
goto continue_matching;
if (SYNTAX(*text) == Sword)
if (SYNTAX(*text) & Sword)
goto fail;
goto continue_matching;
}
@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
if (text == textstart || text == textend)
goto continue_matching;
if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
goto continue_matching;
goto fail;
}
@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
* beginning and end of buffer. */
if (text == textstart || text == textend)
goto fail;
if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
goto fail;
goto continue_matching;
}
case Csyntaxspec:
{
NEXTCHAR(ch);
if (SYNTAX(ch) != (unsigned char)*code++)
if (!(SYNTAX(ch) & (unsigned char)*code++))
goto fail;
goto continue_matching;
}
case Cnotsyntaxspec:
{
NEXTCHAR(ch);
if (SYNTAX(ch) != (unsigned char)*code++)
if (SYNTAX(ch) & (unsigned char)*code++)
break;
goto continue_matching;
}
@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
}
return -1;
}
/*
** Local Variables:
** mode: c
** c-file-style: "python"
** End:
*/

View File

@ -67,10 +67,16 @@ typedef struct re_registers
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0
#define Sword 1
#define Swhitespace 2
#define Sdigit 4
/* Rename all exported symbols to avoid conflicts with similarly named
symbols in some systems' standard C libraries... */
#define re_syntax _Py_re_syntax
#define re_syntax_table _Py_re_syntax_table
#define re_compile_initialize _Py_re_compile_initialize
#define re_set_syntax _Py_re_set_syntax
#define re_compile_pattern _Py_re_compile_pattern
#define re_match _Py_re_match
@ -85,6 +91,10 @@ extern int re_syntax;
/* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */
extern char re_syntax_table[256];
void re_compile_initialize(void);
int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */
@ -133,6 +143,8 @@ int re_exec(char *s);
#else /* HAVE_PROTOTYPES */
extern int re_syntax;
extern char re_syntax_table[256];
void re_compile_initialize();
int re_set_syntax();
char *re_compile_pattern();
int re_match();

View File

@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
static PyObject *ReopError; /* Exception */
#define IGNORECASE 0x01
#define MULTILINE 0x02
#define DOTALL 0x04
#define VERBOSE 0x08
static char *reop_casefold;
static PyObject *
makeresult(regs, num_regs)
struct re_registers *regs;
@ -90,6 +97,10 @@ reop_match(self, args)
int flags, pos, result;
struct re_pattern_buffer bufp;
struct re_registers re_regs;
PyObject *modules = NULL;
PyObject *reopmodule = NULL;
PyObject *reopdict = NULL;
PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated),
@ -102,20 +113,44 @@ reop_match(self, args)
/* XXX sanity-check the input data */
bufp.used=bufp.allocated;
bufp.translate=NULL;
if (flags & IGNORECASE)
{
if ((modules = PyImport_GetModuleDict()) == NULL)
return NULL;
if ((reopmodule = PyDict_GetItemString(modules,
"reop")) == NULL)
return NULL;
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
return NULL;
if ((casefold = PyDict_GetItemString(reopdict,
"casefold")) == NULL)
return NULL;
bufp.translate = PyString_AsString(casefold);
}
else
bufp.translate=NULL;
bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null;
bufp.uses_registers=1;
bufp.anchor=anchor;
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
for(i=0; i<bufp.num_registers; i++) {
re_regs.start[i]=-1;
re_regs.end[i]=-1;
}
result = re_match(&bufp,
string, stringlen, pos,
&re_regs);
if (result < -1) {
/* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure");
return NULL;
}
if (result == -1) {
@ -136,6 +171,10 @@ reop_search(self, args)
int flags, pos, result;
struct re_pattern_buffer bufp;
struct re_registers re_regs;
PyObject *modules = NULL;
PyObject *reopmodule = NULL;
PyObject *reopdict = NULL;
PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated),
@ -148,26 +187,51 @@ reop_search(self, args)
/* XXX sanity-check the input data */
bufp.used=bufp.allocated;
bufp.translate=NULL;
if (flags & IGNORECASE)
{
if ((modules = PyImport_GetModuleDict()) == NULL)
return NULL;
if ((reopmodule = PyDict_GetItemString(modules,
"reop")) == NULL)
return NULL;
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
return NULL;
if ((casefold = PyDict_GetItemString(reopdict,
"casefold")) == NULL)
return NULL;
bufp.translate = PyString_AsString(casefold);
}
else
bufp.translate=NULL;
bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null;
bufp.uses_registers=1;
bufp.anchor=anchor;
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
for(i = 0; i < bufp.num_registers; i++) {
re_regs.start[i] = -1;
re_regs.end[i] = -1;
}
result = re_search(&bufp,
string, stringlen, pos, stringlen-pos,
&re_regs);
if (result < -1) {
/* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure");
return NULL;
}
if (result == -1) {
Py_INCREF(Py_None);
return Py_None;
}
return makeresult(&re_regs, bufp.num_registers);
}
@ -345,9 +409,12 @@ static struct PyMethodDef reop_global_methods[] = {
void
initreop()
{
PyObject *m, *d, *v;
PyObject *m, *d, *k, *v, *o;
int i;
char *s;
char j[2];
re_compile_initialize();
m = Py_InitModule("reop", reop_global_methods);
d = PyModule_GetDict(m);
@ -370,12 +437,64 @@ initreop()
else
s[i] = i;
}
if (PyDict_SetItemString(d, "casefold", v) < 0)
goto finally;
Py_DECREF(v);
/* Initialize the syntax table */
o = PyDict_New();
if (o == NULL)
goto finally;
j[1] = '\0';
for (i = 0; i < 256; i++)
{
j[0] = i;
k = PyString_FromStringAndSize(j, 1);
if (k == NULL)
goto finally;
v = PyInt_FromLong(re_syntax_table[i]);
if (v == NULL)
goto finally;
if (PyDict_SetItem(o, k, v) < 0)
goto finally;
Py_DECREF(k);
Py_DECREF(v);
}
if (PyDict_SetItemString(d, "syntax_table", o) < 0)
goto finally;
Py_DECREF(o);
v = PyInt_FromLong(Sword);
if (v == NULL)
goto finally;
if (PyDict_SetItemString(d, "word", v) < 0)
goto finally;
Py_DECREF(v);
v = PyInt_FromLong(Swhitespace);
if (v == NULL)
goto finally;
if (PyDict_SetItemString(d, "whitespace", v) < 0)
goto finally;
Py_DECREF(v);
v = PyInt_FromLong(Sdigit);
if (v == NULL)
goto finally;
if (PyDict_SetItemString(d, "digit", v) < 0)
goto finally;
Py_DECREF(v);
if (!PyErr_Occurred())
return;
finally:
Py_FatalError("can't initialize reop module");
}