mirror of https://github.com/python/cpython
Jeffrey's latests
This commit is contained in:
parent
9e18ec7dc9
commit
74fb303997
|
@ -1,7 +1,3 @@
|
|||
/*
|
||||
* -*- mode: c-mode; c-file-style: python -*-
|
||||
*/
|
||||
|
||||
/* regexpr.c
|
||||
*
|
||||
* Author: Tatu Ylonen <ylo@ngs.fi>
|
||||
|
@ -472,16 +468,15 @@ static int regexp_ansi_sequences;
|
|||
#define MAX_NESTING 100 /* max nesting level of operators */
|
||||
|
||||
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
|
||||
#define Sword 1
|
||||
|
||||
static char re_syntax_table[256];
|
||||
char re_syntax_table[256];
|
||||
|
||||
static void re_compile_initialize(void)
|
||||
void re_compile_initialize(void)
|
||||
{
|
||||
int a;
|
||||
|
||||
static int syntax_table_inited = 0;
|
||||
|
||||
|
||||
if (!syntax_table_inited)
|
||||
{
|
||||
syntax_table_inited = 1;
|
||||
|
@ -491,7 +486,11 @@ static void re_compile_initialize(void)
|
|||
for (a = 'A'; a <= 'Z'; a++)
|
||||
re_syntax_table[a] = Sword;
|
||||
for (a = '0'; a <= '9'; a++)
|
||||
re_syntax_table[a] = Sword;
|
||||
re_syntax_table[a] = Sword | Sdigit;
|
||||
re_syntax_table['_'] = Sword;
|
||||
for (a = 9; a <= 13; a++)
|
||||
re_syntax_table[a] = Swhitespace;
|
||||
re_syntax_table[' '] = Swhitespace;
|
||||
}
|
||||
re_compile_initialized = 1;
|
||||
for (a = 0; a < 256; a++)
|
||||
|
@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
|
|||
return; /* we have already been here */
|
||||
visited[pos] = 1;
|
||||
for (;;)
|
||||
switch (code[pos++])
|
||||
{
|
||||
switch (code[pos++]) {
|
||||
case Cend:
|
||||
{
|
||||
*can_be_null = 1;
|
||||
return;
|
||||
}
|
||||
{
|
||||
*can_be_null = 1;
|
||||
return;
|
||||
}
|
||||
case Cbol:
|
||||
case Cbegbuf:
|
||||
case Cendbuf:
|
||||
|
@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
|
|||
|
||||
NEW_STATE(state, bufp->num_registers);
|
||||
|
||||
if (!re_compile_initialized)
|
||||
re_compile_initialize();
|
||||
|
||||
continue_matching:
|
||||
switch (*code++)
|
||||
{
|
||||
|
@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp,
|
|||
{
|
||||
if (text == textend)
|
||||
goto fail;
|
||||
if (SYNTAX(*text) != Sword)
|
||||
if (SYNTAX(*text) & Sword)
|
||||
goto fail;
|
||||
if (text == textstart)
|
||||
goto continue_matching;
|
||||
if (SYNTAX(text[-1]) != Sword)
|
||||
if (!(SYNTAX(text[-1]) & Sword))
|
||||
goto continue_matching;
|
||||
goto fail;
|
||||
}
|
||||
|
@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp,
|
|||
{
|
||||
if (text == textstart)
|
||||
goto fail;
|
||||
if (SYNTAX(text[-1]) != Sword)
|
||||
if (!(SYNTAX(text[-1]) & Sword))
|
||||
goto fail;
|
||||
if (text == textend)
|
||||
goto continue_matching;
|
||||
if (SYNTAX(*text) == Sword)
|
||||
if (SYNTAX(*text) & Sword)
|
||||
goto fail;
|
||||
goto continue_matching;
|
||||
}
|
||||
|
@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
|
|||
|
||||
if (text == textstart || text == textend)
|
||||
goto continue_matching;
|
||||
if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
|
||||
if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
|
||||
goto continue_matching;
|
||||
goto fail;
|
||||
}
|
||||
|
@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
|
|||
* beginning and end of buffer. */
|
||||
if (text == textstart || text == textend)
|
||||
goto fail;
|
||||
if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
|
||||
if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
|
||||
goto fail;
|
||||
goto continue_matching;
|
||||
}
|
||||
case Csyntaxspec:
|
||||
{
|
||||
NEXTCHAR(ch);
|
||||
if (SYNTAX(ch) != (unsigned char)*code++)
|
||||
if (!(SYNTAX(ch) & (unsigned char)*code++))
|
||||
goto fail;
|
||||
goto continue_matching;
|
||||
}
|
||||
case Cnotsyntaxspec:
|
||||
{
|
||||
NEXTCHAR(ch);
|
||||
if (SYNTAX(ch) != (unsigned char)*code++)
|
||||
if (SYNTAX(ch) & (unsigned char)*code++)
|
||||
break;
|
||||
goto continue_matching;
|
||||
}
|
||||
|
@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
|
|||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
** Local Variables:
|
||||
** mode: c
|
||||
** c-file-style: "python"
|
||||
** End:
|
||||
*/
|
||||
|
|
|
@ -67,10 +67,16 @@ typedef struct re_registers
|
|||
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
|
||||
#define RE_SYNTAX_EMACS 0
|
||||
|
||||
#define Sword 1
|
||||
#define Swhitespace 2
|
||||
#define Sdigit 4
|
||||
|
||||
/* Rename all exported symbols to avoid conflicts with similarly named
|
||||
symbols in some systems' standard C libraries... */
|
||||
|
||||
#define re_syntax _Py_re_syntax
|
||||
#define re_syntax_table _Py_re_syntax_table
|
||||
#define re_compile_initialize _Py_re_compile_initialize
|
||||
#define re_set_syntax _Py_re_set_syntax
|
||||
#define re_compile_pattern _Py_re_compile_pattern
|
||||
#define re_match _Py_re_match
|
||||
|
@ -85,6 +91,10 @@ extern int re_syntax;
|
|||
/* This is the actual syntax mask. It was added so that Python could do
|
||||
* syntax-dependent munging of patterns before compilation. */
|
||||
|
||||
extern char re_syntax_table[256];
|
||||
|
||||
void re_compile_initialize(void);
|
||||
|
||||
int re_set_syntax(int syntax);
|
||||
/* This sets the syntax to use and returns the previous syntax. The
|
||||
* syntax is specified by a bit mask of the above defined bits. */
|
||||
|
@ -133,6 +143,8 @@ int re_exec(char *s);
|
|||
#else /* HAVE_PROTOTYPES */
|
||||
|
||||
extern int re_syntax;
|
||||
extern char re_syntax_table[256];
|
||||
void re_compile_initialize();
|
||||
int re_set_syntax();
|
||||
char *re_compile_pattern();
|
||||
int re_match();
|
||||
|
|
|
@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
|
|||
|
||||
static PyObject *ReopError; /* Exception */
|
||||
|
||||
#define IGNORECASE 0x01
|
||||
#define MULTILINE 0x02
|
||||
#define DOTALL 0x04
|
||||
#define VERBOSE 0x08
|
||||
|
||||
static char *reop_casefold;
|
||||
|
||||
static PyObject *
|
||||
makeresult(regs, num_regs)
|
||||
struct re_registers *regs;
|
||||
|
@ -90,6 +97,10 @@ reop_match(self, args)
|
|||
int flags, pos, result;
|
||||
struct re_pattern_buffer bufp;
|
||||
struct re_registers re_regs;
|
||||
PyObject *modules = NULL;
|
||||
PyObject *reopmodule = NULL;
|
||||
PyObject *reopdict = NULL;
|
||||
PyObject *casefold = NULL;
|
||||
|
||||
if (!PyArg_Parse(args, "(s#iiis#is#i)",
|
||||
&(bufp.buffer), &(bufp.allocated),
|
||||
|
@ -102,20 +113,44 @@ reop_match(self, args)
|
|||
|
||||
/* XXX sanity-check the input data */
|
||||
bufp.used=bufp.allocated;
|
||||
bufp.translate=NULL;
|
||||
if (flags & IGNORECASE)
|
||||
{
|
||||
if ((modules = PyImport_GetModuleDict()) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((reopmodule = PyDict_GetItemString(modules,
|
||||
"reop")) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((casefold = PyDict_GetItemString(reopdict,
|
||||
"casefold")) == NULL)
|
||||
return NULL;
|
||||
|
||||
bufp.translate = PyString_AsString(casefold);
|
||||
}
|
||||
else
|
||||
bufp.translate=NULL;
|
||||
bufp.fastmap_accurate=1;
|
||||
bufp.can_be_null=can_be_null;
|
||||
bufp.uses_registers=1;
|
||||
bufp.anchor=anchor;
|
||||
|
||||
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
|
||||
for(i=0; i<bufp.num_registers; i++) {
|
||||
re_regs.start[i]=-1;
|
||||
re_regs.end[i]=-1;
|
||||
}
|
||||
|
||||
result = re_match(&bufp,
|
||||
string, stringlen, pos,
|
||||
&re_regs);
|
||||
|
||||
if (result < -1) {
|
||||
/* Failure like stack overflow */
|
||||
PyErr_SetString(ReopError, "match failure");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
if (result == -1) {
|
||||
|
@ -136,6 +171,10 @@ reop_search(self, args)
|
|||
int flags, pos, result;
|
||||
struct re_pattern_buffer bufp;
|
||||
struct re_registers re_regs;
|
||||
PyObject *modules = NULL;
|
||||
PyObject *reopmodule = NULL;
|
||||
PyObject *reopdict = NULL;
|
||||
PyObject *casefold = NULL;
|
||||
|
||||
if (!PyArg_Parse(args, "(s#iiis#is#i)",
|
||||
&(bufp.buffer), &(bufp.allocated),
|
||||
|
@ -148,26 +187,51 @@ reop_search(self, args)
|
|||
|
||||
/* XXX sanity-check the input data */
|
||||
bufp.used=bufp.allocated;
|
||||
bufp.translate=NULL;
|
||||
if (flags & IGNORECASE)
|
||||
{
|
||||
if ((modules = PyImport_GetModuleDict()) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((reopmodule = PyDict_GetItemString(modules,
|
||||
"reop")) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((casefold = PyDict_GetItemString(reopdict,
|
||||
"casefold")) == NULL)
|
||||
return NULL;
|
||||
|
||||
bufp.translate = PyString_AsString(casefold);
|
||||
}
|
||||
else
|
||||
bufp.translate=NULL;
|
||||
bufp.fastmap_accurate=1;
|
||||
bufp.can_be_null=can_be_null;
|
||||
bufp.uses_registers=1;
|
||||
bufp.anchor=anchor;
|
||||
|
||||
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
|
||||
for(i = 0; i < bufp.num_registers; i++) {
|
||||
re_regs.start[i] = -1;
|
||||
re_regs.end[i] = -1;
|
||||
}
|
||||
|
||||
result = re_search(&bufp,
|
||||
string, stringlen, pos, stringlen-pos,
|
||||
&re_regs);
|
||||
|
||||
if (result < -1) {
|
||||
/* Failure like stack overflow */
|
||||
PyErr_SetString(ReopError, "match failure");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (result == -1) {
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
return makeresult(&re_regs, bufp.num_registers);
|
||||
}
|
||||
|
||||
|
@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = {
|
|||
void
|
||||
initreop()
|
||||
{
|
||||
PyObject *m, *d, *v;
|
||||
PyObject *m, *d, *k, *v, *o;
|
||||
int i;
|
||||
char *s;
|
||||
|
||||
char j[2];
|
||||
|
||||
re_compile_initialize();
|
||||
|
||||
m = Py_InitModule("reop", reop_global_methods);
|
||||
d = PyModule_GetDict(m);
|
||||
|
||||
|
@ -370,12 +437,64 @@ initreop()
|
|||
else
|
||||
s[i] = i;
|
||||
}
|
||||
|
||||
if (PyDict_SetItemString(d, "casefold", v) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(v);
|
||||
|
||||
/* Initialize the syntax table */
|
||||
|
||||
o = PyDict_New();
|
||||
if (o == NULL)
|
||||
goto finally;
|
||||
|
||||
j[1] = '\0';
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
j[0] = i;
|
||||
k = PyString_FromStringAndSize(j, 1);
|
||||
if (k == NULL)
|
||||
goto finally;
|
||||
v = PyInt_FromLong(re_syntax_table[i]);
|
||||
if (v == NULL)
|
||||
goto finally;
|
||||
if (PyDict_SetItem(o, k, v) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(k);
|
||||
Py_DECREF(v);
|
||||
}
|
||||
|
||||
if (PyDict_SetItemString(d, "syntax_table", o) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(o);
|
||||
|
||||
v = PyInt_FromLong(Sword);
|
||||
if (v == NULL)
|
||||
goto finally;
|
||||
|
||||
if (PyDict_SetItemString(d, "word", v) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(v);
|
||||
|
||||
v = PyInt_FromLong(Swhitespace);
|
||||
if (v == NULL)
|
||||
goto finally;
|
||||
|
||||
if (PyDict_SetItemString(d, "whitespace", v) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(v);
|
||||
|
||||
v = PyInt_FromLong(Sdigit);
|
||||
if (v == NULL)
|
||||
goto finally;
|
||||
|
||||
if (PyDict_SetItemString(d, "digit", v) < 0)
|
||||
goto finally;
|
||||
Py_DECREF(v);
|
||||
|
||||
if (!PyErr_Occurred())
|
||||
return;
|
||||
|
||||
finally:
|
||||
Py_FatalError("can't initialize reop module");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue