* regexmodule.c: added use of translation table, substring() method.
This commit is contained in:
parent
970871f8e6
commit
36d330bf36
|
@ -1,12 +1,11 @@
|
||||||
/*
|
/*
|
||||||
XXX support translate table
|
|
||||||
XXX support range parameter on search
|
XXX support range parameter on search
|
||||||
XXX support mstop parameter on search
|
XXX support mstop parameter on search
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/***********************************************************
|
/***********************************************************
|
||||||
Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
|
Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
|
||||||
Netherlands.
|
Amsterdam, The Netherlands.
|
||||||
|
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
|
||||||
|
@ -43,8 +42,9 @@ typedef struct {
|
||||||
OB_HEAD
|
OB_HEAD
|
||||||
struct re_pattern_buffer re_patbuf; /* The compiled expression */
|
struct re_pattern_buffer re_patbuf; /* The compiled expression */
|
||||||
struct re_registers re_regs; /* The registers from the last match */
|
struct re_registers re_regs; /* The registers from the last match */
|
||||||
int re_regs_valid; /* Nonzero if the registers are valid */
|
|
||||||
char re_fastmap[256]; /* Storage for fastmap */
|
char re_fastmap[256]; /* Storage for fastmap */
|
||||||
|
object *re_translate; /* String object for translate table */
|
||||||
|
object *re_lastok; /* String object last matched/searched */
|
||||||
} regexobject;
|
} regexobject;
|
||||||
|
|
||||||
/* Regex object methods */
|
/* Regex object methods */
|
||||||
|
@ -53,6 +53,8 @@ static void
|
||||||
reg_dealloc(re)
|
reg_dealloc(re)
|
||||||
regexobject *re;
|
regexobject *re;
|
||||||
{
|
{
|
||||||
|
XDECREF(re->re_translate);
|
||||||
|
XDECREF(re->re_lastok);
|
||||||
XDEL(re->re_patbuf.buffer);
|
XDEL(re->re_patbuf.buffer);
|
||||||
XDEL(re->re_patbuf.translate);
|
XDEL(re->re_patbuf.translate);
|
||||||
DEL(re);
|
DEL(re);
|
||||||
|
@ -66,15 +68,12 @@ makeresult(regs)
|
||||||
if (v != NULL) {
|
if (v != NULL) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < RE_NREGS; i++) {
|
for (i = 0; i < RE_NREGS; i++) {
|
||||||
object *w, *u;
|
object *w;
|
||||||
if ( (w = newtupleobject(2)) == NULL ||
|
w = mkvalue("(ii)", regs->start[i], regs->end[i]);
|
||||||
(u = newintobject(regs->start[i])) == NULL ||
|
if (w == NULL) {
|
||||||
settupleitem(w, 0, u) != 0 ||
|
XDECREF(v);
|
||||||
(u = newintobject(regs->end[i])) == NULL ||
|
v = NULL;
|
||||||
settupleitem(w, 1, u) != 0) {
|
break;
|
||||||
XDECREF(w);
|
|
||||||
DECREF(v);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
settupleitem(v, i, w);
|
settupleitem(v, i, w);
|
||||||
}
|
}
|
||||||
|
@ -87,30 +86,37 @@ reg_match(re, args)
|
||||||
regexobject *re;
|
regexobject *re;
|
||||||
object *args;
|
object *args;
|
||||||
{
|
{
|
||||||
|
object *argstring;
|
||||||
char *buffer;
|
char *buffer;
|
||||||
int size;
|
int size;
|
||||||
int offset;
|
int offset;
|
||||||
int result;
|
int result;
|
||||||
if (getargs(args, "s#", &buffer, &size)) {
|
if (getargs(args, "S", &argstring)) {
|
||||||
offset = 0;
|
offset = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
err_clear();
|
err_clear();
|
||||||
if (!getargs(args, "(s#i)", &buffer, &size, &offset))
|
if (!getargs(args, "(Si)", &argstring, &offset))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (offset < 0 || offset > size) {
|
|
||||||
err_setstr(RegexError, "match offset out of range");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
re->re_regs_valid = 0;
|
buffer = getstringvalue(argstring);
|
||||||
|
size = getstringsize(argstring);
|
||||||
|
if (offset < 0 || offset > size) {
|
||||||
|
err_setstr(RegexError, "match offset out of range");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
XDECREF(re->re_lastok);
|
||||||
|
re->re_lastok = NULL;
|
||||||
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
|
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
|
||||||
if (result < -1) {
|
if (result < -1) {
|
||||||
/* Failure like stack overflow */
|
/* Failure like stack overflow */
|
||||||
err_setstr(RegexError, "match failure");
|
err_setstr(RegexError, "match failure");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
re->re_regs_valid = result >= 0;
|
if (result >= 0) {
|
||||||
|
INCREF(argstring);
|
||||||
|
re->re_lastok = argstring;
|
||||||
|
}
|
||||||
return newintobject((long)result); /* Length of the match or -1 */
|
return newintobject((long)result); /* Length of the match or -1 */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,30 +125,34 @@ reg_search(re, args)
|
||||||
regexobject *re;
|
regexobject *re;
|
||||||
object *args;
|
object *args;
|
||||||
{
|
{
|
||||||
|
object *argstring;
|
||||||
char *buffer;
|
char *buffer;
|
||||||
int size;
|
int size;
|
||||||
int offset;
|
int offset;
|
||||||
int range;
|
int range;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
if (getargs(args, "s#", &buffer, &size)) {
|
if (getargs(args, "S", &argstring)) {
|
||||||
offset = 0;
|
offset = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
err_clear();
|
err_clear();
|
||||||
if (!getargs(args, "(s#i)", &buffer, &size, &offset))
|
if (!getargs(args, "(Si)", &argstring, &offset))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (offset < 0 || offset > size) {
|
}
|
||||||
err_setstr(RegexError, "search offset out of range");
|
buffer = getstringvalue(argstring);
|
||||||
return NULL;
|
size = getstringsize(argstring);
|
||||||
}
|
if (offset < 0 || offset > size) {
|
||||||
|
err_setstr(RegexError, "search offset out of range");
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
/* NB: In Emacs 18.57, the documentation for re_search[_2] and
|
/* NB: In Emacs 18.57, the documentation for re_search[_2] and
|
||||||
the implementation don't match: the documentation states that
|
the implementation don't match: the documentation states that
|
||||||
|range| positions are tried, while the code tries |range|+1
|
|range| positions are tried, while the code tries |range|+1
|
||||||
positions. It seems more productive to believe the code! */
|
positions. It seems more productive to believe the code! */
|
||||||
range = size - offset;
|
range = size - offset;
|
||||||
re->re_regs_valid = 0;
|
XDECREF(re->re_lastok);
|
||||||
|
re->re_lastok = NULL;
|
||||||
result = re_search(&re->re_patbuf, buffer, size, offset, range,
|
result = re_search(&re->re_patbuf, buffer, size, offset, range,
|
||||||
&re->re_regs);
|
&re->re_regs);
|
||||||
if (result < -1) {
|
if (result < -1) {
|
||||||
|
@ -150,13 +160,58 @@ reg_search(re, args)
|
||||||
err_setstr(RegexError, "match failure");
|
err_setstr(RegexError, "match failure");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
re->re_regs_valid = result >= 0;
|
if (result >= 0) {
|
||||||
|
INCREF(argstring);
|
||||||
|
re->re_lastok = argstring;
|
||||||
|
}
|
||||||
return newintobject((long)result); /* Position of the match or -1 */
|
return newintobject((long)result); /* Position of the match or -1 */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static object *
|
||||||
|
reg_substring(re, args)
|
||||||
|
regexobject *re;
|
||||||
|
object *args;
|
||||||
|
{
|
||||||
|
int i, a, b;
|
||||||
|
if (args != NULL && is_tupleobject(args)) {
|
||||||
|
int n = gettuplesize(args);
|
||||||
|
object *res = newtupleobject(n);
|
||||||
|
if (res == NULL)
|
||||||
|
return NULL;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
object *v = reg_substring(re, gettupleitem(args, i));
|
||||||
|
if (v == NULL) {
|
||||||
|
DECREF(res);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
settupleitem(res, i, v);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
if (!getargs(args, "i", &i))
|
||||||
|
return NULL;
|
||||||
|
if (i < 0 || i >= RE_NREGS) {
|
||||||
|
err_setstr(RegexError, "substring() index out of range");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (re->re_lastok == NULL) {
|
||||||
|
err_setstr(RegexError,
|
||||||
|
"substring() only valid after successful match/search");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
a = re->re_regs.start[i];
|
||||||
|
b = re->re_regs.end[i];
|
||||||
|
if (a < 0 || b < 0) {
|
||||||
|
INCREF(None);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a);
|
||||||
|
}
|
||||||
|
|
||||||
static struct methodlist reg_methods[] = {
|
static struct methodlist reg_methods[] = {
|
||||||
{"match", reg_match},
|
{"match", reg_match},
|
||||||
{"search", reg_search},
|
{"search", reg_search},
|
||||||
|
{"substring", reg_substring},
|
||||||
{NULL, NULL} /* sentinel */
|
{NULL, NULL} /* sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -166,13 +221,22 @@ reg_getattr(re, name)
|
||||||
char *name;
|
char *name;
|
||||||
{
|
{
|
||||||
if (strcmp(name, "regs") == 0) {
|
if (strcmp(name, "regs") == 0) {
|
||||||
if (!re->re_regs_valid) {
|
if (re->re_lastok == NULL) {
|
||||||
err_setstr(RegexError,
|
err_setstr(RegexError,
|
||||||
"regs only valid after successful match/search");
|
"regs only valid after successful match/search");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return makeresult(&re->re_regs);
|
return makeresult(&re->re_regs);
|
||||||
}
|
}
|
||||||
|
if (strcmp(name, "last") == 0) {
|
||||||
|
if (re->re_lastok == NULL) {
|
||||||
|
err_setstr(RegexError,
|
||||||
|
"last only valid after successful match/search");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
INCREF(re->re_lastok);
|
||||||
|
return re->re_lastok;
|
||||||
|
}
|
||||||
return findmethod(reg_methods, (object *)re, name);
|
return findmethod(reg_methods, (object *)re, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,19 +256,30 @@ static typeobject Regextype = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static object *
|
static object *
|
||||||
newregexobject(pat, size)
|
newregexobject(pat, size, translate)
|
||||||
char *pat;
|
char *pat;
|
||||||
int size;
|
int size;
|
||||||
|
object *translate;
|
||||||
{
|
{
|
||||||
regexobject *re;
|
regexobject *re;
|
||||||
|
if (translate != NULL && getstringsize(translate) != 256) {
|
||||||
|
err_setstr(RegexError,
|
||||||
|
"translation table must be 256 bytes");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
re = NEWOBJ(regexobject, &Regextype);
|
re = NEWOBJ(regexobject, &Regextype);
|
||||||
if (re != NULL) {
|
if (re != NULL) {
|
||||||
char *error;
|
char *error;
|
||||||
re->re_patbuf.buffer = NULL;
|
re->re_patbuf.buffer = NULL;
|
||||||
re->re_patbuf.allocated = 0;
|
re->re_patbuf.allocated = 0;
|
||||||
re->re_patbuf.fastmap = re->re_fastmap;
|
re->re_patbuf.fastmap = re->re_fastmap;
|
||||||
re->re_patbuf.translate = NULL;
|
if (translate)
|
||||||
re->re_regs_valid = 0;
|
re->re_patbuf.translate = getstringvalue(translate);
|
||||||
|
else
|
||||||
|
re->re_patbuf.translate = NULL;
|
||||||
|
XINCREF(translate);
|
||||||
|
re->re_translate = translate;
|
||||||
|
re->re_lastok = NULL;
|
||||||
error = re_compile_pattern(pat, size, &re->re_patbuf);
|
error = re_compile_pattern(pat, size, &re->re_patbuf);
|
||||||
if (error != NULL) {
|
if (error != NULL) {
|
||||||
err_setstr(RegexError, error);
|
err_setstr(RegexError, error);
|
||||||
|
@ -222,9 +297,13 @@ regex_compile(self, args)
|
||||||
{
|
{
|
||||||
char *pat;
|
char *pat;
|
||||||
int size;
|
int size;
|
||||||
if (!getargs(args, "s#", &pat, &size))
|
object *tran = NULL;
|
||||||
return NULL;
|
if (!getargs(args, "s#", &pat, &size)) {
|
||||||
return newregexobject(pat, size);
|
err_clear();
|
||||||
|
if (!getargs(args, "(s#S)", &pat, &size, &tran))
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return newregexobject(pat, size, tran);
|
||||||
}
|
}
|
||||||
|
|
||||||
static object *cache_pat;
|
static object *cache_pat;
|
||||||
|
@ -253,7 +332,7 @@ regex_match(self, args)
|
||||||
object *args;
|
object *args;
|
||||||
{
|
{
|
||||||
object *pat, *string;
|
object *pat, *string;
|
||||||
if (!getStrStrarg(args, &pat, &string))
|
if (!getargs(args, "(SS)", &pat, &string))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (update_cache(pat) < 0)
|
if (update_cache(pat) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -266,7 +345,7 @@ regex_search(self, args)
|
||||||
object *args;
|
object *args;
|
||||||
{
|
{
|
||||||
object *pat, *string;
|
object *pat, *string;
|
||||||
if (!getStrStrarg(args, &pat, &string))
|
if (!getargs(args, "(SS)", &pat, &string))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (update_cache(pat) < 0)
|
if (update_cache(pat) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
Loading…
Reference in New Issue