* regexmodule.c: added use of translation table, substring() method.

This commit is contained in:
Guido van Rossum 1993-02-21 20:12:16 +00:00
parent 970871f8e6
commit 36d330bf36
1 changed files with 117 additions and 38 deletions

View File

@ -1,12 +1,11 @@
/* /*
XXX support translate table
XXX support range parameter on search XXX support range parameter on search
XXX support mstop parameter on search XXX support mstop parameter on search
*/ */
/*********************************************************** /***********************************************************
Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
Netherlands. Amsterdam, The Netherlands.
All Rights Reserved All Rights Reserved
@ -43,8 +42,9 @@ typedef struct {
OB_HEAD OB_HEAD
struct re_pattern_buffer re_patbuf; /* The compiled expression */ struct re_pattern_buffer re_patbuf; /* The compiled expression */
struct re_registers re_regs; /* The registers from the last match */ struct re_registers re_regs; /* The registers from the last match */
int re_regs_valid; /* Nonzero if the registers are valid */
char re_fastmap[256]; /* Storage for fastmap */ char re_fastmap[256]; /* Storage for fastmap */
object *re_translate; /* String object for translate table */
object *re_lastok; /* String object last matched/searched */
} regexobject; } regexobject;
/* Regex object methods */ /* Regex object methods */
@ -53,6 +53,8 @@ static void
reg_dealloc(re) reg_dealloc(re)
regexobject *re; regexobject *re;
{ {
XDECREF(re->re_translate);
XDECREF(re->re_lastok);
XDEL(re->re_patbuf.buffer); XDEL(re->re_patbuf.buffer);
XDEL(re->re_patbuf.translate); XDEL(re->re_patbuf.translate);
DEL(re); DEL(re);
@ -66,15 +68,12 @@ makeresult(regs)
if (v != NULL) { if (v != NULL) {
int i; int i;
for (i = 0; i < RE_NREGS; i++) { for (i = 0; i < RE_NREGS; i++) {
object *w, *u; object *w;
if ( (w = newtupleobject(2)) == NULL || w = mkvalue("(ii)", regs->start[i], regs->end[i]);
(u = newintobject(regs->start[i])) == NULL || if (w == NULL) {
settupleitem(w, 0, u) != 0 || XDECREF(v);
(u = newintobject(regs->end[i])) == NULL || v = NULL;
settupleitem(w, 1, u) != 0) { break;
XDECREF(w);
DECREF(v);
return NULL;
} }
settupleitem(v, i, w); settupleitem(v, i, w);
} }
@ -87,30 +86,37 @@ reg_match(re, args)
regexobject *re; regexobject *re;
object *args; object *args;
{ {
object *argstring;
char *buffer; char *buffer;
int size; int size;
int offset; int offset;
int result; int result;
if (getargs(args, "s#", &buffer, &size)) { if (getargs(args, "S", &argstring)) {
offset = 0; offset = 0;
} }
else { else {
err_clear(); err_clear();
if (!getargs(args, "(s#i)", &buffer, &size, &offset)) if (!getargs(args, "(Si)", &argstring, &offset))
return NULL; return NULL;
if (offset < 0 || offset > size) {
err_setstr(RegexError, "match offset out of range");
return NULL;
}
} }
re->re_regs_valid = 0; buffer = getstringvalue(argstring);
size = getstringsize(argstring);
if (offset < 0 || offset > size) {
err_setstr(RegexError, "match offset out of range");
return NULL;
}
XDECREF(re->re_lastok);
re->re_lastok = NULL;
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs); result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
if (result < -1) { if (result < -1) {
/* Failure like stack overflow */ /* Failure like stack overflow */
err_setstr(RegexError, "match failure"); err_setstr(RegexError, "match failure");
return NULL; return NULL;
} }
re->re_regs_valid = result >= 0; if (result >= 0) {
INCREF(argstring);
re->re_lastok = argstring;
}
return newintobject((long)result); /* Length of the match or -1 */ return newintobject((long)result); /* Length of the match or -1 */
} }
@ -119,30 +125,34 @@ reg_search(re, args)
regexobject *re; regexobject *re;
object *args; object *args;
{ {
object *argstring;
char *buffer; char *buffer;
int size; int size;
int offset; int offset;
int range; int range;
int result; int result;
if (getargs(args, "s#", &buffer, &size)) { if (getargs(args, "S", &argstring)) {
offset = 0; offset = 0;
} }
else { else {
err_clear(); err_clear();
if (!getargs(args, "(s#i)", &buffer, &size, &offset)) if (!getargs(args, "(Si)", &argstring, &offset))
return NULL; return NULL;
if (offset < 0 || offset > size) { }
err_setstr(RegexError, "search offset out of range"); buffer = getstringvalue(argstring);
return NULL; size = getstringsize(argstring);
} if (offset < 0 || offset > size) {
err_setstr(RegexError, "search offset out of range");
return NULL;
} }
/* NB: In Emacs 18.57, the documentation for re_search[_2] and /* NB: In Emacs 18.57, the documentation for re_search[_2] and
the implementation don't match: the documentation states that the implementation don't match: the documentation states that
|range| positions are tried, while the code tries |range|+1 |range| positions are tried, while the code tries |range|+1
positions. It seems more productive to believe the code! */ positions. It seems more productive to believe the code! */
range = size - offset; range = size - offset;
re->re_regs_valid = 0; XDECREF(re->re_lastok);
re->re_lastok = NULL;
result = re_search(&re->re_patbuf, buffer, size, offset, range, result = re_search(&re->re_patbuf, buffer, size, offset, range,
&re->re_regs); &re->re_regs);
if (result < -1) { if (result < -1) {
@ -150,13 +160,58 @@ reg_search(re, args)
err_setstr(RegexError, "match failure"); err_setstr(RegexError, "match failure");
return NULL; return NULL;
} }
re->re_regs_valid = result >= 0; if (result >= 0) {
INCREF(argstring);
re->re_lastok = argstring;
}
return newintobject((long)result); /* Position of the match or -1 */ return newintobject((long)result); /* Position of the match or -1 */
} }
static object *
reg_substring(re, args)
regexobject *re;
object *args;
{
int i, a, b;
if (args != NULL && is_tupleobject(args)) {
int n = gettuplesize(args);
object *res = newtupleobject(n);
if (res == NULL)
return NULL;
for (i = 0; i < n; i++) {
object *v = reg_substring(re, gettupleitem(args, i));
if (v == NULL) {
DECREF(res);
return NULL;
}
settupleitem(res, i, v);
}
return res;
}
if (!getargs(args, "i", &i))
return NULL;
if (i < 0 || i >= RE_NREGS) {
err_setstr(RegexError, "substring() index out of range");
return NULL;
}
if (re->re_lastok == NULL) {
err_setstr(RegexError,
"substring() only valid after successful match/search");
return NULL;
}
a = re->re_regs.start[i];
b = re->re_regs.end[i];
if (a < 0 || b < 0) {
INCREF(None);
return None;
}
return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a);
}
static struct methodlist reg_methods[] = { static struct methodlist reg_methods[] = {
{"match", reg_match}, {"match", reg_match},
{"search", reg_search}, {"search", reg_search},
{"substring", reg_substring},
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };
@ -166,13 +221,22 @@ reg_getattr(re, name)
char *name; char *name;
{ {
if (strcmp(name, "regs") == 0) { if (strcmp(name, "regs") == 0) {
if (!re->re_regs_valid) { if (re->re_lastok == NULL) {
err_setstr(RegexError, err_setstr(RegexError,
"regs only valid after successful match/search"); "regs only valid after successful match/search");
return NULL; return NULL;
} }
return makeresult(&re->re_regs); return makeresult(&re->re_regs);
} }
if (strcmp(name, "last") == 0) {
if (re->re_lastok == NULL) {
err_setstr(RegexError,
"last only valid after successful match/search");
return NULL;
}
INCREF(re->re_lastok);
return re->re_lastok;
}
return findmethod(reg_methods, (object *)re, name); return findmethod(reg_methods, (object *)re, name);
} }
@ -192,19 +256,30 @@ static typeobject Regextype = {
}; };
static object * static object *
newregexobject(pat, size) newregexobject(pat, size, translate)
char *pat; char *pat;
int size; int size;
object *translate;
{ {
regexobject *re; regexobject *re;
if (translate != NULL && getstringsize(translate) != 256) {
err_setstr(RegexError,
"translation table must be 256 bytes");
return NULL;
}
re = NEWOBJ(regexobject, &Regextype); re = NEWOBJ(regexobject, &Regextype);
if (re != NULL) { if (re != NULL) {
char *error; char *error;
re->re_patbuf.buffer = NULL; re->re_patbuf.buffer = NULL;
re->re_patbuf.allocated = 0; re->re_patbuf.allocated = 0;
re->re_patbuf.fastmap = re->re_fastmap; re->re_patbuf.fastmap = re->re_fastmap;
re->re_patbuf.translate = NULL; if (translate)
re->re_regs_valid = 0; re->re_patbuf.translate = getstringvalue(translate);
else
re->re_patbuf.translate = NULL;
XINCREF(translate);
re->re_translate = translate;
re->re_lastok = NULL;
error = re_compile_pattern(pat, size, &re->re_patbuf); error = re_compile_pattern(pat, size, &re->re_patbuf);
if (error != NULL) { if (error != NULL) {
err_setstr(RegexError, error); err_setstr(RegexError, error);
@ -222,9 +297,13 @@ regex_compile(self, args)
{ {
char *pat; char *pat;
int size; int size;
if (!getargs(args, "s#", &pat, &size)) object *tran = NULL;
return NULL; if (!getargs(args, "s#", &pat, &size)) {
return newregexobject(pat, size); err_clear();
if (!getargs(args, "(s#S)", &pat, &size, &tran))
return NULL;
}
return newregexobject(pat, size, tran);
} }
static object *cache_pat; static object *cache_pat;
@ -253,7 +332,7 @@ regex_match(self, args)
object *args; object *args;
{ {
object *pat, *string; object *pat, *string;
if (!getStrStrarg(args, &pat, &string)) if (!getargs(args, "(SS)", &pat, &string))
return NULL; return NULL;
if (update_cache(pat) < 0) if (update_cache(pat) < 0)
return NULL; return NULL;
@ -266,7 +345,7 @@ regex_search(self, args)
object *args; object *args;
{ {
object *pat, *string; object *pat, *string;
if (!getStrStrarg(args, &pat, &string)) if (!getargs(args, "(SS)", &pat, &string))
return NULL; return NULL;
if (update_cache(pat) < 0) if (update_cache(pat) < 0)
return NULL; return NULL;