Sanitize bloom filter macros

This commit is contained in:
Antoine Pitrou 2010-01-13 14:01:26 +00:00
parent a913a20c91
commit 10042922d9
2 changed files with 36 additions and 14 deletions

View File

@ -18,15 +18,27 @@
#define FAST_SEARCH 1
#define FAST_RSEARCH 2
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & 0x1F))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
#if LONG_BIT >= 128
#define STRINGLIB_BLOOM_WIDTH 128
#elif LONG_BIT >= 64
#define STRINGLIB_BLOOM_WIDTH 64
#elif LONG_BIT >= 32
#define STRINGLIB_BLOOM_WIDTH 32
#else
#error "LONG_BIT is smaller than 32"
#endif
#define STRINGLIB_BLOOM_ADD(mask, ch) \
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
long mask;
unsigned long mask;
Py_ssize_t skip, count = 0;
Py_ssize_t i, j, mlast, w;
@ -70,12 +82,12 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* process pattern[:-1] */
for (i = 0; i < mlast; i++) {
BLOOM_ADD(mask, p[i]);
STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast])
skip = mlast - i - 1;
}
/* process pattern[-1] outside the loop */
BLOOM_ADD(mask, p[mlast]);
STRINGLIB_BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
@ -95,13 +107,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
if (!BLOOM(mask, s[i+m]))
if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
if (!BLOOM(mask, s[i+m]))
if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
}
}
@ -110,10 +122,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */
BLOOM_ADD(mask, p[0]);
STRINGLIB_BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) {
BLOOM_ADD(mask, p[i]);
STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[0])
skip = i - 1;
}
@ -128,13 +140,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* got a match! */
return i;
/* miss: check if previous character is part of pattern */
if (!BLOOM(mask, s[i-1]))
if (!STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m;
else
i = i - skip;
} else {
/* skip: check if previous character is part of pattern */
if (!BLOOM(mask, s[i-1]))
if (!STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m;
}
}

View File

@ -190,12 +190,22 @@ PyUnicode_GetMax(void)
/* the linebreak mask is set up by Unicode_Init below */
#if LONG_BIT >= 128
#define BLOOM_WIDTH 128
#elif LONG_BIT >= 64
#define BLOOM_WIDTH 64
#elif LONG_BIT >= 32
#define BLOOM_WIDTH 32
#else
#error "LONG_BIT is smaller than 32"
#endif
#define BLOOM_MASK unsigned long
static BLOOM_MASK bloom_linebreak;
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM_LINEBREAK(ch) \
((ch) < 128U ? ascii_linebreak[(ch)] : \
@ -205,7 +215,7 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
{
/* calculate simple bloom-style bitmask for a given unicode string */
long mask;
BLOOM_MASK mask;
Py_ssize_t i;
mask = 0;