Sanitize bloom filter macros

This commit is contained in:
Antoine Pitrou 2010-01-13 14:01:26 +00:00
parent a913a20c91
commit 10042922d9
2 changed files with 36 additions and 14 deletions

View File

@ -18,15 +18,27 @@
#define FAST_SEARCH 1 #define FAST_SEARCH 1
#define FAST_RSEARCH 2 #define FAST_RSEARCH 2
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & 0x1F)))) #if LONG_BIT >= 128
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F)))) #define STRINGLIB_BLOOM_WIDTH 128
#elif LONG_BIT >= 64
#define STRINGLIB_BLOOM_WIDTH 64
#elif LONG_BIT >= 32
#define STRINGLIB_BLOOM_WIDTH 32
#else
#error "LONG_BIT is smaller than 32"
#endif
#define STRINGLIB_BLOOM_ADD(mask, ch) \
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m, const STRINGLIB_CHAR* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode) Py_ssize_t maxcount, int mode)
{ {
long mask; unsigned long mask;
Py_ssize_t skip, count = 0; Py_ssize_t skip, count = 0;
Py_ssize_t i, j, mlast, w; Py_ssize_t i, j, mlast, w;
@ -70,12 +82,12 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* process pattern[:-1] */ /* process pattern[:-1] */
for (i = 0; i < mlast; i++) { for (i = 0; i < mlast; i++) {
BLOOM_ADD(mask, p[i]); STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast]) if (p[i] == p[mlast])
skip = mlast - i - 1; skip = mlast - i - 1;
} }
/* process pattern[-1] outside the loop */ /* process pattern[-1] outside the loop */
BLOOM_ADD(mask, p[mlast]); STRINGLIB_BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) { for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */ /* note: using mlast in the skip path slows things down on x86 */
@ -95,13 +107,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue; continue;
} }
/* miss: check if next character is part of pattern */ /* miss: check if next character is part of pattern */
if (!BLOOM(mask, s[i+m])) if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
else else
i = i + skip; i = i + skip;
} else { } else {
/* skip: check if next character is part of pattern */ /* skip: check if next character is part of pattern */
if (!BLOOM(mask, s[i+m])) if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m; i = i + m;
} }
} }
@ -110,10 +122,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* create compressed boyer-moore delta 1 table */ /* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */ /* process pattern[0] outside the loop */
BLOOM_ADD(mask, p[0]); STRINGLIB_BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */ /* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) { for (i = mlast; i > 0; i--) {
BLOOM_ADD(mask, p[i]); STRINGLIB_BLOOM_ADD(mask, p[i]);
if (p[i] == p[0]) if (p[i] == p[0])
skip = i - 1; skip = i - 1;
} }
@ -128,13 +140,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* got a match! */ /* got a match! */
return i; return i;
/* miss: check if previous character is part of pattern */ /* miss: check if previous character is part of pattern */
if (!BLOOM(mask, s[i-1])) if (!STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
else else
i = i - skip; i = i - skip;
} else { } else {
/* skip: check if previous character is part of pattern */ /* skip: check if previous character is part of pattern */
if (!BLOOM(mask, s[i-1])) if (!STRINGLIB_BLOOM(mask, s[i-1]))
i = i - m; i = i - m;
} }
} }

View File

@ -190,12 +190,22 @@ PyUnicode_GetMax(void)
/* the linebreak mask is set up by Unicode_Init below */ /* the linebreak mask is set up by Unicode_Init below */
#if LONG_BIT >= 128
#define BLOOM_WIDTH 128
#elif LONG_BIT >= 64
#define BLOOM_WIDTH 64
#elif LONG_BIT >= 32
#define BLOOM_WIDTH 32
#else
#error "LONG_BIT is smaller than 32"
#endif
#define BLOOM_MASK unsigned long #define BLOOM_MASK unsigned long
static BLOOM_MASK bloom_linebreak; static BLOOM_MASK bloom_linebreak;
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1))))) #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM_LINEBREAK(ch) \ #define BLOOM_LINEBREAK(ch) \
((ch) < 128U ? ascii_linebreak[(ch)] : \ ((ch) < 128U ? ascii_linebreak[(ch)] : \
@ -205,7 +215,7 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
{ {
/* calculate simple bloom-style bitmask for a given unicode string */ /* calculate simple bloom-style bitmask for a given unicode string */
long mask; BLOOM_MASK mask;
Py_ssize_t i; Py_ssize_t i;
mask = 0; mask = 0;