Sanitize bloom filter macros
This commit is contained in:
parent
a913a20c91
commit
10042922d9
|
@ -18,15 +18,27 @@
|
||||||
#define FAST_SEARCH 1
|
#define FAST_SEARCH 1
|
||||||
#define FAST_RSEARCH 2
|
#define FAST_RSEARCH 2
|
||||||
|
|
||||||
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & 0x1F))))
|
#if LONG_BIT >= 128
|
||||||
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
|
#define STRINGLIB_BLOOM_WIDTH 128
|
||||||
|
#elif LONG_BIT >= 64
|
||||||
|
#define STRINGLIB_BLOOM_WIDTH 64
|
||||||
|
#elif LONG_BIT >= 32
|
||||||
|
#define STRINGLIB_BLOOM_WIDTH 32
|
||||||
|
#else
|
||||||
|
#error "LONG_BIT is smaller than 32"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define STRINGLIB_BLOOM_ADD(mask, ch) \
|
||||||
|
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
||||||
|
#define STRINGLIB_BLOOM(mask, ch) \
|
||||||
|
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
||||||
|
|
||||||
Py_LOCAL_INLINE(Py_ssize_t)
|
Py_LOCAL_INLINE(Py_ssize_t)
|
||||||
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
const STRINGLIB_CHAR* p, Py_ssize_t m,
|
const STRINGLIB_CHAR* p, Py_ssize_t m,
|
||||||
Py_ssize_t maxcount, int mode)
|
Py_ssize_t maxcount, int mode)
|
||||||
{
|
{
|
||||||
long mask;
|
unsigned long mask;
|
||||||
Py_ssize_t skip, count = 0;
|
Py_ssize_t skip, count = 0;
|
||||||
Py_ssize_t i, j, mlast, w;
|
Py_ssize_t i, j, mlast, w;
|
||||||
|
|
||||||
|
@ -70,12 +82,12 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
|
|
||||||
/* process pattern[:-1] */
|
/* process pattern[:-1] */
|
||||||
for (i = 0; i < mlast; i++) {
|
for (i = 0; i < mlast; i++) {
|
||||||
BLOOM_ADD(mask, p[i]);
|
STRINGLIB_BLOOM_ADD(mask, p[i]);
|
||||||
if (p[i] == p[mlast])
|
if (p[i] == p[mlast])
|
||||||
skip = mlast - i - 1;
|
skip = mlast - i - 1;
|
||||||
}
|
}
|
||||||
/* process pattern[-1] outside the loop */
|
/* process pattern[-1] outside the loop */
|
||||||
BLOOM_ADD(mask, p[mlast]);
|
STRINGLIB_BLOOM_ADD(mask, p[mlast]);
|
||||||
|
|
||||||
for (i = 0; i <= w; i++) {
|
for (i = 0; i <= w; i++) {
|
||||||
/* note: using mlast in the skip path slows things down on x86 */
|
/* note: using mlast in the skip path slows things down on x86 */
|
||||||
|
@ -95,13 +107,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* miss: check if next character is part of pattern */
|
/* miss: check if next character is part of pattern */
|
||||||
if (!BLOOM(mask, s[i+m]))
|
if (!STRINGLIB_BLOOM(mask, s[i+m]))
|
||||||
i = i + m;
|
i = i + m;
|
||||||
else
|
else
|
||||||
i = i + skip;
|
i = i + skip;
|
||||||
} else {
|
} else {
|
||||||
/* skip: check if next character is part of pattern */
|
/* skip: check if next character is part of pattern */
|
||||||
if (!BLOOM(mask, s[i+m]))
|
if (!STRINGLIB_BLOOM(mask, s[i+m]))
|
||||||
i = i + m;
|
i = i + m;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -110,10 +122,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
/* create compressed boyer-moore delta 1 table */
|
/* create compressed boyer-moore delta 1 table */
|
||||||
|
|
||||||
/* process pattern[0] outside the loop */
|
/* process pattern[0] outside the loop */
|
||||||
BLOOM_ADD(mask, p[0]);
|
STRINGLIB_BLOOM_ADD(mask, p[0]);
|
||||||
/* process pattern[:0:-1] */
|
/* process pattern[:0:-1] */
|
||||||
for (i = mlast; i > 0; i--) {
|
for (i = mlast; i > 0; i--) {
|
||||||
BLOOM_ADD(mask, p[i]);
|
STRINGLIB_BLOOM_ADD(mask, p[i]);
|
||||||
if (p[i] == p[0])
|
if (p[i] == p[0])
|
||||||
skip = i - 1;
|
skip = i - 1;
|
||||||
}
|
}
|
||||||
|
@ -128,13 +140,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||||
/* got a match! */
|
/* got a match! */
|
||||||
return i;
|
return i;
|
||||||
/* miss: check if previous character is part of pattern */
|
/* miss: check if previous character is part of pattern */
|
||||||
if (!BLOOM(mask, s[i-1]))
|
if (!STRINGLIB_BLOOM(mask, s[i-1]))
|
||||||
i = i - m;
|
i = i - m;
|
||||||
else
|
else
|
||||||
i = i - skip;
|
i = i - skip;
|
||||||
} else {
|
} else {
|
||||||
/* skip: check if previous character is part of pattern */
|
/* skip: check if previous character is part of pattern */
|
||||||
if (!BLOOM(mask, s[i-1]))
|
if (!STRINGLIB_BLOOM(mask, s[i-1]))
|
||||||
i = i - m;
|
i = i - m;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -190,12 +190,22 @@ PyUnicode_GetMax(void)
|
||||||
|
|
||||||
/* the linebreak mask is set up by Unicode_Init below */
|
/* the linebreak mask is set up by Unicode_Init below */
|
||||||
|
|
||||||
|
#if LONG_BIT >= 128
|
||||||
|
#define BLOOM_WIDTH 128
|
||||||
|
#elif LONG_BIT >= 64
|
||||||
|
#define BLOOM_WIDTH 64
|
||||||
|
#elif LONG_BIT >= 32
|
||||||
|
#define BLOOM_WIDTH 32
|
||||||
|
#else
|
||||||
|
#error "LONG_BIT is smaller than 32"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define BLOOM_MASK unsigned long
|
#define BLOOM_MASK unsigned long
|
||||||
|
|
||||||
static BLOOM_MASK bloom_linebreak;
|
static BLOOM_MASK bloom_linebreak;
|
||||||
|
|
||||||
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
|
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
|
||||||
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
|
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
|
||||||
|
|
||||||
#define BLOOM_LINEBREAK(ch) \
|
#define BLOOM_LINEBREAK(ch) \
|
||||||
((ch) < 128U ? ascii_linebreak[(ch)] : \
|
((ch) < 128U ? ascii_linebreak[(ch)] : \
|
||||||
|
@ -205,7 +215,7 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
|
||||||
{
|
{
|
||||||
/* calculate simple bloom-style bitmask for a given unicode string */
|
/* calculate simple bloom-style bitmask for a given unicode string */
|
||||||
|
|
||||||
long mask;
|
BLOOM_MASK mask;
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
|
||||||
mask = 0;
|
mask = 0;
|
||||||
|
|
Loading…
Reference in New Issue