mirror of https://github.com/python/cpython
gh-94808: improve comments and coverage of fastsearch.h (GH-96760)
This commit is contained in:
parent
4995f5f9a0
commit
69d9a08099
|
@ -341,6 +341,42 @@ class BaseTest:
|
||||||
self.checkequal(reference_find(p, text),
|
self.checkequal(reference_find(p, text),
|
||||||
text, 'find', p)
|
text, 'find', p)
|
||||||
|
|
||||||
|
def test_find_many_lengths(self):
|
||||||
|
haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)]
|
||||||
|
haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats]
|
||||||
|
|
||||||
|
needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)]
|
||||||
|
needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats]
|
||||||
|
|
||||||
|
for n, haystack1 in haystacks:
|
||||||
|
haystack2 = haystack1[:-1]
|
||||||
|
for m, needle in needles:
|
||||||
|
answer1 = 5 * (n - m) if m <= n else -1
|
||||||
|
self.assertEqual(haystack1.find(needle), answer1, msg=(n,m))
|
||||||
|
self.assertEqual(haystack2.find(needle), -1, msg=(n,m))
|
||||||
|
|
||||||
|
def test_adaptive_find(self):
|
||||||
|
# This would be very slow for the naive algorithm,
|
||||||
|
# but str.find() should be O(n + m).
|
||||||
|
for N in 1000, 10_000, 100_000, 1_000_000:
|
||||||
|
A, B = 'a' * N, 'b' * N
|
||||||
|
haystack = A + A + B + A + A
|
||||||
|
needle = A + B + B + A
|
||||||
|
self.checkequal(-1, haystack, 'find', needle)
|
||||||
|
self.checkequal(0, haystack, 'count', needle)
|
||||||
|
self.checkequal(len(haystack), haystack + needle, 'find', needle)
|
||||||
|
self.checkequal(1, haystack + needle, 'count', needle)
|
||||||
|
|
||||||
|
def test_find_with_memory(self):
|
||||||
|
# Test the "Skip with memory" path in the two-way algorithm.
|
||||||
|
for N in 1000, 3000, 10_000, 30_000:
|
||||||
|
needle = 'ab' * N
|
||||||
|
haystack = ('ab'*(N-1) + 'b') * 2
|
||||||
|
self.checkequal(-1, haystack, 'find', needle)
|
||||||
|
self.checkequal(0, haystack, 'count', needle)
|
||||||
|
self.checkequal(len(haystack), haystack + needle, 'find', needle)
|
||||||
|
self.checkequal(1, haystack + needle, 'count', needle)
|
||||||
|
|
||||||
def test_find_shift_table_overflow(self):
|
def test_find_shift_table_overflow(self):
|
||||||
"""When the table of 8-bit shifts overflows."""
|
"""When the table of 8-bit shifts overflows."""
|
||||||
N = 2**8 + 100
|
N = 2**8 + 100
|
||||||
|
@ -715,6 +751,18 @@ class BaseTest:
|
||||||
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
|
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
|
||||||
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
|
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
|
||||||
|
|
||||||
|
def test_replace_uses_two_way_maxcount(self):
|
||||||
|
# Test that maxcount works in _two_way_count in fastsearch.h
|
||||||
|
A, B = "A"*1000, "B"*1000
|
||||||
|
AABAA = A + A + B + A + A
|
||||||
|
ABBA = A + B + B + A
|
||||||
|
self.checkequal(AABAA + ABBA,
|
||||||
|
AABAA + ABBA, 'replace', ABBA, "ccc", 0)
|
||||||
|
self.checkequal(AABAA + "ccc",
|
||||||
|
AABAA + ABBA, 'replace', ABBA, "ccc", 1)
|
||||||
|
self.checkequal(AABAA + "ccc",
|
||||||
|
AABAA + ABBA, 'replace', ABBA, "ccc", 2)
|
||||||
|
|
||||||
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
|
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
|
||||||
'only applies to 32-bit platforms')
|
'only applies to 32-bit platforms')
|
||||||
def test_replace_overflow(self):
|
def test_replace_overflow(self):
|
||||||
|
|
|
@ -18,7 +18,8 @@
|
||||||
algorithm, which has worst-case O(n) runtime and best-case O(n/k).
|
algorithm, which has worst-case O(n) runtime and best-case O(n/k).
|
||||||
Also compute a table of shifts to achieve O(n/k) in more cases,
|
Also compute a table of shifts to achieve O(n/k) in more cases,
|
||||||
and often (data dependent) deduce larger shifts than pure C&P can
|
and often (data dependent) deduce larger shifts than pure C&P can
|
||||||
deduce. */
|
deduce. See stringlib_find_two_way_notes.txt in this folder for a
|
||||||
|
detailed explanation. */
|
||||||
|
|
||||||
#define FAST_COUNT 0
|
#define FAST_COUNT 0
|
||||||
#define FAST_SEARCH 1
|
#define FAST_SEARCH 1
|
||||||
|
@ -398,7 +399,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
|
||||||
if (window_last >= haystack_end) {
|
if (window_last >= haystack_end) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
LOG("Horspool skip");
|
LOG("Horspool skip\n");
|
||||||
}
|
}
|
||||||
no_shift:
|
no_shift:
|
||||||
window = window_last - len_needle + 1;
|
window = window_last - len_needle + 1;
|
||||||
|
@ -457,7 +458,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
|
||||||
if (window_last >= haystack_end) {
|
if (window_last >= haystack_end) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
LOG("Horspool skip");
|
LOG("Horspool skip\n");
|
||||||
}
|
}
|
||||||
window = window_last - len_needle + 1;
|
window = window_last - len_needle + 1;
|
||||||
assert((window[len_needle - 1] & TABLE_MASK) ==
|
assert((window[len_needle - 1] & TABLE_MASK) ==
|
||||||
|
|
|
@ -395,7 +395,7 @@ of their proof goes something like this (this is far from complete):
|
||||||
needle == (a + w) + (w + b), meaning there's a bad equality
|
needle == (a + w) + (w + b), meaning there's a bad equality
|
||||||
w == w, it's impossible for w + b to be bigger than both
|
w == w, it's impossible for w + b to be bigger than both
|
||||||
b and w + w + b, so this can't happen. We thus have all of
|
b and w + w + b, so this can't happen. We thus have all of
|
||||||
the ineuqalities with no question marks.
|
the inequalities with no question marks.
|
||||||
* By maximality, the right part is not a substring of the left
|
* By maximality, the right part is not a substring of the left
|
||||||
part. Thus, we have all of the inequalities involving no
|
part. Thus, we have all of the inequalities involving no
|
||||||
left-side question marks.
|
left-side question marks.
|
||||||
|
|
Loading…
Reference in New Issue