2023-11-04 11:56:58 -03:00
|
|
|
import unittest
|
2023-11-06 10:32:35 -04:00
|
|
|
from test.test_tools import skip_if_missing, imports_under_tool
|
2023-11-04 11:56:58 -03:00
|
|
|
from test import support
|
|
|
|
from test.support.hypothesis_helper import hypothesis
|
|
|
|
|
|
|
|
st = hypothesis.strategies
|
|
|
|
given = hypothesis.given
|
|
|
|
example = hypothesis.example
|
|
|
|
|
|
|
|
|
2023-11-06 10:32:35 -04:00
|
|
|
skip_if_missing("unicode")
|
2023-11-04 11:56:58 -03:00
|
|
|
with imports_under_tool("unicode"):
|
|
|
|
from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup
|
|
|
|
|
|
|
|
|
|
|
|
@st.composite
|
|
|
|
def char_name_db(draw, min_length=1, max_length=30):
|
|
|
|
m = draw(st.integers(min_value=min_length, max_value=max_length))
|
|
|
|
names = draw(
|
|
|
|
st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m)
|
|
|
|
)
|
|
|
|
characters = draw(st.sets(st.characters(), min_size=m, max_size=m))
|
|
|
|
return list(zip(names, characters))
|
|
|
|
|
|
|
|
|
|
|
|
class TestDawg(unittest.TestCase):
|
|
|
|
"""Tests for the directed acyclic word graph data structure that is used
|
|
|
|
to store the unicode character names in unicodedata. Tests ported from PyPy
|
|
|
|
"""
|
|
|
|
|
|
|
|
def test_dawg_direct_simple(self):
|
|
|
|
dawg = Dawg()
|
|
|
|
dawg.insert("a", -4)
|
|
|
|
dawg.insert("c", -2)
|
|
|
|
dawg.insert("cat", -1)
|
|
|
|
dawg.insert("catarr", 0)
|
|
|
|
dawg.insert("catnip", 1)
|
|
|
|
dawg.insert("zcatnip", 5)
|
|
|
|
packed, data, inverse = dawg.finish()
|
|
|
|
|
|
|
|
self.assertEqual(lookup(packed, data, b"a"), -4)
|
|
|
|
self.assertEqual(lookup(packed, data, b"c"), -2)
|
|
|
|
self.assertEqual(lookup(packed, data, b"cat"), -1)
|
|
|
|
self.assertEqual(lookup(packed, data, b"catarr"), 0)
|
|
|
|
self.assertEqual(lookup(packed, data, b"catnip"), 1)
|
|
|
|
self.assertEqual(lookup(packed, data, b"zcatnip"), 5)
|
|
|
|
self.assertRaises(KeyError, lookup, packed, data, b"b")
|
|
|
|
self.assertRaises(KeyError, lookup, packed, data, b"catni")
|
|
|
|
self.assertRaises(KeyError, lookup, packed, data, b"catnipp")
|
|
|
|
|
|
|
|
self.assertEqual(inverse_lookup(packed, inverse, -4), b"a")
|
|
|
|
self.assertEqual(inverse_lookup(packed, inverse, -2), b"c")
|
|
|
|
self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat")
|
|
|
|
self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr")
|
|
|
|
self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip")
|
|
|
|
self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip")
|
|
|
|
self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12)
|
|
|
|
|
|
|
|
def test_forbid_empty_dawg(self):
|
|
|
|
dawg = Dawg()
|
|
|
|
self.assertRaises(ValueError, dawg.finish)
|
|
|
|
|
|
|
|
@given(char_name_db())
|
|
|
|
@example([("abc", "a"), ("abd", "b")])
|
|
|
|
@example(
|
|
|
|
[
|
|
|
|
("bab", "1"),
|
|
|
|
("a", ":"),
|
|
|
|
("ad", "@"),
|
|
|
|
("b", "<"),
|
|
|
|
("aacc", "?"),
|
|
|
|
("dab", "D"),
|
|
|
|
("aa", "0"),
|
|
|
|
("ab", "F"),
|
|
|
|
("aaa", "7"),
|
|
|
|
("cbd", "="),
|
|
|
|
("abad", ";"),
|
|
|
|
("ac", "B"),
|
|
|
|
("abb", "4"),
|
|
|
|
("bb", "2"),
|
|
|
|
("aab", "9"),
|
|
|
|
("caaaaba", "E"),
|
|
|
|
("ca", ">"),
|
|
|
|
("bbaaa", "5"),
|
|
|
|
("d", "3"),
|
|
|
|
("baac", "8"),
|
|
|
|
("c", "6"),
|
|
|
|
("ba", "A"),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
@example(
|
|
|
|
[
|
|
|
|
("bcdac", "9"),
|
|
|
|
("acc", "g"),
|
|
|
|
("d", "d"),
|
|
|
|
("daabdda", "0"),
|
|
|
|
("aba", ";"),
|
|
|
|
("c", "6"),
|
|
|
|
("aa", "7"),
|
|
|
|
("abbd", "c"),
|
|
|
|
("badbd", "?"),
|
|
|
|
("bbd", "f"),
|
|
|
|
("cc", "@"),
|
|
|
|
("bb", "8"),
|
|
|
|
("daca", ">"),
|
|
|
|
("ba", ":"),
|
|
|
|
("baac", "3"),
|
|
|
|
("dbdddac", "a"),
|
|
|
|
("a", "2"),
|
|
|
|
("cabd", "b"),
|
|
|
|
("b", "="),
|
|
|
|
("abd", "4"),
|
|
|
|
("adcbd", "5"),
|
|
|
|
("abc", "e"),
|
|
|
|
("ab", "1"),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
def test_dawg(self, data):
|
|
|
|
# suppress debug prints
|
|
|
|
with support.captured_stdout() as output:
|
|
|
|
# it's enough to build it, building will also check the result
|
|
|
|
build_compression_dawg(data)
|