Fix _sre.CODESIZE on 64-bit machines in UCS-4 mode. Fixes #931848.

Backported to 2.3.
This commit is contained in:
Martin v. Löwis 2004-05-07 07:18:13 +00:00
parent 156c49ad1c
commit 7d9c6c7e8c
3 changed files with 15 additions and 4 deletions

View File

@ -333,14 +333,16 @@ def _optimize_unicode(charset, fixup):
block = block + 1
data = data + _mk_bitmap(chunk)
header = [block]
if MAXCODE == 65535:
if _sre.CODESIZE == 2:
code = 'H'
else:
code = 'L'
code = 'I'
# Convert block indices to byte array of 256 bytes
mapping = array.array('b', mapping).tostring()
# Convert byte array to word array
header = header + array.array(code, mapping).tolist()
mapping = array.array(code, mapping)
assert mapping.itemsize == _sre.CODESIZE
header = header + mapping.tolist()
data[0:0] = header
return [(BIGCHARSET, data)]

View File

@ -497,6 +497,15 @@ class ReTests(unittest.TestCase):
self.assert_(re.compile('bug_926075') is not
re.compile(eval("u'bug_926075'")))
def test_bug_931848(self):
try:
unicode
except NameError:
pass
pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
self.assertEqual(re.compile(pattern).split("a.b.c"),
['a','b','c'])
def run_re_tests():
from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:

View File

@ -16,7 +16,7 @@
/* size of a code word (must be unsigned short or larger, and
large enough to hold a Py_UNICODE character) */
#ifdef Py_UNICODE_WIDE
#define SRE_CODE unsigned long
#define SRE_CODE Py_UCS4
#else
#define SRE_CODE unsigned short
#endif