mirror of https://github.com/python/cpython
Issue #19327: Fixed the working of regular expressions with too big charset.
This commit is contained in:
commit
8b150ecfc9
|
@ -339,7 +339,7 @@ def _optimize_unicode(charset, fixup):
|
||||||
else:
|
else:
|
||||||
code = 'I'
|
code = 'I'
|
||||||
# Convert block indices to byte array of 256 bytes
|
# Convert block indices to byte array of 256 bytes
|
||||||
mapping = array.array('b', mapping).tobytes()
|
mapping = array.array('B', mapping).tobytes()
|
||||||
# Convert byte array to word array
|
# Convert byte array to word array
|
||||||
mapping = array.array(code, mapping)
|
mapping = array.array(code, mapping)
|
||||||
assert mapping.itemsize == _sre.CODESIZE
|
assert mapping.itemsize == _sre.CODESIZE
|
||||||
|
|
|
@ -482,6 +482,9 @@ class ReTests(unittest.TestCase):
|
||||||
"\u2222").group(1), "\u2222")
|
"\u2222").group(1), "\u2222")
|
||||||
self.assertEqual(re.match("([\u2222\u2223])",
|
self.assertEqual(re.match("([\u2222\u2223])",
|
||||||
"\u2222", re.UNICODE).group(1), "\u2222")
|
"\u2222", re.UNICODE).group(1), "\u2222")
|
||||||
|
r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
|
||||||
|
self.assertEqual(re.match(r,
|
||||||
|
"\uff01", re.UNICODE).group(), "\uff01")
|
||||||
|
|
||||||
def test_big_codesize(self):
|
def test_big_codesize(self):
|
||||||
# Issue #1160
|
# Issue #1160
|
||||||
|
|
|
@ -19,6 +19,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #19327: Fixed the working of regular expressions with too big charset.
|
||||||
|
|
||||||
- Issue #17400: New 'is_global' attribute for ipaddress to tell if an address
|
- Issue #17400: New 'is_global' attribute for ipaddress to tell if an address
|
||||||
is allocated by IANA for global or private networks.
|
is allocated by IANA for global or private networks.
|
||||||
|
|
||||||
|
|
|
@ -447,7 +447,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
||||||
count = *(set++);
|
count = *(set++);
|
||||||
|
|
||||||
if (sizeof(SRE_CODE) == 2) {
|
if (sizeof(SRE_CODE) == 2) {
|
||||||
block = ((char*)set)[ch >> 8];
|
block = ((unsigned char*)set)[ch >> 8];
|
||||||
set += 128;
|
set += 128;
|
||||||
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
|
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
|
||||||
return ok;
|
return ok;
|
||||||
|
@ -457,7 +457,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
||||||
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
|
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
|
||||||
* warnings when c's type supports only numbers < N+1 */
|
* warnings when c's type supports only numbers < N+1 */
|
||||||
if (!(ch & ~65535))
|
if (!(ch & ~65535))
|
||||||
block = ((char*)set)[ch >> 8];
|
block = ((unsigned char*)set)[ch >> 8];
|
||||||
else
|
else
|
||||||
block = -1;
|
block = -1;
|
||||||
set += 64;
|
set += 64;
|
||||||
|
|
Loading…
Reference in New Issue