cpython/Lib/test/test_unicodedata.py

""" Test script for the unicodedata module.

    Written by Marc-Andre Lemburg (mal@lemburg.com).

    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

"""#"
import sha

encoding = 'utf-8'

def test_methods():

    h = sha.sha()
    for i in range(65536):
        char = unichr(i)
        data = [

            # Predicates (single char)
            char.isalnum() and u'1' or u'0',
            char.isalpha() and u'1' or u'0',
            char.isdecimal() and u'1' or u'0',
            char.isdigit() and u'1' or u'0',
            char.islower() and u'1' or u'0',
            char.isnumeric() and u'1' or u'0',
            char.isspace() and u'1' or u'0',
            char.istitle() and u'1' or u'0',
            char.isupper() and u'1' or u'0',

            # Predicates (multiple chars)
            (char + u'abc').isalnum() and u'1' or u'0',
            (char + u'abc').isalpha() and u'1' or u'0',
            (char + u'123').isdecimal() and u'1' or u'0',
            (char + u'123').isdigit() and u'1' or u'0',
            (char + u'abc').islower() and u'1' or u'0',
            (char + u'123').isnumeric() and u'1' or u'0',
            (char + u' \t').isspace() and u'1' or u'0',
            (char + u'abc').istitle() and u'1' or u'0',
            (char + u'ABC').isupper() and u'1' or u'0',

            # Mappings (single char)
            char.lower(),
            char.upper(),
            char.title(),

            # Mappings (multiple chars)
            (char + u'abc').lower(),
            (char + u'ABC').upper(),
            (char + u'abc').title(),
            (char + u'ABC').title(),

            ]
        h.update(u''.join(data).encode(encoding))
    return h.hexdigest()

def test_unicodedata():

    h = sha.sha()
    for i in range(65536):
        char = unichr(i)
        data = [
            # Properties
            str(unicodedata.digit(char, -1)),
            str(unicodedata.numeric(char, -1)),
            str(unicodedata.decimal(char, -1)),
            unicodedata.category(char),
            unicodedata.bidirectional(char),
            unicodedata.decomposition(char),
            str(unicodedata.mirrored(char)),
            str(unicodedata.combining(char)),
            ]
        h.update(''.join(data))
    return h.hexdigest()

### Run tests

print 'Testing Unicode Database...'
print 'Methods:',
print test_methods()

# In case unicodedata is not available, this will raise an ImportError,
# but still test the above cases...
import unicodedata
print 'Functions:',
print test_unicodedata()

# Some additional checks of the API:
print 'API:',

assert unicodedata.digit(u'A',None) is None
assert unicodedata.digit(u'9') == 9
assert unicodedata.digit(u'\u215b',None) is None
assert unicodedata.digit(u'\u2468') == 9

assert unicodedata.numeric(u'A',None) is None
assert unicodedata.numeric(u'9') == 9
assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0

assert unicodedata.decimal(u'A',None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b',None) is None
assert unicodedata.decimal(u'\u2468',None) is None

assert unicodedata.category(u'\uFFFE') == 'Cn'
assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'

assert unicodedata.bidirectional(u'\uFFFE') == ''
assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'

assert unicodedata.decomposition(u'\uFFFE') == ''
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'

assert unicodedata.mirrored(u'\uFFFE') == 0
assert unicodedata.mirrored(u'a') == 0
assert unicodedata.mirrored(u'\u2201') == 1

assert unicodedata.combining(u'\uFFFE') == 0
assert unicodedata.combining(u'a') == 0
assert unicodedata.combining(u'\u20e1') == 230

print 'ok'
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00			`""" Test script for the unicodedata module.`

Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`Written by Marc-Andre Lemburg (mal@lemburg.com).`
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.`
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00
			`"""#"`
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`import sha`
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00
Fixed encoding to use an endianness independent format. 2000-09-27 09:24:34 -03:00			`encoding = 'utf-8'`

Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`def test_methods():`

			`h = sha.sha()`
			`for i in range(65536):`
			`char = unichr(i)`
			`data = [`
Make reindent.py happy (convert everything to 4-space indents!). 2000-10-23 14:22:08 -03:00
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`# Predicates (single char)`
			`char.isalnum() and u'1' or u'0',`
			`char.isalpha() and u'1' or u'0',`
			`char.isdecimal() and u'1' or u'0',`
			`char.isdigit() and u'1' or u'0',`
			`char.islower() and u'1' or u'0',`
			`char.isnumeric() and u'1' or u'0',`
			`char.isspace() and u'1' or u'0',`
			`char.istitle() and u'1' or u'0',`
			`char.isupper() and u'1' or u'0',`
Make reindent.py happy (convert everything to 4-space indents!). 2000-10-23 14:22:08 -03:00
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`# Predicates (multiple chars)`
			`(char + u'abc').isalnum() and u'1' or u'0',`
			`(char + u'abc').isalpha() and u'1' or u'0',`
			`(char + u'123').isdecimal() and u'1' or u'0',`
			`(char + u'123').isdigit() and u'1' or u'0',`
			`(char + u'abc').islower() and u'1' or u'0',`
			`(char + u'123').isnumeric() and u'1' or u'0',`
			`(char + u' \t').isspace() and u'1' or u'0',`
			`(char + u'abc').istitle() and u'1' or u'0',`
			`(char + u'ABC').isupper() and u'1' or u'0',`

			`# Mappings (single char)`
			`char.lower(),`
			`char.upper(),`
			`char.title(),`
Make reindent.py happy (convert everything to 4-space indents!). 2000-10-23 14:22:08 -03:00
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`# Mappings (multiple chars)`
			`(char + u'abc').lower(),`
			`(char + u'ABC').upper(),`
			`(char + u'abc').title(),`
			`(char + u'ABC').title(),`
Make reindent.py happy (convert everything to 4-space indents!). 2000-10-23 14:22:08 -03:00
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`]`
Fixed encoding to use an endianness independent format. 2000-09-27 09:24:34 -03:00			`h.update(u''.join(data).encode(encoding))`
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`return h.hexdigest()`

			`def test_unicodedata():`

			`h = sha.sha()`
			`for i in range(65536):`
			`char = unichr(i)`
			`data = [`
			`# Properties`
			`str(unicodedata.digit(char, -1)),`
			`str(unicodedata.numeric(char, -1)),`
			`str(unicodedata.decimal(char, -1)),`
			`unicodedata.category(char),`
			`unicodedata.bidirectional(char),`
			`unicodedata.decomposition(char),`
			`str(unicodedata.mirrored(char)),`
			`str(unicodedata.combining(char)),`
Make reindent.py happy (convert everything to 4-space indents!). 2000-10-23 14:22:08 -03:00			`]`
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`h.update(''.join(data))`
			`return h.hexdigest()`

			`### Run tests`

			`print 'Testing Unicode Database...'`
			`print 'Methods:',`
			`print test_methods()`

			`# In case unicodedata is not available, this will raise an ImportError,`
			`# but still test the above cases...`
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00			`import unicodedata`
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`print 'Functions:',`
			`print test_unicodedata()`
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00
Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`# Some additional checks of the API:`
			`print 'API:',`
Marc-Andre Lemburg: The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found. 2000-03-28 16:29:59 -04:00
			`assert unicodedata.digit(u'A',None) is None`
			`assert unicodedata.digit(u'9') == 9`
			`assert unicodedata.digit(u'\u215b',None) is None`
			`assert unicodedata.digit(u'\u2468') == 9`

			`assert unicodedata.numeric(u'A',None) is None`
			`assert unicodedata.numeric(u'9') == 9`
			`assert unicodedata.numeric(u'\u215b') == 0.125`
			`assert unicodedata.numeric(u'\u2468') == 9.0`

			`assert unicodedata.decimal(u'A',None) is None`
			`assert unicodedata.decimal(u'9') == 9`
			`assert unicodedata.decimal(u'\u215b',None) is None`
			`assert unicodedata.decimal(u'\u2468',None) is None`

			`assert unicodedata.category(u'\uFFFE') == 'Cn'`
			`assert unicodedata.category(u'a') == 'Ll'`
			`assert unicodedata.category(u'A') == 'Lu'`

			`assert unicodedata.bidirectional(u'\uFFFE') == ''`
			`assert unicodedata.bidirectional(u' ') == 'WS'`
			`assert unicodedata.bidirectional(u'A') == 'L'`

			`assert unicodedata.decomposition(u'\uFFFE') == ''`
			`assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'`

			`assert unicodedata.mirrored(u'\uFFFE') == 0`
			`assert unicodedata.mirrored(u'a') == 0`
			`assert unicodedata.mirrored(u'\u2201') == 1`

			`assert unicodedata.combining(u'\uFFFE') == 0`
			`assert unicodedata.combining(u'a') == 0`
			`assert unicodedata.combining(u'\u20e1') == 230`

Added test suite for the complete Unicode database. The test previously only tested a few cases. 2000-09-26 13:18:58 -03:00			`print 'ok'`