342 lines
11 KiB
Python
342 lines
11 KiB
Python
#
|
|
# Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
|
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
# SUCH DAMAGE.
|
|
#
|
|
|
|
|
|
# Generate PEP-3101 format strings.
|
|
|
|
|
|
import os, sys, locale, random
|
|
import platform, subprocess
|
|
from distutils.spawn import find_executable
|
|
|
|
import _decimal as C
|
|
import _pydecimal as P
|
|
|
|
|
|
windows_lang_strings = [
|
|
"chinese", "chinese-simplified", "chinese-traditional", "czech", "danish",
|
|
"dutch", "belgian", "english", "australian", "canadian", "english-nz",
|
|
"english-uk", "english-us", "finnish", "french", "french-belgian",
|
|
"french-canadian", "french-swiss", "german", "german-austrian",
|
|
"german-swiss", "greek", "hungarian", "icelandic", "italian", "italian-swiss",
|
|
"japanese", "korean", "norwegian", "norwegian-bokmal", "norwegian-nynorsk",
|
|
"polish", "portuguese", "portuguese-brazil", "russian", "slovak", "spanish",
|
|
"spanish-mexican", "spanish-modern", "swedish", "turkish",
|
|
]
|
|
|
|
preferred_encoding = {
|
|
'cs_CZ': 'ISO8859-2',
|
|
'cs_CZ.iso88592': 'ISO8859-2',
|
|
'czech': 'ISO8859-2',
|
|
'eesti': 'ISO8859-1',
|
|
'estonian': 'ISO8859-1',
|
|
'et_EE': 'ISO8859-15',
|
|
'et_EE.ISO-8859-15': 'ISO8859-15',
|
|
'et_EE.iso885915': 'ISO8859-15',
|
|
'et_EE.iso88591': 'ISO8859-1',
|
|
'fi_FI.iso88591': 'ISO8859-1',
|
|
'fi_FI': 'ISO8859-15',
|
|
'fi_FI@euro': 'ISO8859-15',
|
|
'fi_FI.iso885915@euro': 'ISO8859-15',
|
|
'finnish': 'ISO8859-1',
|
|
'lv_LV': 'ISO8859-13',
|
|
'lv_LV.iso885913': 'ISO8859-13',
|
|
'nb_NO': 'ISO8859-1',
|
|
'nb_NO.iso88591': 'ISO8859-1',
|
|
'bokmal': 'ISO8859-1',
|
|
'nn_NO': 'ISO8859-1',
|
|
'nn_NO.iso88591': 'ISO8859-1',
|
|
'no_NO': 'ISO8859-1',
|
|
'norwegian': 'ISO8859-1',
|
|
'nynorsk': 'ISO8859-1',
|
|
'ru_RU': 'ISO8859-5',
|
|
'ru_RU.iso88595': 'ISO8859-5',
|
|
'russian': 'ISO8859-5',
|
|
'ru_RU.KOI8-R': 'KOI8-R',
|
|
'ru_RU.koi8r': 'KOI8-R',
|
|
'ru_RU.CP1251': 'CP1251',
|
|
'ru_RU.cp1251': 'CP1251',
|
|
'sk_SK': 'ISO8859-2',
|
|
'sk_SK.iso88592': 'ISO8859-2',
|
|
'slovak': 'ISO8859-2',
|
|
'sv_FI': 'ISO8859-1',
|
|
'sv_FI.iso88591': 'ISO8859-1',
|
|
'sv_FI@euro': 'ISO8859-15',
|
|
'sv_FI.iso885915@euro': 'ISO8859-15',
|
|
'uk_UA': 'KOI8-U',
|
|
'uk_UA.koi8u': 'KOI8-U'
|
|
}
|
|
|
|
integers = [
|
|
"",
|
|
"1",
|
|
"12",
|
|
"123",
|
|
"1234",
|
|
"12345",
|
|
"123456",
|
|
"1234567",
|
|
"12345678",
|
|
"123456789",
|
|
"1234567890",
|
|
"12345678901",
|
|
"123456789012",
|
|
"1234567890123",
|
|
"12345678901234",
|
|
"123456789012345",
|
|
"1234567890123456",
|
|
"12345678901234567",
|
|
"123456789012345678",
|
|
"1234567890123456789",
|
|
"12345678901234567890",
|
|
"123456789012345678901",
|
|
"1234567890123456789012",
|
|
]
|
|
|
|
numbers = [
|
|
"0", "-0", "+0",
|
|
"0.0", "-0.0", "+0.0",
|
|
"0e0", "-0e0", "+0e0",
|
|
".0", "-.0",
|
|
".1", "-.1",
|
|
"1.1", "-1.1",
|
|
"1e1", "-1e1"
|
|
]
|
|
|
|
# Get the list of available locales.
|
|
if platform.system() == 'Windows':
|
|
locale_list = windows_lang_strings
|
|
else:
|
|
locale_list = ['C']
|
|
if os.path.isfile("/var/lib/locales/supported.d/local"):
|
|
# On Ubuntu, `locale -a` gives the wrong case for some locales,
|
|
# so we get the correct names directly:
|
|
with open("/var/lib/locales/supported.d/local") as f:
|
|
locale_list = [loc.split()[0] for loc in f.readlines() \
|
|
if not loc.startswith('#')]
|
|
elif find_executable('locale'):
|
|
locale_list = subprocess.Popen(["locale", "-a"],
|
|
stdout=subprocess.PIPE).communicate()[0]
|
|
try:
|
|
locale_list = locale_list.decode()
|
|
except UnicodeDecodeError:
|
|
# Some distributions insist on using latin-1 characters
|
|
# in their locale names.
|
|
locale_list = locale_list.decode('latin-1')
|
|
locale_list = locale_list.split('\n')
|
|
try:
|
|
locale_list.remove('')
|
|
except ValueError:
|
|
pass
|
|
|
|
# Debian
|
|
if os.path.isfile("/etc/locale.alias"):
|
|
with open("/etc/locale.alias") as f:
|
|
while 1:
|
|
try:
|
|
line = f.readline()
|
|
except UnicodeDecodeError:
|
|
continue
|
|
if line == "":
|
|
break
|
|
if line.startswith('#'):
|
|
continue
|
|
x = line.split()
|
|
if len(x) == 2:
|
|
if x[0] in locale_list:
|
|
locale_list.remove(x[0])
|
|
|
|
# FreeBSD
|
|
if platform.system() == 'FreeBSD':
|
|
# http://www.freebsd.org/cgi/query-pr.cgi?pr=142173
|
|
# en_GB.US-ASCII has 163 as the currency symbol.
|
|
for loc in ['it_CH.ISO8859-1', 'it_CH.ISO8859-15', 'it_CH.UTF-8',
|
|
'it_IT.ISO8859-1', 'it_IT.ISO8859-15', 'it_IT.UTF-8',
|
|
'sl_SI.ISO8859-2', 'sl_SI.UTF-8',
|
|
'en_GB.US-ASCII']:
|
|
try:
|
|
locale_list.remove(loc)
|
|
except ValueError:
|
|
pass
|
|
|
|
# Print a testcase in the format of the IBM tests (for runtest.c):
|
|
def get_preferred_encoding():
|
|
loc = locale.setlocale(locale.LC_CTYPE)
|
|
if loc in preferred_encoding:
|
|
return preferred_encoding[loc]
|
|
else:
|
|
return locale.getpreferredencoding()
|
|
|
|
def printit(testno, s, fmt, encoding=None):
|
|
if not encoding:
|
|
encoding = get_preferred_encoding()
|
|
try:
|
|
result = format(P.Decimal(s), fmt)
|
|
fmt = str(fmt.encode(encoding))[2:-1]
|
|
result = str(result.encode(encoding))[2:-1]
|
|
if "'" in result:
|
|
sys.stdout.write("xfmt%d format %s '%s' -> \"%s\"\n"
|
|
% (testno, s, fmt, result))
|
|
else:
|
|
sys.stdout.write("xfmt%d format %s '%s' -> '%s'\n"
|
|
% (testno, s, fmt, result))
|
|
except Exception as err:
|
|
sys.stderr.write("%s %s %s\n" % (err, s, fmt))
|
|
|
|
|
|
# Check if an integer can be converted to a valid fill character.
|
|
def check_fillchar(i):
|
|
try:
|
|
c = chr(i)
|
|
c.encode('utf-8').decode()
|
|
format(P.Decimal(0), c + '<19g')
|
|
return c
|
|
except:
|
|
return None
|
|
|
|
# Generate all unicode characters that are accepted as
|
|
# fill characters by decimal.py.
|
|
def all_fillchars():
|
|
for i in range(0, 0x110002):
|
|
c = check_fillchar(i)
|
|
if c: yield c
|
|
|
|
# Return random fill character.
|
|
def rand_fillchar():
|
|
while 1:
|
|
i = random.randrange(0, 0x110002)
|
|
c = check_fillchar(i)
|
|
if c: return c
|
|
|
|
# Generate random format strings
|
|
# [[fill]align][sign][#][0][width][.precision][type]
|
|
def rand_format(fill, typespec='EeGgFfn%'):
|
|
active = sorted(random.sample(range(7), random.randrange(8)))
|
|
have_align = 0
|
|
s = ''
|
|
for elem in active:
|
|
if elem == 0: # fill+align
|
|
s += fill
|
|
s += random.choice('<>=^')
|
|
have_align = 1
|
|
elif elem == 1: # sign
|
|
s += random.choice('+- ')
|
|
elif elem == 2 and not have_align: # zeropad
|
|
s += '0'
|
|
elif elem == 3: # width
|
|
s += str(random.randrange(1, 100))
|
|
elif elem == 4: # thousands separator
|
|
s += ','
|
|
elif elem == 5: # prec
|
|
s += '.'
|
|
s += str(random.randrange(100))
|
|
elif elem == 6:
|
|
if 4 in active: c = typespec.replace('n', '')
|
|
else: c = typespec
|
|
s += random.choice(c)
|
|
return s
|
|
|
|
# Partially brute force all possible format strings containing a thousands
|
|
# separator. Fall back to random where the runtime would become excessive.
|
|
# [[fill]align][sign][#][0][width][,][.precision][type]
|
|
def all_format_sep():
|
|
for align in ('', '<', '>', '=', '^'):
|
|
for fill in ('', 'x'):
|
|
if align == '': fill = ''
|
|
for sign in ('', '+', '-', ' '):
|
|
for zeropad in ('', '0'):
|
|
if align != '': zeropad = ''
|
|
for width in ['']+[str(y) for y in range(1, 15)]+['101']:
|
|
for prec in ['']+['.'+str(y) for y in range(15)]:
|
|
# for type in ('', 'E', 'e', 'G', 'g', 'F', 'f', '%'):
|
|
type = random.choice(('', 'E', 'e', 'G', 'g', 'F', 'f', '%'))
|
|
yield ''.join((fill, align, sign, zeropad, width, ',', prec, type))
|
|
|
|
# Partially brute force all possible format strings with an 'n' specifier.
|
|
# [[fill]align][sign][#][0][width][,][.precision][type]
|
|
def all_format_loc():
|
|
for align in ('', '<', '>', '=', '^'):
|
|
for fill in ('', 'x'):
|
|
if align == '': fill = ''
|
|
for sign in ('', '+', '-', ' '):
|
|
for zeropad in ('', '0'):
|
|
if align != '': zeropad = ''
|
|
for width in ['']+[str(y) for y in range(1, 20)]+['101']:
|
|
for prec in ['']+['.'+str(y) for y in range(1, 20)]:
|
|
yield ''.join((fill, align, sign, zeropad, width, prec, 'n'))
|
|
|
|
# Generate random format strings with a unicode fill character
|
|
# [[fill]align][sign][#][0][width][,][.precision][type]
|
|
def randfill(fill):
|
|
active = sorted(random.sample(range(5), random.randrange(6)))
|
|
s = ''
|
|
s += str(fill)
|
|
s += random.choice('<>=^')
|
|
for elem in active:
|
|
if elem == 0: # sign
|
|
s += random.choice('+- ')
|
|
elif elem == 1: # width
|
|
s += str(random.randrange(1, 100))
|
|
elif elem == 2: # thousands separator
|
|
s += ','
|
|
elif elem == 3: # prec
|
|
s += '.'
|
|
s += str(random.randrange(100))
|
|
elif elem == 4:
|
|
if 2 in active: c = 'EeGgFf%'
|
|
else: c = 'EeGgFfn%'
|
|
s += random.choice(c)
|
|
return s
|
|
|
|
# Generate random format strings with random locale setting
|
|
# [[fill]align][sign][#][0][width][,][.precision][type]
|
|
def rand_locale():
|
|
try:
|
|
loc = random.choice(locale_list)
|
|
locale.setlocale(locale.LC_ALL, loc)
|
|
except locale.Error as err:
|
|
pass
|
|
active = sorted(random.sample(range(5), random.randrange(6)))
|
|
s = ''
|
|
have_align = 0
|
|
for elem in active:
|
|
if elem == 0: # fill+align
|
|
s += chr(random.randrange(32, 128))
|
|
s += random.choice('<>=^')
|
|
have_align = 1
|
|
elif elem == 1: # sign
|
|
s += random.choice('+- ')
|
|
elif elem == 2 and not have_align: # zeropad
|
|
s += '0'
|
|
elif elem == 3: # width
|
|
s += str(random.randrange(1, 100))
|
|
elif elem == 4: # prec
|
|
s += '.'
|
|
s += str(random.randrange(100))
|
|
s += 'n'
|
|
return s
|