Issue #28563: Fixed possible DoS and arbitrary code execution when handle

plural form selections in the gettext module.  The expression parser now
supports exact syntax supported by GNU gettext.
This commit is contained in:
Serhiy Storchaka 2016-11-08 21:20:09 +02:00
commit 1c3fdd900d
3 changed files with 214 additions and 43 deletions

View File

@ -59,55 +59,139 @@ __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
_default_localedir = os.path.join(sys.base_prefix, 'share', 'locale') _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
# Expression parsing for plural form selection.
#
# The gettext library supports a small subset of C syntax. The only
# incompatible difference is that integer literals starting with zero are
# decimal.
#
# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
_token_pattern = re.compile(r"""
(?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
(?P<NUMBER>[0-9]+\b) | # decimal integer
(?P<NAME>n\b) | # only n is allowed
(?P<PARENTHESIS>[()]) |
(?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
# <=, >=, ==, !=, &&, ||,
# ? :
# unary and bitwise ops
# not allowed
(?P<INVALID>\w+|.) # invalid token
""", re.VERBOSE|re.DOTALL)
def _tokenize(plural):
for mo in re.finditer(_token_pattern, plural):
kind = mo.lastgroup
if kind == 'WHITESPACES':
continue
value = mo.group(kind)
if kind == 'INVALID':
raise ValueError('invalid token in plural form: %s' % value)
yield value
yield ''
def _error(value):
if value:
return ValueError('unexpected token in plural form: %s' % value)
else:
return ValueError('unexpected end of plural form')
_binary_ops = (
('||',),
('&&',),
('==', '!='),
('<', '>', '<=', '>='),
('+', '-'),
('*', '/', '%'),
)
_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops}
_c2py_ops = {'||': 'or', '&&': 'and', '/': '//'}
def _parse(tokens, priority=-1):
result = ''
nexttok = next(tokens)
while nexttok == '!':
result += 'not '
nexttok = next(tokens)
if nexttok == '(':
sub, nexttok = _parse(tokens)
result = '%s(%s)' % (result, sub)
if nexttok != ')':
raise ValueError('unbalanced parenthesis in plural form')
elif nexttok == 'n':
result = '%s%s' % (result, nexttok)
else:
try:
value = int(nexttok, 10)
except ValueError:
raise _error(nexttok) from None
result = '%s%d' % (result, value)
nexttok = next(tokens)
j = 100
while nexttok in _binary_ops:
i = _binary_ops[nexttok]
if i < priority:
break
# Break chained comparisons
if i in (3, 4) and j in (3, 4): # '==', '!=', '<', '>', '<=', '>='
result = '(%s)' % result
# Replace some C operators by their Python equivalents
op = _c2py_ops.get(nexttok, nexttok)
right, nexttok = _parse(tokens, i + 1)
result = '%s %s %s' % (result, op, right)
j = i
if j == priority == 4: # '<', '>', '<=', '>='
result = '(%s)' % result
if nexttok == '?' and priority <= 0:
if_true, nexttok = _parse(tokens, 0)
if nexttok != ':':
raise _error(nexttok)
if_false, nexttok = _parse(tokens)
result = '%s if %s else %s' % (if_true, result, if_false)
if priority == 0:
result = '(%s)' % result
return result, nexttok
def c2py(plural): def c2py(plural):
"""Gets a C expression as used in PO files for plural forms and returns a """Gets a C expression as used in PO files for plural forms and returns a
Python lambda function that implements an equivalent expression. Python function that implements an equivalent expression.
""" """
# Security check, allow only the "n" identifier
import token, tokenize if len(plural) > 1000:
tokens = tokenize.generate_tokens(io.StringIO(plural).readline) raise ValueError('plural form expression is too long')
try: try:
danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n'] result, nexttok = _parse(_tokenize(plural))
except tokenize.TokenError: if nexttok:
raise ValueError('plural forms expression error, maybe unbalanced parenthesis') raise _error(nexttok)
else:
if danger:
raise ValueError('plural forms expression could be dangerous')
# Replace some C operators by their Python equivalents depth = 0
plural = plural.replace('&&', ' and ') for c in result:
plural = plural.replace('||', ' or ') if c == '(':
depth += 1
expr = re.compile(r'\!([^=])') if depth > 20:
plural = expr.sub(' not \\1', plural) # Python compiler limit is about 90.
# The most complex example has 2.
# Regular expression and replacement function used to transform raise ValueError('plural form expression is too complex')
# "a?b:c" to "b if a else c". elif c == ')':
expr = re.compile(r'(.*?)\?(.*?):(.*)') depth -= 1
def repl(x):
return "(%s if %s else %s)" % (x.group(2), x.group(1),
expr.sub(repl, x.group(3)))
# Code to transform the plural expression, taking care of parentheses
stack = ['']
for c in plural:
if c == '(':
stack.append('')
elif c == ')':
if len(stack) == 1:
# Actually, we never reach this code, because unbalanced
# parentheses get caught in the security check at the
# beginning.
raise ValueError('unbalanced parenthesis in plural form')
s = expr.sub(repl, stack.pop())
stack[-1] += '(%s)' % s
else:
stack[-1] += c
plural = expr.sub(repl, stack.pop())
return eval('lambda n: int(%s)' % plural)
ns = {}
exec('''if True:
def func(n):
if not isinstance(n, int):
raise ValueError('Plural value must be an integer.')
return int(%s)
''' % result, ns)
return ns['func']
except RuntimeError:
# Recursion error can be raised in _parse() or exec().
raise ValueError('plural form expression is too complex')
def _expand_lang(loc): def _expand_lang(loc):

View File

@ -236,7 +236,9 @@ class PluralFormsTestCase(GettextBaseTest):
x = t.ngettext('There is %s file', 'There are %s files', 2) x = t.ngettext('There is %s file', 'There are %s files', 2)
eq(x, 'Hay %s ficheros') eq(x, 'Hay %s ficheros')
def test_hu(self): # Examples from http://www.gnu.org/software/gettext/manual/gettext.html
def test_ja(self):
eq = self.assertEqual eq = self.assertEqual
f = gettext.c2py('0') f = gettext.c2py('0')
s = ''.join([ str(f(x)) for x in range(200) ]) s = ''.join([ str(f(x)) for x in range(200) ])
@ -254,6 +256,12 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ]) s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
def test_lv(self):
eq = self.assertEqual
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2')
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "20111111111111111111101111111110111111111011111111101111111110111111111011111111101111111110111111111011111111111111111110111111111011111111101111111110111111111011111111101111111110111111111011111111")
def test_gd(self): def test_gd(self):
eq = self.assertEqual eq = self.assertEqual
f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2') f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2')
@ -267,6 +275,12 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ]) s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222") eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
def test_ro(self):
eq = self.assertEqual
f = gettext.c2py('n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2')
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "10111111111111111111222222222222222222222222222222222222222222222222222222222222222222222222222222222111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222")
def test_lt(self): def test_lt(self):
eq = self.assertEqual eq = self.assertEqual
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2') f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2')
@ -279,6 +293,12 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ]) s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222") eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222")
def test_cs(self):
eq = self.assertEqual
f = gettext.c2py('(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2')
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "20111222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
def test_pl(self): def test_pl(self):
eq = self.assertEqual eq = self.assertEqual
f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2') f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2')
@ -291,10 +311,73 @@ class PluralFormsTestCase(GettextBaseTest):
s = ''.join([ str(f(x)) for x in range(200) ]) s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333") eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333")
def test_ar(self):
eq = self.assertEqual
f = gettext.c2py('n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5')
s = ''.join([ str(f(x)) for x in range(200) ])
eq(s, "01233333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444445553333333344444444444444444444444444444444444444444444444444444444444444444444444444444444444444444")
def test_security(self): def test_security(self):
raises = self.assertRaises raises = self.assertRaises
# Test for a dangerous expression # Test for a dangerous expression
raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)") raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)")
# issue28563
raises(ValueError, gettext.c2py, '"(eval(foo) && ""')
raises(ValueError, gettext.c2py, 'f"{os.system(\'sh\')}"')
# Maximum recursion depth exceeded during compilation
raises(ValueError, gettext.c2py, 'n+'*10000 + 'n')
self.assertEqual(gettext.c2py('n+'*100 + 'n')(1), 101)
# MemoryError during compilation
raises(ValueError, gettext.c2py, '('*100 + 'n' + ')'*100)
# Maximum recursion depth exceeded in C to Python translator
raises(ValueError, gettext.c2py, '('*10000 + 'n' + ')'*10000)
self.assertEqual(gettext.c2py('('*20 + 'n' + ')'*20)(1), 1)
def test_chained_comparison(self):
# C doesn't chain comparison as Python so 2 == 2 == 2 gets different results
f = gettext.c2py('n == n == n')
self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
f = gettext.c2py('1 < n == n')
self.assertEqual(''.join(str(f(x)) for x in range(3)), '100')
f = gettext.c2py('n == n < 2')
self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
f = gettext.c2py('0 < n < 2')
self.assertEqual(''.join(str(f(x)) for x in range(3)), '111')
def test_decimal_number(self):
self.assertEqual(gettext.c2py('0123')(1), 123)
def test_invalid_syntax(self):
invalid_expressions = [
'x>1', '(n>1', 'n>1)', '42**42**42', '0xa', '1.0', '1e2',
'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n',
]
for expr in invalid_expressions:
with self.assertRaises(ValueError):
gettext.c2py(expr)
def test_nested_condition_operator(self):
self.assertEqual(gettext.c2py('n?1?2:3:4')(0), 4)
self.assertEqual(gettext.c2py('n?1?2:3:4')(1), 2)
self.assertEqual(gettext.c2py('n?1:3?4:5')(0), 4)
self.assertEqual(gettext.c2py('n?1:3?4:5')(1), 1)
def test_division(self):
f = gettext.c2py('2/n*3')
self.assertEqual(f(1), 6)
self.assertEqual(f(2), 3)
self.assertEqual(f(3), 0)
self.assertEqual(f(-1), -6)
self.assertRaises(ZeroDivisionError, f, 0)
def test_plural_number(self):
f = gettext.c2py('1')
self.assertEqual(f(1), 1)
self.assertRaises(ValueError, f, 1.0)
self.assertRaises(ValueError, f, '1')
self.assertRaises(ValueError, f, [])
self.assertRaises(ValueError, f, object())
class GNUTranslationParsingTest(GettextBaseTest): class GNUTranslationParsingTest(GettextBaseTest):
def test_plural_form_error_issue17898(self): def test_plural_form_error_issue17898(self):

View File

@ -16,6 +16,10 @@ Core and Builtins
Library Library
------- -------
- Issue #28563: Fixed possible DoS and arbitrary code execution when handle
plural form selections in the gettext module. The expression parser now
supports exact syntax supported by GNU gettext.
- In the curses module, raise an error if window.getstr() or window.instr() is - In the curses module, raise an error if window.getstr() or window.instr() is
passed a negative value. passed a negative value.