From 2daf6ae2495c862adf8bc717bfe9964081ea0b10 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 19:54:16 +0100 Subject: [PATCH 01/19] Issue #13703: add a way to randomize the hash values of basic types (str, bytes, datetime) in order to make algorithmic complexity attacks on (e.g.) web apps much more complicated. The environment variable PYTHONHASHSEED and the new command line flag -R control this behavior. --- Doc/library/sys.rst | 4 + Doc/reference/datamodel.rst | 2 + Doc/using/cmdline.rst | 48 +++- Include/object.h | 6 + Include/pydebug.h | 1 + Include/pythonrun.h | 2 + Lib/json/__init__.py | 4 +- Lib/os.py | 17 -- Lib/test/mapping_tests.py | 2 +- Lib/test/regrtest.py | 5 + Lib/test/script_helper.py | 7 +- Lib/test/test_cmd_line.py | 17 +- Lib/test/test_descr.py | 12 +- Lib/test/test_hash.py | 92 +++++- Lib/test/test_os.py | 36 ++- Lib/test/test_set.py | 23 +- Lib/test/test_sys.py | 2 +- Lib/test/test_urllib.py | 4 +- Lib/tkinter/test/test_ttk/test_functions.py | 2 +- Makefile.pre.in | 1 + Misc/NEWS | 5 + Misc/python.man | 29 ++ Modules/datetimemodule.c | 4 +- Modules/main.c | 16 +- Modules/posixmodule.c | 125 ++------ Objects/bytesobject.c | 12 +- Objects/object.c | 2 + Objects/unicodeobject.c | 12 +- PCbuild/pythoncore.vcproj | 4 + Python/pythonrun.c | 8 + Python/random.c | 302 ++++++++++++++++++++ Python/sysmodule.c | 6 +- 32 files changed, 660 insertions(+), 152 deletions(-) create mode 100644 Python/random.c diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index bb9f9205878..95947560f21 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -220,8 +220,12 @@ always available. :const:`ignore_environment` :option:`-E` :const:`verbose` :option:`-v` :const:`bytes_warning` :option:`-b` + :const:`hash_randomization` :option:`-R` ============================= ============================= + .. versionadded:: 3.1.5 + The ``hash_randomization`` attribute. + .. data:: float_info diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 6f874b6764a..7ded851ab6f 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1265,6 +1265,8 @@ Basic customization inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__` had been explicitly set to :const:`None`. + See also the :option:`-R` command-line option. + .. method:: object.__bool__(self) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 3fe0c7aedd3..11e2d7d9c0c 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -21,7 +21,7 @@ Command line When invoking Python, you may specify any of these options:: - python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args] + python [-bBdEhiORsSuvVWx?] [-c command | -m module-name | script | - ] [args] The most common use case is, of course, a simple invocation of a script:: @@ -215,6 +215,29 @@ Miscellaneous options Discard docstrings in addition to the :option:`-O` optimizations. +.. cmdoption:: -R + + Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes + and datetime objects are "salted" with an unpredictable random value. + Although they remain constant within an individual Python process, they are + not predictable between repeated invocations of Python. + + This is intended to provide protection against a denial-of-service caused by + carefully-chosen inputs that exploit the worst case performance of a dict + insertion, O(n^2) complexity. See + http://www.ocert.org/advisories/ocert-2011-003.html for details. + + Changing hash values affects the order in which keys are retrieved from a + dict. Although Python has never made guarantees about this ordering (and it + typically varies between 32-bit and 64-bit builds), enough real-world code + implicitly relies on this non-guaranteed behavior that the randomization is + disabled by default. + + See also :envvar:`PYTHONHASHSEED`. + + .. versionadded:: 3.1.5 + + .. cmdoption:: -s Don't add user site directory to sys.path @@ -314,6 +337,7 @@ Miscellaneous options .. note:: The line numbers in error messages will be off by one. + Options you shouldn't use ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -328,6 +352,7 @@ Options you shouldn't use Reserved for alternative implementations of Python to use for their own purposes. + .. _using-on-envvars: Environment variables @@ -435,6 +460,27 @@ These environment variables influence Python's behavior. import of source modules. +.. envvar:: PYTHONHASHSEED + + If this variable is set to ``random``, the effect is the same as specifying + the :option:`-R` option: a random value is used to seed the hashes of str, + bytes and datetime objects. + + If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed + seed for generating the hash() of the types covered by the hash + randomization. + + Its purpose is to allow repeatable hashing, such as for selftests for the + interpreter itself, or to allow a cluster of python processes to share hash + values. + + The integer must be a decimal number in the range [0,4294967295]. Specifying + the value 0 will lead to the same hash values as when hash randomization is + disabled. + + .. versionadded:: 3.1.5 + + .. envvar:: PYTHONIOENCODING Overrides the encoding used for stdin/stdout/stderr, in the syntax diff --git a/Include/object.h b/Include/object.h index ef73a213d16..7848cf4c6eb 100644 --- a/Include/object.h +++ b/Include/object.h @@ -473,6 +473,12 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *); PyAPI_FUNC(long) _Py_HashDouble(double); PyAPI_FUNC(long) _Py_HashPointer(void*); +typedef struct { + long prefix; + long suffix; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + /* Helper for passing objects to printf and the like */ #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj)) diff --git a/Include/pydebug.h b/Include/pydebug.h index 0a31f5ba01e..5969e444195 100644 --- a/Include/pydebug.h +++ b/Include/pydebug.h @@ -19,6 +19,7 @@ PyAPI_DATA(int) Py_DivisionWarningFlag; PyAPI_DATA(int) Py_DontWriteBytecodeFlag; PyAPI_DATA(int) Py_NoUserSiteDirectory; PyAPI_DATA(int) Py_UnbufferedStdioFlag; +PyAPI_DATA(int) Py_HashRandomizationFlag; /* this is a wrapper around getenv() that pays attention to Py_IgnoreEnvironmentFlag. It should be used for getting variables like diff --git a/Include/pythonrun.h b/Include/pythonrun.h index 96a0e232f9d..af4aa0877ed 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -174,6 +174,8 @@ typedef void (*PyOS_sighandler_t)(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus } diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 6d88931dcbd..ba2bc1d3426 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -31,7 +31,9 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':')) + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: diff --git a/Lib/os.py b/Lib/os.py index b46c02f5800..8f66472390a 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -611,23 +611,6 @@ try: except NameError: # statvfs_result may not exist pass -if not _exists("urandom"): - def urandom(n): - """urandom(n) -> str - - Return a string of n random bytes suitable for cryptographic use. - - """ - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except (OSError, IOError): - raise NotImplementedError("/dev/urandom (or equivalent) not found") - bs = b"" - while len(bs) < n: - bs += read(_urandomfd, n - len(bs)) - close(_urandomfd) - return bs - # Supply os.popen() def popen(cmd, mode="r", buffering=-1): if not isinstance(cmd, str): diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py index c34bd59e9cc..592a78be41b 100644 --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase): def _reference(self): """Return a dictionary of values which are invariant by storage in the object under test.""" - return {1:2, "key1":"value1", "key2":(1,2,3)} + return {"1": "2", "key1":"value1", "key2":(1,2,3)} def _empty_mapping(self): """Return an empty mapping object""" return self.type2test() diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 98d68bd1b48..d203600e295 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -428,6 +428,11 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False, except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: + hashseed = os.getenv('PYTHONHASHSEED') + if not hashseed: + os.environ['PYTHONHASHSEED'] = str(random_seed) + os.execv(sys.executable, [sys.executable] + sys.argv) + return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(tests) diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py index 0699cf61a21..fca28d37c36 100644 --- a/Lib/test/script_helper.py +++ b/Lib/test/script_helper.py @@ -3,7 +3,6 @@ import sys import os -import re import os.path import tempfile import subprocess @@ -19,11 +18,15 @@ def _assert_python(expected_success, *args, **env_vars): cmd_line = [sys.executable] if not env_vars: cmd_line.append('-E') - cmd_line.extend(args) # Need to preserve the original environment, for in-place testing of # shared library builds. env = os.environ.copy() + # But a special flag that can be set to override -- in this case, the + # caller is responsible to pass the full environment. + if env_vars.pop('__cleanenv', None): + env = {} env.update(env_vars) + cmd_line.extend(args) p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index c4b8be5f6cb..eacd7a6ae43 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -4,7 +4,6 @@ import os import test.support, unittest -import os import sys import subprocess @@ -190,6 +189,22 @@ sys.stdout.buffer.write(path)""" self.assertTrue(path1.encode('ascii') in stdout) self.assertTrue(path2.encode('ascii') in stdout) + def test_hash_randomization(self): + # Verify that -R enables hash randomization: + self.verify_valid_flag('-R') + hashes = [] + for i in range(2): + code = 'print(hash("spam"))' + data, rc = self.start_python_and_exit_code('-R', '-c', code) + self.assertEqual(rc, 0) + hashes.append(data) + self.assertNotEqual(hashes[0], hashes[1]) + + # Verify that sys.flags contains hash_randomization + code = 'import sys; print("random is", sys.flags.hash_randomization)' + data, rc = self.start_python_and_exit_code('-R', '-c', code) + self.assertEqual(rc, 0) + self.assertIn(b'random is 1', data) def test_main(): test.support.run_unittest(CmdLineTest) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 0ce85f0b697..077f5da5075 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4300,8 +4300,18 @@ class DictProxyTests(unittest.TestCase): def test_repr(self): # Testing dict_proxy.__repr__ + def sorted_dict_repr(repr_): + # Given the repr of a dict, sort the keys + assert repr_.startswith('{') + assert repr_.endswith('}') + kvs = repr_[1:-1].split(', ') + return '{' + ', '.join(sorted(kvs)) + '}' dict_ = {k: v for k, v in self.C.__dict__.items()} - self.assertEqual(repr(self.C.__dict__), 'dict_proxy({!r})'.format(dict_)) + repr_ = repr(self.C.__dict__) + self.assert_(repr_.startswith('dict_proxy(')) + self.assert_(repr_.endswith(')')) + self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]), + sorted_dict_repr('{!r}'.format(dict_))) class PTypesLongInitTest(unittest.TestCase): diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py index 569e5e054c4..f5736b2de7f 100644 --- a/Lib/test/test_hash.py +++ b/Lib/test/test_hash.py @@ -3,10 +3,16 @@ # # Also test that hash implementations are inherited as expected +import datetime +import os +import struct import unittest from test import support +from test.script_helper import assert_python_ok from collections import Hashable +IS_64BIT = (struct.calcsize('l') == 8) + class HashEqualityTestCase(unittest.TestCase): @@ -118,10 +124,92 @@ class HashBuiltinsTestCase(unittest.TestCase): for obj in self.hashes_to_check: self.assertEqual(hash(obj), _default_hash(obj)) +class HashRandomizationTests(unittest.TestCase): + + # Each subclass should define a field "repr_", containing the repr() of + # an object to be tested + + def get_hash_command(self, repr_): + return 'print(hash(%s))' % repr_ + + def get_hash(self, repr_, seed=None): + env = os.environ.copy() + env['__cleanenv'] = True # signal to assert_python not to do a copy + # of os.environ on its own + if seed is not None: + env['PYTHONHASHSEED'] = str(seed) + else: + env.pop('PYTHONHASHSEED', None) + out = assert_python_ok( + '-c', self.get_hash_command(repr_), + **env) + stdout = out[1].strip() + return int(stdout) + + def test_randomized_hash(self): + # two runs should return different hashes + run1 = self.get_hash(self.repr_, seed='random') + run2 = self.get_hash(self.repr_, seed='random') + self.assertNotEqual(run1, run2) + +class StringlikeHashRandomizationTests(HashRandomizationTests): + def test_null_hash(self): + # PYTHONHASHSEED=0 disables the randomized hash + if IS_64BIT: + known_hash_of_obj = 1453079729188098211 + else: + known_hash_of_obj = -1600925533 + + # Randomization is disabled by default: + self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj) + + # It can also be disabled by setting the seed to 0: + self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) + + def test_fixed_hash(self): + # test a fixed seed for the randomized hash + # Note that all types share the same values: + if IS_64BIT: + h = -4410911502303878509 + else: + h = -206076799 + self.assertEqual(self.get_hash(self.repr_, seed=42), h) + +class StrHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr('abc') + + def test_empty_string(self): + self.assertEqual(hash(""), 0) + +class BytesHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr(b'abc') + + def test_empty_string(self): + self.assertEqual(hash(b""), 0) + +class DatetimeTests(HashRandomizationTests): + def get_hash_command(self, repr_): + return 'import datetime; print(hash(%s))' % repr_ + +class DatetimeDateTests(DatetimeTests): + repr_ = repr(datetime.date(1066, 10, 14)) + +class DatetimeDatetimeTests(DatetimeTests): + repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) + +class DatetimeTimeTests(DatetimeTests): + repr_ = repr(datetime.time(0)) + + def test_main(): support.run_unittest(HashEqualityTestCase, - HashInheritanceTestCase, - HashBuiltinsTestCase) + HashInheritanceTestCase, + HashBuiltinsTestCase, + StrHashRandomizationTests, + BytesHashRandomizationTests, + DatetimeDateTests, + DatetimeDatetimeTests, + DatetimeTimeTests) if __name__ == "__main__": diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index cbf0c1647d3..bff4f0bdea0 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -9,6 +9,7 @@ import warnings import sys import shutil from test import support +from test.script_helper import assert_python_ok # Detect whether we're on a Linux system that uses the (now outdated # and unmaintained) linuxthreads threading library. There's an issue @@ -574,14 +575,33 @@ class DevNullTests(unittest.TestCase): f.close() class URandomTests(unittest.TestCase): - def test_urandom(self): - try: - self.assertEqual(len(os.urandom(1)), 1) - self.assertEqual(len(os.urandom(10)), 10) - self.assertEqual(len(os.urandom(100)), 100) - self.assertEqual(len(os.urandom(1000)), 1000) - except NotImplementedError: - pass + def test_urandom_length(self): + self.assertEqual(len(os.urandom(0)), 0) + self.assertEqual(len(os.urandom(1)), 1) + self.assertEqual(len(os.urandom(10)), 10) + self.assertEqual(len(os.urandom(100)), 100) + self.assertEqual(len(os.urandom(1000)), 1000) + + def test_urandom_value(self): + data1 = os.urandom(16) + data2 = os.urandom(16) + self.assertNotEqual(data1, data2) + + def get_urandom_subprocess(self, count): + code = '\n'.join(( + 'import os, sys', + 'data = os.urandom(%s)' % count, + 'sys.stdout.buffer.write(data)', + 'sys.stdout.buffer.flush()')) + out = assert_python_ok('-c', code) + stdout = out[1] + self.assertEqual(len(stdout), 16) + return stdout + + def test_urandom_subprocess(self): + data1 = self.get_urandom_subprocess(16) + data2 = self.get_urandom_subprocess(16) + self.assertNotEqual(data1, data2) class ExecTests(unittest.TestCase): @unittest.skipIf(USING_LINUXTHREADS, diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 99d5c70e0a3..5d5e2324e9d 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -734,6 +734,17 @@ class TestBasicOps(unittest.TestCase): if self.repr is not None: self.assertEqual(repr(self.set), self.repr) + def check_repr_against_values(self): + text = repr(self.set) + self.assertTrue(text.startswith('{')) + self.assertTrue(text.endswith('}')) + + result = text[1:-1].split(', ') + result.sort() + sorted_repr_values = [repr(value) for value in self.values] + sorted_repr_values.sort() + self.assertEqual(result, sorted_repr_values) + def test_print(self): try: fo = open(support.TESTFN, "w") @@ -892,7 +903,9 @@ class TestBasicOpsString(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{'a', 'c', 'b'}" + + def test_repr(self): + self.check_repr_against_values() #------------------------------------------------------------------------------ @@ -903,7 +916,9 @@ class TestBasicOpsBytes(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{b'a', b'c', b'b'}" + + def test_repr(self): + self.check_repr_against_values() #------------------------------------------------------------------------------ @@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 4 - self.repr = "{'a', b'a', 'b', b'b'}" def tearDown(self): warnings.filters = self.warning_filters + def test_repr(self): + self.check_repr_against_values() + #============================================================================== def baditer(): diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 11685a42023..7732c4c325f 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -446,7 +446,7 @@ class SysModuleTest(unittest.TestCase): attrs = ("debug", "division_warning", "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", - "bytes_warning") + "bytes_warning", "hash_randomization") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) self.assertEqual(type(getattr(sys.flags, attr)), int, attr) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4d3509ae7d2..482acc1c0f2 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -12,6 +12,7 @@ import os import sys import tempfile import warnings +import collections def hexescape(char): """Escape char as RFC 2396 specifies""" @@ -840,8 +841,9 @@ class urlencode_Tests(unittest.TestCase): self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) self.assertEqual("a=None&a=a", urllib.parse.urlencode({"a": [None, "a"]}, True)) + data = collections.OrderedDict([("a", 1), ("b", 1)]) self.assertEqual("a=a&a=b", - urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) + urllib.parse.urlencode({"a": data}, True)) def test_urlencode_encoding(self): # ASCII encoding. Expect %3F with errors="replace' diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py index df593cd7109..2303e4cd468 100644 --- a/Lib/tkinter/test/test_ttk/test_functions.py +++ b/Lib/tkinter/test/test_ttk/test_functions.py @@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase): ('a', 'b', 'c')), ("test {a b} c", ())) # state spec and options self.assertEqual(ttk._format_elemcreate('image', False, 'test', - ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y"))) + ('a', 'b'), a='x'), ("test a b", ("-a", "x"))) # format returned values as a tcl script # state spec with multiple states and an option with a multivalue self.assertEqual(ttk._format_elemcreate('image', True, 'test', diff --git a/Makefile.pre.in b/Makefile.pre.in index ed1dc33509e..e4470bdb88d 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -305,6 +305,7 @@ PYTHON_OBJS= \ Python/pymath.o \ Python/pystate.o \ Python/pythonrun.o \ + Python/random.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ diff --git a/Misc/NEWS b/Misc/NEWS index 6e956973377..486da13674d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 3.1.5? Core and Builtins ----------------- +- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED + environment variables, to provide an opt-in way to protect against denial of + service attacks due to hash collisions within the dict and set types. Patch + by David Malcolm, based on work by Victor Stinner. + Library ------- diff --git a/Misc/python.man b/Misc/python.man index 411a43a3e77..5b4eeef6546 100644 --- a/Misc/python.man +++ b/Misc/python.man @@ -34,6 +34,9 @@ python \- an interpreted, interactive, object-oriented programming language .B \-OO ] [ +.B \-R +] +[ .B -Q .I argument ] @@ -145,6 +148,18 @@ to \fI.pyo\fP. Given twice, causes docstrings to be discarded. .B \-OO Discard docstrings in addition to the \fB-O\fP optimizations. .TP +.B \-R +Turn on "hash randomization", so that the hash() values of str, bytes and +datetime objects are "salted" with an unpredictable pseudo-random value. +Although they remain constant within an individual Python process, they are +not predictable between repeated invocations of Python. +.IP +This is intended to provide protection against a denial of service +caused by carefully-chosen inputs that exploit the worst case performance +of a dict insertion, O(n^2) complexity. See +http://www.ocert.org/advisories/ocert-2011-003.html +for details. +.TP .BI "\-Q " argument Division control; see PEP 238. The argument must be one of "old" (the default, int/int and long/long return an int or long), "new" (new @@ -403,6 +418,20 @@ the \fB\-u\fP option. If this is set to a non-empty string it is equivalent to specifying the \fB\-v\fP option. If set to an integer, it is equivalent to specifying \fB\-v\fP multiple times. +.IP PYTHONHASHSEED +If this variable is set to "random", the effect is the same as specifying +the \fB-R\fP option: a random value is used to seed the hashes of str, +bytes and datetime objects. + +If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for +generating the hash() of the types covered by the hash randomization. Its +purpose is to allow repeatable hashing, such as for selftests for the +interpreter itself, or to allow a cluster of python processes to share hash +values. + +The integer must be a decimal number in the range [0,4294967295]. Specifying +the value 0 will lead to the same hash values as when hash randomization is +disabled. .SH AUTHOR The Python Software Foundation: http://www.python.org/psf .SH INTERNET RESOURCES diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 0ac51aaa7ff..f3103eaf699 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -2566,10 +2566,12 @@ generic_hash(unsigned char *data, int len) register long x; p = (unsigned char *) data; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= len; + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; diff --git a/Modules/main.c b/Modules/main.c index eb9bb545172..9607cb334db 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -47,7 +47,7 @@ static wchar_t **orig_argv; static int orig_argc; /* command line options */ -#define BASE_OPTS L"bBc:dEhiJm:OsStuvVW:xX?" +#define BASE_OPTS L"bBc:dEhiJm:ORsStuvVW:xX?" #define PROGRAM_OPTS BASE_OPTS @@ -72,6 +72,9 @@ static char *usage_2 = "\ -m mod : run library module as a script (terminates option list)\n\ -O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\ -OO : remove doc-strings in addition to the -O optimizations\n\ +-R : use a pseudo-random salt to make hash() values of various types be\n\ + unpredictable between separate invocations of the interpreter, as\n\ + a defence against denial-of-service attacks\n\ -s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\ -S : don't imply 'import site' on initialization\n\ "; @@ -99,6 +102,12 @@ PYTHONHOME : alternate directory (or %c).\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ "; +static char *usage_6 = "\ +PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\ + as specifying the :option:`-R` option: a random value is used to seed the\n\ + hashes of str, bytes and datetime objects. It can also be set to an integer\n\ + in the range [0,4294967295] to get hash values with a predictable seed.\n\ +"; #ifndef MS_WINDOWS static FILE* @@ -136,6 +145,7 @@ usage(int exitcode, wchar_t* program) fputs(usage_3, f); fprintf(f, usage_4, DELIM); fprintf(f, usage_5, DELIM, PYTHONHOMEHELP); + fputs(usage_6, f); } #if defined(__VMS) if (exitcode == 0) { @@ -373,6 +383,10 @@ Py_Main(int argc, wchar_t **argv) PySys_AddWarnOption(_PyOS_optarg); break; + case 'R': + Py_HashRandomizationFlag++; + break; + /* This space reserved for other options */ default: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index a836af6cd05..dbbc29f95ef 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4022,7 +4022,7 @@ posix_getgroups(PyObject *self, PyObject *noargs) #endif gid_t grouplist[MAX_GROUPS]; - /* On MacOSX getgroups(2) can return more than MAX_GROUPS results + /* On MacOSX getgroups(2) can return more than MAX_GROUPS results * This is a helper variable to store the intermediate result when * that happens. * @@ -6942,82 +6942,6 @@ posix_getloadavg(PyObject *self, PyObject *noargs) } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, -"urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ - LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ - DWORD dwFlags ); -typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ - BYTE *pbBuffer ); - -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating - system will release it when the process terminates. */ -static HCRYPTPROV hCryptProv = 0; - -static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) - return PyErr_Format(PyExc_ValueError, - "negative argument not allowed"); - - if (hCryptProv == 0) { - HINSTANCE hAdvAPI32 = NULL; - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; - - /* Obtain handle to the DLL containing CryptoAPI - This should not fail */ - hAdvAPI32 = GetModuleHandle("advapi32.dll"); - if(hAdvAPI32 == NULL) - return win32_error("GetModuleHandle", NULL); - - /* Obtain pointers to the CryptoAPI functions - This will fail on some early versions of Win95 */ - pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( - hAdvAPI32, - "CryptAcquireContextA"); - if (pCryptAcquireContext == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptAcquireContextA not found"); - - pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress( - hAdvAPI32, "CryptGenRandom"); - if (pCryptGenRandom == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptGenRandom not found"); - - /* Acquire context */ - if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, - PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return win32_error("CryptAcquireContext", NULL); - } - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */ - if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*) - PyBytes_AS_STRING(result))) { - Py_DECREF(result); - return win32_error("CryptGenRandom", NULL); - } - } - return result; -} -#endif - PyDoc_STRVAR(device_encoding__doc__, "device_encoding(fd) -> str\n\n\ Return a string describing the encoding of the device\n\ @@ -7055,41 +6979,35 @@ device_encoding(PyObject *self, PyObject *args) return Py_None; } -#ifdef __VMS -/* Use openssl random routine */ -#include -PyDoc_STRVAR(vms_urandom__doc__, +PyDoc_STRVAR(posix_urandom__doc__, "urandom(n) -> str\n\n\ Return n random bytes suitable for cryptographic use."); -static PyObject* -vms_urandom(PyObject *self, PyObject *args) +static PyObject * +posix_urandom(PyObject *self, PyObject *args) { - int howMany; - PyObject* result; + Py_ssize_t size; + PyObject *result; + int ret; - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) + /* Read arguments */ + if (!PyArg_ParseTuple(args, "n:urandom", &size)) return NULL; - if (howMany < 0) + if (size < 0) return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); + result = PyBytes_FromStringAndSize(NULL, size); + if (result == NULL) + return NULL; - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - if (RAND_pseudo_bytes((unsigned char*) - PyBytes_AS_STRING(result), - howMany) < 0) { - Py_DECREF(result); - return PyErr_Format(PyExc_ValueError, - "RAND_pseudo_bytes"); - } + ret = _PyOS_URandom(PyBytes_AS_STRING(result), + PyBytes_GET_SIZE(result)); + if (ret == -1) { + Py_DECREF(result); + return NULL; } return result; } -#endif static PyMethodDef posix_methods[] = { {"access", posix_access, METH_VARARGS, posix_access__doc__}, @@ -7374,12 +7292,7 @@ static PyMethodDef posix_methods[] = { #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif - #ifdef MS_WINDOWS - {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__}, - #endif - #ifdef __VMS - {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__}, - #endif + {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__}, {NULL, NULL} /* Sentinel */ }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index f2ee1310fa4..e6ab440caa9 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -899,11 +899,21 @@ bytes_hash(PyBytesObject *a) if (a->ob_shash != -1) return a->ob_shash; len = Py_SIZE(a); + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + a->ob_shash = 0; + return 0; + } p = (unsigned char *) a->ob_sval; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= Py_SIZE(a); + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; a->ob_shash = x; diff --git a/Objects/object.c b/Objects/object.c index ac57cd7e93c..0b1c656cc60 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -712,6 +712,8 @@ PyObject_HashNotImplemented(PyObject *v) return -1; } +_Py_HashSecret_t _Py_HashSecret; + long PyObject_Hash(PyObject *v) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2cdbc0e6890..5986fb8ea07 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7344,11 +7344,21 @@ unicode_hash(PyUnicodeObject *self) if (self->hash != -1) return self->hash; len = Py_SIZE(self); + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + self->hash = 0; + return 0; + } p = self->str; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= Py_SIZE(self); + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; self->hash = x; diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj index 11cd3118a24..045300ace9e 100644 --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -1778,6 +1778,10 @@ RelativePath="..\Python\pythonrun.c" > + + diff --git a/Python/pythonrun.c b/Python/pythonrun.c index c4ae9211ffa..4474e79b0f4 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -73,6 +73,7 @@ extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); extern int _PyLong_Init(void); extern void PyLong_Fini(void); +extern void _PyRandom_Init(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); @@ -91,6 +92,7 @@ int Py_FrozenFlag; /* Needed by getpath.c */ int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */ int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */ +int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */ /* PyModule_GetWarningsModule is no longer necessary as of 2.6 since _warnings is builtin. This API should not be used. */ @@ -195,6 +197,12 @@ Py_InitializeEx(int install_sigs) Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + /* The variable is only tested for existence here; _PyRandom_Init will + check its value further. */ + if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') + Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p); + + _PyRandom_Init(); interp = PyInterpreterState_New(); if (interp == NULL) diff --git a/Python/random.c b/Python/random.c new file mode 100644 index 00000000000..327166e26aa --- /dev/null +++ b/Python/random.c @@ -0,0 +1,302 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include +#else +#include +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating + system will release it when the process terminates. */ +static HCRYPTPROV hCryptProv = 0; + +static int +win32_urandom_init(int raise) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + + /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + goto error; + + /* Obtain pointers to the CryptoAPI functions. This will fail on some early + versions of Win95. */ + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( + hAdvAPI32, "CryptAcquireContextA"); + if (pCryptAcquireContext == NULL) + goto error; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, + "CryptGenRandom"); + if (pCryptGenRandom == NULL) + goto error; + + /* Acquire context */ + if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto error; + + return 0; + +error: + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialize Windows random API (CryptoGen)"); + return -1; +} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen + API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + Py_ssize_t chunk; + + if (hCryptProv == 0) + { + if (win32_urandom_init(raise) == -1) + return -1; + } + + while (size > 0) + { + chunk = size > INT_MAX ? INT_MAX : size; + if (!pCryptGenRandom(hCryptProv, chunk, buffer)) + { + /* CryptGenRandom() failed */ + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialized the randomized hash " + "secret using CryptoGen)"); + return -1; + } + buffer += chunk; + size -= chunk; + } + return 0; +} +#endif /* MS_WINDOWS */ + + +#ifdef __VMS +/* Use openssl random routine */ +#include +static int +vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + if (RAND_pseudo_bytes(buffer, size) < 0) { + if (raise) { + PyErr_Format(PyExc_ValueError, + "RAND_pseudo_bytes"); + } else { + Py_FatalError("Failed to initialize the randomized hash " + "secret using RAND_pseudo_bytes"); + } + return -1; + } + return 0; +} +#endif /* __VMS */ + + +#if !defined(MS_WINDOWS) && !defined(__VMS) + +/* Read size bytes from /dev/urandom into buffer. + Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + assert (0 < size); + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + Py_FatalError("Failed to open /dev/urandom"); + + while (0 < size) + { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + Py_FatalError("Failed to read bytes from /dev/urandom"); + break; + } + buffer += n; + size -= (Py_ssize_t)n; + } + close(fd); +} + +/* Read size bytes from /dev/urandom into buffer. + Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + if (size <= 0) + return 0; + + Py_BEGIN_ALLOW_THREADS + fd = open("/dev/urandom", O_RDONLY); + Py_END_ALLOW_THREADS + if (fd < 0) + { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom"); + return -1; + } + + Py_BEGIN_ALLOW_THREADS + do { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + break; + buffer += n; + size -= (Py_ssize_t)n; + } while (0 < size); + Py_END_ALLOW_THREADS + + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + if (n < 0) + PyErr_SetFromErrno(PyExc_OSError); + else + PyErr_Format(PyExc_RuntimeError, + "Failed to read %zi bytes from /dev/urandom", + size); + close(fd); + return -1; + } + close(fd); + return 0; +} +#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */ + +/* Fill buffer with pseudo-random bytes generated by a linear congruent + generator (LCG): + + x(n+1) = (x(n) * 214013 + 2531011) % 2^32 + + Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{ + size_t index; + unsigned int x; + + x = x0; + for (index=0; index < size; index++) { + x *= 214013; + x += 2531011; + /* modulo 2 ^ (8 * sizeof(int)) */ + buffer[index] = (x >> 16) & 0xff; + } +} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic + use, from the operating random number generator (RNG). + + Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + return -1; + } + if (size == 0) + return 0; + +#ifdef MS_WINDOWS + return win32_urandom((unsigned char *)buffer, size, 1); +#else +# ifdef __VMS + return vms_urandom((unsigned char *)buffer, size, 1); +# else + return dev_urandom_python((char*)buffer, size); +# endif +#endif +} + +void +_PyRandom_Init(void) +{ + char *env; + void *secret = &_Py_HashSecret; + Py_ssize_t secret_size = sizeof(_Py_HashSecret); + + if (random_initialized) + return; + random_initialized = 1; + + /* + By default, hash randomization is disabled, and only + enabled if PYTHONHASHSEED is set to non-empty or if + "-R" is provided at the command line: + */ + if (!Py_HashRandomizationFlag) { + /* Disable the randomized hash: */ + memset(secret, 0, secret_size); + return; + } + + /* + Hash randomization is enabled. Generate a per-process secret, + using PYTHONHASHSEED if provided. + */ + + env = Py_GETENV("PYTHONHASHSEED"); + if (env && *env != '\0' & strcmp(env, "random") != 0) { + char *endptr = env; + unsigned long seed; + seed = strtoul(env, &endptr, 10); + if (*endptr != '\0' + || seed > 4294967295UL + || (errno == ERANGE && seed == ULONG_MAX)) + { + Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer " + "in range [0; 4294967295]"); + } + if (seed == 0) { + /* disable the randomized hash */ + memset(secret, 0, secret_size); + } + else { + lcg_urandom(seed, (unsigned char*)secret, secret_size); + } + } + else { +#ifdef MS_WINDOWS + (void)win32_urandom((unsigned char *)secret, secret_size, 0); +#else /* #ifdef MS_WINDOWS */ +# ifdef __VMS + vms_urandom((unsigned char *)secret, secret_size, 0); +# else + dev_urandom_noraise((char*)secret, secret_size); +# endif +#endif + } +} diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c688172dac3..6a7e91432c0 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1126,6 +1126,7 @@ static PyStructSequence_Field flags_fields[] = { /* {"unbuffered", "-u"}, */ /* {"skip_first", "-x"}, */ {"bytes_warning", "-b"}, + {"hash_randomization", "-R"}, {0} }; @@ -1134,9 +1135,9 @@ static PyStructSequence_Desc flags_desc = { flags__doc__, /* doc */ flags_fields, /* fields */ #ifdef RISCOS - 12 + 13 #else - 11 + 12 #endif }; @@ -1169,6 +1170,7 @@ make_flags(void) /* SetFlag(saw_unbuffered_flag); */ /* SetFlag(skipfirstline); */ SetFlag(Py_BytesWarningFlag); + SetFlag(Py_HashRandomizationFlag); #undef SetFlag if (PyErr_Occurred()) { From a3ed11bd3409d61e6eb35995743e405790df2d3c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 20 Feb 2012 15:20:37 -0500 Subject: [PATCH 02/19] don't rely on the order of module clearing --- Lib/test/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_module.py b/Lib/test/test_module.py index 7734fb04a26..15836cac93e 100644 --- a/Lib/test/test_module.py +++ b/Lib/test/test_module.py @@ -70,7 +70,7 @@ class ModuleTests(unittest.TestCase): m = ModuleType("foo") m.destroyed = destroyed s = """class A: - def __del__(self): + def __del__(self, destroyed=destroyed): destroyed.append(1) a = A()""" exec(s, m.__dict__) From 8848255d8a5b3ac711801466de7e19bae54246af Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:34:31 +0100 Subject: [PATCH 03/19] Run tests with -R on "make test" and the buildbots. --- Makefile.pre.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 7ffc3ecab3b..3008d6d73ad 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -764,7 +764,7 @@ $(LIBRARY_OBJS) $(MODOBJS) Modules/python.o: $(PYTHON_HEADERS) TESTOPTS= -l $(EXTRATESTOPTS) TESTPROG= $(srcdir)/Lib/test/regrtest.py -TESTPYTHON= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -E -bb $(TESTPYTHONOPTS) +TESTPYTHON= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -E -R -bb $(TESTPYTHONOPTS) test: all platform -find $(srcdir)/Lib -name '*.py[co]' -print | xargs rm -f -$(TESTPYTHON) $(TESTPROG) $(TESTOPTS) From a86b262d1e8b98de5c8f6781cc30eef0b0c98ddc Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:34:57 +0100 Subject: [PATCH 04/19] Fix bad inheritance in test_subprocess that led to a number of tests being executed twice. --- Lib/test/test_subprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 40d0fb48be8..fb0b8342465 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1702,7 +1702,7 @@ class CommandsWithSpaces (BaseTestCase): self.with_spaces([sys.executable, self.fname, "ab cd"]) -class ContextManagerTests(ProcessTestCase): +class ContextManagerTests(BaseTestCase): def test_pipe(self): with subprocess.Popen([sys.executable, "-c", From 242631da860fc94eafa86ff50d219cfd9dfded3e Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:36:28 +0100 Subject: [PATCH 05/19] Fix "sys.path modified" warning in test_strlit, by not replacing sys.path itself, only its contents. --- Lib/test/test_strlit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py index 30475a441b9..6bdc6e4e8a2 100644 --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -65,7 +65,7 @@ class TestLiterals(unittest.TestCase): sys.path.insert(0, self.tmpdir) def tearDown(self): - sys.path = self.save_path + sys.path[:] = self.save_path shutil.rmtree(self.tmpdir, ignore_errors=True) def test_template(self): From c425a94899a10b565df060c6f943b5d8b4a85ac9 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:37:22 +0100 Subject: [PATCH 06/19] Fix use of deprecated assert_ method. --- Lib/test/test_descr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 77cadc01d9f..92304b4e7ef 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4482,8 +4482,8 @@ class DictProxyTests(unittest.TestCase): return '{' + ', '.join(sorted(kvs)) + '}' dict_ = {k: v for k, v in self.C.__dict__.items()} repr_ = repr(self.C.__dict__) - self.assert_(repr_.startswith('dict_proxy(')) - self.assert_(repr_.endswith(')')) + self.assertTrue(repr_.startswith('dict_proxy(')) + self.assertTrue(repr_.endswith(')')) self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]), sorted_dict_repr('{!r}'.format(dict_))) From a108227c47b27116267aa2a131624e29d23d768d Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:41:03 +0100 Subject: [PATCH 07/19] Fix test_dis dependency on dict order. --- Lib/test/test_dis.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 7a61493c105..42466183d8e 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -268,12 +268,13 @@ Variable names: 6: args 7: kwds Cell variables: - 0: e - 1: d - 2: f - 3: y - 4: x - 5: z""" + 0: [edfxyz] + 1: [edfxyz] + 2: [edfxyz] + 3: [edfxyz] + 4: [edfxyz] + 5: [edfxyz]""" +# NOTE: the order of the cell variables above depends on dictionary order! co_tricky_nested_f = tricky.__func__.__code__.co_consts[1] From 27fe226eb13b1f7cedba838b3d388a645197d722 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:03:28 +0100 Subject: [PATCH 08/19] Another test_dis dict order dependency. --- Lib/test/test_dis.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 42466183d8e..5c59eaa01c8 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -293,12 +293,12 @@ Names: Variable names: 0: c Free variables: - 0: e - 1: d - 2: f - 3: y - 4: x - 5: z""" + 0: [edfxyz] + 1: [edfxyz] + 2: [edfxyz] + 3: [edfxyz] + 4: [edfxyz] + 5: [edfxyz]""" code_info_expr_str = """\ Name: From 61470246d086553fbd1de3c637ec17e9bb1ecb67 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:06:02 +0100 Subject: [PATCH 09/19] Remove setting hash seed to regrtest's random seed and re-execv()ing: this doesn't preserve Python flags and fails from a temp directory. --- Lib/test/regrtest.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 26ba9820d76..135a90e7727 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -496,11 +496,6 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: - hashseed = os.getenv('PYTHONHASHSEED') - if not hashseed: - os.environ['PYTHONHASHSEED'] = str(random_seed) - os.execv(sys.executable, [sys.executable] + sys.argv) - return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(selected) From 12897d7d395f5907e0f9a3694ba3c64c329db0dd Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:49:29 +0100 Subject: [PATCH 10/19] Fix typo in conditional. --- Python/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/random.c b/Python/random.c index 327166e26aa..01cd83aa564 100644 --- a/Python/random.c +++ b/Python/random.c @@ -269,7 +269,7 @@ _PyRandom_Init(void) */ env = Py_GETENV("PYTHONHASHSEED"); - if (env && *env != '\0' & strcmp(env, "random") != 0) { + if (env && *env != '\0' && strcmp(env, "random") != 0) { char *endptr = env; unsigned long seed; seed = strtoul(env, &endptr, 10); From 06b1c4f68bca367bf2a2c8ce9bf36168ccb2f9ef Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:09:59 +0100 Subject: [PATCH 11/19] Fix typo. --- Misc/NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS b/Misc/NEWS index 486da13674d..0d23663867d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -11,7 +11,7 @@ Core and Builtins ----------------- - Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED - environment variables, to provide an opt-in way to protect against denial of + environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch by David Malcolm, based on work by Victor Stinner. From 7c573f7a075fd8fc7bf90bdbcc4f163cda3b9acf Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:48:06 +0100 Subject: [PATCH 12/19] Fix dbm_gnu test relying on set order. --- Lib/test/test_dbm_gnu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index 2173b92afa9..f77dcfe49a1 100755 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -49,7 +49,7 @@ class TestGdbm(unittest.TestCase): all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). modes = all - set('fsu') - for mode in modes: + for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() From 9571155ae4ff86b4275950fcdfadcc76475943e3 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:06:02 +0100 Subject: [PATCH 13/19] Remove setting hash seed to regrtest's random seed and re-execv()ing: this doesn't preserve Python flags and fails from a temp directory. --- Lib/test/regrtest.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index d203600e295..98d68bd1b48 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -428,11 +428,6 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False, except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: - hashseed = os.getenv('PYTHONHASHSEED') - if not hashseed: - os.environ['PYTHONHASHSEED'] = str(random_seed) - os.execv(sys.executable, [sys.executable] + sys.argv) - return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(tests) From f47b20f0b00452e00a014c6fb80b911899845e2f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:08:27 +0100 Subject: [PATCH 14/19] Fix use of deprecated assertRegexpMatches method. --- Lib/lib2to3/tests/test_refactor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/lib2to3/tests/test_refactor.py b/Lib/lib2to3/tests/test_refactor.py index 4b87ed65442..8bdebc1f3da 100644 --- a/Lib/lib2to3/tests/test_refactor.py +++ b/Lib/lib2to3/tests/test_refactor.py @@ -230,7 +230,7 @@ from __future__ import print_function""" os.sep, os.path.basename(test_file)) for message in debug_messages: if "Not writing changes" in message: - self.assertRegexpMatches(message, message_regex) + self.assertRegex(message, message_regex) break else: self.fail("%r not matched in %r" % (message_regex, debug_messages)) From 16684eb62449922b8e066d8063cebc682c0ea5c7 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:48:06 +0100 Subject: [PATCH 15/19] Fix dbm_gnu test relying on set order. --- Lib/test/test_dbm_gnu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index ce96ce465e7..30a39f7b895 100755 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -53,7 +53,7 @@ class TestGdbm(unittest.TestCase): all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). modes = all - set('fsu') - for mode in modes: + for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() From 09562b43304a91aa323418834e183e7f39101372 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:09:59 +0100 Subject: [PATCH 16/19] Fix typo. --- Misc/NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS b/Misc/NEWS index 63281cf88c4..0e3595a2212 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -11,7 +11,7 @@ Core and Builtins ----------------- - Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED - environment variables, to provide an opt-in way to protect against denial of + environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch by David Malcolm, based on work by Victor Stinner. From e5a0e0a75f4aec8410092f53abad145565e76d3f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:37:36 +0100 Subject: [PATCH 17/19] Fix obscure failures of datetime-related tests due to the datetime tests failing to restore the system state completely after testing the pure-Python versions. --- Lib/test/datetimetester.py | 2 -- Lib/test/test_datetime.py | 40 ++++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 38f3b8f19bd..3fd6799b579 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1780,8 +1780,6 @@ class TestDateTime(TestDate): self.assertTrue(abs(from_timestamp - from_now) <= tolerance) def test_strptime(self): - import _strptime - string = '2004-12-01 13:02:47.197' format = '%Y-%m-%d %H:%M:%S.%f' expected = _strptime._strptime_datetime(self.theclass, string, format) diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index ded2aa93abe..d9ddb32363a 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -1,7 +1,9 @@ import unittest import sys from test.support import import_fresh_module, run_unittest + TESTS = 'test.datetimetester' + # XXX: import_fresh_module() is supposed to leave sys.module cache untouched, # XXX: but it does not, so we have to save and restore it ourselves. save_sys_modules = sys.modules.copy() @@ -15,28 +17,32 @@ finally: sys.modules.update(save_sys_modules) test_modules = [pure_tests, fast_tests] test_suffixes = ["_Pure", "_Fast"] +# XXX(gb) First run all the _Pure tests, then all the _Fast tests. You might +# not believe this, but in spite of all the sys.modules trickery running a _Pure +# test last will leave a mix of pure and native datetime stuff lying around. +test_classes = [] for module, suffix in zip(test_modules, test_suffixes): for name, cls in module.__dict__.items(): - if isinstance(cls, type) and issubclass(cls, unittest.TestCase): - name += suffix - cls.__name__ = name - globals()[name] = cls - def setUp(self, module=module, setup=cls.setUp): - self._save_sys_modules = sys.modules.copy() - sys.modules[TESTS] = module - sys.modules['datetime'] = module.datetime_module - sys.modules['_strptime'] = module._strptime - setup(self) - def tearDown(self, teardown=cls.tearDown): - teardown(self) - sys.modules.clear() - sys.modules.update(self._save_sys_modules) - cls.setUp = setUp - cls.tearDown = tearDown + if not (isinstance(cls, type) and issubclass(cls, unittest.TestCase)): + continue + cls.__name__ = name + suffix + @classmethod + def setUpClass(cls_, module=module): + cls_._save_sys_modules = sys.modules.copy() + sys.modules[TESTS] = module + sys.modules['datetime'] = module.datetime_module + sys.modules['_strptime'] = module._strptime + @classmethod + def tearDownClass(cls_): + sys.modules.clear() + sys.modules.update(cls_._save_sys_modules) + cls.setUpClass = setUpClass + cls.tearDownClass = tearDownClass + test_classes.append(cls) def test_main(): - run_unittest(__name__) + run_unittest(*test_classes) if __name__ == "__main__": test_main() From e9f637b062b3d23f284a88c45b3a8889eca673ca Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 20 Feb 2012 23:49:07 +0100 Subject: [PATCH 18/19] Make "regrtest -j" "-R"-aware --- Lib/test/support.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/support.py b/Lib/test/support.py index 51375d5d6b0..01cd2034064 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -1460,6 +1460,7 @@ def args_from_interpreter_flags(): flag_opt_map = { 'bytes_warning': 'b', 'dont_write_bytecode': 'B', + 'hash_randomization': 'R', 'ignore_environment': 'E', 'no_user_site': 's', 'no_site': 'S', From 91e5c08fe8a7b682099606ab48ef965833981a9a Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:49:29 +0100 Subject: [PATCH 19/19] Fix typo in conditional. --- Python/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/random.c b/Python/random.c index 327166e26aa..01cd83aa564 100644 --- a/Python/random.c +++ b/Python/random.c @@ -269,7 +269,7 @@ _PyRandom_Init(void) */ env = Py_GETENV("PYTHONHASHSEED"); - if (env && *env != '\0' & strcmp(env, "random") != 0) { + if (env && *env != '\0' && strcmp(env, "random") != 0) { char *endptr = env; unsigned long seed; seed = strtoul(env, &endptr, 10);