diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index c2b7423e0c3..3af43543c40 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -252,11 +252,15 @@ always available. :const:`verbose` :option:`-v` :const:`bytes_warning` :option:`-b` :const:`quiet` :option:`-q` + :const:`hash_randomization` :option:`-R` ============================= ============================= .. versionchanged:: 3.2 Added ``quiet`` attribute for the new :option:`-q` flag. + .. versionadded:: 3.2.3 + The ``hash_randomization`` attribute. + .. versionchanged:: 3.3 Removed obsolete ``division_warning`` attribute. diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 55fd76bd793..85953ad1e25 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1277,6 +1277,8 @@ Basic customization inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__` had been explicitly set to :const:`None`. + See also the :option:`-R` command-line option. + .. method:: object.__bool__(self) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 8a02b764eac..b97dbcdf323 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -24,7 +24,7 @@ Command line When invoking Python, you may specify any of these options:: - python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args] + python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args] The most common use case is, of course, a simple invocation of a script:: @@ -227,6 +227,29 @@ Miscellaneous options .. versionadded:: 3.2 +.. cmdoption:: -R + + Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes + and datetime objects are "salted" with an unpredictable random value. + Although they remain constant within an individual Python process, they are + not predictable between repeated invocations of Python. + + This is intended to provide protection against a denial-of-service caused by + carefully-chosen inputs that exploit the worst case performance of a dict + insertion, O(n^2) complexity. See + http://www.ocert.org/advisories/ocert-2011-003.html for details. + + Changing hash values affects the order in which keys are retrieved from a + dict. Although Python has never made guarantees about this ordering (and it + typically varies between 32-bit and 64-bit builds), enough real-world code + implicitly relies on this non-guaranteed behavior that the randomization is + disabled by default. + + See also :envvar:`PYTHONHASHSEED`. + + .. versionadded:: 3.2.3 + + .. cmdoption:: -s Don't add the :data:`user site-packages directory ` to @@ -352,6 +375,7 @@ Options you shouldn't use .. _Jython: http://jython.org + .. _using-on-envvars: Environment variables @@ -460,6 +484,27 @@ These environment variables influence Python's behavior. option. +.. envvar:: PYTHONHASHSEED + + If this variable is set to ``random``, the effect is the same as specifying + the :option:`-R` option: a random value is used to seed the hashes of str, + bytes and datetime objects. + + If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed + seed for generating the hash() of the types covered by the hash + randomization. + + Its purpose is to allow repeatable hashing, such as for selftests for the + interpreter itself, or to allow a cluster of python processes to share hash + values. + + The integer must be a decimal number in the range [0,4294967295]. Specifying + the value 0 will lead to the same hash values as when hash randomization is + disabled. + + .. versionadded:: 3.2.3 + + .. envvar:: PYTHONIOENCODING If this is set before running the interpreter, it overrides the encoding used diff --git a/Include/object.h b/Include/object.h index 844ff9f14a8..c69becec51a 100644 --- a/Include/object.h +++ b/Include/object.h @@ -554,6 +554,12 @@ PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); #endif +typedef struct { + Py_hash_t prefix; + Py_hash_t suffix; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + /* Helper for passing objects to printf and the like */ #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj)) diff --git a/Include/pydebug.h b/Include/pydebug.h index 7173fe33394..97c2f8c4257 100644 --- a/Include/pydebug.h +++ b/Include/pydebug.h @@ -19,6 +19,7 @@ PyAPI_DATA(int) Py_IgnoreEnvironmentFlag; PyAPI_DATA(int) Py_DontWriteBytecodeFlag; PyAPI_DATA(int) Py_NoUserSiteDirectory; PyAPI_DATA(int) Py_UnbufferedStdioFlag; +PyAPI_DATA(int) Py_HashRandomizationFlag; /* this is a wrapper around getenv() that pays attention to Py_IgnoreEnvironmentFlag. It should be used for getting variables like diff --git a/Include/pythonrun.h b/Include/pythonrun.h index fc6c85434ad..eeba1b7af12 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -246,6 +246,8 @@ typedef void (*PyOS_sighandler_t)(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus } diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 6d88931dcbd..ba2bc1d3426 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -31,7 +31,9 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':')) + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: diff --git a/Lib/os.py b/Lib/os.py index 301870cb487..fe6cb11b9c2 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -852,23 +852,6 @@ try: except NameError: # statvfs_result may not exist pass -if not _exists("urandom"): - def urandom(n): - """urandom(n) -> str - - Return a string of n random bytes suitable for cryptographic use. - - """ - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except (OSError, IOError): - raise NotImplementedError("/dev/urandom (or equivalent) not found") - bs = b"" - while len(bs) < n: - bs += read(_urandomfd, n - len(bs)) - close(_urandomfd) - return bs - # Supply os.popen() def popen(cmd, mode="r", buffering=-1): if not isinstance(cmd, str): diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 85036aa7d85..5c02b3a7353 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1786,8 +1786,6 @@ class TestDateTime(TestDate): self.assertTrue(abs(from_timestamp - from_now) <= tolerance) def test_strptime(self): - import _strptime - string = '2004-12-01 13:02:47.197' format = '%Y-%m-%d %H:%M:%S.%f' expected = _strptime._strptime_datetime(self.theclass, string, format) diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py index d2b7a59a30a..bc12c7756f2 100644 --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase): def _reference(self): """Return a dictionary of values which are invariant by storage in the object under test.""" - return {1:2, "key1":"value1", "key2":(1,2,3)} + return {"1": "2", "key1":"value1", "key2":(1,2,3)} def _empty_mapping(self): """Return an empty mapping object""" return self.type2test() diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py index e556eca52ac..10ada6d0d1e 100644 --- a/Lib/test/script_helper.py +++ b/Lib/test/script_helper.py @@ -3,7 +3,6 @@ import sys import os -import re import os.path import tempfile import subprocess @@ -20,11 +19,15 @@ def _assert_python(expected_success, *args, **env_vars): cmd_line = [sys.executable] if not env_vars: cmd_line.append('-E') - cmd_line.extend(args) # Need to preserve the original environment, for in-place testing of # shared library builds. env = os.environ.copy() + # But a special flag that can be set to override -- in this case, the + # caller is responsible to pass the full environment. + if env_vars.pop('__cleanenv', None): + env = {} env.update(env_vars) + cmd_line.extend(args) p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) diff --git a/Lib/test/support.py b/Lib/test/support.py index 2c56e12ddf6..47b94ca26f6 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -1588,6 +1588,7 @@ def args_from_interpreter_flags(): flag_opt_map = { 'bytes_warning': 'b', 'dont_write_bytecode': 'B', + 'hash_randomization': 'R', 'ignore_environment': 'E', 'no_user_site': 's', 'no_site': 'S', diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 70dfb173b7f..01af9b93527 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -324,6 +324,22 @@ class CmdLineTest(unittest.TestCase): def test_no_std_streams(self): self._test_no_stdio(['stdin', 'stdout', 'stderr']) + def test_hash_randomization(self): + # Verify that -R enables hash randomization: + self.verify_valid_flag('-R') + hashes = [] + for i in range(2): + code = 'print(hash("spam"))' + rc, out, err = assert_python_ok('-R', '-c', code) + self.assertEqual(rc, 0) + hashes.append(out) + self.assertNotEqual(hashes[0], hashes[1]) + + # Verify that sys.flags contains hash_randomization + code = 'import sys; print("random is", sys.flags.hash_randomization)' + rc, out, err = assert_python_ok('-R', '-c', code) + self.assertEqual(rc, 0) + self.assertIn(b'random is 1', out) def test_main(): test.support.run_unittest(CmdLineTest) diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index ded2aa93abe..d9ddb32363a 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -1,7 +1,9 @@ import unittest import sys from test.support import import_fresh_module, run_unittest + TESTS = 'test.datetimetester' + # XXX: import_fresh_module() is supposed to leave sys.module cache untouched, # XXX: but it does not, so we have to save and restore it ourselves. save_sys_modules = sys.modules.copy() @@ -15,28 +17,32 @@ finally: sys.modules.update(save_sys_modules) test_modules = [pure_tests, fast_tests] test_suffixes = ["_Pure", "_Fast"] +# XXX(gb) First run all the _Pure tests, then all the _Fast tests. You might +# not believe this, but in spite of all the sys.modules trickery running a _Pure +# test last will leave a mix of pure and native datetime stuff lying around. +test_classes = [] for module, suffix in zip(test_modules, test_suffixes): for name, cls in module.__dict__.items(): - if isinstance(cls, type) and issubclass(cls, unittest.TestCase): - name += suffix - cls.__name__ = name - globals()[name] = cls - def setUp(self, module=module, setup=cls.setUp): - self._save_sys_modules = sys.modules.copy() - sys.modules[TESTS] = module - sys.modules['datetime'] = module.datetime_module - sys.modules['_strptime'] = module._strptime - setup(self) - def tearDown(self, teardown=cls.tearDown): - teardown(self) - sys.modules.clear() - sys.modules.update(self._save_sys_modules) - cls.setUp = setUp - cls.tearDown = tearDown + if not (isinstance(cls, type) and issubclass(cls, unittest.TestCase)): + continue + cls.__name__ = name + suffix + @classmethod + def setUpClass(cls_, module=module): + cls_._save_sys_modules = sys.modules.copy() + sys.modules[TESTS] = module + sys.modules['datetime'] = module.datetime_module + sys.modules['_strptime'] = module._strptime + @classmethod + def tearDownClass(cls_): + sys.modules.clear() + sys.modules.update(cls_._save_sys_modules) + cls.setUpClass = setUpClass + cls.tearDownClass = tearDownClass + test_classes.append(cls) def test_main(): - run_unittest(__name__) + run_unittest(*test_classes) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index ce96ce465e7..30a39f7b895 100755 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -53,7 +53,7 @@ class TestGdbm(unittest.TestCase): all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). modes = all - set('fsu') - for mode in modes: + for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index be380079822..1506fe9f6e0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -350,12 +350,13 @@ Variable names: 6: args 7: kwds Cell variables: - 0: e - 1: d - 2: f - 3: y - 4: x - 5: z""" + 0: [edfxyz] + 1: [edfxyz] + 2: [edfxyz] + 3: [edfxyz] + 4: [edfxyz] + 5: [edfxyz]""" +# NOTE: the order of the cell variables above depends on dictionary order! co_tricky_nested_f = tricky.__func__.__code__.co_consts[1] @@ -374,12 +375,12 @@ Names: Variable names: 0: c Free variables: - 0: e - 1: d - 2: f - 3: y - 4: x - 5: z""" + 0: [edfxyz] + 1: [edfxyz] + 2: [edfxyz] + 3: [edfxyz] + 4: [edfxyz] + 5: [edfxyz]""" code_info_expr_str = """\ Name: diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py index d507a129cec..c4c4803000e 100644 --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -52,13 +52,18 @@ class DebuggerTests(unittest.TestCase): """Test that the debugger can debug Python.""" - def run_gdb(self, *args): + def run_gdb(self, *args, **env_vars): """Runs gdb with the command line given by *args. Returns its stdout, stderr """ + if env_vars: + env = os.environ.copy() + env.update(env_vars) + else: + env = None out, err = subprocess.Popen( - args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, ).communicate() return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace') @@ -118,7 +123,7 @@ class DebuggerTests(unittest.TestCase): # print ' '.join(args) # Use "args" to invoke gdb, capturing stdout, stderr: - out, err = self.run_gdb(*args) + out, err = self.run_gdb(*args, PYTHONHASHSEED='0') # Ignore some noise on stderr due to the pending breakpoint: err = err.replace('Function "%s" not defined.\n' % breakpoint, '') @@ -207,7 +212,8 @@ class PrettyPrintTests(DebuggerTests): 'Verify the pretty-printing of dictionaries' self.assertGdbRepr({}) self.assertGdbRepr({'foo': 'bar'}) - self.assertGdbRepr({'foo': 'bar', 'douglas':42}) + self.assertGdbRepr({'foo': 'bar', 'douglas': 42}, + "{'foo': 'bar', 'douglas': 42}") def test_lists(self): 'Verify the pretty-printing of lists' @@ -269,8 +275,8 @@ class PrettyPrintTests(DebuggerTests): def test_sets(self): 'Verify the pretty-printing of sets' self.assertGdbRepr(set()) - self.assertGdbRepr(set(['a', 'b'])) - self.assertGdbRepr(set([4, 5, 6])) + self.assertGdbRepr(set(['a', 'b']), "{'a', 'b'}") + self.assertGdbRepr(set([4, 5, 6]), "{4, 5, 6}") # Ensure that we handle sets containing the "dummy" key value, # which happens on deletion: @@ -282,8 +288,8 @@ id(s)''') def test_frozensets(self): 'Verify the pretty-printing of frozensets' self.assertGdbRepr(frozenset()) - self.assertGdbRepr(frozenset(['a', 'b'])) - self.assertGdbRepr(frozenset([4, 5, 6])) + self.assertGdbRepr(frozenset(['a', 'b']), "frozenset({'a', 'b'})") + self.assertGdbRepr(frozenset([4, 5, 6]), "frozenset({4, 5, 6})") def test_exceptions(self): # Test a RuntimeError diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py index 779e4850ec1..385efed7b3d 100644 --- a/Lib/test/test_hash.py +++ b/Lib/test/test_hash.py @@ -3,10 +3,16 @@ # # Also test that hash implementations are inherited as expected +import datetime +import os +import sys import unittest from test import support +from test.script_helper import assert_python_ok from collections import Hashable +IS_64BIT = sys.maxsize > 2**32 + class HashEqualityTestCase(unittest.TestCase): @@ -117,10 +123,92 @@ class HashBuiltinsTestCase(unittest.TestCase): for obj in self.hashes_to_check: self.assertEqual(hash(obj), _default_hash(obj)) +class HashRandomizationTests(unittest.TestCase): + + # Each subclass should define a field "repr_", containing the repr() of + # an object to be tested + + def get_hash_command(self, repr_): + return 'print(hash(%s))' % repr_ + + def get_hash(self, repr_, seed=None): + env = os.environ.copy() + env['__cleanenv'] = True # signal to assert_python not to do a copy + # of os.environ on its own + if seed is not None: + env['PYTHONHASHSEED'] = str(seed) + else: + env.pop('PYTHONHASHSEED', None) + out = assert_python_ok( + '-c', self.get_hash_command(repr_), + **env) + stdout = out[1].strip() + return int(stdout) + + def test_randomized_hash(self): + # two runs should return different hashes + run1 = self.get_hash(self.repr_, seed='random') + run2 = self.get_hash(self.repr_, seed='random') + self.assertNotEqual(run1, run2) + +class StringlikeHashRandomizationTests(HashRandomizationTests): + def test_null_hash(self): + # PYTHONHASHSEED=0 disables the randomized hash + if IS_64BIT: + known_hash_of_obj = 1453079729188098211 + else: + known_hash_of_obj = -1600925533 + + # Randomization is disabled by default: + self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj) + + # It can also be disabled by setting the seed to 0: + self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) + + def test_fixed_hash(self): + # test a fixed seed for the randomized hash + # Note that all types share the same values: + if IS_64BIT: + h = -4410911502303878509 + else: + h = -206076799 + self.assertEqual(self.get_hash(self.repr_, seed=42), h) + +class StrHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr('abc') + + def test_empty_string(self): + self.assertEqual(hash(""), 0) + +class BytesHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr(b'abc') + + def test_empty_string(self): + self.assertEqual(hash(b""), 0) + +class DatetimeTests(HashRandomizationTests): + def get_hash_command(self, repr_): + return 'import datetime; print(hash(%s))' % repr_ + +class DatetimeDateTests(DatetimeTests): + repr_ = repr(datetime.date(1066, 10, 14)) + +class DatetimeDatetimeTests(DatetimeTests): + repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) + +class DatetimeTimeTests(DatetimeTests): + repr_ = repr(datetime.time(0)) + + def test_main(): support.run_unittest(HashEqualityTestCase, - HashInheritanceTestCase, - HashBuiltinsTestCase) + HashInheritanceTestCase, + HashBuiltinsTestCase, + StrHashRandomizationTests, + BytesHashRandomizationTests, + DatetimeDateTests, + DatetimeDatetimeTests, + DatetimeTimeTests) if __name__ == "__main__": diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index c5dbb95629c..a0f13fd790b 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -26,6 +26,7 @@ try: import threading except ImportError: threading = None +from test.script_helper import assert_python_ok os.stat_float_times(True) st = os.stat(__file__) @@ -794,14 +795,33 @@ class DevNullTests(unittest.TestCase): self.assertEqual(f.read(), b'') class URandomTests(unittest.TestCase): - def test_urandom(self): - try: - self.assertEqual(len(os.urandom(1)), 1) - self.assertEqual(len(os.urandom(10)), 10) - self.assertEqual(len(os.urandom(100)), 100) - self.assertEqual(len(os.urandom(1000)), 1000) - except NotImplementedError: - pass + def test_urandom_length(self): + self.assertEqual(len(os.urandom(0)), 0) + self.assertEqual(len(os.urandom(1)), 1) + self.assertEqual(len(os.urandom(10)), 10) + self.assertEqual(len(os.urandom(100)), 100) + self.assertEqual(len(os.urandom(1000)), 1000) + + def test_urandom_value(self): + data1 = os.urandom(16) + data2 = os.urandom(16) + self.assertNotEqual(data1, data2) + + def get_urandom_subprocess(self, count): + code = '\n'.join(( + 'import os, sys', + 'data = os.urandom(%s)' % count, + 'sys.stdout.buffer.write(data)', + 'sys.stdout.buffer.flush()')) + out = assert_python_ok('-c', code) + stdout = out[1] + self.assertEqual(len(stdout), 16) + return stdout + + def test_urandom_subprocess(self): + data1 = self.get_urandom_subprocess(16) + data2 = self.get_urandom_subprocess(16) + self.assertNotEqual(data1, data2) @contextlib.contextmanager def _execvpe_mockup(defpath=None): diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 07bfe0657ec..6642440deae 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -733,6 +733,17 @@ class TestBasicOps(unittest.TestCase): if self.repr is not None: self.assertEqual(repr(self.set), self.repr) + def check_repr_against_values(self): + text = repr(self.set) + self.assertTrue(text.startswith('{')) + self.assertTrue(text.endswith('}')) + + result = text[1:-1].split(', ') + result.sort() + sorted_repr_values = [repr(value) for value in self.values] + sorted_repr_values.sort() + self.assertEqual(result, sorted_repr_values) + def test_print(self): try: fo = open(support.TESTFN, "w") @@ -891,7 +902,9 @@ class TestBasicOpsString(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{'a', 'c', 'b'}" + + def test_repr(self): + self.check_repr_against_values() #------------------------------------------------------------------------------ @@ -902,7 +915,9 @@ class TestBasicOpsBytes(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{b'a', b'c', b'b'}" + + def test_repr(self): + self.check_repr_against_values() #------------------------------------------------------------------------------ @@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 4 - self.repr = "{'a', b'a', 'b', b'b'}" def tearDown(self): self._warning_filters.__exit__(None, None, None) + def test_repr(self): + self.check_repr_against_values() + #============================================================================== def baditer(): diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py index a6033a4b57a..1f041c80abe 100644 --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -65,7 +65,7 @@ class TestLiterals(unittest.TestCase): sys.path.insert(0, self.tmpdir) def tearDown(self): - sys.path = self.save_path + sys.path[:] = self.save_path shutil.rmtree(self.tmpdir, ignore_errors=True) def test_template(self): diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 685857d0148..5deec426db7 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1774,7 +1774,7 @@ class CommandsWithSpaces (BaseTestCase): self.with_spaces([sys.executable, self.fname, "ab cd"]) -class ContextManagerTests(ProcessTestCase): +class ContextManagerTests(BaseTestCase): def test_pipe(self): with subprocess.Popen([sys.executable, "-c", diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index f065bf72e6c..bf22df2269f 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -513,7 +513,7 @@ class SysModuleTest(unittest.TestCase): attrs = ("debug", "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", - "bytes_warning", "quiet") + "bytes_warning", "quiet", "hash_randomization") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) self.assertEqual(type(getattr(sys.flags, attr)), int, attr) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index e403bbf1676..b2680edd422 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -13,6 +13,7 @@ import sys import tempfile from base64 import b64encode +import collections def hexescape(char): """Escape char as RFC 2396 specifies""" @@ -953,8 +954,9 @@ class urlencode_Tests(unittest.TestCase): self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) self.assertEqual("a=None&a=a", urllib.parse.urlencode({"a": [None, "a"]}, True)) + data = collections.OrderedDict([("a", 1), ("b", 1)]) self.assertEqual("a=a&a=b", - urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) + urllib.parse.urlencode({"a": data}, True)) def test_urlencode_encoding(self): # ASCII encoding. Expect %3F with errors="replace' diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py old mode 100644 new mode 100755 index a6e7ee8e1ca..ada0ca87887 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -769,7 +769,8 @@ class UrlParseTestCase(unittest.TestCase): # Other tests incidentally urlencode things; test non-covered cases: # Sequence and object values. result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) - self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5') + # we cannot rely on ordering here + assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'} class Trivial: def __str__(self): diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py index df593cd7109..2303e4cd468 100644 --- a/Lib/tkinter/test/test_ttk/test_functions.py +++ b/Lib/tkinter/test/test_ttk/test_functions.py @@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase): ('a', 'b', 'c')), ("test {a b} c", ())) # state spec and options self.assertEqual(ttk._format_elemcreate('image', False, 'test', - ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y"))) + ('a', 'b'), a='x'), ("test a b", ("-a", "x"))) # format returned values as a tcl script # state spec with multiple states and an option with a multivalue self.assertEqual(ttk._format_elemcreate('image', True, 'test', diff --git a/Makefile.pre.in b/Makefile.pre.in index 46f461ad2bf..a13d1c3649d 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -322,6 +322,7 @@ PYTHON_OBJS= \ Python/pystate.o \ Python/pythonrun.o \ Python/pytime.o \ + Python/random.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ diff --git a/Misc/NEWS b/Misc/NEWS index 6e010b7a2f0..1a6ce289fd3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -16,6 +16,11 @@ Core and Builtins - Issue #14051: Allow arbitrary attributes to be set of classmethod and staticmethod. +- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED + environment variable, to provide an opt-in way to protect against denial of + service attacks due to hash collisions within the dict and set types. Patch + by David Malcolm, based on work by Victor Stinner. + - Issue #13020: Fix a reference leak when allocating a structsequence object fails. Patch by Suman Saha. diff --git a/Misc/python.man b/Misc/python.man index eaa3ec76468..ef42c4ef94b 100644 --- a/Misc/python.man +++ b/Misc/python.man @@ -37,6 +37,9 @@ python \- an interpreted, interactive, object-oriented programming language .B \-OO ] [ +.B \-R +] +[ .B \-s ] [ @@ -148,6 +151,18 @@ Discard docstrings in addition to the \fB-O\fP optimizations. Do not print the version and copyright messages. These messages are also suppressed in non-interactive mode. .TP +.B \-R +Turn on "hash randomization", so that the hash() values of str, bytes and +datetime objects are "salted" with an unpredictable pseudo-random value. +Although they remain constant within an individual Python process, they are +not predictable between repeated invocations of Python. +.IP +This is intended to provide protection against a denial of service +caused by carefully-chosen inputs that exploit the worst case performance +of a dict insertion, O(n^2) complexity. See +http://www.ocert.org/advisories/ocert-2011-003.html +for details. +.TP .B \-s Don't add user site directory to sys.path. .TP @@ -402,6 +417,20 @@ specifying \fB\-v\fP multiple times. .IP PYTHONWARNINGS If this is set to a comma-separated string it is equivalent to specifying the \fB\-W\fP option for each separate value. +.IP PYTHONHASHSEED +If this variable is set to "random", the effect is the same as specifying +the \fB-R\fP option: a random value is used to seed the hashes of str, +bytes and datetime objects. + +If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for +generating the hash() of the types covered by the hash randomization. Its +purpose is to allow repeatable hashing, such as for selftests for the +interpreter itself, or to allow a cluster of python processes to share hash +values. + +The integer must be a decimal number in the range [0,4294967295]. Specifying +the value 0 will lead to the same hash values as when hash randomization is +disabled. .SH AUTHOR The Python Software Foundation: http://www.python.org/psf .SH INTERNET RESOURCES diff --git a/Modules/main.c b/Modules/main.c index d8c51721085..a820a9eb44e 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -47,7 +47,7 @@ static wchar_t **orig_argv; static int orig_argc; /* command line options */ -#define BASE_OPTS L"bBc:dEhiJm:OqsStuvVW:xX:?" +#define BASE_OPTS L"bBc:dEhiJm:OqRsStuvVW:xX:?" #define PROGRAM_OPTS BASE_OPTS @@ -73,6 +73,9 @@ static char *usage_2 = "\ -O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\ -OO : remove doc-strings in addition to the -O optimizations\n\ -q : don't print version and copyright messages on interactive startup\n\ +-R : use a pseudo-random salt to make hash() values of various types be\n\ + unpredictable between separate invocations of the interpreter, as\n\ + a defence against denial-of-service attacks\n\ -s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\ -S : don't imply 'import site' on initialization\n\ "; @@ -101,8 +104,14 @@ static char *usage_5 = " The default module search path uses %s.\n" "PYTHONCASEOK : ignore case in 'import' statements (Windows).\n" "PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n" -"PYTHONFAULTHANDLER: dump the Python traceback on fatal errors.\n" -; +"PYTHONFAULTHANDLER: dump the Python traceback on fatal errors.\n\ +"; +static char *usage_6 = "\ +PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\ + as specifying the :option:`-R` option: a random value is used to seed the\n\ + hashes of str, bytes and datetime objects. It can also be set to an integer\n\ + in the range [0,4294967295] to get hash values with a predictable seed.\n\ +"; static int usage(int exitcode, wchar_t* program) @@ -118,6 +127,7 @@ usage(int exitcode, wchar_t* program) fputs(usage_3, f); fprintf(f, usage_4, DELIM); fprintf(f, usage_5, DELIM, PYTHONHOMEHELP); + fputs(usage_6, f); } #if defined(__VMS) if (exitcode == 0) { @@ -431,6 +441,10 @@ Py_Main(int argc, wchar_t **argv) Py_QuietFlag++; break; + case 'R': + Py_HashRandomizationFlag++; + break; + /* This space reserved for other options */ default: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 0553c761bd3..dbace1a8911 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -9317,82 +9317,6 @@ posix_getloadavg(PyObject *self, PyObject *noargs) } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, -"urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ - LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ - DWORD dwFlags ); -typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ - BYTE *pbBuffer ); - -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating - system will release it when the process terminates. */ -static HCRYPTPROV hCryptProv = 0; - -static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) - return PyErr_Format(PyExc_ValueError, - "negative argument not allowed"); - - if (hCryptProv == 0) { - HINSTANCE hAdvAPI32 = NULL; - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; - - /* Obtain handle to the DLL containing CryptoAPI - This should not fail */ - hAdvAPI32 = GetModuleHandleW(L"advapi32.dll"); - if(hAdvAPI32 == NULL) - return win32_error("GetModuleHandle", NULL); - - /* Obtain pointers to the CryptoAPI functions - This will fail on some early versions of Win95 */ - pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( - hAdvAPI32, - "CryptAcquireContextA"); - if (pCryptAcquireContext == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptAcquireContextA not found"); - - pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress( - hAdvAPI32, "CryptGenRandom"); - if (pCryptGenRandom == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptGenRandom not found"); - - /* Acquire context */ - if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, - PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return win32_error("CryptAcquireContext", NULL); - } - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */ - if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*) - PyBytes_AS_STRING(result))) { - Py_DECREF(result); - return win32_error("CryptGenRandom", NULL); - } - } - return result; -} -#endif - PyDoc_STRVAR(device_encoding__doc__, "device_encoding(fd) -> str\n\n\ Return a string describing the encoding of the device\n\ @@ -10490,6 +10414,36 @@ posix_flistxattr(PyObject *self, PyObject *args) #endif /* USE_XATTRS */ +PyDoc_STRVAR(posix_urandom__doc__, +"urandom(n) -> str\n\n\ +Return n random bytes suitable for cryptographic use."); + +static PyObject * +posix_urandom(PyObject *self, PyObject *args) +{ + Py_ssize_t size; + PyObject *result; + int ret; + + /* Read arguments */ + if (!PyArg_ParseTuple(args, "n:urandom", &size)) + return NULL; + if (size < 0) + return PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + result = PyBytes_FromStringAndSize(NULL, size); + if (result == NULL) + return NULL; + + ret = _PyOS_URandom(PyBytes_AS_STRING(result), + PyBytes_GET_SIZE(result)); + if (ret == -1) { + Py_DECREF(result); + return NULL; + } + return result; +} + /* Terminal size querying */ static PyTypeObject TerminalSizeType; @@ -10984,12 +10938,7 @@ static PyMethodDef posix_methods[] = { #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif - #ifdef MS_WINDOWS - {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__}, - #endif - #ifdef __VMS - {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__}, - #endif + {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__}, #ifdef HAVE_SETRESUID {"setresuid", posix_setresuid, METH_VARARGS, posix_setresuid__doc__}, #endif diff --git a/Objects/object.c b/Objects/object.c index 81348258a4a..bb18d471912 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -759,10 +759,19 @@ _Py_HashBytes(unsigned char *p, Py_ssize_t len) Py_uhash_t x; Py_ssize_t i; - x = (Py_uhash_t) *p << 7; + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + return 0; + } + x = (Py_uhash_t) _Py_HashSecret.prefix; + x ^= (Py_uhash_t) *p << 7; for (i = 0; i < len; i++) x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++; x ^= (Py_uhash_t) len; + x ^= (Py_uhash_t) _Py_HashSecret.suffix; if (x == -1) x = -2; return x; @@ -776,6 +785,8 @@ PyObject_HashNotImplemented(PyObject *v) return -1; } +_Py_HashSecret_t _Py_HashSecret; + Py_hash_t PyObject_Hash(PyObject *v) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 07d3eb85696..716ca3f26a4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11221,11 +11221,12 @@ unicode_hash(PyObject *self) len = PyUnicode_GET_LENGTH(self); /* The hash function as a macro, gets expanded three times below. */ -#define HASH(P) \ - x = (Py_uhash_t)*P << 7; \ - while (--len >= 0) \ - x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++; +#define HASH(P) \ + x ^= (Py_uhash_t) *P << 7; \ + while (--len >= 0) \ + x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++; \ + x = (Py_uhash_t) _Py_HashSecret.prefix; switch (PyUnicode_KIND(self)) { case PyUnicode_1BYTE_KIND: { const unsigned char *c = PyUnicode_1BYTE_DATA(self); @@ -11246,7 +11247,8 @@ unicode_hash(PyObject *self) break; } } - x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self); + x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self); + x ^= (Py_uhash_t) _Py_HashSecret.suffix; if (x == -1) x = -2; diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj index 69a8ca8c7c8..8b908aa7ab4 100644 --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -1890,6 +1890,10 @@ RelativePath="..\Python\pythonrun.c" > + + diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 44b817f5468..54d39a5a91f 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -73,6 +73,7 @@ extern int _PyLong_Init(void); extern void PyLong_Fini(void); extern int _PyFaulthandler_Init(void); extern void _PyFaulthandler_Fini(void); +extern void _PyRandom_Init(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); @@ -92,6 +93,7 @@ int Py_FrozenFlag; /* Needed by getpath.c */ int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */ int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */ +int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */ PyThreadState *_Py_Finalizing = NULL; @@ -218,6 +220,12 @@ Py_InitializeEx(int install_sigs) Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + /* The variable is only tested for existence here; _PyRandom_Init will + check its value further. */ + if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') + Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p); + + _PyRandom_Init(); interp = PyInterpreterState_New(); if (interp == NULL) diff --git a/Python/random.c b/Python/random.c new file mode 100644 index 00000000000..01cd83aa564 --- /dev/null +++ b/Python/random.c @@ -0,0 +1,302 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include +#else +#include +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating + system will release it when the process terminates. */ +static HCRYPTPROV hCryptProv = 0; + +static int +win32_urandom_init(int raise) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + + /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + goto error; + + /* Obtain pointers to the CryptoAPI functions. This will fail on some early + versions of Win95. */ + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( + hAdvAPI32, "CryptAcquireContextA"); + if (pCryptAcquireContext == NULL) + goto error; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, + "CryptGenRandom"); + if (pCryptGenRandom == NULL) + goto error; + + /* Acquire context */ + if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto error; + + return 0; + +error: + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialize Windows random API (CryptoGen)"); + return -1; +} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen + API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + Py_ssize_t chunk; + + if (hCryptProv == 0) + { + if (win32_urandom_init(raise) == -1) + return -1; + } + + while (size > 0) + { + chunk = size > INT_MAX ? INT_MAX : size; + if (!pCryptGenRandom(hCryptProv, chunk, buffer)) + { + /* CryptGenRandom() failed */ + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialized the randomized hash " + "secret using CryptoGen)"); + return -1; + } + buffer += chunk; + size -= chunk; + } + return 0; +} +#endif /* MS_WINDOWS */ + + +#ifdef __VMS +/* Use openssl random routine */ +#include +static int +vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + if (RAND_pseudo_bytes(buffer, size) < 0) { + if (raise) { + PyErr_Format(PyExc_ValueError, + "RAND_pseudo_bytes"); + } else { + Py_FatalError("Failed to initialize the randomized hash " + "secret using RAND_pseudo_bytes"); + } + return -1; + } + return 0; +} +#endif /* __VMS */ + + +#if !defined(MS_WINDOWS) && !defined(__VMS) + +/* Read size bytes from /dev/urandom into buffer. + Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + assert (0 < size); + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + Py_FatalError("Failed to open /dev/urandom"); + + while (0 < size) + { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + Py_FatalError("Failed to read bytes from /dev/urandom"); + break; + } + buffer += n; + size -= (Py_ssize_t)n; + } + close(fd); +} + +/* Read size bytes from /dev/urandom into buffer. + Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + if (size <= 0) + return 0; + + Py_BEGIN_ALLOW_THREADS + fd = open("/dev/urandom", O_RDONLY); + Py_END_ALLOW_THREADS + if (fd < 0) + { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom"); + return -1; + } + + Py_BEGIN_ALLOW_THREADS + do { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + break; + buffer += n; + size -= (Py_ssize_t)n; + } while (0 < size); + Py_END_ALLOW_THREADS + + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + if (n < 0) + PyErr_SetFromErrno(PyExc_OSError); + else + PyErr_Format(PyExc_RuntimeError, + "Failed to read %zi bytes from /dev/urandom", + size); + close(fd); + return -1; + } + close(fd); + return 0; +} +#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */ + +/* Fill buffer with pseudo-random bytes generated by a linear congruent + generator (LCG): + + x(n+1) = (x(n) * 214013 + 2531011) % 2^32 + + Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{ + size_t index; + unsigned int x; + + x = x0; + for (index=0; index < size; index++) { + x *= 214013; + x += 2531011; + /* modulo 2 ^ (8 * sizeof(int)) */ + buffer[index] = (x >> 16) & 0xff; + } +} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic + use, from the operating random number generator (RNG). + + Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + return -1; + } + if (size == 0) + return 0; + +#ifdef MS_WINDOWS + return win32_urandom((unsigned char *)buffer, size, 1); +#else +# ifdef __VMS + return vms_urandom((unsigned char *)buffer, size, 1); +# else + return dev_urandom_python((char*)buffer, size); +# endif +#endif +} + +void +_PyRandom_Init(void) +{ + char *env; + void *secret = &_Py_HashSecret; + Py_ssize_t secret_size = sizeof(_Py_HashSecret); + + if (random_initialized) + return; + random_initialized = 1; + + /* + By default, hash randomization is disabled, and only + enabled if PYTHONHASHSEED is set to non-empty or if + "-R" is provided at the command line: + */ + if (!Py_HashRandomizationFlag) { + /* Disable the randomized hash: */ + memset(secret, 0, secret_size); + return; + } + + /* + Hash randomization is enabled. Generate a per-process secret, + using PYTHONHASHSEED if provided. + */ + + env = Py_GETENV("PYTHONHASHSEED"); + if (env && *env != '\0' && strcmp(env, "random") != 0) { + char *endptr = env; + unsigned long seed; + seed = strtoul(env, &endptr, 10); + if (*endptr != '\0' + || seed > 4294967295UL + || (errno == ERANGE && seed == ULONG_MAX)) + { + Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer " + "in range [0; 4294967295]"); + } + if (seed == 0) { + /* disable the randomized hash */ + memset(secret, 0, secret_size); + } + else { + lcg_urandom(seed, (unsigned char*)secret, secret_size); + } + } + else { +#ifdef MS_WINDOWS + (void)win32_urandom((unsigned char *)secret, secret_size, 0); +#else /* #ifdef MS_WINDOWS */ +# ifdef __VMS + vms_urandom((unsigned char *)secret, secret_size, 0); +# else + dev_urandom_noraise((char*)secret, secret_size); +# endif +#endif + } +} diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 955219f2288..c434b5a81eb 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1332,6 +1332,7 @@ static PyStructSequence_Field flags_fields[] = { /* {"skip_first", "-x"}, */ {"bytes_warning", "-b"}, {"quiet", "-q"}, + {"hash_randomization", "-R"}, {0} }; @@ -1340,9 +1341,9 @@ static PyStructSequence_Desc flags_desc = { flags__doc__, /* doc */ flags_fields, /* fields */ #ifdef RISCOS - 12 + 13 #else - 11 + 12 #endif }; @@ -1375,6 +1376,7 @@ make_flags(void) /* SetFlag(skipfirstline); */ SetFlag(Py_BytesWarningFlag); SetFlag(Py_QuietFlag); + SetFlag(Py_HashRandomizationFlag); #undef SetFlag if (PyErr_Occurred()) { diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py index f750e192a0e..fb7ce5c07cb 100755 --- a/Tools/scripts/run_tests.py +++ b/Tools/scripts/run_tests.py @@ -25,6 +25,7 @@ def main(regrtest_args): '-W', 'default', # Warnings set to 'default' '-bb', # Warnings about bytes/bytearray '-E', # Ignore environment variables + '-R', # Randomize hashing ] # Allow user-specified interpreter options to override our defaults. args.extend(test.support.args_from_interpreter_flags())