From c9f54cf512996790266c17f81584c9725ee99d47 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 21 Feb 2012 16:08:05 -0500 Subject: [PATCH] enable hash randomization by default --- Doc/reference/datamodel.rst | 24 +++++++++++++++++++++++- Doc/using/cmdline.rst | 10 ++++------ Lib/test/test_cmd_line.py | 4 ++-- Lib/test/test_hash.py | 4 ++-- Misc/NEWS | 7 +++---- Misc/python.man | 23 +++-------------------- Modules/main.c | 25 +++++++------------------ Python/random.c | 11 ----------- Tools/scripts/run_tests.py | 1 - 9 files changed, 44 insertions(+), 65 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 85953ad1e25..3fb0bf6f3bc 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1277,7 +1277,29 @@ Basic customization inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__` had been explicitly set to :const:`None`. - See also the :option:`-R` command-line option. + + .. note:: + + Note by default the :meth:`__hash__` values of str, bytes and datetime + objects are "salted" with an unpredictable random value. Although they + remain constant within an individual Python process, they are not + predictable between repeated invocations of Python. + + This is intended to provide protection against a denial-of-service caused + by carefully-chosen inputs that exploit the worst case performance of a + dict insertion, O(n^2) complexity. See + http://www.ocert.org/advisories/ocert-2011-003.html for details. + + Changing hash values affects the order in which keys are retrieved from a + dict. Although Python has never made guarantees about this ordering (and + it typically varies between 32-bit and 64-bit builds), enough real-world + code implicitly relies on this non-guaranteed behavior that the + randomization is disabled by default. + + See also :envvar:`PYTHONHASHSEED`. + + .. versionchanged:: 3.3 + Hash randomization is enabled by default. .. method:: object.__bool__(self) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index b97dbcdf323..64d453eaddc 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -24,7 +24,7 @@ Command line When invoking Python, you may specify any of these options:: - python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args] + python [-bBdEhiOqsSuvVWx?] [-c command | -m module-name | script | - ] [args] The most common use case is, of course, a simple invocation of a script:: @@ -486,9 +486,8 @@ These environment variables influence Python's behavior. .. envvar:: PYTHONHASHSEED - If this variable is set to ``random``, the effect is the same as specifying - the :option:`-R` option: a random value is used to seed the hashes of str, - bytes and datetime objects. + If this variable is set to ``random``, a random value is used to seed the + hashes of str, bytes and datetime objects. If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed seed for generating the hash() of the types covered by the hash @@ -499,8 +498,7 @@ These environment variables influence Python's behavior. values. The integer must be a decimal number in the range [0,4294967295]. Specifying - the value 0 will lead to the same hash values as when hash randomization is - disabled. + the value 0 will disable hash randomization. .. versionadded:: 3.2.3 diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 01af9b93527..8c960b14ec9 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -330,14 +330,14 @@ class CmdLineTest(unittest.TestCase): hashes = [] for i in range(2): code = 'print(hash("spam"))' - rc, out, err = assert_python_ok('-R', '-c', code) + rc, out, err = assert_python_ok('-c', code) self.assertEqual(rc, 0) hashes.append(out) self.assertNotEqual(hashes[0], hashes[1]) # Verify that sys.flags contains hash_randomization code = 'import sys; print("random is", sys.flags.hash_randomization)' - rc, out, err = assert_python_ok('-R', '-c', code) + rc, out, err = assert_python_ok('-c', code) self.assertEqual(rc, 0) self.assertIn(b'random is 1', out) diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py index a104548ff9c..137165f378c 100644 --- a/Lib/test/test_hash.py +++ b/Lib/test/test_hash.py @@ -159,8 +159,8 @@ class StringlikeHashRandomizationTests(HashRandomizationTests): else: known_hash_of_obj = -1600925533 - # Randomization is disabled by default: - self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj) + # Randomization is enabled by default: + self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj) # It can also be disabled by setting the seed to 0: self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) diff --git a/Misc/NEWS b/Misc/NEWS index ad45afe0725..389b6ab2609 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -18,10 +18,9 @@ Core and Builtins - Issue #14051: Allow arbitrary attributes to be set of classmethod and staticmethod. -- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED - environment variable, to provide an opt-in way to protect against denial of - service attacks due to hash collisions within the dict and set types. Patch - by David Malcolm, based on work by Victor Stinner. +- Issue #13703: oCERT-2011-003: Randomize hashes of str and bytes to protect + against denial of service attacks due to hash collisions within the dict and + set types. Patch by David Malcolm, based on work by Victor Stinner. - Issue #13020: Fix a reference leak when allocating a structsequence object fails. Patch by Suman Saha. diff --git a/Misc/python.man b/Misc/python.man index ef42c4ef94b..757d4d89494 100644 --- a/Misc/python.man +++ b/Misc/python.man @@ -37,9 +37,6 @@ python \- an interpreted, interactive, object-oriented programming language .B \-OO ] [ -.B \-R -] -[ .B \-s ] [ @@ -151,18 +148,6 @@ Discard docstrings in addition to the \fB-O\fP optimizations. Do not print the version and copyright messages. These messages are also suppressed in non-interactive mode. .TP -.B \-R -Turn on "hash randomization", so that the hash() values of str, bytes and -datetime objects are "salted" with an unpredictable pseudo-random value. -Although they remain constant within an individual Python process, they are -not predictable between repeated invocations of Python. -.IP -This is intended to provide protection against a denial of service -caused by carefully-chosen inputs that exploit the worst case performance -of a dict insertion, O(n^2) complexity. See -http://www.ocert.org/advisories/ocert-2011-003.html -for details. -.TP .B \-s Don't add user site directory to sys.path. .TP @@ -418,9 +403,8 @@ specifying \fB\-v\fP multiple times. If this is set to a comma-separated string it is equivalent to specifying the \fB\-W\fP option for each separate value. .IP PYTHONHASHSEED -If this variable is set to "random", the effect is the same as specifying -the \fB-R\fP option: a random value is used to seed the hashes of str, -bytes and datetime objects. +If this variable is set to "random", a random value is used to seed the hashes +of str, bytes and datetime objects. If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for generating the hash() of the types covered by the hash randomization. Its @@ -429,8 +413,7 @@ interpreter itself, or to allow a cluster of python processes to share hash values. The integer must be a decimal number in the range [0,4294967295]. Specifying -the value 0 will lead to the same hash values as when hash randomization is -disabled. +the value 0 will disable hash randomization. .SH AUTHOR The Python Software Foundation: http://www.python.org/psf .SH INTERNET RESOURCES diff --git a/Modules/main.c b/Modules/main.c index d0260334ae6..8241ee48fcd 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -73,9 +73,6 @@ static char *usage_2 = "\ -O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\ -OO : remove doc-strings in addition to the -O optimizations\n\ -q : don't print version and copyright messages on interactive startup\n\ --R : use a pseudo-random salt to make hash() values of various types be\n\ - unpredictable between separate invocations of the interpreter, as\n\ - a defence against denial-of-service attacks\n\ -s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\ -S : don't imply 'import site' on initialization\n\ "; @@ -107,10 +104,10 @@ static char *usage_5 = "PYTHONFAULTHANDLER: dump the Python traceback on fatal errors.\n\ "; static char *usage_6 = "\ -PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\ - as specifying the :option:`-R` option: a random value is used to seed the\n\ - hashes of str, bytes and datetime objects. It can also be set to an integer\n\ - in the range [0,4294967295] to get hash values with a predictable seed.\n\ +PYTHONHASHSEED: if this variable is set to ``random``, a random value is used\n\ + to seed the hashes of str, bytes and datetime objects. It can also be\n\ + set to an integer in the range [0,4294967295] to get hash values with a\n\ + predictable seed.\n\ "; static int @@ -347,21 +344,13 @@ Py_Main(int argc, wchar_t **argv) not interpreter options. */ break; } - switch (c) { - case 'E': + if (c == 'E') { Py_IgnoreEnvironmentFlag++; break; - case 'R': - Py_HashRandomizationFlag++; - break; } } - /* The variable is only tested for existence here; _PyRandom_Init will - check its value further. */ - if (!Py_HashRandomizationFlag && - (p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') - Py_HashRandomizationFlag = 1; + Py_HashRandomizationFlag = 1; _PyRandom_Init(); PySys_ResetWarnOptions(); @@ -468,7 +457,7 @@ Py_Main(int argc, wchar_t **argv) break; case 'R': - /* Already handled above */ + /* Ignored */ break; /* This space reserved for other options */ diff --git a/Python/random.c b/Python/random.c index a2ae002261a..7019a3503e8 100644 --- a/Python/random.c +++ b/Python/random.c @@ -256,17 +256,6 @@ _PyRandom_Init(void) return; _Py_HashSecret_Initialized = 1; - /* - By default, hash randomization is disabled, and only - enabled if PYTHONHASHSEED is set to non-empty or if - "-R" is provided at the command line: - */ - if (!Py_HashRandomizationFlag) { - /* Disable the randomized hash: */ - memset(secret, 0, secret_size); - return; - } - /* Hash randomization is enabled. Generate a per-process secret, using PYTHONHASHSEED if provided. diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py index fb7ce5c07cb..f750e192a0e 100755 --- a/Tools/scripts/run_tests.py +++ b/Tools/scripts/run_tests.py @@ -25,7 +25,6 @@ def main(regrtest_args): '-W', 'default', # Warnings set to 'default' '-bb', # Warnings about bytes/bytearray '-E', # Ignore environment variables - '-R', # Randomize hashing ] # Allow user-specified interpreter options to override our defaults. args.extend(test.support.args_from_interpreter_flags())