From 8e86579caef59fad0c54ac698d589f23a7951c55 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 22 Apr 2024 18:24:21 -0700 Subject: [PATCH] gh-95754: Better error when script shadows a standard library or third party module (#113769) --- Doc/whatsnew/3.13.rst | 34 +++ .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + Lib/test/test_import/__init__.py | 221 ++++++++++++++++ ...4-01-07-03-38-34.gh-issue-95754.aPjEBG.rst | 4 + Objects/moduleobject.c | 248 ++++++++++++++---- 8 files changed, 458 insertions(+), 55 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-07-03-38-34.gh-issue-95754.aPjEBG.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 67d1956a196..89694afdfa3 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -104,6 +104,40 @@ Improved Error Messages variables. See also :ref:`using-on-controlling-color`. (Contributed by Pablo Galindo Salgado in :gh:`112730`.) +* A common mistake is to write a script with the same name as a + standard library module. When this results in errors, we now + display a more helpful error message: + + .. code-block:: shell-session + + $ python random.py + Traceback (most recent call last): + File "/home/random.py", line 1, in + import random; print(random.randint(5)) + ^^^^^^^^^^^^^ + File "/home/random.py", line 1, in + import random; print(random.randint(5)) + ^^^^^^^^^^^^^^ + AttributeError: module 'random' has no attribute 'randint' (consider renaming '/home/random.py' since it has the same name as the standard library module named 'random' and the import system gives it precedence) + + Similarly, if a script has the same name as a third-party + module it attempts to import, and this results in errors, + we also display a more helpful error message: + + .. code-block:: shell-session + + $ python numpy.py + Traceback (most recent call last): + File "/home/numpy.py", line 1, in + import numpy as np; np.array([1,2,3]) + ^^^^^^^^^^^^^^^^^^ + File "/home/numpy.py", line 1, in + import numpy as np; np.array([1,2,3]) + ^^^^^^^^ + AttributeError: module 'numpy' has no attribute 'array' (consider renaming '/home/numpy.py' if it has the same name as a third-party module you intended to import) + + (Contributed by Shantanu Jain in :gh:`95754`.) + * When an incorrect keyword argument is passed to a function, the error message now potentially suggests the correct keyword argument. (Contributed by Pablo Galindo Salgado and Shantanu Jain in :gh:`107944`.) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 90a338ade17..4a6f40c8408 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -981,6 +981,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(h)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(handle)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(handle_seq)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(has_location)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hash_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(header)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(headers)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 0899e7ee776..8332cdf874c 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -470,6 +470,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(h) STRUCT_FOR_ID(handle) STRUCT_FOR_ID(handle_seq) + STRUCT_FOR_ID(has_location) STRUCT_FOR_ID(hash_name) STRUCT_FOR_ID(header) STRUCT_FOR_ID(headers) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index d4323e5bd12..103279a4cf2 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -979,6 +979,7 @@ extern "C" { INIT_ID(h), \ INIT_ID(handle), \ INIT_ID(handle_seq), \ + INIT_ID(has_location), \ INIT_ID(hash_name), \ INIT_ID(header), \ INIT_ID(headers), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 9daef267069..a180054d407 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1251,6 +1251,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(handle_seq); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(has_location); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(hash_name); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 469d1fbe59a..947a7b19056 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -804,6 +804,227 @@ class ImportTests(unittest.TestCase): self.assertIn("Frozen object named 'x' is invalid", str(cm.exception)) + def test_script_shadowing_stdlib(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("import fractions\nfractions.Fraction") + + expected_error = ( + rb"AttributeError: module 'fractions' has no attribute 'Fraction' " + rb"\(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and the import system gives it precedence\)" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + # and there's no error at all when using -P + popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') + + tmp_child = os.path.join(tmp, "child") + os.mkdir(tmp_child) + + # test the logic with different cwd + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error + + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error + + def test_package_shadowing_stdlib_module(self): + with os_helper.temp_dir() as tmp: + os.mkdir(os.path.join(tmp, "fractions")) + with open(os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +fractions.Fraction +""") + + expected_error = ( + rb"AttributeError: module 'fractions' has no attribute 'Fraction' " + rb"\(consider renaming '.*fractions.__init__.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and the import system gives it precedence\)" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'main', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + # and there's no shadowing at all when using -P + popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") + + def test_script_shadowing_third_party(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write("import numpy\nnumpy.array") + + expected_error = ( + rb"AttributeError: module 'numpy' has no attribute 'array' " + rb"\(consider renaming '.*numpy.py' if it has the " + rb"same name as a third-party module you intended to import\)\s+\Z" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_script_maybe_not_shadowing_third_party(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write("this_script_does_not_attempt_to_import_numpy = True") + + expected_error = ( + rb"AttributeError: module 'numpy' has no attribute 'attr'\s+\Z" + ) + + popen = script_helper.spawn_python('-c', 'import numpy; numpy.attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_script_shadowing_stdlib_edge_cases(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + fractions.Fraction +except TypeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout.rstrip(), b"unhashable type: 'substr'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +sys.path = [0] +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual( + stdout.splitlines(), + [ + b"module 'fractions' has no attribute 'Fraction'", + b"module 'fractions' has no attribute 'Fraction'", + b"module 'fractions' has no attribute 'Fraction'", + ], + ) + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +del fractions.__spec__.origin +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +fractions.__spec__.origin = 0 +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual( + stdout.splitlines(), + [ + b"module 'fractions' has no attribute 'Fraction'", + b"module 'fractions' has no attribute 'Fraction'" + ], + ) + + def test_script_shadowing_stdlib_sys_path_modification(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + + expected_error = ( + rb"AttributeError: module 'fractions' has no attribute 'Fraction' " + rb"\(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and the import system gives it precedence\)" + ) + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import sys +sys.path.insert(0, "this_folder_does_not_exist") +import fractions +fractions.Fraction +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + @skip_if_dont_write_bytecode class FilePermissionTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-07-03-38-34.gh-issue-95754.aPjEBG.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-07-03-38-34.gh-issue-95754.aPjEBG.rst new file mode 100644 index 00000000000..588be2d28cd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-07-03-38-34.gh-issue-95754.aPjEBG.rst @@ -0,0 +1,4 @@ +Improve the error message when a script shadowing a module from the standard +library causes :exc:`AttributeError` to be raised. Similarly, improve the error +message when a script shadowing a third party module attempts to access an +attribute from that third party module while still initialising. diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index da6a276c41b..2f6adb9a2e1 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_fileutils.h" // _Py_wgetcwd #include "pycore_interp.h" // PyInterpreterState.importlib #include "pycore_modsupport.h" // _PyModule_CreateInitialized() #include "pycore_moduleobject.h" // _PyModule_GetDef() @@ -10,6 +11,7 @@ #include "pycore_pyerrors.h" // _PyErr_FormatFromCause() #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "osdefs.h" // MAXPATHLEN static PyMemberDef module_members[] = { @@ -785,11 +787,104 @@ _PyModuleSpec_IsUninitializedSubmodule(PyObject *spec, PyObject *name) return rc; } +static int +_get_file_origin_from_spec(PyObject *spec, PyObject **p_origin) +{ + PyObject *has_location = NULL; + int rc = PyObject_GetOptionalAttr(spec, &_Py_ID(has_location), &has_location); + if (rc <= 0) { + return rc; + } + // If origin is not a location, or doesn't exist, or is not a str), we could consider falling + // back to module.__file__. But the cases in which module.__file__ is not __spec__.origin + // are cases in which we probably shouldn't be guessing. + rc = PyObject_IsTrue(has_location); + Py_DECREF(has_location); + if (rc <= 0) { + return rc; + } + // has_location is true, so origin is a location + PyObject *origin = NULL; + rc = PyObject_GetOptionalAttr(spec, &_Py_ID(origin), &origin); + if (rc <= 0) { + return rc; + } + assert(origin != NULL); + if (!PyUnicode_Check(origin)) { + Py_DECREF(origin); + return 0; + } + *p_origin = origin; + return 1; +} + +static int +_is_module_possibly_shadowing(PyObject *origin) +{ + // origin must be a unicode subtype + // Returns 1 if the module at origin could be shadowing a module of the + // same name later in the module search path. The condition we check is basically: + // root = os.path.dirname(origin.removesuffix(os.sep + "__init__.py")) + // return not sys.flags.safe_path and root == (sys.path[0] or os.getcwd()) + // Returns 0 otherwise (or if we aren't sure) + // Returns -1 if an error occurred that should be propagated + if (origin == NULL) { + return 0; + } + + // not sys.flags.safe_path + const PyConfig *config = _Py_GetConfig(); + if (config->safe_path) { + return 0; + } + + // root = os.path.dirname(origin.removesuffix(os.sep + "__init__.py")) + wchar_t root[MAXPATHLEN + 1]; + Py_ssize_t size = PyUnicode_AsWideChar(origin, root, MAXPATHLEN); + if (size < 0) { + return -1; + } + assert(size <= MAXPATHLEN); + root[size] = L'\0'; + + wchar_t *sep = wcsrchr(root, SEP); + if (sep == NULL) { + return 0; + } + // If it's a package then we need to look one directory further up + if (wcscmp(sep + 1, L"__init__.py") == 0) { + *sep = L'\0'; + sep = wcsrchr(root, SEP); + if (sep == NULL) { + return 0; + } + } + *sep = L'\0'; + + // sys.path[0] or os.getcwd() + wchar_t *sys_path_0 = config->sys_path_0; + if (!sys_path_0) { + return 0; + } + + wchar_t sys_path_0_buf[MAXPATHLEN]; + if (sys_path_0[0] == L'\0') { + // if sys.path[0] == "", treat it as if it were the current directory + if (!_Py_wgetcwd(sys_path_0_buf, MAXPATHLEN)) { + return -1; + } + sys_path_0 = sys_path_0_buf; + } + + int result = wcscmp(sys_path_0, root) == 0; + return result; +} + PyObject* _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) { // When suppress=1, this function suppresses AttributeError. - PyObject *attr, *mod_name, *getattr, *origin; + PyObject *attr, *mod_name, *getattr; attr = _PyObject_GenericGetAttrWithDict((PyObject *)m, name, NULL, suppress); if (attr) { return attr; @@ -820,68 +915,111 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) Py_DECREF(getattr); return result; } + + // The attribute was not found. We make a best effort attempt at a useful error message, + // but only if we're not suppressing AttributeError. + if (suppress == 1) { + return NULL; + } if (PyDict_GetItemRef(m->md_dict, &_Py_ID(__name__), &mod_name) < 0) { return NULL; } - if (mod_name && PyUnicode_Check(mod_name)) { - PyObject *spec; - if (PyDict_GetItemRef(m->md_dict, &_Py_ID(__spec__), &spec) < 0) { - Py_DECREF(mod_name); - return NULL; - } - if (suppress != 1) { - int rc = _PyModuleSpec_IsInitializing(spec); - if (rc > 0) { - int valid_spec = PyObject_GetOptionalAttr(spec, &_Py_ID(origin), &origin); - if (valid_spec == -1) { - Py_XDECREF(spec); - Py_DECREF(mod_name); - return NULL; - } - if (valid_spec == 1 && !PyUnicode_Check(origin)) { - valid_spec = 0; - Py_DECREF(origin); - } - if (valid_spec == 1) { - PyErr_Format(PyExc_AttributeError, - "partially initialized " - "module '%U' from '%U' has no attribute '%U' " - "(most likely due to a circular import)", - mod_name, origin, name); - Py_DECREF(origin); - } - else { - PyErr_Format(PyExc_AttributeError, - "partially initialized " - "module '%U' has no attribute '%U' " - "(most likely due to a circular import)", - mod_name, name); - } - } - else if (rc == 0) { - rc = _PyModuleSpec_IsUninitializedSubmodule(spec, name); - if (rc > 0) { - PyErr_Format(PyExc_AttributeError, - "cannot access submodule '%U' of module '%U' " - "(most likely due to a circular import)", - name, mod_name); - } - else if (rc == 0) { - PyErr_Format(PyExc_AttributeError, - "module '%U' has no attribute '%U'", - mod_name, name); - } - } - } - Py_XDECREF(spec); + if (!mod_name || !PyUnicode_Check(mod_name)) { + Py_XDECREF(mod_name); + PyErr_Format(PyExc_AttributeError, + "module has no attribute '%U'", name); + return NULL; + } + PyObject *spec; + if (PyDict_GetItemRef(m->md_dict, &_Py_ID(__spec__), &spec) < 0) { Py_DECREF(mod_name); return NULL; } - Py_XDECREF(mod_name); - if (suppress != 1) { + if (spec == NULL) { PyErr_Format(PyExc_AttributeError, - "module has no attribute '%U'", name); + "module '%U' has no attribute '%U'", + mod_name, name); + Py_DECREF(mod_name); + return NULL; } + + PyObject *origin = NULL; + if (_get_file_origin_from_spec(spec, &origin) < 0) { + goto done; + } + + int is_possibly_shadowing = _is_module_possibly_shadowing(origin); + if (is_possibly_shadowing < 0) { + goto done; + } + int is_possibly_shadowing_stdlib = 0; + if (is_possibly_shadowing) { + PyObject *stdlib_modules = PySys_GetObject("stdlib_module_names"); + if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { + is_possibly_shadowing_stdlib = PySet_Contains(stdlib_modules, mod_name); + if (is_possibly_shadowing_stdlib < 0) { + goto done; + } + } + } + + if (is_possibly_shadowing_stdlib) { + assert(origin); + PyErr_Format(PyExc_AttributeError, + "module '%U' has no attribute '%U' " + "(consider renaming '%U' since it has the same " + "name as the standard library module named '%U' " + "and the import system gives it precedence)", + mod_name, name, origin, mod_name); + } + else { + int rc = _PyModuleSpec_IsInitializing(spec); + if (rc > 0) { + if (is_possibly_shadowing) { + assert(origin); + // For third-party modules, only mention the possibility of + // shadowing if the module is being initialized. + PyErr_Format(PyExc_AttributeError, + "module '%U' has no attribute '%U' " + "(consider renaming '%U' if it has the same name " + "as a third-party module you intended to import)", + mod_name, name, origin); + } + else if (origin) { + PyErr_Format(PyExc_AttributeError, + "partially initialized " + "module '%U' from '%U' has no attribute '%U' " + "(most likely due to a circular import)", + mod_name, origin, name); + } + else { + PyErr_Format(PyExc_AttributeError, + "partially initialized " + "module '%U' has no attribute '%U' " + "(most likely due to a circular import)", + mod_name, name); + } + } + else if (rc == 0) { + rc = _PyModuleSpec_IsUninitializedSubmodule(spec, name); + if (rc > 0) { + PyErr_Format(PyExc_AttributeError, + "cannot access submodule '%U' of module '%U' " + "(most likely due to a circular import)", + name, mod_name); + } + else if (rc == 0) { + PyErr_Format(PyExc_AttributeError, + "module '%U' has no attribute '%U'", + mod_name, name); + } + } + } + +done: + Py_XDECREF(origin); + Py_DECREF(spec); + Py_DECREF(mod_name); return NULL; }