mirror of https://github.com/python/cpython
GH-85168: Use filesystem encoding when converting to/from `file` URIs (#126852)
Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529.
This commit is contained in:
parent
2cdfb41d0c
commit
c9b399fbdb
|
@ -609,10 +609,6 @@ class urlretrieve_FileTests(unittest.TestCase):
|
||||||
|
|
||||||
def constructLocalFileUrl(self, filePath):
|
def constructLocalFileUrl(self, filePath):
|
||||||
filePath = os.path.abspath(filePath)
|
filePath = os.path.abspath(filePath)
|
||||||
try:
|
|
||||||
filePath.encode("utf-8")
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
raise unittest.SkipTest("filePath is not encodable to utf8")
|
|
||||||
return "file://%s" % urllib.request.pathname2url(filePath)
|
return "file://%s" % urllib.request.pathname2url(filePath)
|
||||||
|
|
||||||
def createNewTempFile(self, data=b""):
|
def createNewTempFile(self, data=b""):
|
||||||
|
@ -1462,6 +1458,13 @@ class Pathname_Tests(unittest.TestCase):
|
||||||
self.assertEqual(fn('/a/b.c'), '/a/b.c')
|
self.assertEqual(fn('/a/b.c'), '/a/b.c')
|
||||||
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
|
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
|
||||||
|
|
||||||
|
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
|
||||||
|
def test_pathname2url_nonascii(self):
|
||||||
|
encoding = sys.getfilesystemencoding()
|
||||||
|
errors = sys.getfilesystemencodeerrors()
|
||||||
|
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
|
||||||
|
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
|
||||||
|
|
||||||
@unittest.skipUnless(sys.platform == 'win32',
|
@unittest.skipUnless(sys.platform == 'win32',
|
||||||
'test specific to Windows pathnames.')
|
'test specific to Windows pathnames.')
|
||||||
def test_url2pathname_win(self):
|
def test_url2pathname_win(self):
|
||||||
|
@ -1512,6 +1515,15 @@ class Pathname_Tests(unittest.TestCase):
|
||||||
self.assertEqual(fn('////foo/bar'), '//foo/bar')
|
self.assertEqual(fn('////foo/bar'), '//foo/bar')
|
||||||
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
|
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
|
||||||
|
|
||||||
|
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
|
||||||
|
def test_url2pathname_nonascii(self):
|
||||||
|
encoding = sys.getfilesystemencoding()
|
||||||
|
errors = sys.getfilesystemencodeerrors()
|
||||||
|
url = os_helper.FS_NONASCII
|
||||||
|
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
|
||||||
|
url = urllib.parse.quote(url, encoding=encoding, errors=errors)
|
||||||
|
self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
|
||||||
|
|
||||||
class Utility_Tests(unittest.TestCase):
|
class Utility_Tests(unittest.TestCase):
|
||||||
"""Testcase to test the various utility functions in the urllib."""
|
"""Testcase to test the various utility functions in the urllib."""
|
||||||
|
|
||||||
|
|
|
@ -718,10 +718,6 @@ class OpenerDirectorTests(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
def sanepathname2url(path):
|
def sanepathname2url(path):
|
||||||
try:
|
|
||||||
path.encode("utf-8")
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
raise unittest.SkipTest("path is not encodable to utf8")
|
|
||||||
urlpath = urllib.request.pathname2url(path)
|
urlpath = urllib.request.pathname2url(path)
|
||||||
if os.name == "nt" and urlpath.startswith("///"):
|
if os.name == "nt" and urlpath.startswith("///"):
|
||||||
urlpath = urlpath[2:]
|
urlpath = urlpath[2:]
|
||||||
|
|
|
@ -1657,12 +1657,16 @@ else:
|
||||||
# URL has an empty authority section, so the path begins on the
|
# URL has an empty authority section, so the path begins on the
|
||||||
# third character.
|
# third character.
|
||||||
pathname = pathname[2:]
|
pathname = pathname[2:]
|
||||||
return unquote(pathname)
|
encoding = sys.getfilesystemencoding()
|
||||||
|
errors = sys.getfilesystemencodeerrors()
|
||||||
|
return unquote(pathname, encoding=encoding, errors=errors)
|
||||||
|
|
||||||
def pathname2url(pathname):
|
def pathname2url(pathname):
|
||||||
"""OS-specific conversion from a file system path to a relative URL
|
"""OS-specific conversion from a file system path to a relative URL
|
||||||
of the 'file' scheme; not recommended for general use."""
|
of the 'file' scheme; not recommended for general use."""
|
||||||
return quote(pathname)
|
encoding = sys.getfilesystemencoding()
|
||||||
|
errors = sys.getfilesystemencodeerrors()
|
||||||
|
return quote(pathname, encoding=encoding, errors=errors)
|
||||||
|
|
||||||
|
|
||||||
# Utility functions
|
# Utility functions
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Fix issue where :func:`urllib.request.url2pathname` and
|
||||||
|
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
|
||||||
|
unquoting file URIs. They now use the :term:`filesystem encoding and error
|
||||||
|
handler`.
|
Loading…
Reference in New Issue