From 38220931433ab2d83892170e96a4d66764ce5338 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 31 Mar 2015 15:31:53 +0300 Subject: [PATCH] Issue #10395: Added os.path.commonpath(). Implemented in posixpath and ntpath. Based on patch by Rafik Draoui. --- Doc/library/os.path.rst | 19 +++++++++-- Doc/whatsnew/3.5.rst | 7 ++++ Lib/ntpath.py | 63 +++++++++++++++++++++++++++++++++- Lib/posixpath.py | 45 ++++++++++++++++++++++++- Lib/test/test_ntpath.py | 69 ++++++++++++++++++++++++++++++++++++++ Lib/test/test_posixpath.py | 54 +++++++++++++++++++++++++++++ Misc/NEWS | 3 ++ 7 files changed, 255 insertions(+), 5 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 92631b2e416..e4fe44ed94f 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -66,11 +66,24 @@ the :mod:`glob` module.) empty string (``''``). +.. function:: commonpath(paths) + + Return the longest common sub-path of each pathname in the sequence + *paths*. Raise ValueError if *paths* contains both absolute and relative + pathnames, or if *paths* is empty. Unlike :func:`commonprefix`, this + returns a valid path. + + Availability: Unix, Windows + + .. versionadded:: 3.5 + + .. function:: commonprefix(list) - Return the longest path prefix (taken character-by-character) that is a prefix - of all paths in *list*. If *list* is empty, return the empty string (``''``). - Note that this may return invalid paths because it works a character at a time. + Return the longest path prefix (taken character-by-character) that is a + prefix of all paths in *list*. If *list* is empty, return the empty string + (``''``). Note that this may return invalid paths because it works a + character at a time. To obtain a valid path, see :func:`commonpath`. .. function:: dirname(path) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index fe981e60d33..15d7f26b27d 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -370,6 +370,13 @@ os * :class:`os.stat_result` now has a :attr:`~os.stat_result.st_file_attributes` attribute on Windows. (Contributed by Ben Hoyt in :issue:`21719`.) +os.path +------- + +* New :func:`~os.path.commonpath` function that extracts common path prefix. + Unlike the :func:`~os.path.commonprefix` function, it always returns a valid + patch. (Contributed by Rafik Draoui and Serhiy Storchaka in :issue:`10395`.) + pickle ------ diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 6732aa22cb1..cfb46060198 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -17,7 +17,7 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "ismount", "expanduser","expandvars","normpath","abspath", "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", - "samefile", "sameopenfile", "samestat",] + "samefile", "sameopenfile", "samestat", "commonpath"] # strings representing various path-related bits and pieces # These are primarily for export; internally, they are hardcoded. @@ -589,6 +589,67 @@ def relpath(path, start=None): raise +# Return the longest common sub-path of the sequence of paths given as input. +# The function is case-insensitive and 'separator-insensitive', i.e. if the +# only difference between two paths is the use of '\' versus '/' as separator, +# they are deemed to be equal. +# +# However, the returned path will have the standard '\' separator (even if the +# given paths had the alternative '/' separator) and will have the case of the +# first path given in the sequence. Additionally, any trailing separator is +# stripped from the returned path. + +def commonpath(paths): + """Given a sequence of path names, returns the longest common sub-path.""" + + if not paths: + raise ValueError('commonpath() arg is an empty sequence') + + if isinstance(paths[0], bytes): + sep = b'\\' + altsep = b'/' + curdir = b'.' + else: + sep = '\\' + altsep = '/' + curdir = '.' + + try: + drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths] + split_paths = [p.split(sep) for d, p in drivesplits] + + try: + isabs, = set(p[:1] == sep for d, p in drivesplits) + except ValueError: + raise ValueError("Can't mix absolute and relative paths") from None + + # Check that all drive letters or UNC paths match. The check is made only + # now otherwise type errors for mixing strings and bytes would not be + # caught. + if len(set(d for d, p in drivesplits)) != 1: + raise ValueError("Paths don't have the same drive") + + drive, path = splitdrive(paths[0].replace(altsep, sep)) + common = path.split(sep) + common = [c for c in common if c and c != curdir] + + split_paths = [[c for c in s if c and c != curdir] for s in split_paths] + s1 = min(split_paths) + s2 = max(split_paths) + for i, c in enumerate(s1): + if c != s2[i]: + common = common[:i] + break + else: + common = common[:len(s1)] + + prefix = drive + sep if isabs else drive + return prefix + sep.join(common) + except (TypeError, AttributeError): + genericpath._check_arg_types('commonpath', *paths) + raise + + # determine if two files are in fact the same file try: # GetFinalPathNameByHandle is available starting with Windows 6.0. diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 44ed8383f2e..ea51e118338 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -22,7 +22,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "ismount", "expanduser","expandvars","normpath","abspath", "samefile","sameopenfile","samestat", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames","relpath"] + "devnull","realpath","supports_unicode_filenames","relpath", + "commonpath"] # Strings representing various path-related bits and pieces. # These are primarily for export; internally, they are hardcoded. @@ -455,3 +456,45 @@ def relpath(path, start=None): except (TypeError, AttributeError, BytesWarning, DeprecationWarning): genericpath._check_arg_types('relpath', path, start) raise + + +# Return the longest common sub-path of the sequence of paths given as input. +# The paths are not normalized before comparing them (this is the +# responsibility of the caller). Any trailing separator is stripped from the +# returned path. + +def commonpath(paths): + """Given a sequence of path names, returns the longest common sub-path.""" + + if not paths: + raise ValueError('commonpath() arg is an empty sequence') + + if isinstance(paths[0], bytes): + sep = b'/' + curdir = b'.' + else: + sep = '/' + curdir = '.' + + try: + split_paths = [path.split(sep) for path in paths] + + try: + isabs, = set(p[:1] == sep for p in paths) + except ValueError: + raise ValueError("Can't mix absolute and relative paths") from None + + split_paths = [[c for c in s if c and c != curdir] for s in split_paths] + s1 = min(split_paths) + s2 = max(split_paths) + common = s1 + for i, c in enumerate(s1): + if c != s2[i]: + common = s1[:i] + break + + prefix = sep if isabs else sep[:0] + return prefix + sep.join(common) + except (TypeError, AttributeError): + genericpath._check_arg_types('commonpath', *paths) + raise diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index dacddded3dd..580f2030a3d 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -330,6 +330,75 @@ class TestNtpath(unittest.TestCase): tester('ntpath.relpath("/a/b", "/a/b")', '.') tester('ntpath.relpath("c:/foo", "C:/FOO")', '.') + def test_commonpath(self): + def check(paths, expected): + tester(('ntpath.commonpath(%r)' % paths).replace('\\\\', '\\'), + expected) + def check_error(exc, paths): + self.assertRaises(exc, ntpath.commonpath, paths) + self.assertRaises(exc, ntpath.commonpath, + [os.fsencode(p) for p in paths]) + + self.assertRaises(ValueError, ntpath.commonpath, []) + check_error(ValueError, ['C:\\Program Files', 'Program Files']) + check_error(ValueError, ['C:\\Program Files', 'C:Program Files']) + check_error(ValueError, ['\\Program Files', 'Program Files']) + check_error(ValueError, ['Program Files', 'C:\\Program Files']) + check(['C:\\Program Files'], 'C:\\Program Files') + check(['C:\\Program Files', 'C:\\Program Files'], 'C:\\Program Files') + check(['C:\\Program Files\\', 'C:\\Program Files'], + 'C:\\Program Files') + check(['C:\\Program Files\\', 'C:\\Program Files\\'], + 'C:\\Program Files') + check(['C:\\\\Program Files', 'C:\\Program Files\\\\'], + 'C:\\Program Files') + check(['C:\\.\\Program Files', 'C:\\Program Files\\.'], + 'C:\\Program Files') + check(['C:\\', 'C:\\bin'], 'C:\\') + check(['C:\\Program Files', 'C:\\bin'], 'C:\\') + check(['C:\\Program Files', 'C:\\Program Files\\Bar'], + 'C:\\Program Files') + check(['C:\\Program Files\\Foo', 'C:\\Program Files\\Bar'], + 'C:\\Program Files') + check(['C:\\Program Files', 'C:\\Projects'], 'C:\\') + check(['C:\\Program Files\\', 'C:\\Projects'], 'C:\\') + + check(['C:\\Program Files\\Foo', 'C:/Program Files/Bar'], + 'C:\\Program Files') + check(['C:\\Program Files\\Foo', 'c:/program files/bar'], + 'C:\\Program Files') + check(['c:/program files/bar', 'C:\\Program Files\\Foo'], + 'c:\\program files') + + check_error(ValueError, ['C:\\Program Files', 'D:\\Program Files']) + + check(['spam'], 'spam') + check(['spam', 'spam'], 'spam') + check(['spam', 'alot'], '') + check(['and\\jam', 'and\\spam'], 'and') + check(['and\\\\jam', 'and\\spam\\\\'], 'and') + check(['and\\.\\jam', '.\\and\\spam'], 'and') + check(['and\\jam', 'and\\spam', 'alot'], '') + check(['and\\jam', 'and\\spam', 'and'], 'and') + check(['C:and\\jam', 'C:and\\spam'], 'C:and') + + check([''], '') + check(['', 'spam\\alot'], '') + check_error(ValueError, ['', '\\spam\\alot']) + + self.assertRaises(TypeError, ntpath.commonpath, + [b'C:\\Program Files', 'C:\\Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + [b'C:\\Program Files', 'Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + [b'Program Files', 'C:\\Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + ['C:\\Program Files', b'C:\\Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + ['C:\\Program Files', b'Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + ['Program Files', b'C:\\Program Files\\Foo']) + def test_sameopenfile(self): with TemporaryFile() as tf1, TemporaryFile() as tf2: # Make sure the same file is really the same diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index b2454794e5b..ece3555e9d7 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -522,6 +522,60 @@ class PosixPathTest(unittest.TestCase): finally: os.getcwdb = real_getcwdb + def test_commonpath(self): + def check(paths, expected): + self.assertEqual(posixpath.commonpath(paths), expected) + self.assertEqual(posixpath.commonpath([os.fsencode(p) for p in paths]), + os.fsencode(expected)) + def check_error(exc, paths): + self.assertRaises(exc, posixpath.commonpath, paths) + self.assertRaises(exc, posixpath.commonpath, + [os.fsencode(p) for p in paths]) + + self.assertRaises(ValueError, posixpath.commonpath, []) + check_error(ValueError, ['/usr', 'usr']) + check_error(ValueError, ['usr', '/usr']) + + check(['/usr/local'], '/usr/local') + check(['/usr/local', '/usr/local'], '/usr/local') + check(['/usr/local/', '/usr/local'], '/usr/local') + check(['/usr/local/', '/usr/local/'], '/usr/local') + check(['/usr//local', '//usr/local'], '/usr/local') + check(['/usr/./local', '/./usr/local'], '/usr/local') + check(['/', '/dev'], '/') + check(['/usr', '/dev'], '/') + check(['/usr/lib/', '/usr/lib/python3'], '/usr/lib') + check(['/usr/lib/', '/usr/lib64/'], '/usr') + + check(['/usr/lib', '/usr/lib64'], '/usr') + check(['/usr/lib/', '/usr/lib64'], '/usr') + + check(['spam'], 'spam') + check(['spam', 'spam'], 'spam') + check(['spam', 'alot'], '') + check(['and/jam', 'and/spam'], 'and') + check(['and//jam', 'and/spam//'], 'and') + check(['and/./jam', './and/spam'], 'and') + check(['and/jam', 'and/spam', 'alot'], '') + check(['and/jam', 'and/spam', 'and'], 'and') + + check([''], '') + check(['', 'spam/alot'], '') + check_error(ValueError, ['', '/spam/alot']) + + self.assertRaises(TypeError, posixpath.commonpath, + [b'/usr/lib/', '/usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + [b'/usr/lib/', 'usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + [b'usr/lib/', '/usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + ['/usr/lib/', b'/usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + ['/usr/lib/', b'usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + ['usr/lib/', b'/usr/lib/python3']) + class PosixCommonTest(test_genericpath.CommonTest, unittest.TestCase): pathmodule = posixpath diff --git a/Misc/NEWS b/Misc/NEWS index bfa1dc7f3e3..ae08396cc44 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,9 @@ Core and Builtins Library ------- +- Issue #10395: Added os.path.commonpath(). Implemented in posixpath and ntpath. + Based on patch by Rafik Draoui. + - Issue #23611: Serializing more "lookupable" objects (such as unbound methods or nested classes) now are supported with pickle protocols < 4.