From e78fbcce3e9fef7d4f701971186baa4bdec6b9b1 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sat, 5 Jul 2008 10:13:36 +0000 Subject: [PATCH] #2663: support an *ignore* argument to shutil.copytree(). Patch by Tarek Ziade. This is a new feature, but Barry authorized adding it in the beta period. --- Doc/library/shutil.rst | 76 +++++++++++++++++++++++++++++++++++------ Lib/shutil.py | 35 +++++++++++++++++-- Lib/test/test_shutil.py | 76 +++++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 4 +-- 4 files changed, 176 insertions(+), 15 deletions(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index bd66aa96216..ab652a58af9 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -78,18 +78,41 @@ copying and removal. For operations on individual files, see also the Unix command :program:`cp -p`. -.. function:: copytree(src, dst[, symlinks]) +.. function:: ignore_patterns(\*patterns) + + This factory function creates a function that can be used as a callable for + :func:`copytree`\'s *ignore* argument, ignoring files and directories that + match one the glob-style *patterns* provided. See the example below. + + .. versionadded:: 2.6 + + +.. function:: copytree(src, dst[, symlinks=False[, ignore=None]]) Recursively copy an entire directory tree rooted at *src*. The destination - directory, named by *dst*, must not already exist; it will be created as well as - missing parent directories. Permissions and times of directories are copied with - :func:`copystat`, individual files are copied using :func:`copy2`. If - *symlinks* is true, symbolic links in the source tree are represented as - symbolic links in the new tree; if false or omitted, the contents of the linked - files are copied to the new tree. If exception(s) occur, an :exc:`Error` is - raised with a list of reasons. + directory, named by *dst*, must not already exist; it will be created as well + as missing parent directories. Permissions and times of directories are + copied with :func:`copystat`, individual files are copied using + :func:`copy2`. - The source code for this should be considered an example rather than a tool. + If *symlinks* is true, symbolic links in the source tree are represented as + symbolic links in the new tree; if false or omitted, the contents of the + linked files are copied to the new tree. + + If *ignore* is given, it must be a callable that will receive as its + arguments the directory being visited by :func:`copytree`, and a list of its + contents, as returned by :func:`os.listdir`. Since :func:`copytree` is + called recursively, the *ignore* callable will be called once for each + directory that is copied. The callable must return a sequence of directory + and file names relative to the current directory (i.e. a subset of the items + in its second argument); these names will then be ignored in the copy + process. :func:`ignore_patterns` can be used to create such a callable that + ignores names based on glob-style patterns. + + If exception(s) occur, an :exc:`Error` is raised with a list of reasons. + + The source code for this should be considered an example rather than the + ultimate tool. .. versionchanged:: 2.3 :exc:`Error` is raised if any exceptions occur during copying, rather than @@ -99,6 +122,9 @@ copying and removal. For operations on individual files, see also the Create intermediate directories needed to create *dst*, rather than raising an error. Copy permissions and times of directories using :func:`copystat`. + .. versionchanged:: 2.6 + Added the *ignore* argument to be able to influence what is being copied. + .. function:: rmtree(path[, ignore_errors[, onerror]]) @@ -152,11 +178,18 @@ This example is the implementation of the :func:`copytree` function, described above, with the docstring omitted. It demonstrates many of the other functions provided by this module. :: - def copytree(src, dst, symlinks=False): + def copytree(src, dst, symlinks=False, ignore=None): names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + os.makedirs(dst) errors = [] for name in names: + if name in ignored_names: + continue srcname = os.path.join(src, name) dstname = os.path.join(dst, name) try: @@ -164,7 +197,7 @@ provided by this module. :: linkto = os.readlink(srcname) os.symlink(linkto, dstname) elif os.path.isdir(srcname): - copytree(srcname, dstname, symlinks) + copytree(srcname, dstname, symlinks, ignore) else: copy2(srcname, dstname) # XXX What about devices, sockets etc.? @@ -183,3 +216,24 @@ provided by this module. :: errors.extend((src, dst, str(why))) if errors: raise Error, errors + +Another example that uses the :func:`ignore_patterns` helper:: + + from shutil import copytree, ignore_patterns + + copytree(source, destination, ignore=ignore_patterns('*.pyc', 'tmp*')) + +This will copy everything except ``.pyc`` files and files or directories whose +name starts with ``tmp``. + +Another example that uses the *ignore* argument to add a logging call:: + + from shutil import copytree + import logging + + def _logpath(path, names): + logging.info('Working in %s' % path) + return [] # nothing will be ignored + + copytree(source, destination, ignore=_logpath) + diff --git a/Lib/shutil.py b/Lib/shutil.py index 6ce402326bb..3af280dd42e 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -8,6 +8,7 @@ import os import sys import stat from os.path import abspath +import fnmatch __all__ = ["copyfileobj","copyfile","copymode","copystat","copy","copy2", "copytree","move","rmtree","Error"] @@ -93,8 +94,19 @@ def copy2(src, dst): copyfile(src, dst) copystat(src, dst) +def ignore_patterns(*patterns): + """Function that can be used as copytree() ignore parameter. -def copytree(src, dst, symlinks=False): + Patterns is a sequence of glob-style patterns + that are used to exclude files""" + def _ignore_patterns(path, names): + ignored_names = [] + for pattern in patterns: + ignored_names.extend(fnmatch.filter(names, pattern)) + return set(ignored_names) + return _ignore_patterns + +def copytree(src, dst, symlinks=False, ignore=None): """Recursively copy a directory tree using copy2(). The destination directory must not already exist. @@ -105,13 +117,32 @@ def copytree(src, dst, symlinks=False): it is false, the contents of the files pointed to by symbolic links are copied. + The optional ignore argument is a callable. If given, it + is called with the `src` parameter, which is the directory + being visited by copytree(), and `names` which is the list of + `src` contents, as returned by os.listdir(): + + callable(src, names) -> ignored_names + + Since copytree() is called recursively, the callable will be + called once for each directory that is copied. It returns a + list of names relative to the `src` directory that should + not be copied. + XXX Consider this example code rather than the ultimate tool. """ names = os.listdir(src) + if ignore is not None: + ignored_names = ignore(src, names) + else: + ignored_names = set() + os.makedirs(dst) errors = [] for name in names: + if name in ignored_names: + continue srcname = os.path.join(src, name) dstname = os.path.join(dst, name) try: @@ -119,7 +150,7 @@ def copytree(src, dst, symlinks=False): linkto = os.readlink(srcname) os.symlink(linkto, dstname) elif os.path.isdir(srcname): - copytree(srcname, dstname, symlinks) + copytree(srcname, dstname, symlinks, ignore) else: copy2(srcname, dstname) # XXX What about devices, sockets etc.? diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index dfdec0da581..fa5bbb1601f 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -108,6 +108,82 @@ class TestShutil(unittest.TestCase): if os.path.exists(path): shutil.rmtree(path) + def test_copytree_with_exclude(self): + + def write_data(path, data): + f = open(path, "w") + f.write(data) + f.close() + + def read_data(path): + f = open(path) + data = f.read() + f.close() + return data + + # creating data + join = os.path.join + exists = os.path.exists + src_dir = tempfile.mkdtemp() + dst_dir = join(tempfile.mkdtemp(), 'destination') + write_data(join(src_dir, 'test.txt'), '123') + write_data(join(src_dir, 'test.tmp'), '123') + os.mkdir(join(src_dir, 'test_dir')) + write_data(join(src_dir, 'test_dir', 'test.txt'), '456') + os.mkdir(join(src_dir, 'test_dir2')) + write_data(join(src_dir, 'test_dir2', 'test.txt'), '456') + os.mkdir(join(src_dir, 'test_dir2', 'subdir')) + os.mkdir(join(src_dir, 'test_dir2', 'subdir2')) + write_data(join(src_dir, 'test_dir2', 'subdir', 'test.txt'), '456') + write_data(join(src_dir, 'test_dir2', 'subdir2', 'test.py'), '456') + + + # testing glob-like patterns + try: + patterns = shutil.ignore_patterns('*.tmp', 'test_dir2') + shutil.copytree(src_dir, dst_dir, ignore=patterns) + # checking the result: some elements should not be copied + self.assert_(exists(join(dst_dir, 'test.txt'))) + self.assert_(not exists(join(dst_dir, 'test.tmp'))) + self.assert_(not exists(join(dst_dir, 'test_dir2'))) + finally: + if os.path.exists(dst_dir): + shutil.rmtree(dst_dir) + try: + patterns = shutil.ignore_patterns('*.tmp', 'subdir*') + shutil.copytree(src_dir, dst_dir, ignore=patterns) + # checking the result: some elements should not be copied + self.assert_(not exists(join(dst_dir, 'test.tmp'))) + self.assert_(not exists(join(dst_dir, 'test_dir2', 'subdir2'))) + self.assert_(not exists(join(dst_dir, 'test_dir2', 'subdir'))) + finally: + if os.path.exists(dst_dir): + shutil.rmtree(dst_dir) + + # testing callable-style + try: + def _filter(src, names): + res = [] + for name in names: + path = os.path.join(src, name) + + if (os.path.isdir(path) and + path.split()[-1] == 'subdir'): + res.append(name) + elif os.path.splitext(path)[-1] in ('.py'): + res.append(name) + return res + + shutil.copytree(src_dir, dst_dir, ignore=_filter) + + # checking the result: some elements should not be copied + self.assert_(not exists(join(dst_dir, 'test_dir2', 'subdir2', + 'test.py'))) + self.assert_(not exists(join(dst_dir, 'test_dir2', 'subdir'))) + + finally: + if os.path.exists(dst_dir): + shutil.rmtree(dst_dir) if hasattr(os, "symlink"): def test_dont_copy_file_onto_link_to_itself(self): diff --git a/Misc/NEWS b/Misc/NEWS index 796765b9c6f..ce4394387dd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -29,10 +29,11 @@ Core and Builtins would not cause a syntax error. This was regression from 2.4 caused by the switch to the new compiler. - Library ------- +- Issue #2663: add filtering capability to shutil.copytree(). + - Issue #1622: Correct interpretation of various ZIP header fields. - Issue #1526: Allow more than 64k files to be added to Zip64 file. @@ -54,7 +55,6 @@ Library urllib module in Python 3.0 to urllib.request, urllib.parse, and urllib.error. - Build -----