Fix long-standing bugs with MANIFEST.in parsing on Windows (#6884).

These regex changes fix a number of issues for distutils on Windows:
- #6884: impossible to include a file starting with 'build'
- #9691 and #14004: sdist includes too many files
- #13193: test_filelist failures

This commit replaces the incorrect changes done in 0a94e2f807c7 and
90b30d62caf2 to fix #13193; we were too eager to fix the test failures
and I did not study the code enough before greenlighting patches.  This
time we have unit tests from the problems reported by users to be sure
we have the right fix.

Thanks to Nadeem Vawda for his help.
This commit is contained in:
Éric Araujo 2012-02-25 16:28:05 +01:00
parent 3d4809f464
commit 2e0a0e1640
4 changed files with 133 additions and 47 deletions

View File

@ -201,6 +201,7 @@ class FileList:
Return True if files are found, False otherwise.
"""
# XXX docstring lying about what the special chars are?
files_found = False
pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
self.debug_print("include_pattern: applying regex r'%s'" %
@ -284,11 +285,14 @@ def glob_to_re(pattern):
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters.
# XXX currently the "special characters" are just slash -- i.e. this is
# Unix-only.
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
@ -312,9 +316,11 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
if prefix is not None:
# ditch end of pattern character
empty_pattern = glob_to_re('')
prefix_re = (glob_to_re(prefix))[:-len(empty_pattern)]
# paths should always use / in manifest templates
pattern_re = "^%s/.*%s" % (prefix_re, pattern_re)
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
sep = os.sep
if os.sep == '\\':
sep = r'\\'
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
else: # no prefix -- respect anchor flag
if anchor:
pattern_re = "^" + pattern_re

View File

@ -1,4 +1,5 @@
"""Tests for distutils.filelist."""
import os
import re
import unittest
from distutils import debug
@ -9,6 +10,26 @@ from distutils.filelist import glob_to_re, translate_pattern, FileList
from test.support import captured_stdout, run_unittest
from distutils.tests import support
MANIFEST_IN = """\
include ok
include xo
exclude xo
include foo.tmp
include buildout.cfg
global-include *.x
global-include *.txt
global-exclude *.tmp
recursive-include f *.oo
recursive-exclude global *.x
graft dir
prune dir3
"""
def make_local_path(s):
"""Converts '/' in a string to os.sep"""
return s.replace('/', os.sep)
class FileListTestCase(support.LoggingSilencer,
unittest.TestCase):
@ -22,16 +43,62 @@ class FileListTestCase(support.LoggingSilencer,
self.clear_logs()
def test_glob_to_re(self):
# simple cases
self.assertEqual(glob_to_re('foo*'), 'foo[^/]*\\Z(?ms)')
self.assertEqual(glob_to_re('foo?'), 'foo[^/]\\Z(?ms)')
self.assertEqual(glob_to_re('foo??'), 'foo[^/][^/]\\Z(?ms)')
sep = os.sep
if os.sep == '\\':
sep = re.escape(os.sep)
# special cases
self.assertEqual(glob_to_re(r'foo\\*'), r'foo\\\\[^/]*\Z(?ms)')
self.assertEqual(glob_to_re(r'foo\\\*'), r'foo\\\\\\[^/]*\Z(?ms)')
self.assertEqual(glob_to_re('foo????'), r'foo[^/][^/][^/][^/]\Z(?ms)')
self.assertEqual(glob_to_re(r'foo\\??'), r'foo\\\\[^/][^/]\Z(?ms)')
for glob, regex in (
# simple cases
('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
('foo?', r'foo[^%(sep)s]\Z(?ms)'),
('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
# special cases
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
regex = regex % {'sep': sep}
self.assertEqual(glob_to_re(glob), regex)
def test_process_template_line(self):
# testing all MANIFEST.in template patterns
file_list = FileList()
l = make_local_path
# simulated file list
file_list.allfiles = ['foo.tmp', 'ok', 'xo', 'four.txt',
'buildout.cfg',
# filelist does not filter out VCS directories,
# it's sdist that does
l('.hg/last-message.txt'),
l('global/one.txt'),
l('global/two.txt'),
l('global/files.x'),
l('global/here.tmp'),
l('f/o/f.oo'),
l('dir/graft-one'),
l('dir/dir2/graft2'),
l('dir3/ok'),
l('dir3/sub/ok.txt'),
]
for line in MANIFEST_IN.split('\n'):
if line.strip() == '':
continue
file_list.process_template_line(line)
wanted = ['ok',
'buildout.cfg',
'four.txt',
l('.hg/last-message.txt'),
l('global/one.txt'),
l('global/two.txt'),
l('f/o/f.oo'),
l('dir/graft-one'),
l('dir/dir2/graft2'),
]
self.assertEqual(file_list.files, wanted)
def test_debug_print(self):
file_list = FileList()
@ -117,6 +184,7 @@ class FileListTestCase(support.LoggingSilencer,
self.assertEqual(file_list.allfiles, ['a.py', 'b.txt'])
def test_process_template(self):
l = make_local_path
# invalid lines
file_list = FileList()
for action in ('include', 'exclude', 'global-include',
@ -127,7 +195,7 @@ class FileListTestCase(support.LoggingSilencer,
# include
file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('include *.py')
self.assertEqual(file_list.files, ['a.py'])
@ -139,31 +207,31 @@ class FileListTestCase(support.LoggingSilencer,
# exclude
file_list = FileList()
file_list.files = ['a.py', 'b.txt', 'd/c.py']
file_list.files = ['a.py', 'b.txt', l('d/c.py')]
file_list.process_template_line('exclude *.py')
self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
self.assertNoWarnings()
file_list.process_template_line('exclude *.rb')
self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
self.assertWarnings()
# global-include
file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('global-include *.py')
self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
self.assertNoWarnings()
file_list.process_template_line('global-include *.rb')
self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
self.assertWarnings()
# global-exclude
file_list = FileList()
file_list.files = ['a.py', 'b.txt', 'd/c.py']
file_list.files = ['a.py', 'b.txt', l('d/c.py')]
file_list.process_template_line('global-exclude *.py')
self.assertEqual(file_list.files, ['b.txt'])
@ -175,50 +243,52 @@ class FileListTestCase(support.LoggingSilencer,
# recursive-include
file_list = FileList()
file_list.set_allfiles(['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py'])
file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
l('d/d/e.py')])
file_list.process_template_line('recursive-include d *.py')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertNoWarnings()
file_list.process_template_line('recursive-include e *.py')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertWarnings()
# recursive-exclude
file_list = FileList()
file_list.files = ['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py']
file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
file_list.process_template_line('recursive-exclude d *.py')
self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
self.assertNoWarnings()
file_list.process_template_line('recursive-exclude e *.py')
self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
self.assertWarnings()
# graft
file_list = FileList()
file_list.set_allfiles(['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py'])
file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
l('f/f.py')])
file_list.process_template_line('graft d')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertNoWarnings()
file_list.process_template_line('graft e')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertWarnings()
# prune
file_list = FileList()
file_list.files = ['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py']
file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]
file_list.process_template_line('prune d')
self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
self.assertNoWarnings()
file_list.process_template_line('prune e')
self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
self.assertWarnings()

View File

@ -7,6 +7,12 @@ import zipfile
from os.path import join
from textwrap import dedent
try:
import zlib
ZLIB_SUPPORT = True
except ImportError:
ZLIB_SUPPORT = False
from test.support import captured_stdout, check_warnings, run_unittest
from distutils.command.sdist import sdist, show_formats
@ -28,6 +34,7 @@ setup(name='fake')
MANIFEST = """\
# file GENERATED by distutils, do NOT edit
README
buildout.cfg
inroot.txt
setup.py
data%(sep)sdata.dt
@ -39,13 +46,6 @@ somecode%(sep)sdoc.dat
somecode%(sep)sdoc.txt
"""
try:
import zlib
ZLIB_SUPPORT = True
except ImportError:
ZLIB_SUPPORT = False
class SDistTestCase(PyPIRCCommandTestCase):
def setUp(self):
@ -143,7 +143,7 @@ class SDistTestCase(PyPIRCCommandTestCase):
dist_folder = join(self.tmp_dir, 'dist')
result = os.listdir(dist_folder)
result.sort()
self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'] )
self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'])
os.remove(join(dist_folder, 'fake-1.0.tar'))
os.remove(join(dist_folder, 'fake-1.0.tar.gz'))
@ -180,11 +180,18 @@ class SDistTestCase(PyPIRCCommandTestCase):
self.write_file((data_dir, 'data.dt'), '#')
some_dir = join(self.tmp_dir, 'some')
os.mkdir(some_dir)
# make sure VCS directories are pruned (#14004)
hg_dir = join(self.tmp_dir, '.hg')
os.mkdir(hg_dir)
self.write_file((hg_dir, 'last-message.txt'), '#')
# a buggy regex used to prevent this from working on windows (#6884)
self.write_file((self.tmp_dir, 'buildout.cfg'), '#')
self.write_file((self.tmp_dir, 'inroot.txt'), '#')
self.write_file((some_dir, 'file.txt'), '#')
self.write_file((some_dir, 'other_file.txt'), '#')
dist.data_files = [('data', ['data/data.dt',
'buildout.cfg',
'inroot.txt',
'notexisting']),
'some/file.txt',
@ -214,15 +221,15 @@ class SDistTestCase(PyPIRCCommandTestCase):
zip_file.close()
# making sure everything was added
self.assertEqual(len(content), 11)
self.assertEqual(len(content), 12)
# checking the MANIFEST
f = open(join(self.tmp_dir, 'MANIFEST'))
try:
manifest = f.read()
self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
finally:
f.close()
self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
@unittest.skipUnless(ZLIB_SUPPORT, 'Need zlib support to run')
def test_metadata_check_option(self):

View File

@ -124,6 +124,9 @@ Core and Builtins
Library
-------
- Issue #6884: Fix long-standing bugs with MANIFEST.in parsing in distutils
on Windows.
- Issue #8033: sqlite3: Fix 64-bit integer handling in user functions
on 32-bit architectures. Initial patch by Philippe Devalkeneer.