Fix long-standing bugs with MANIFEST.in parsing on Windows (#6884).

These regex changes fix a number of issues for distutils on Windows:
- #6884: impossible to include a file starting with 'build'
- #9691 and #14004: sdist includes too many files
- #13193: test_filelist failures

This commit replaces the incorrect changes done in 557a973709de,
c566a3447ba1 and 3925081a7ca0 to fix #13193; we were too eager to fix
the test failures and I did not study the code enough before
greenlighting patches.  This time we have unit tests from the problems
reported by users to be sure we have the right fix.

Thanks to Nadeem Vawda for his help.
This commit is contained in:
Éric Araujo 2012-02-25 16:13:53 +01:00
parent 02fa3b9571
commit 021eddfff6
4 changed files with 97 additions and 53 deletions

View File

@ -210,6 +210,7 @@ class FileList:
Return 1 if files are found. Return 1 if files are found.
""" """
# XXX docstring lying about what the special chars are?
files_found = 0 files_found = 0
pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
self.debug_print("include_pattern: applying regex r'%s'" % self.debug_print("include_pattern: applying regex r'%s'" %
@ -297,11 +298,14 @@ def glob_to_re(pattern):
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under # and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any # any OS. So change all non-escaped dots in the RE to match any
# character except the special characters. # character except the special characters (currently: just os.sep).
# XXX currently the "special characters" are just slash -- i.e. this is sep = os.sep
# Unix-only. if os.sep == '\\':
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re) # we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re return pattern_re
@ -328,8 +332,10 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
# ditch end of pattern character # ditch end of pattern character
empty_pattern = glob_to_re('') empty_pattern = glob_to_re('')
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)] prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
# paths should always use / in manifest templates sep = os.sep
pattern_re = "^%s/.*%s" % (prefix_re, pattern_re) if os.sep == '\\':
sep = r'\\'
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
else: # no prefix -- respect anchor flag else: # no prefix -- respect anchor flag
if anchor: if anchor:
pattern_re = "^" + pattern_re pattern_re = "^" + pattern_re

View File

@ -1,4 +1,5 @@
"""Tests for distutils.filelist.""" """Tests for distutils.filelist."""
import os
import re import re
import unittest import unittest
from distutils import debug from distutils import debug
@ -14,6 +15,7 @@ include ok
include xo include xo
exclude xo exclude xo
include foo.tmp include foo.tmp
include buildout.cfg
global-include *.x global-include *.x
global-include *.txt global-include *.txt
global-exclude *.tmp global-exclude *.tmp
@ -24,6 +26,11 @@ prune dir3
""" """
def make_local_path(s):
"""Converts '/' in a string to os.sep"""
return s.replace('/', os.sep)
class FileListTestCase(support.LoggingSilencer, class FileListTestCase(support.LoggingSilencer,
unittest.TestCase): unittest.TestCase):
@ -36,41 +43,60 @@ class FileListTestCase(support.LoggingSilencer,
self.clear_logs() self.clear_logs()
def test_glob_to_re(self): def test_glob_to_re(self):
# simple cases sep = os.sep
self.assertEqual(glob_to_re('foo*'), 'foo[^/]*\\Z(?ms)') if os.sep == '\\':
self.assertEqual(glob_to_re('foo?'), 'foo[^/]\\Z(?ms)') sep = re.escape(os.sep)
self.assertEqual(glob_to_re('foo??'), 'foo[^/][^/]\\Z(?ms)')
# special cases for glob, regex in (
self.assertEqual(glob_to_re(r'foo\\*'), r'foo\\\\[^/]*\Z(?ms)') # simple cases
self.assertEqual(glob_to_re(r'foo\\\*'), r'foo\\\\\\[^/]*\Z(?ms)') ('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
self.assertEqual(glob_to_re('foo????'), r'foo[^/][^/][^/][^/]\Z(?ms)') ('foo?', r'foo[^%(sep)s]\Z(?ms)'),
self.assertEqual(glob_to_re(r'foo\\??'), r'foo\\\\[^/][^/]\Z(?ms)') ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
# special cases
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
regex = regex % {'sep': sep}
self.assertEqual(glob_to_re(glob), regex)
def test_process_template_line(self): def test_process_template_line(self):
# testing all MANIFEST.in template patterns # testing all MANIFEST.in template patterns
file_list = FileList() file_list = FileList()
l = make_local_path
# simulated file list # simulated file list
file_list.allfiles = ['foo.tmp', 'ok', 'xo', 'four.txt', file_list.allfiles = ['foo.tmp', 'ok', 'xo', 'four.txt',
'global/one.txt', 'buildout.cfg',
'global/two.txt', # filelist does not filter out VCS directories,
'global/files.x', # it's sdist that does
'global/here.tmp', l('.hg/last-message.txt'),
'f/o/f.oo', l('global/one.txt'),
'dir/graft-one', l('global/two.txt'),
'dir/dir2/graft2', l('global/files.x'),
'dir3/ok', l('global/here.tmp'),
'dir3/sub/ok.txt', l('f/o/f.oo'),
] l('dir/graft-one'),
l('dir/dir2/graft2'),
l('dir3/ok'),
l('dir3/sub/ok.txt'),
]
for line in MANIFEST_IN.split('\n'): for line in MANIFEST_IN.split('\n'):
if line.strip() == '': if line.strip() == '':
continue continue
file_list.process_template_line(line) file_list.process_template_line(line)
wanted = ['ok', 'four.txt', 'global/one.txt', 'global/two.txt', wanted = ['ok',
'f/o/f.oo', 'dir/graft-one', 'dir/dir2/graft2'] 'buildout.cfg',
'four.txt',
l('.hg/last-message.txt'),
l('global/one.txt'),
l('global/two.txt'),
l('f/o/f.oo'),
l('dir/graft-one'),
l('dir/dir2/graft2'),
]
self.assertEqual(file_list.files, wanted) self.assertEqual(file_list.files, wanted)
@ -158,6 +184,7 @@ class FileListTestCase(support.LoggingSilencer,
self.assertEqual(file_list.allfiles, ['a.py', 'b.txt']) self.assertEqual(file_list.allfiles, ['a.py', 'b.txt'])
def test_process_template(self): def test_process_template(self):
l = make_local_path
# invalid lines # invalid lines
file_list = FileList() file_list = FileList()
for action in ('include', 'exclude', 'global-include', for action in ('include', 'exclude', 'global-include',
@ -168,7 +195,7 @@ class FileListTestCase(support.LoggingSilencer,
# include # include
file_list = FileList() file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py']) file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('include *.py') file_list.process_template_line('include *.py')
self.assertEqual(file_list.files, ['a.py']) self.assertEqual(file_list.files, ['a.py'])
@ -180,31 +207,31 @@ class FileListTestCase(support.LoggingSilencer,
# exclude # exclude
file_list = FileList() file_list = FileList()
file_list.files = ['a.py', 'b.txt', 'd/c.py'] file_list.files = ['a.py', 'b.txt', l('d/c.py')]
file_list.process_template_line('exclude *.py') file_list.process_template_line('exclude *.py')
self.assertEqual(file_list.files, ['b.txt', 'd/c.py']) self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
self.assertNoWarnings() self.assertNoWarnings()
file_list.process_template_line('exclude *.rb') file_list.process_template_line('exclude *.rb')
self.assertEqual(file_list.files, ['b.txt', 'd/c.py']) self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
self.assertWarnings() self.assertWarnings()
# global-include # global-include
file_list = FileList() file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py']) file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('global-include *.py') file_list.process_template_line('global-include *.py')
self.assertEqual(file_list.files, ['a.py', 'd/c.py']) self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
self.assertNoWarnings() self.assertNoWarnings()
file_list.process_template_line('global-include *.rb') file_list.process_template_line('global-include *.rb')
self.assertEqual(file_list.files, ['a.py', 'd/c.py']) self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
self.assertWarnings() self.assertWarnings()
# global-exclude # global-exclude
file_list = FileList() file_list = FileList()
file_list.files = ['a.py', 'b.txt', 'd/c.py'] file_list.files = ['a.py', 'b.txt', l('d/c.py')]
file_list.process_template_line('global-exclude *.py') file_list.process_template_line('global-exclude *.py')
self.assertEqual(file_list.files, ['b.txt']) self.assertEqual(file_list.files, ['b.txt'])
@ -216,50 +243,52 @@ class FileListTestCase(support.LoggingSilencer,
# recursive-include # recursive-include
file_list = FileList() file_list = FileList()
file_list.set_allfiles(['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py']) file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
l('d/d/e.py')])
file_list.process_template_line('recursive-include d *.py') file_list.process_template_line('recursive-include d *.py')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py']) self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertNoWarnings() self.assertNoWarnings()
file_list.process_template_line('recursive-include e *.py') file_list.process_template_line('recursive-include e *.py')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py']) self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertWarnings() self.assertWarnings()
# recursive-exclude # recursive-exclude
file_list = FileList() file_list = FileList()
file_list.files = ['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py'] file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
file_list.process_template_line('recursive-exclude d *.py') file_list.process_template_line('recursive-exclude d *.py')
self.assertEqual(file_list.files, ['a.py', 'd/c.txt']) self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
self.assertNoWarnings() self.assertNoWarnings()
file_list.process_template_line('recursive-exclude e *.py') file_list.process_template_line('recursive-exclude e *.py')
self.assertEqual(file_list.files, ['a.py', 'd/c.txt']) self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
self.assertWarnings() self.assertWarnings()
# graft # graft
file_list = FileList() file_list = FileList()
file_list.set_allfiles(['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py']) file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
l('f/f.py')])
file_list.process_template_line('graft d') file_list.process_template_line('graft d')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py']) self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertNoWarnings() self.assertNoWarnings()
file_list.process_template_line('graft e') file_list.process_template_line('graft e')
self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py']) self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertWarnings() self.assertWarnings()
# prune # prune
file_list = FileList() file_list = FileList()
file_list.files = ['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py'] file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]
file_list.process_template_line('prune d') file_list.process_template_line('prune d')
self.assertEqual(file_list.files, ['a.py', 'f/f.py']) self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
self.assertNoWarnings() self.assertNoWarnings()
file_list.process_template_line('prune e') file_list.process_template_line('prune e')
self.assertEqual(file_list.files, ['a.py', 'f/f.py']) self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
self.assertWarnings() self.assertWarnings()

View File

@ -42,6 +42,7 @@ setup(name='fake')
MANIFEST = """\ MANIFEST = """\
# file GENERATED by distutils, do NOT edit # file GENERATED by distutils, do NOT edit
README README
buildout.cfg
inroot.txt inroot.txt
setup.py setup.py
data%(sep)sdata.dt data%(sep)sdata.dt
@ -150,7 +151,7 @@ class SDistTestCase(PyPIRCCommandTestCase):
dist_folder = join(self.tmp_dir, 'dist') dist_folder = join(self.tmp_dir, 'dist')
result = os.listdir(dist_folder) result = os.listdir(dist_folder)
result.sort() result.sort()
self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'] ) self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'])
os.remove(join(dist_folder, 'fake-1.0.tar')) os.remove(join(dist_folder, 'fake-1.0.tar'))
os.remove(join(dist_folder, 'fake-1.0.tar.gz')) os.remove(join(dist_folder, 'fake-1.0.tar.gz'))
@ -209,11 +210,18 @@ class SDistTestCase(PyPIRCCommandTestCase):
self.write_file((data_dir, 'data.dt'), '#') self.write_file((data_dir, 'data.dt'), '#')
some_dir = join(self.tmp_dir, 'some') some_dir = join(self.tmp_dir, 'some')
os.mkdir(some_dir) os.mkdir(some_dir)
# make sure VCS directories are pruned (#14004)
hg_dir = join(self.tmp_dir, '.hg')
os.mkdir(hg_dir)
self.write_file((hg_dir, 'last-message.txt'), '#')
# a buggy regex used to prevent this from working on windows (#6884)
self.write_file((self.tmp_dir, 'buildout.cfg'), '#')
self.write_file((self.tmp_dir, 'inroot.txt'), '#') self.write_file((self.tmp_dir, 'inroot.txt'), '#')
self.write_file((some_dir, 'file.txt'), '#') self.write_file((some_dir, 'file.txt'), '#')
self.write_file((some_dir, 'other_file.txt'), '#') self.write_file((some_dir, 'other_file.txt'), '#')
dist.data_files = [('data', ['data/data.dt', dist.data_files = [('data', ['data/data.dt',
'buildout.cfg',
'inroot.txt', 'inroot.txt',
'notexisting']), 'notexisting']),
'some/file.txt', 'some/file.txt',
@ -243,15 +251,15 @@ class SDistTestCase(PyPIRCCommandTestCase):
zip_file.close() zip_file.close()
# making sure everything was added # making sure everything was added
self.assertEqual(len(content), 11) self.assertEqual(len(content), 12)
# checking the MANIFEST # checking the MANIFEST
f = open(join(self.tmp_dir, 'MANIFEST')) f = open(join(self.tmp_dir, 'MANIFEST'))
try: try:
manifest = f.read() manifest = f.read()
self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
finally: finally:
f.close() f.close()
self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
@unittest.skipUnless(zlib, "requires zlib") @unittest.skipUnless(zlib, "requires zlib")
def test_metadata_check_option(self): def test_metadata_check_option(self):

View File

@ -113,6 +113,9 @@ Core and Builtins
Library Library
------- -------
- Issue #6884: Fix long-standing bugs with MANIFEST.in parsing in distutils
on Windows.
- Issue #8033: sqlite3: Fix 64-bit integer handling in user functions - Issue #8033: sqlite3: Fix 64-bit integer handling in user functions
on 32-bit architectures. Initial patch by Philippe Devalkeneer. on 32-bit architectures. Initial patch by Philippe Devalkeneer.
@ -265,8 +268,6 @@ Library
- Issues #1745761, #755670, #13357, #12629, #1200313: HTMLParser now correctly - Issues #1745761, #755670, #13357, #12629, #1200313: HTMLParser now correctly
handles non-valid attributes, including adjacent and unquoted attributes. handles non-valid attributes, including adjacent and unquoted attributes.
- Issue #13193: Fix distutils.filelist.FileList under Windows.
- Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely - Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely
when called with a timeout. Patch by Arnaud Ysmal. when called with a timeout. Patch by Arnaud Ysmal.