Issue #3187: Better support for "undecodable" filenames. Code by Victor

Stinner, with small tweaks by GvR.
This commit is contained in:
Guido van Rossum 2008-10-02 18:55:37 +00:00
parent fefeca53ee
commit f0af3e30db
11 changed files with 359 additions and 145 deletions

View File

@ -37,15 +37,24 @@ def fnmatch(name, pat):
pat = os.path.normcase(pat) pat = os.path.normcase(pat)
return fnmatchcase(name, pat) return fnmatchcase(name, pat)
def _compile_pattern(pat):
regex = _cache.get(pat)
if regex is None:
if isinstance(pat, bytes):
pat_str = str(pat, 'ISO-8859-1')
res_str = translate(pat_str)
res = bytes(res_str, 'ISO-8859-1')
else:
res = translate(pat)
_cache[pat] = regex = re.compile(res)
return regex.match
def filter(names, pat): def filter(names, pat):
"""Return the subset of the list NAMES that match PAT""" """Return the subset of the list NAMES that match PAT"""
import os,posixpath import os,posixpath
result=[] result = []
pat=os.path.normcase(pat) pat = os.path.normcase(pat)
if not pat in _cache: match = _compile_pattern(pat)
res = translate(pat)
_cache[pat] = re.compile(res)
match=_cache[pat].match
if os.path is posixpath: if os.path is posixpath:
# normcase on posix is NOP. Optimize it away from the loop. # normcase on posix is NOP. Optimize it away from the loop.
for name in names: for name in names:
@ -64,10 +73,8 @@ def fnmatchcase(name, pat):
its arguments. its arguments.
""" """
if not pat in _cache: match = _compile_pattern(pat)
res = translate(pat) return match(name) is not None
_cache[pat] = re.compile(res)
return _cache[pat].match(name) is not None
def translate(pat): def translate(pat):
"""Translate a shell PATTERN to a regular expression. """Translate a shell PATTERN to a regular expression.

View File

@ -87,6 +87,7 @@ def _splitext(p, sep, altsep, extsep):
Extension is everything from the last dot to the end, ignoring Extension is everything from the last dot to the end, ignoring
leading dots. Returns "(root, ext)"; ext may be empty.""" leading dots. Returns "(root, ext)"; ext may be empty."""
# NOTE: This code must work for text and bytes strings.
sepIndex = p.rfind(sep) sepIndex = p.rfind(sep)
if altsep: if altsep:
@ -98,8 +99,8 @@ def _splitext(p, sep, altsep, extsep):
# skip all leading dots # skip all leading dots
filenameIndex = sepIndex + 1 filenameIndex = sepIndex + 1
while filenameIndex < dotIndex: while filenameIndex < dotIndex:
if p[filenameIndex] != extsep: if p[filenameIndex:filenameIndex+1] != extsep:
return p[:dotIndex], p[dotIndex:] return p[:dotIndex], p[dotIndex:]
filenameIndex += 1 filenameIndex += 1
return p, '' return p, p[:0]

View File

@ -27,7 +27,7 @@ def iglob(pathname):
return return
dirname, basename = os.path.split(pathname) dirname, basename = os.path.split(pathname)
if not dirname: if not dirname:
for name in glob1(os.curdir, basename): for name in glob1(None, basename):
yield name yield name
return return
if has_magic(dirname): if has_magic(dirname):
@ -48,10 +48,10 @@ def iglob(pathname):
def glob1(dirname, pattern): def glob1(dirname, pattern):
if not dirname: if not dirname:
dirname = os.curdir if isinstance(pattern, bytes):
if isinstance(pattern, str) and not isinstance(dirname, str): dirname = bytes(os.curdir, 'ASCII')
dirname = str(dirname, sys.getfilesystemencoding() or else:
sys.getdefaultencoding()) dirname = os.curdir
try: try:
names = os.listdir(dirname) names = os.listdir(dirname)
except os.error: except os.error:
@ -73,6 +73,11 @@ def glob0(dirname, basename):
magic_check = re.compile('[*?[]') magic_check = re.compile('[*?[]')
magic_check_bytes = re.compile(b'[*?[]')
def has_magic(s): def has_magic(s):
return magic_check.search(s) is not None if isinstance(s, bytes):
match = magic_check_bytes.search(s)
else:
match = magic_check.search(s)
return match is not None

View File

@ -82,14 +82,13 @@ class BlockingIOError(IOError):
def open(file, mode="r", buffering=None, encoding=None, errors=None, def open(file, mode="r", buffering=None, encoding=None, errors=None,
newline=None, closefd=True): newline=None, closefd=True):
r"""Open file and return a stream. If the file cannot be opened, an IOError is r"""Open file and return a stream. Raise IOError upon failure.
raised.
file is either a string giving the name (and the path if the file file is either a text or byte string giving the name (and the path
isn't in the current working directory) of the file to be opened or an if the file isn't in the current working directory) of the file to
integer file descriptor of the file to be wrapped. (If a file be opened or an integer file descriptor of the file to be
descriptor is given, it is closed when the returned I/O object is wrapped. (If a file descriptor is given, it is closed when the
closed, unless closefd is set to False.) returned I/O object is closed, unless closefd is set to False.)
mode is an optional string that specifies the mode in which the file mode is an optional string that specifies the mode in which the file
is opened. It defaults to 'r' which means open for reading in text is opened. It defaults to 'r' which means open for reading in text
@ -180,7 +179,7 @@ def open(file, mode="r", buffering=None, encoding=None, errors=None,
opened in a text mode, and for bytes a BytesIO can be used like a file opened in a text mode, and for bytes a BytesIO can be used like a file
opened in a binary mode. opened in a binary mode.
""" """
if not isinstance(file, (str, int)): if not isinstance(file, (str, bytes, int)):
raise TypeError("invalid file: %r" % file) raise TypeError("invalid file: %r" % file)
if not isinstance(mode, str): if not isinstance(mode, str):
raise TypeError("invalid mode: %r" % mode) raise TypeError("invalid mode: %r" % mode)

View File

@ -11,6 +11,7 @@ for manipulation of the pathname component of URLs.
""" """
import os import os
import sys
import stat import stat
import genericpath import genericpath
from genericpath import * from genericpath import *
@ -23,7 +24,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"curdir","pardir","sep","pathsep","defpath","altsep","extsep", "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
"devnull","realpath","supports_unicode_filenames","relpath"] "devnull","realpath","supports_unicode_filenames","relpath"]
# strings representing various path-related bits and pieces # Strings representing various path-related bits and pieces.
# These are primarily for export; internally, they are hardcoded.
curdir = '.' curdir = '.'
pardir = '..' pardir = '..'
extsep = '.' extsep = '.'
@ -33,6 +35,12 @@ defpath = ':/bin:/usr/bin'
altsep = None altsep = None
devnull = '/dev/null' devnull = '/dev/null'
def _get_sep(path):
if isinstance(path, bytes):
return b'/'
else:
return '/'
# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
# On MS-DOS this may also turn slashes into backslashes; however, other # On MS-DOS this may also turn slashes into backslashes; however, other
# normalizations (such as optimizing '../' away) are not allowed # normalizations (such as optimizing '../' away) are not allowed
@ -40,6 +48,7 @@ devnull = '/dev/null'
def normcase(s): def normcase(s):
"""Normalize case of pathname. Has no effect under Posix""" """Normalize case of pathname. Has no effect under Posix"""
# TODO: on Mac OS X, this should really return s.lower().
return s return s
@ -48,7 +57,8 @@ def normcase(s):
def isabs(s): def isabs(s):
"""Test whether a path is absolute""" """Test whether a path is absolute"""
return s.startswith('/') sep = _get_sep(s)
return s.startswith(sep)
# Join pathnames. # Join pathnames.
@ -59,14 +69,15 @@ def join(a, *p):
"""Join two or more pathname components, inserting '/' as needed. """Join two or more pathname components, inserting '/' as needed.
If any component is an absolute path, all previous path components If any component is an absolute path, all previous path components
will be discarded.""" will be discarded."""
sep = _get_sep(a)
path = a path = a
for b in p: for b in p:
if b.startswith('/'): if b.startswith(sep):
path = b path = b
elif path == '' or path.endswith('/'): elif not path or path.endswith(sep):
path += b path += b
else: else:
path += '/' + b path += sep + b
return path return path
@ -78,10 +89,11 @@ def join(a, *p):
def split(p): def split(p):
"""Split a pathname. Returns tuple "(head, tail)" where "tail" is """Split a pathname. Returns tuple "(head, tail)" where "tail" is
everything after the final slash. Either part may be empty.""" everything after the final slash. Either part may be empty."""
i = p.rfind('/') + 1 sep = _get_sep(p)
i = p.rfind(sep) + 1
head, tail = p[:i], p[i:] head, tail = p[:i], p[i:]
if head and head != '/'*len(head): if head and head != sep*len(head):
head = head.rstrip('/') head = head.rstrip(sep)
return head, tail return head, tail
@ -91,7 +103,13 @@ def split(p):
# It is always true that root + ext == p. # It is always true that root + ext == p.
def splitext(p): def splitext(p):
return genericpath._splitext(p, sep, altsep, extsep) if isinstance(p, bytes):
sep = b'/'
extsep = b'.'
else:
sep = '/'
extsep = '.'
return genericpath._splitext(p, sep, None, extsep)
splitext.__doc__ = genericpath._splitext.__doc__ splitext.__doc__ = genericpath._splitext.__doc__
# Split a pathname into a drive specification and the rest of the # Split a pathname into a drive specification and the rest of the
@ -100,14 +118,15 @@ splitext.__doc__ = genericpath._splitext.__doc__
def splitdrive(p): def splitdrive(p):
"""Split a pathname into drive and path. On Posix, drive is always """Split a pathname into drive and path. On Posix, drive is always
empty.""" empty."""
return '', p return p[:0], p
# Return the tail (basename) part of a path, same as split(path)[1]. # Return the tail (basename) part of a path, same as split(path)[1].
def basename(p): def basename(p):
"""Returns the final component of a pathname""" """Returns the final component of a pathname"""
i = p.rfind('/') + 1 sep = _get_sep(p)
i = p.rfind(sep) + 1
return p[i:] return p[i:]
@ -115,10 +134,11 @@ def basename(p):
def dirname(p): def dirname(p):
"""Returns the directory component of a pathname""" """Returns the directory component of a pathname"""
i = p.rfind('/') + 1 sep = _get_sep(p)
i = p.rfind(sep) + 1
head = p[:i] head = p[:i]
if head and head != '/'*len(head): if head and head != sep*len(head):
head = head.rstrip('/') head = head.rstrip(sep)
return head return head
@ -179,7 +199,11 @@ def ismount(path):
"""Test whether a path is a mount point""" """Test whether a path is a mount point"""
try: try:
s1 = os.lstat(path) s1 = os.lstat(path)
s2 = os.lstat(join(path, '..')) if isinstance(path, bytes):
parent = join(path, b'..')
else:
parent = join(path, '..')
s2 = os.lstat(parent)
except os.error: except os.error:
return False # It doesn't exist -- so not a mount point :-) return False # It doesn't exist -- so not a mount point :-)
dev1 = s1.st_dev dev1 = s1.st_dev
@ -205,9 +229,14 @@ def ismount(path):
def expanduser(path): def expanduser(path):
"""Expand ~ and ~user constructions. If user or $HOME is unknown, """Expand ~ and ~user constructions. If user or $HOME is unknown,
do nothing.""" do nothing."""
if not path.startswith('~'): if isinstance(path, bytes):
tilde = b'~'
else:
tilde = '~'
if not path.startswith(tilde):
return path return path
i = path.find('/', 1) sep = _get_sep(path)
i = path.find(sep, 1)
if i < 0: if i < 0:
i = len(path) i = len(path)
if i == 1: if i == 1:
@ -218,12 +247,17 @@ def expanduser(path):
userhome = os.environ['HOME'] userhome = os.environ['HOME']
else: else:
import pwd import pwd
name = path[1:i]
if isinstance(name, bytes):
name = str(name, 'ASCII')
try: try:
pwent = pwd.getpwnam(path[1:i]) pwent = pwd.getpwnam(name)
except KeyError: except KeyError:
return path return path
userhome = pwent.pw_dir userhome = pwent.pw_dir
userhome = userhome.rstrip('/') if isinstance(path, bytes):
userhome = userhome.encode(sys.getfilesystemencoding())
userhome = userhome.rstrip(sep)
return userhome + path[i:] return userhome + path[i:]
@ -232,28 +266,47 @@ def expanduser(path):
# Non-existent variables are left unchanged. # Non-existent variables are left unchanged.
_varprog = None _varprog = None
_varprogb = None
def expandvars(path): def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables """Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged.""" are left unchanged."""
global _varprog global _varprog, _varprogb
if '$' not in path: if isinstance(path, bytes):
return path if b'$' not in path:
if not _varprog: return path
import re if not _varprogb:
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) import re
_varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
search = _varprogb.search
start = b'{'
end = b'}'
else:
if '$' not in path:
return path
if not _varprog:
import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
search = _varprog.search
start = '{'
end = '}'
i = 0 i = 0
while True: while True:
m = _varprog.search(path, i) m = search(path, i)
if not m: if not m:
break break
i, j = m.span(0) i, j = m.span(0)
name = m.group(1) name = m.group(1)
if name.startswith('{') and name.endswith('}'): if name.startswith(start) and name.endswith(end):
name = name[1:-1] name = name[1:-1]
if isinstance(name, bytes):
name = str(name, 'ASCII')
if name in os.environ: if name in os.environ:
tail = path[j:] tail = path[j:]
path = path[:i] + os.environ[name] value = os.environ[name]
if isinstance(path, bytes):
value = value.encode('ASCII')
path = path[:i] + value
i = len(path) i = len(path)
path += tail path += tail
else: else:
@ -267,35 +320,49 @@ def expandvars(path):
def normpath(path): def normpath(path):
"""Normalize path, eliminating double slashes, etc.""" """Normalize path, eliminating double slashes, etc."""
if path == '': if isinstance(path, bytes):
return '.' sep = b'/'
initial_slashes = path.startswith('/') empty = b''
dot = b'.'
dotdot = b'..'
else:
sep = '/'
empty = ''
dot = '.'
dotdot = '..'
if path == empty:
return dot
initial_slashes = path.startswith(sep)
# POSIX allows one or two initial slashes, but treats three or more # POSIX allows one or two initial slashes, but treats three or more
# as single slash. # as single slash.
if (initial_slashes and if (initial_slashes and
path.startswith('//') and not path.startswith('///')): path.startswith(sep*2) and not path.startswith(sep*3)):
initial_slashes = 2 initial_slashes = 2
comps = path.split('/') comps = path.split(sep)
new_comps = [] new_comps = []
for comp in comps: for comp in comps:
if comp in ('', '.'): if comp in (empty, dot):
continue continue
if (comp != '..' or (not initial_slashes and not new_comps) or if (comp != dotdot or (not initial_slashes and not new_comps) or
(new_comps and new_comps[-1] == '..')): (new_comps and new_comps[-1] == dotdot)):
new_comps.append(comp) new_comps.append(comp)
elif new_comps: elif new_comps:
new_comps.pop() new_comps.pop()
comps = new_comps comps = new_comps
path = '/'.join(comps) path = sep.join(comps)
if initial_slashes: if initial_slashes:
path = '/'*initial_slashes + path path = sep*initial_slashes + path
return path or '.' return path or dot
def abspath(path): def abspath(path):
"""Return an absolute path.""" """Return an absolute path."""
if not isabs(path): if not isabs(path):
path = join(os.getcwd(), path) if isinstance(path, bytes):
cwd = os.getcwdb()
else:
cwd = os.getcwd()
path = join(cwd, path)
return normpath(path) return normpath(path)
@ -305,10 +372,16 @@ def abspath(path):
def realpath(filename): def realpath(filename):
"""Return the canonical path of the specified filename, eliminating any """Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path.""" symbolic links encountered in the path."""
if isabs(filename): if isinstance(filename, bytes):
bits = ['/'] + filename.split('/')[1:] sep = b'/'
empty = b''
else: else:
bits = [''] + filename.split('/') sep = '/'
empty = ''
if isabs(filename):
bits = [sep] + filename.split(sep)[1:]
else:
bits = [empty] + filename.split(sep)
for i in range(2, len(bits)+1): for i in range(2, len(bits)+1):
component = join(*bits[0:i]) component = join(*bits[0:i])
@ -347,12 +420,24 @@ def _resolve_link(path):
supports_unicode_filenames = False supports_unicode_filenames = False
def relpath(path, start=curdir): def relpath(path, start=None):
"""Return a relative version of a path""" """Return a relative version of a path"""
if not path: if not path:
raise ValueError("no path specified") raise ValueError("no path specified")
if isinstance(path, bytes):
curdir = b'.'
sep = b'/'
pardir = b'..'
else:
curdir = '.'
sep = '/'
pardir = '..'
if start is None:
start = curdir
start_list = abspath(start).split(sep) start_list = abspath(start).split(sep)
path_list = abspath(path).split(sep) path_list = abspath(path).split(sep)

View File

@ -37,6 +37,15 @@ class FnmatchTestCase(unittest.TestCase):
check('a', r'[!\]') check('a', r'[!\]')
check('\\', r'[!\]', 0) check('\\', r'[!\]', 0)
def test_mix_bytes_str(self):
self.assertRaises(TypeError, fnmatch, 'test', b'*')
self.assertRaises(TypeError, fnmatch, b'test', '*')
self.assertRaises(TypeError, fnmatchcase, 'test', b'*')
self.assertRaises(TypeError, fnmatchcase, b'test', '*')
def test_bytes(self):
self.check_match(b'test', b'te*')
self.check_match(b'test\xff', b'te*\xff')
def test_main(): def test_main():
support.run_unittest(FnmatchTestCase) support.run_unittest(FnmatchTestCase)

View File

@ -29,7 +29,7 @@ class PosixTester(unittest.TestCase):
def testNoArgFunctions(self): def testNoArgFunctions(self):
# test posix functions which take no arguments and have # test posix functions which take no arguments and have
# no side-effects which we need to cleanup (e.g., fork, wait, abort) # no side-effects which we need to cleanup (e.g., fork, wait, abort)
NO_ARG_FUNCTIONS = [ "ctermid", "getcwd", "getcwdu", "uname", NO_ARG_FUNCTIONS = [ "ctermid", "getcwd", "getcwdb", "uname",
"times", "getloadavg", "times", "getloadavg",
"getegid", "geteuid", "getgid", "getgroups", "getegid", "geteuid", "getgid", "getgroups",
"getpid", "getpgrp", "getppid", "getuid", "getpid", "getpgrp", "getppid", "getuid",

View File

@ -31,20 +31,34 @@ class PosixPathTest(unittest.TestCase):
def test_normcase(self): def test_normcase(self):
# Check that normcase() is idempotent # Check that normcase() is idempotent
p = "FoO/./BaR" p = "FoO/./BaR"
p = posixpath.normcase(p) self.assertEqual(p, posixpath.normcase(p))
p = b"FoO/./BaR"
self.assertEqual(p, posixpath.normcase(p)) self.assertEqual(p, posixpath.normcase(p))
self.assertRaises(TypeError, posixpath.normcase) self.assertRaises(TypeError, posixpath.normcase)
def test_join(self): def test_join(self):
self.assertEqual(posixpath.join("/foo", "bar", "/bar", "baz"), "/bar/baz") self.assertEqual(posixpath.join("/foo", "bar", "/bar", "baz"),
"/bar/baz")
self.assertEqual(posixpath.join("/foo", "bar", "baz"), "/foo/bar/baz") self.assertEqual(posixpath.join("/foo", "bar", "baz"), "/foo/bar/baz")
self.assertEqual(posixpath.join("/foo/", "bar/", "baz/"), "/foo/bar/baz/") self.assertEqual(posixpath.join("/foo/", "bar/", "baz/"),
"/foo/bar/baz/")
self.assertEqual(posixpath.join(b"/foo", b"bar", b"/bar", b"baz"),
b"/bar/baz")
self.assertEqual(posixpath.join(b"/foo", b"bar", b"baz"),
b"/foo/bar/baz")
self.assertEqual(posixpath.join(b"/foo/", b"bar/", b"baz/"),
b"/foo/bar/baz/")
self.assertRaises(TypeError, posixpath.join) self.assertRaises(TypeError, posixpath.join)
self.assertRaises(TypeError, posixpath.join, b"bytes", "str")
self.assertRaises(TypeError, posixpath.join, "str", b"bytes")
def test_splitdrive(self): def test_splitdrive(self):
self.assertEqual(posixpath.splitdrive("/foo/bar"), ("", "/foo/bar")) self.assertEqual(posixpath.splitdrive("/foo/bar"), ("", "/foo/bar"))
self.assertEqual(posixpath.splitdrive(b"/foo/bar"), (b"", b"/foo/bar"))
self.assertRaises(TypeError, posixpath.splitdrive) self.assertRaises(TypeError, posixpath.splitdrive)
@ -55,15 +69,41 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(posixpath.split("////foo"), ("////", "foo")) self.assertEqual(posixpath.split("////foo"), ("////", "foo"))
self.assertEqual(posixpath.split("//foo//bar"), ("//foo", "bar")) self.assertEqual(posixpath.split("//foo//bar"), ("//foo", "bar"))
self.assertEqual(posixpath.split(b"/foo/bar"), (b"/foo", b"bar"))
self.assertEqual(posixpath.split(b"/"), (b"/", b""))
self.assertEqual(posixpath.split(b"foo"), (b"", b"foo"))
self.assertEqual(posixpath.split(b"////foo"), (b"////", b"foo"))
self.assertEqual(posixpath.split(b"//foo//bar"), (b"//foo", b"bar"))
self.assertRaises(TypeError, posixpath.split) self.assertRaises(TypeError, posixpath.split)
def splitextTest(self, path, filename, ext): def splitextTest(self, path, filename, ext):
self.assertEqual(posixpath.splitext(path), (filename, ext)) self.assertEqual(posixpath.splitext(path), (filename, ext))
self.assertEqual(posixpath.splitext("/" + path), ("/" + filename, ext)) self.assertEqual(posixpath.splitext("/" + path), ("/" + filename, ext))
self.assertEqual(posixpath.splitext("abc/" + path), ("abc/" + filename, ext)) self.assertEqual(posixpath.splitext("abc/" + path),
self.assertEqual(posixpath.splitext("abc.def/" + path), ("abc.def/" + filename, ext)) ("abc/" + filename, ext))
self.assertEqual(posixpath.splitext("/abc.def/" + path), ("/abc.def/" + filename, ext)) self.assertEqual(posixpath.splitext("abc.def/" + path),
self.assertEqual(posixpath.splitext(path + "/"), (filename + ext + "/", "")) ("abc.def/" + filename, ext))
self.assertEqual(posixpath.splitext("/abc.def/" + path),
("/abc.def/" + filename, ext))
self.assertEqual(posixpath.splitext(path + "/"),
(filename + ext + "/", ""))
path = bytes(path, "ASCII")
filename = bytes(filename, "ASCII")
ext = bytes(ext, "ASCII")
self.assertEqual(posixpath.splitext(path), (filename, ext))
self.assertEqual(posixpath.splitext(b"/" + path),
(b"/" + filename, ext))
self.assertEqual(posixpath.splitext(b"abc/" + path),
(b"abc/" + filename, ext))
self.assertEqual(posixpath.splitext(b"abc.def/" + path),
(b"abc.def/" + filename, ext))
self.assertEqual(posixpath.splitext(b"/abc.def/" + path),
(b"/abc.def/" + filename, ext))
self.assertEqual(posixpath.splitext(path + b"/"),
(filename + ext + b"/", b""))
def test_splitext(self): def test_splitext(self):
self.splitextTest("foo.bar", "foo", ".bar") self.splitextTest("foo.bar", "foo", ".bar")
@ -87,13 +127,14 @@ class PosixPathTest(unittest.TestCase):
self.assertIs(posixpath.isabs("/foo/bar"), True) self.assertIs(posixpath.isabs("/foo/bar"), True)
self.assertIs(posixpath.isabs("foo/bar"), False) self.assertIs(posixpath.isabs("foo/bar"), False)
self.assertIs(posixpath.isabs(b""), False)
self.assertIs(posixpath.isabs(b"/"), True)
self.assertIs(posixpath.isabs(b"/foo"), True)
self.assertIs(posixpath.isabs(b"/foo/bar"), True)
self.assertIs(posixpath.isabs(b"foo/bar"), False)
self.assertRaises(TypeError, posixpath.isabs) self.assertRaises(TypeError, posixpath.isabs)
def test_splitdrive(self):
self.assertEqual(posixpath.splitdrive("/foo/bar"), ("", "/foo/bar"))
self.assertRaises(TypeError, posixpath.splitdrive)
def test_basename(self): def test_basename(self):
self.assertEqual(posixpath.basename("/foo/bar"), "bar") self.assertEqual(posixpath.basename("/foo/bar"), "bar")
self.assertEqual(posixpath.basename("/"), "") self.assertEqual(posixpath.basename("/"), "")
@ -101,6 +142,12 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(posixpath.basename("////foo"), "foo") self.assertEqual(posixpath.basename("////foo"), "foo")
self.assertEqual(posixpath.basename("//foo//bar"), "bar") self.assertEqual(posixpath.basename("//foo//bar"), "bar")
self.assertEqual(posixpath.basename(b"/foo/bar"), b"bar")
self.assertEqual(posixpath.basename(b"/"), b"")
self.assertEqual(posixpath.basename(b"foo"), b"foo")
self.assertEqual(posixpath.basename(b"////foo"), b"foo")
self.assertEqual(posixpath.basename(b"//foo//bar"), b"bar")
self.assertRaises(TypeError, posixpath.basename) self.assertRaises(TypeError, posixpath.basename)
def test_dirname(self): def test_dirname(self):
@ -110,6 +157,12 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(posixpath.dirname("////foo"), "////") self.assertEqual(posixpath.dirname("////foo"), "////")
self.assertEqual(posixpath.dirname("//foo//bar"), "//foo") self.assertEqual(posixpath.dirname("//foo//bar"), "//foo")
self.assertEqual(posixpath.dirname(b"/foo/bar"), b"/foo")
self.assertEqual(posixpath.dirname(b"/"), b"/")
self.assertEqual(posixpath.dirname(b"foo"), b"")
self.assertEqual(posixpath.dirname(b"////foo"), b"////")
self.assertEqual(posixpath.dirname(b"//foo//bar"), b"//foo")
self.assertRaises(TypeError, posixpath.dirname) self.assertRaises(TypeError, posixpath.dirname)
def test_commonprefix(self): def test_commonprefix(self):
@ -130,6 +183,19 @@ class PosixPathTest(unittest.TestCase):
"/home/swen/spam" "/home/swen/spam"
) )
self.assertEqual(
posixpath.commonprefix([b"/home/swenson/spam", b"/home/swen/spam"]),
b"/home/swen"
)
self.assertEqual(
posixpath.commonprefix([b"/home/swen/spam", b"/home/swen/eggs"]),
b"/home/swen/"
)
self.assertEqual(
posixpath.commonprefix([b"/home/swen/spam", b"/home/swen/spam"]),
b"/home/swen/spam"
)
testlist = ['', 'abc', 'Xbcd', 'Xb', 'XY', 'abcd', 'aXc', 'abd', 'ab', 'aX', 'abcX'] testlist = ['', 'abc', 'Xbcd', 'Xb', 'XY', 'abcd', 'aXc', 'abd', 'ab', 'aX', 'abcX']
for s1 in testlist: for s1 in testlist:
for s2 in testlist: for s2 in testlist:
@ -330,20 +396,28 @@ class PosixPathTest(unittest.TestCase):
def test_expanduser(self): def test_expanduser(self):
self.assertEqual(posixpath.expanduser("foo"), "foo") self.assertEqual(posixpath.expanduser("foo"), "foo")
self.assertEqual(posixpath.expanduser(b"foo"), b"foo")
try: try:
import pwd import pwd
except ImportError: except ImportError:
pass pass
else: else:
self.assert_(isinstance(posixpath.expanduser("~/"), str)) self.assert_(isinstance(posixpath.expanduser("~/"), str))
self.assert_(isinstance(posixpath.expanduser(b"~/"), bytes))
# if home directory == root directory, this test makes no sense # if home directory == root directory, this test makes no sense
if posixpath.expanduser("~") != '/': if posixpath.expanduser("~") != '/':
self.assertEqual( self.assertEqual(
posixpath.expanduser("~") + "/", posixpath.expanduser("~") + "/",
posixpath.expanduser("~/") posixpath.expanduser("~/")
) )
self.assertEqual(
posixpath.expanduser(b"~") + b"/",
posixpath.expanduser(b"~/")
)
self.assert_(isinstance(posixpath.expanduser("~root/"), str)) self.assert_(isinstance(posixpath.expanduser("~root/"), str))
self.assert_(isinstance(posixpath.expanduser("~foo/"), str)) self.assert_(isinstance(posixpath.expanduser("~foo/"), str))
self.assert_(isinstance(posixpath.expanduser(b"~root/"), bytes))
self.assert_(isinstance(posixpath.expanduser(b"~foo/"), bytes))
self.assertRaises(TypeError, posixpath.expanduser) self.assertRaises(TypeError, posixpath.expanduser)
@ -366,6 +440,19 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(posixpath.expandvars("${{foo}}"), "baz1}") self.assertEqual(posixpath.expandvars("${{foo}}"), "baz1}")
self.assertEqual(posixpath.expandvars("$foo$foo"), "barbar") self.assertEqual(posixpath.expandvars("$foo$foo"), "barbar")
self.assertEqual(posixpath.expandvars("$bar$bar"), "$bar$bar") self.assertEqual(posixpath.expandvars("$bar$bar"), "$bar$bar")
self.assertEqual(posixpath.expandvars(b"foo"), b"foo")
self.assertEqual(posixpath.expandvars(b"$foo bar"), b"bar bar")
self.assertEqual(posixpath.expandvars(b"${foo}bar"), b"barbar")
self.assertEqual(posixpath.expandvars(b"$[foo]bar"), b"$[foo]bar")
self.assertEqual(posixpath.expandvars(b"$bar bar"), b"$bar bar")
self.assertEqual(posixpath.expandvars(b"$?bar"), b"$?bar")
self.assertEqual(posixpath.expandvars(b"${foo}bar"), b"barbar")
self.assertEqual(posixpath.expandvars(b"$foo}bar"), b"bar}bar")
self.assertEqual(posixpath.expandvars(b"${foo"), b"${foo")
self.assertEqual(posixpath.expandvars(b"${{foo}}"), b"baz1}")
self.assertEqual(posixpath.expandvars(b"$foo$foo"), b"barbar")
self.assertEqual(posixpath.expandvars(b"$bar$bar"), b"$bar$bar")
finally: finally:
os.environ.clear() os.environ.clear()
os.environ.update(oldenv) os.environ.update(oldenv)
@ -378,18 +465,31 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(posixpath.normpath("//"), "//") self.assertEqual(posixpath.normpath("//"), "//")
self.assertEqual(posixpath.normpath("///"), "/") self.assertEqual(posixpath.normpath("///"), "/")
self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar") self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"), "/foo/baz") self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
"/foo/baz")
self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar") self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")
self.assertEqual(posixpath.normpath(b""), b".")
self.assertEqual(posixpath.normpath(b"/"), b"/")
self.assertEqual(posixpath.normpath(b"//"), b"//")
self.assertEqual(posixpath.normpath(b"///"), b"/")
self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
b"/foo/baz")
self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
b"/foo/bar")
self.assertRaises(TypeError, posixpath.normpath) self.assertRaises(TypeError, posixpath.normpath)
def test_abspath(self): def test_abspath(self):
self.assert_("foo" in posixpath.abspath("foo")) self.assert_("foo" in posixpath.abspath("foo"))
self.assert_(b"foo" in posixpath.abspath(b"foo"))
self.assertRaises(TypeError, posixpath.abspath) self.assertRaises(TypeError, posixpath.abspath)
def test_realpath(self): def test_realpath(self):
self.assert_("foo" in realpath("foo")) self.assert_("foo" in realpath("foo"))
self.assert_(b"foo" in realpath(b"foo"))
self.assertRaises(TypeError, posixpath.realpath) self.assertRaises(TypeError, posixpath.realpath)
if hasattr(os, "symlink"): if hasattr(os, "symlink"):
@ -499,12 +599,34 @@ class PosixPathTest(unittest.TestCase):
self.assertEqual(posixpath.relpath("a/b"), "a/b") self.assertEqual(posixpath.relpath("a/b"), "a/b")
self.assertEqual(posixpath.relpath("../a/b"), "../a/b") self.assertEqual(posixpath.relpath("../a/b"), "../a/b")
self.assertEqual(posixpath.relpath("a", "../b"), "../"+curdir+"/a") self.assertEqual(posixpath.relpath("a", "../b"), "../"+curdir+"/a")
self.assertEqual(posixpath.relpath("a/b", "../c"), "../"+curdir+"/a/b") self.assertEqual(posixpath.relpath("a/b", "../c"),
"../"+curdir+"/a/b")
self.assertEqual(posixpath.relpath("a", "b/c"), "../../a") self.assertEqual(posixpath.relpath("a", "b/c"), "../../a")
self.assertEqual(posixpath.relpath("a", "a"), ".") self.assertEqual(posixpath.relpath("a", "a"), ".")
finally: finally:
os.getcwd = real_getcwd os.getcwd = real_getcwd
def test_relpath_bytes(self):
(real_getcwdb, os.getcwdb) = (os.getcwdb, lambda: br"/home/user/bar")
try:
curdir = os.path.split(os.getcwdb())[-1]
self.assertRaises(ValueError, posixpath.relpath, b"")
self.assertEqual(posixpath.relpath(b"a"), b"a")
self.assertEqual(posixpath.relpath(posixpath.abspath(b"a")), b"a")
self.assertEqual(posixpath.relpath(b"a/b"), b"a/b")
self.assertEqual(posixpath.relpath(b"../a/b"), b"../a/b")
self.assertEqual(posixpath.relpath(b"a", b"../b"),
b"../"+curdir+b"/a")
self.assertEqual(posixpath.relpath(b"a/b", b"../c"),
b"../"+curdir+b"/a/b")
self.assertEqual(posixpath.relpath(b"a", b"b/c"), b"../../a")
self.assertEqual(posixpath.relpath(b"a", b"a"), b".")
self.assertRaises(TypeError, posixpath.relpath, b"bytes", "str")
self.assertRaises(TypeError, posixpath.relpath, "str", b"bytes")
finally:
os.getcwdb = real_getcwdb
def test_main(): def test_main():
support.run_unittest(PosixPathTest) support.run_unittest(PosixPathTest)

View File

@ -90,7 +90,7 @@ class TestUnicodeFiles(unittest.TestCase):
os.unlink(filename1 + ".new") os.unlink(filename1 + ".new")
def _do_directory(self, make_name, chdir_name, encoded): def _do_directory(self, make_name, chdir_name, encoded):
cwd = os.getcwd() cwd = os.getcwdb()
if os.path.isdir(make_name): if os.path.isdir(make_name):
os.rmdir(make_name) os.rmdir(make_name)
os.mkdir(make_name) os.mkdir(make_name)
@ -98,10 +98,10 @@ class TestUnicodeFiles(unittest.TestCase):
os.chdir(chdir_name) os.chdir(chdir_name)
try: try:
if not encoded: if not encoded:
cwd_result = os.getcwdu() cwd_result = os.getcwd()
name_result = make_name name_result = make_name
else: else:
cwd_result = os.getcwd().decode(TESTFN_ENCODING) cwd_result = os.getcwdb().decode(TESTFN_ENCODING)
name_result = make_name.decode(TESTFN_ENCODING) name_result = make_name.decode(TESTFN_ENCODING)
cwd_result = unicodedata.normalize("NFD", cwd_result) cwd_result = unicodedata.normalize("NFD", cwd_result)

View File

@ -4,8 +4,11 @@ Python News
(editors: check NEWS.help for information about editing NEWS using ReST.) (editors: check NEWS.help for information about editing NEWS using ReST.)
What's New in Python 3.0 release candidate 2 What's New in Python 3.0 beta 5
============================================ ===============================
[Note: due to the number of unresolved issues we're going back to beta
releases for a while.]
*Release date: XX-XXX-2008* *Release date: XX-XXX-2008*
@ -22,6 +25,9 @@ Core and Builtins
Library Library
------- -------
- Issue #3187: Better support for "undecodable" filenames. Code by Victor
Stinner, with small tweaks by GvR.
- Issue #3965: Allow repeated calls to turtle.Screen, by making it a - Issue #3965: Allow repeated calls to turtle.Screen, by making it a
true singleton object. true singleton object.

View File

@ -1968,63 +1968,18 @@ posix_lchown(PyObject *self, PyObject *args)
#ifdef HAVE_GETCWD #ifdef HAVE_GETCWD
PyDoc_STRVAR(posix_getcwd__doc__,
"getcwd() -> path\n\n\
Return a string representing the current working directory.");
static PyObject * static PyObject *
posix_getcwd(PyObject *self, PyObject *noargs) posix_getcwd(int use_bytes)
{
int bufsize_incr = 1024;
int bufsize = 0;
char *tmpbuf = NULL;
char *res = NULL;
PyObject *dynamic_return;
Py_BEGIN_ALLOW_THREADS
do {
bufsize = bufsize + bufsize_incr;
tmpbuf = malloc(bufsize);
if (tmpbuf == NULL) {
break;
}
#if defined(PYOS_OS2) && defined(PYCC_GCC)
res = _getcwd2(tmpbuf, bufsize);
#else
res = getcwd(tmpbuf, bufsize);
#endif
if (res == NULL) {
free(tmpbuf);
}
} while ((res == NULL) && (errno == ERANGE));
Py_END_ALLOW_THREADS
if (res == NULL)
return posix_error();
dynamic_return = PyUnicode_FromString(tmpbuf);
free(tmpbuf);
return dynamic_return;
}
PyDoc_STRVAR(posix_getcwdu__doc__,
"getcwdu() -> path\n\n\
Return a unicode string representing the current working directory.");
static PyObject *
posix_getcwdu(PyObject *self, PyObject *noargs)
{ {
char buf[1026]; char buf[1026];
char *res; char *res;
#ifdef Py_WIN_WIDE_FILENAMES #ifdef Py_WIN_WIDE_FILENAMES
DWORD len; if (!use_bytes && unicode_file_names()) {
if (unicode_file_names()) {
wchar_t wbuf[1026]; wchar_t wbuf[1026];
wchar_t *wbuf2 = wbuf; wchar_t *wbuf2 = wbuf;
PyObject *resobj; PyObject *resobj;
DWORD len;
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
len = GetCurrentDirectoryW(sizeof wbuf/ sizeof wbuf[0], wbuf); len = GetCurrentDirectoryW(sizeof wbuf/ sizeof wbuf[0], wbuf);
/* If the buffer is large enough, len does not include the /* If the buffer is large enough, len does not include the
@ -2059,8 +2014,30 @@ posix_getcwdu(PyObject *self, PyObject *noargs)
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
if (res == NULL) if (res == NULL)
return posix_error(); return posix_error();
if (use_bytes)
return PyBytes_FromStringAndSize(buf, strlen(buf));
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict"); return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
} }
PyDoc_STRVAR(posix_getcwd__doc__,
"getcwd() -> path\n\n\
Return a unicode string representing the current working directory.");
static PyObject *
posix_getcwd_unicode(PyObject *self)
{
return posix_getcwd(0);
}
PyDoc_STRVAR(posix_getcwdb__doc__,
"getcwdb() -> path\n\n\
Return a bytes string representing the current working directory.");
static PyObject *
posix_getcwd_bytes(PyObject *self)
{
return posix_getcwd(1);
}
#endif #endif
@ -2378,9 +2355,12 @@ posix_listdir(PyObject *self, PyObject *args)
v = w; v = w;
} }
else { else {
/* fall back to the original byte string, as /* Ignore undecodable filenames, as discussed
discussed in patch #683592 */ * in issue 3187. To include these,
* use getcwdb(). */
PyErr_Clear(); PyErr_Clear();
Py_DECREF(v);
continue;
} }
} }
if (PyList_Append(d, v) != 0) { if (PyList_Append(d, v) != 0) {
@ -4477,9 +4457,7 @@ posix_readlink(PyObject *self, PyObject *args)
v = w; v = w;
} }
else { else {
/* fall back to the original byte string, as v = NULL;
discussed in patch #683592 */
PyErr_Clear();
} }
} }
return v; return v;
@ -6810,8 +6788,10 @@ static PyMethodDef posix_methods[] = {
{"ctermid", posix_ctermid, METH_NOARGS, posix_ctermid__doc__}, {"ctermid", posix_ctermid, METH_NOARGS, posix_ctermid__doc__},
#endif #endif
#ifdef HAVE_GETCWD #ifdef HAVE_GETCWD
{"getcwd", posix_getcwd, METH_NOARGS, posix_getcwd__doc__}, {"getcwd", (PyCFunction)posix_getcwd_unicode,
{"getcwdu", posix_getcwdu, METH_NOARGS, posix_getcwdu__doc__}, METH_NOARGS, posix_getcwd__doc__},
{"getcwdb", (PyCFunction)posix_getcwd_bytes,
METH_NOARGS, posix_getcwdb__doc__},
#endif #endif
#ifdef HAVE_LINK #ifdef HAVE_LINK
{"link", posix_link, METH_VARARGS, posix_link__doc__}, {"link", posix_link, METH_VARARGS, posix_link__doc__},