Bug #1462152: file() now checks more thoroughly for invalid mode

strings and removes a possible "U" before passing the mode to the
C library function.
This commit is contained in:
Georg Brandl 2006-05-18 07:01:27 +00:00
parent b678ce5aa6
commit 7b90e168f3
4 changed files with 61 additions and 38 deletions

View File

@ -418,7 +418,7 @@ class C:
that differentiate between binary and text files (else it is that differentiate between binary and text files (else it is
ignored). If the file cannot be opened, \exception{IOError} is ignored). If the file cannot be opened, \exception{IOError} is
raised. raised.
In addition to the standard \cfunction{fopen()} values \var{mode} In addition to the standard \cfunction{fopen()} values \var{mode}
may be \code{'U'} or \code{'rU'}. If Python is built with universal may be \code{'U'} or \code{'rU'}. If Python is built with universal
newline support (the default) the file is opened as a text file, but newline support (the default) the file is opened as a text file, but
@ -434,6 +434,9 @@ class C:
have yet been seen), \code{'\e n'}, \code{'\e r'}, \code{'\e r\e n'}, have yet been seen), \code{'\e n'}, \code{'\e r'}, \code{'\e r\e n'},
or a tuple containing all the newline types seen. or a tuple containing all the newline types seen.
Python enforces that the mode, after stripping \code{'U'}, begins with
\code{'r'}, \code{'w'} or \code{'a'}.
If \var{mode} is omitted, it defaults to \code{'r'}. When opening a If \var{mode} is omitted, it defaults to \code{'r'}. When opening a
binary file, you should append \code{'b'} to the \var{mode} value binary file, you should append \code{'b'} to the \var{mode} value
for improved portability. (It's useful even on systems which don't for improved portability. (It's useful even on systems which don't
@ -456,6 +459,9 @@ class C:
determine whether this is the case.} determine whether this is the case.}
\versionadded{2.2} \versionadded{2.2}
\versionchanged[Restriction on first letter of mode string
introduced]{2.5}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{filter}{function, list} \begin{funcdesc}{filter}{function, list}

View File

@ -136,7 +136,7 @@ f.close()
bad_mode = "qwerty" bad_mode = "qwerty"
try: try:
open(TESTFN, bad_mode) open(TESTFN, bad_mode)
except IOError, msg: except ValueError, msg:
if msg[0] != 0: if msg[0] != 0:
s = str(msg) s = str(msg)
if s.find(TESTFN) != -1 or s.find(bad_mode) == -1: if s.find(TESTFN) != -1 or s.find(bad_mode) == -1:

View File

@ -12,6 +12,10 @@ What's New in Python 2.5 alpha 3?
Core and builtins Core and builtins
----------------- -----------------
- Bug #1462152: file() now checks more thoroughly for invalid mode
strings and removes a possible "U" before passing the mode to the
C library function.
- Patch #1488312, Fix memory alignment problem on SPARC in unicode - Patch #1488312, Fix memory alignment problem on SPARC in unicode
- Bug #1487966: Fix SystemError with conditional expression in assignment - Bug #1487966: Fix SystemError with conditional expression in assignment

View File

@ -136,46 +136,45 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
/* check for known incorrect mode strings - problem is, platforms are /* check for known incorrect mode strings - problem is, platforms are
free to accept any mode characters they like and are supposed to free to accept any mode characters they like and are supposed to
ignore stuff they don't understand... write or append mode with ignore stuff they don't understand... write or append mode with
universal newline support is expressly forbidden by PEP 278. */ universal newline support is expressly forbidden by PEP 278.
Additionally, remove the 'U' from the mode string as platforms
won't know what it is. */
/* zero return is kewl - one is un-kewl */ /* zero return is kewl - one is un-kewl */
static int static int
check_the_mode(char *mode) sanitize_the_mode(char *mode)
{ {
char *upos;
size_t len = strlen(mode); size_t len = strlen(mode);
switch (len) { if (!len) {
case 0:
PyErr_SetString(PyExc_ValueError, "empty mode string"); PyErr_SetString(PyExc_ValueError, "empty mode string");
return 1; return 1;
}
/* reject wU, aU */ upos = strchr(mode, 'U');
case 2: if (upos) {
switch (mode[0]) { memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
case 'w':
case 'a':
if (mode[1] == 'U') {
PyErr_SetString(PyExc_ValueError,
"invalid mode string");
return 1;
}
break;
}
break;
/* reject w+U, a+U, wU+, aU+ */ if (mode[0] == 'w' || mode[0] == 'a') {
case 3: PyErr_Format(PyExc_ValueError, "universal newline "
switch (mode[0]) { "mode can only be used with modes "
case 'w': "starting with 'r'");
case 'a': return 1;
if ((mode[1] == '+' && mode[2] == 'U') ||
(mode[1] == 'U' && mode[2] == '+')) {
PyErr_SetString(PyExc_ValueError,
"invalid mode string");
return 1;
}
break;
} }
break;
if (mode[0] != 'r') {
memmove(mode+1, mode, strlen(mode)+1);
mode[0] = 'r';
}
if (!strchr(mode, 'b')) {
memmove(mode+2, mode+1, strlen(mode));
mode[1] = 'b';
}
} else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
PyErr_Format(PyExc_ValueError, "mode string must begin with "
"one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
return 1;
} }
return 0; return 0;
@ -184,6 +183,7 @@ check_the_mode(char *mode)
static PyObject * static PyObject *
open_the_file(PyFileObject *f, char *name, char *mode) open_the_file(PyFileObject *f, char *name, char *mode)
{ {
char *newmode;
assert(f != NULL); assert(f != NULL);
assert(PyFile_Check(f)); assert(PyFile_Check(f));
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
@ -195,8 +195,18 @@ open_the_file(PyFileObject *f, char *name, char *mode)
assert(mode != NULL); assert(mode != NULL);
assert(f->f_fp == NULL); assert(f->f_fp == NULL);
if (check_the_mode(mode)) /* probably need to replace 'U' by 'rb' */
newmode = PyMem_MALLOC(strlen(mode) + 3);
if (!newmode) {
PyErr_NoMemory();
return NULL; return NULL;
}
strcpy(newmode, mode);
if (sanitize_the_mode(newmode)) {
f = NULL;
goto cleanup;
}
/* rexec.py can't stop a user from getting the file() constructor -- /* rexec.py can't stop a user from getting the file() constructor --
all they have to do is get *any* file object f, and then do all they have to do is get *any* file object f, and then do
@ -204,16 +214,15 @@ open_the_file(PyFileObject *f, char *name, char *mode)
if (PyEval_GetRestricted()) { if (PyEval_GetRestricted()) {
PyErr_SetString(PyExc_IOError, PyErr_SetString(PyExc_IOError,
"file() constructor not accessible in restricted mode"); "file() constructor not accessible in restricted mode");
return NULL; f = NULL;
goto cleanup;
} }
errno = 0; errno = 0;
if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
mode = "rb";
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
if (PyUnicode_Check(f->f_name)) { if (PyUnicode_Check(f->f_name)) {
PyObject *wmode; PyObject *wmode;
wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL); wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
if (f->f_name && wmode) { if (f->f_name && wmode) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
/* PyUnicode_AS_UNICODE OK without thread /* PyUnicode_AS_UNICODE OK without thread
@ -227,7 +236,7 @@ open_the_file(PyFileObject *f, char *name, char *mode)
#endif #endif
if (NULL == f->f_fp && NULL != name) { if (NULL == f->f_fp && NULL != name) {
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
f->f_fp = fopen(name, mode); f->f_fp = fopen(name, newmode);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
} }
@ -254,6 +263,10 @@ open_the_file(PyFileObject *f, char *name, char *mode)
} }
if (f != NULL) if (f != NULL)
f = dircheck(f); f = dircheck(f);
cleanup:
PyMem_FREE(newmode);
return (PyObject *)f; return (PyObject *)f;
} }