Patch #612627: Add encoding attribute to file objects, and determine

the terminal encoding on Windows and Unix.
This commit is contained in:
Martin v. Löwis 2003-05-10 07:10:12 +00:00
parent b7b4ce27f7
commit 5467d4c0e3
6 changed files with 109 additions and 1 deletions

View File

@ -2029,6 +2029,12 @@ implementation detail and may change in future releases of Python.
creation. creation.
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{int}{PyFile_Encoding}{PyFileObject *p, char *enc}
Set the file's encoding for Unicode output to \var{enc}. Return
1 on success and 0 on failure.
\versionadded{2.3}
\end{cfuncdesc}
\begin{cfuncdesc}{int}{PyFile_SoftSpace}{PyObject *p, int newflag} \begin{cfuncdesc}{int}{PyFile_SoftSpace}{PyObject *p, int newflag}
This function exists for internal use by the interpreter. Sets the This function exists for internal use by the interpreter. Sets the
\member{softspace} attribute of \var{p} to \var{newflag} and \member{softspace} attribute of \var{p} to \var{newflag} and

View File

@ -1372,6 +1372,20 @@ read-only attribute; the \method{close()} method changes the value.
It may not be available on all file-like objects. It may not be available on all file-like objects.
\end{memberdesc} \end{memberdesc}
\begin{memberdesc}[file]{encoding}
The encoding that this file uses. When Unicode strings are written
to a file, they will be converted to byte strings using this encoding.
In addition, when the file is connected to a terminal, the attribute
gives the encoding that the terminal is likely to use (that
information might be incorrect if the user has misconfigured the
terminal). The attribute is read-only and may not be present on
all file-like objects. It may also be \code{None}, in which case
the file uses the system default encoding for converting Unicode
strings.
\versionadded{2.3}
\end{memberdesc}
\begin{memberdesc}[file]{mode} \begin{memberdesc}[file]{mode}
The I/O mode for the file. If the file was created using the The I/O mode for the file. If the file was created using the
\function{open()} built-in function, this will be the value of the \function{open()} built-in function, this will be the value of the

View File

@ -24,6 +24,7 @@ typedef struct {
int f_newlinetypes; /* Types of newlines seen */ int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */ int f_skipnextlf; /* Skip next \n */
#endif #endif
PyObject *f_encoding;
} PyFileObject; } PyFileObject;
PyAPI_DATA(PyTypeObject) PyFile_Type; PyAPI_DATA(PyTypeObject) PyFile_Type;
@ -33,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
int (*)(FILE *)); int (*)(FILE *));
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);

View File

@ -12,6 +12,9 @@ What's New in Python 2.3 beta 2?
Core and builtins Core and builtins
----------------- -----------------
- The encoding attribute has been added for file objects, and set to
the terminal encoding on Unix and Windows.
- The softspace attribute of file objects became read-only by oversight. - The softspace attribute of file objects became read-only by oversight.
It's writable again. It's writable again.

View File

@ -116,6 +116,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
Py_DECREF(f->f_name); Py_DECREF(f->f_name);
Py_DECREF(f->f_mode); Py_DECREF(f->f_mode);
Py_DECREF(f->f_encoding);
#ifdef Py_USING_UNICODE #ifdef Py_USING_UNICODE
if (wname) if (wname)
f->f_name = PyUnicode_FromObject(wname); f->f_name = PyUnicode_FromObject(wname);
@ -133,7 +134,9 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
f->f_newlinetypes = NEWLINE_UNKNOWN; f->f_newlinetypes = NEWLINE_UNKNOWN;
f->f_skipnextlf = 0; f->f_skipnextlf = 0;
#endif #endif
Py_INCREF(Py_None);
f->f_encoding = Py_None;
if (f->f_name == NULL || f->f_mode == NULL) if (f->f_name == NULL || f->f_mode == NULL)
return NULL; return NULL;
f->f_fp = fp; f->f_fp = fp;
@ -302,6 +305,21 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
} }
} }
/* Set the encoding used to output Unicode strings.
Returh 1 on success, 0 on failure. */
int
PyFile_SetEncoding(PyObject *f, const char *enc)
{
PyFileObject *file = (PyFileObject*)f;
PyObject *str = PyString_FromString(enc);
if (!str)
return 0;
Py_DECREF(file->f_encoding);
file->f_encoding = str;
return 1;
}
static PyObject * static PyObject *
err_closed(void) err_closed(void)
{ {
@ -323,6 +341,7 @@ file_dealloc(PyFileObject *f)
} }
Py_XDECREF(f->f_name); Py_XDECREF(f->f_name);
Py_XDECREF(f->f_mode); Py_XDECREF(f->f_mode);
Py_XDECREF(f->f_encoding);
drop_readahead(f); drop_readahead(f);
f->ob_type->tp_free((PyObject *)f); f->ob_type->tp_free((PyObject *)f);
} }
@ -1667,6 +1686,8 @@ static PyMemberDef file_memberlist[] = {
"file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"}, "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
{"name", T_OBJECT, OFF(f_name), RO, {"name", T_OBJECT, OFF(f_name), RO,
"file name"}, "file name"},
{"encoding", T_OBJECT, OFF(f_encoding), RO,
"file encoding"},
/* getattr(f, "closed") is implemented without this table */ /* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
@ -1851,6 +1872,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
((PyFileObject *)self)->f_name = not_yet_string; ((PyFileObject *)self)->f_name = not_yet_string;
Py_INCREF(not_yet_string); Py_INCREF(not_yet_string);
((PyFileObject *)self)->f_mode = not_yet_string; ((PyFileObject *)self)->f_mode = not_yet_string;
Py_INCREF(Py_None);
((PyFileObject *)self)->f_encoding = Py_None;
} }
return self; return self;
} }
@ -2034,11 +2057,28 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
} }
else if (PyFile_Check(f)) { else if (PyFile_Check(f)) {
FILE *fp = PyFile_AsFile(f); FILE *fp = PyFile_AsFile(f);
PyObject *enc = ((PyFileObject*)f)->f_encoding;
int result;
if (fp == NULL) { if (fp == NULL) {
err_closed(); err_closed();
return -1; return -1;
} }
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(v) && enc != Py_None) {
char *cenc = PyString_AS_STRING(enc);
value = PyUnicode_AsEncodedString(v, cenc, "strict");
if (value == NULL)
return -1;
} else {
value = v;
Py_INCREF(value);
}
result = PyObject_Print(value, fp, flags);
Py_DECREF(value);
return result;
#else
return PyObject_Print(v, fp, flags); return PyObject_Print(v, fp, flags);
#endif
} }
writer = PyObject_GetAttrString(f, "write"); writer = PyObject_GetAttrString(f, "write");
if (writer == NULL) if (writer == NULL)

View File

@ -36,6 +36,15 @@ extern const char *PyWin_DLLVersionString;
#include <unixlib.h> #include <unixlib.h>
#endif #endif
#ifdef MS_WINDOWS
#include <windows.h>
#endif
#ifdef HAVE_LANGINFO_H
#include <locale.h>
#include <langinfo.h>
#endif
PyObject * PyObject *
PySys_GetObject(char *name) PySys_GetObject(char *name)
{ {
@ -881,6 +890,12 @@ _PySys_Init(void)
PyObject *m, *v, *sysdict; PyObject *m, *v, *sysdict;
PyObject *sysin, *sysout, *syserr; PyObject *sysin, *sysout, *syserr;
char *s; char *s;
#ifdef MS_WINDOWS
char buf[10];
#endif
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
char *oldloc, *codeset;
#endif
m = Py_InitModule3("sys", sys_methods, sys_doc); m = Py_InitModule3("sys", sys_methods, sys_doc);
sysdict = PyModule_GetDict(m); sysdict = PyModule_GetDict(m);
@ -890,6 +905,34 @@ _PySys_Init(void)
syserr = PyFile_FromFile(stderr, "<stderr>", "w", NULL); syserr = PyFile_FromFile(stderr, "<stderr>", "w", NULL);
if (PyErr_Occurred()) if (PyErr_Occurred())
return NULL; return NULL;
#ifdef MS_WINDOWS
if(isatty(_fileno(stdin))){
sprintf(buf, "cp%d", GetConsoleCP());
if (!PyFile_SetEncoding(sysin, buf))
return NULL;
}
if(isatty(_fileno(stdout))) {
sprintf(buf, "cp%d", GetConsoleOutputCP());
if (!PyFile_SetEncoding(sysout, buf))
return NULL;
}
#endif
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
oldloc = setlocale(LC_CTYPE, 0);
setlocale(LC_CTYPE, "");
codeset = nl_langinfo(CODESET);
setlocale(LC_CTYPE, oldloc);
if(codeset && isatty(fileno(stdin))){
if (!PyFile_SetEncoding(sysin, codeset))
return NULL;
}
if(codeset && isatty(fileno(stdout))) {
if (!PyFile_SetEncoding(sysout, codeset))
return NULL;
}
#endif
PyDict_SetItemString(sysdict, "stdin", sysin); PyDict_SetItemString(sysdict, "stdin", sysin);
PyDict_SetItemString(sysdict, "stdout", sysout); PyDict_SetItemString(sysdict, "stdout", sysout);
PyDict_SetItemString(sysdict, "stderr", syserr); PyDict_SetItemString(sysdict, "stderr", syserr);