From 5467d4c0e31e9db305a4899a44d7978f83e96649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sat, 10 May 2003 07:10:12 +0000 Subject: [PATCH] Patch #612627: Add encoding attribute to file objects, and determine the terminal encoding on Windows and Unix. --- Doc/api/concrete.tex | 6 ++++++ Doc/lib/libstdtypes.tex | 14 ++++++++++++++ Include/fileobject.h | 2 ++ Misc/NEWS | 3 +++ Objects/fileobject.c | 42 +++++++++++++++++++++++++++++++++++++++- Python/sysmodule.c | 43 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 1 deletion(-) diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index f8cbc2834b1..2c14596f561 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -2029,6 +2029,12 @@ implementation detail and may change in future releases of Python. creation. \end{cfuncdesc} +\begin{cfuncdesc}{int}{PyFile_Encoding}{PyFileObject *p, char *enc} + Set the file's encoding for Unicode output to \var{enc}. Return + 1 on success and 0 on failure. + \versionadded{2.3} +\end{cfuncdesc} + \begin{cfuncdesc}{int}{PyFile_SoftSpace}{PyObject *p, int newflag} This function exists for internal use by the interpreter. Sets the \member{softspace} attribute of \var{p} to \var{newflag} and diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex index 04cdbcab9f0..951d0884ccb 100644 --- a/Doc/lib/libstdtypes.tex +++ b/Doc/lib/libstdtypes.tex @@ -1372,6 +1372,20 @@ read-only attribute; the \method{close()} method changes the value. It may not be available on all file-like objects. \end{memberdesc} +\begin{memberdesc}[file]{encoding} +The encoding that this file uses. When Unicode strings are written +to a file, they will be converted to byte strings using this encoding. +In addition, when the file is connected to a terminal, the attribute +gives the encoding that the terminal is likely to use (that +information might be incorrect if the user has misconfigured the +terminal). The attribute is read-only and may not be present on +all file-like objects. It may also be \code{None}, in which case +the file uses the system default encoding for converting Unicode +strings. + +\versionadded{2.3} +\end{memberdesc} + \begin{memberdesc}[file]{mode} The I/O mode for the file. If the file was created using the \function{open()} built-in function, this will be the value of the diff --git a/Include/fileobject.h b/Include/fileobject.h index c351b4d09d3..e2053df2e4f 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -24,6 +24,7 @@ typedef struct { int f_newlinetypes; /* Types of newlines seen */ int f_skipnextlf; /* Skip next \n */ #endif + PyObject *f_encoding; } PyFileObject; PyAPI_DATA(PyTypeObject) PyFile_Type; @@ -33,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type; PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); +PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *); PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, int (*)(FILE *)); PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); diff --git a/Misc/NEWS b/Misc/NEWS index b94339103cd..098145a8804 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.3 beta 2? Core and builtins ----------------- +- The encoding attribute has been added for file objects, and set to + the terminal encoding on Unix and Windows. + - The softspace attribute of file objects became read-only by oversight. It's writable again. diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 92cfa5b35a9..40ce759f584 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -116,6 +116,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode, Py_DECREF(f->f_name); Py_DECREF(f->f_mode); + Py_DECREF(f->f_encoding); #ifdef Py_USING_UNICODE if (wname) f->f_name = PyUnicode_FromObject(wname); @@ -133,7 +134,9 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode, f->f_newlinetypes = NEWLINE_UNKNOWN; f->f_skipnextlf = 0; #endif - + Py_INCREF(Py_None); + f->f_encoding = Py_None; + if (f->f_name == NULL || f->f_mode == NULL) return NULL; f->f_fp = fp; @@ -302,6 +305,21 @@ PyFile_SetBufSize(PyObject *f, int bufsize) } } +/* Set the encoding used to output Unicode strings. + Returh 1 on success, 0 on failure. */ + +int +PyFile_SetEncoding(PyObject *f, const char *enc) +{ + PyFileObject *file = (PyFileObject*)f; + PyObject *str = PyString_FromString(enc); + if (!str) + return 0; + Py_DECREF(file->f_encoding); + file->f_encoding = str; + return 1; +} + static PyObject * err_closed(void) { @@ -323,6 +341,7 @@ file_dealloc(PyFileObject *f) } Py_XDECREF(f->f_name); Py_XDECREF(f->f_mode); + Py_XDECREF(f->f_encoding); drop_readahead(f); f->ob_type->tp_free((PyObject *)f); } @@ -1667,6 +1686,8 @@ static PyMemberDef file_memberlist[] = { "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"}, {"name", T_OBJECT, OFF(f_name), RO, "file name"}, + {"encoding", T_OBJECT, OFF(f_encoding), RO, + "file encoding"}, /* getattr(f, "closed") is implemented without this table */ {NULL} /* Sentinel */ }; @@ -1851,6 +1872,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ((PyFileObject *)self)->f_name = not_yet_string; Py_INCREF(not_yet_string); ((PyFileObject *)self)->f_mode = not_yet_string; + Py_INCREF(Py_None); + ((PyFileObject *)self)->f_encoding = Py_None; } return self; } @@ -2034,11 +2057,28 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags) } else if (PyFile_Check(f)) { FILE *fp = PyFile_AsFile(f); + PyObject *enc = ((PyFileObject*)f)->f_encoding; + int result; if (fp == NULL) { err_closed(); return -1; } +#ifdef Py_USING_UNICODE + if (PyUnicode_Check(v) && enc != Py_None) { + char *cenc = PyString_AS_STRING(enc); + value = PyUnicode_AsEncodedString(v, cenc, "strict"); + if (value == NULL) + return -1; + } else { + value = v; + Py_INCREF(value); + } + result = PyObject_Print(value, fp, flags); + Py_DECREF(value); + return result; +#else return PyObject_Print(v, fp, flags); +#endif } writer = PyObject_GetAttrString(f, "write"); if (writer == NULL) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index d06d18a98b4..edbc2bf5805 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -36,6 +36,15 @@ extern const char *PyWin_DLLVersionString; #include #endif +#ifdef MS_WINDOWS +#include +#endif + +#ifdef HAVE_LANGINFO_H +#include +#include +#endif + PyObject * PySys_GetObject(char *name) { @@ -881,6 +890,12 @@ _PySys_Init(void) PyObject *m, *v, *sysdict; PyObject *sysin, *sysout, *syserr; char *s; +#ifdef MS_WINDOWS + char buf[10]; +#endif +#if defined(HAVE_LANGINFO_H) && defined(CODESET) + char *oldloc, *codeset; +#endif m = Py_InitModule3("sys", sys_methods, sys_doc); sysdict = PyModule_GetDict(m); @@ -890,6 +905,34 @@ _PySys_Init(void) syserr = PyFile_FromFile(stderr, "", "w", NULL); if (PyErr_Occurred()) return NULL; +#ifdef MS_WINDOWS + if(isatty(_fileno(stdin))){ + sprintf(buf, "cp%d", GetConsoleCP()); + if (!PyFile_SetEncoding(sysin, buf)) + return NULL; + } + if(isatty(_fileno(stdout))) { + sprintf(buf, "cp%d", GetConsoleOutputCP()); + if (!PyFile_SetEncoding(sysout, buf)) + return NULL; + } +#endif + +#if defined(HAVE_LANGINFO_H) && defined(CODESET) + oldloc = setlocale(LC_CTYPE, 0); + setlocale(LC_CTYPE, ""); + codeset = nl_langinfo(CODESET); + setlocale(LC_CTYPE, oldloc); + if(codeset && isatty(fileno(stdin))){ + if (!PyFile_SetEncoding(sysin, codeset)) + return NULL; + } + if(codeset && isatty(fileno(stdout))) { + if (!PyFile_SetEncoding(sysout, codeset)) + return NULL; + } +#endif + PyDict_SetItemString(sysdict, "stdin", sysin); PyDict_SetItemString(sysdict, "stdout", sysout); PyDict_SetItemString(sysdict, "stderr", syserr);