2000-03-10 19:09:23 -04:00
|
|
|
/* ------------------------------------------------------------------------
|
|
|
|
|
|
|
|
_codecs -- Provides access to the codec registry and the builtin
|
|
|
|
codecs.
|
|
|
|
|
|
|
|
This module should never be imported directly. The standard library
|
|
|
|
module "codecs" wraps this builtin module for use within Python.
|
|
|
|
|
|
|
|
The codec registry is accessible via:
|
|
|
|
|
|
|
|
register(search_function) -> None
|
|
|
|
|
2007-07-19 10:04:38 -03:00
|
|
|
lookup(encoding) -> CodecInfo object
|
2000-03-10 19:09:23 -04:00
|
|
|
|
|
|
|
The builtin Unicode codecs use the following interface:
|
|
|
|
|
2005-11-02 04:30:08 -04:00
|
|
|
<encoding>_encode(Unicode_object[,errors='strict']) ->
|
2010-05-09 12:15:40 -03:00
|
|
|
(string object, bytes consumed)
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2005-11-02 04:30:08 -04:00
|
|
|
<encoding>_decode(char_buffer_obj[,errors='strict']) ->
|
2000-03-10 19:09:23 -04:00
|
|
|
(Unicode object, bytes consumed)
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
<encoding>_encode() interfaces also accept non-Unicode object as
|
|
|
|
input. The objects are then converted to Unicode using
|
|
|
|
PyUnicode_FromObject() prior to applying the conversion.
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
These <encoding>s are available: utf_8, unicode_escape,
|
2000-07-05 08:24:13 -03:00
|
|
|
raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
|
|
|
|
mbcs (on win32).
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
|
|
|
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
|
|
|
|
2000-08-03 13:24:25 -03:00
|
|
|
Copyright (c) Corporation for National Research Initiatives.
|
2000-03-10 19:09:23 -04:00
|
|
|
|
|
|
|
------------------------------------------------------------------------ */
|
|
|
|
|
2006-02-15 13:27:45 -04:00
|
|
|
#define PY_SSIZE_T_CLEAN
|
2000-03-10 19:09:23 -04:00
|
|
|
#include "Python.h"
|
|
|
|
|
|
|
|
/* --- Registry ----------------------------------------------------------- */
|
|
|
|
|
2002-10-31 09:36:29 -04:00
|
|
|
PyDoc_STRVAR(register__doc__,
|
|
|
|
"register(search_function)\n\
|
|
|
|
\n\
|
|
|
|
Register a codec search function. Search functions are expected to take\n\
|
|
|
|
one argument, the encoding name in all lower case letters, and return\n\
|
2007-07-19 10:04:38 -03:00
|
|
|
a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
|
|
|
|
(or a CodecInfo object).");
|
2002-10-31 09:36:29 -04:00
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
static
|
2006-05-29 18:04:52 -03:00
|
|
|
PyObject *codec_register(PyObject *self, PyObject *search_function)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
|
|
|
if (PyCodec_Register(search_function))
|
2006-05-29 18:04:52 -03:00
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2006-05-29 18:04:52 -03:00
|
|
|
Py_RETURN_NONE;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
2002-10-31 09:36:29 -04:00
|
|
|
PyDoc_STRVAR(lookup__doc__,
|
2007-07-19 10:04:38 -03:00
|
|
|
"lookup(encoding) -> CodecInfo\n\
|
2002-10-31 09:36:29 -04:00
|
|
|
\n\
|
|
|
|
Looks up a codec tuple in the Python codec registry and returns\n\
|
Merged revisions 69578-69580,69901,69907,69994,70022-70023,70025-70026,70166,70273,70275,70342,70386-70387,70389-70390,70392-70393,70395,70397,70400,70418 via svnmerge
........
r69578 | georg.brandl | 2009-02-13 12:03:59 +0100 (Fr, 13 Feb 2009) | 1 line
#3694: add test for fix committed in r66693.
........
r69579 | georg.brandl | 2009-02-13 12:06:59 +0100 (Fr, 13 Feb 2009) | 2 lines
Fix warnings GCC emits where the argument of PyErr_Format is a single variable.
........
r69580 | georg.brandl | 2009-02-13 12:10:04 +0100 (Fr, 13 Feb 2009) | 2 lines
Fix warnings GCC emits where the argument of PyErr_Format is a single variable.
........
r69901 | georg.brandl | 2009-02-23 12:24:46 +0100 (Mo, 23 Feb 2009) | 2 lines
#5349: C++ pure virtuals can also have an implementation.
........
r69907 | georg.brandl | 2009-02-23 19:33:48 +0100 (Mo, 23 Feb 2009) | 1 line
Fix grammar.
........
r69994 | georg.brandl | 2009-02-26 18:36:26 +0100 (Do, 26 Feb 2009) | 1 line
Document that setting sys.py3kwarning wont do anything.
........
r70022 | georg.brandl | 2009-02-27 17:23:18 +0100 (Fr, 27 Feb 2009) | 1 line
#5361: fix typo.
........
r70023 | georg.brandl | 2009-02-27 17:39:26 +0100 (Fr, 27 Feb 2009) | 1 line
#5363: fix cmpfiles() docs. Another instance where a prose description is twice as long as the code.
........
r70025 | georg.brandl | 2009-02-27 17:52:55 +0100 (Fr, 27 Feb 2009) | 1 line
#5344: fix punctuation.
........
r70026 | georg.brandl | 2009-02-27 17:59:03 +0100 (Fr, 27 Feb 2009) | 1 line
#5365: add quick look conversion table for different time representations.
........
r70166 | georg.brandl | 2009-03-04 19:24:41 +0100 (Mi, 04 Mär 2009) | 2 lines
Remove obsolete stuff from string module docs.
........
r70273 | georg.brandl | 2009-03-09 15:25:07 +0100 (Mo, 09 Mär 2009) | 2 lines
#5458: add a note when we started to raise RuntimeErrors.
........
r70275 | georg.brandl | 2009-03-09 17:35:48 +0100 (Mo, 09 Mär 2009) | 2 lines
Add missing space.
........
r70342 | georg.brandl | 2009-03-13 20:03:58 +0100 (Fr, 13 Mär 2009) | 1 line
#5486: typos.
........
r70386 | georg.brandl | 2009-03-15 22:32:06 +0100 (So, 15 Mär 2009) | 1 line
#5496: fix docstring of lookup().
........
r70387 | georg.brandl | 2009-03-15 22:37:16 +0100 (So, 15 Mär 2009) | 1 line
#5493: clarify __nonzero__ docs.
........
r70389 | georg.brandl | 2009-03-15 22:43:38 +0100 (So, 15 Mär 2009) | 1 line
Fix a small nit in the error message if bool() falls back on __len__ and it returns the wrong type: it would tell the user that __nonzero__ should return bool or int.
........
r70390 | georg.brandl | 2009-03-15 22:44:43 +0100 (So, 15 Mär 2009) | 1 line
#5491: clarify nested() semantics.
........
r70392 | georg.brandl | 2009-03-15 22:46:00 +0100 (So, 15 Mär 2009) | 1 line
#5488: add missing struct member.
........
r70393 | georg.brandl | 2009-03-15 22:47:42 +0100 (So, 15 Mär 2009) | 1 line
#5478: fix copy-paste oversight in function signature.
........
r70395 | georg.brandl | 2009-03-15 22:51:48 +0100 (So, 15 Mär 2009) | 1 line
#5276: document IDLESTARTUP and .Idle.py.
........
r70397 | georg.brandl | 2009-03-15 22:53:56 +0100 (So, 15 Mär 2009) | 1 line
#5469: add with statement to list of name-binding constructs.
........
r70400 | georg.brandl | 2009-03-15 22:59:37 +0100 (So, 15 Mär 2009) | 3 lines
Fix markup in re docs and give a mail address in regex howto, so that
the recommendation to send suggestions to the author can be followed.
........
r70418 | georg.brandl | 2009-03-16 20:42:03 +0100 (Mo, 16 Mär 2009) | 1 line
Add token markup.
........
2009-04-05 18:48:06 -03:00
|
|
|
a CodecInfo object.");
|
2002-10-31 09:36:29 -04:00
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
static
|
2004-07-10 09:06:10 -03:00
|
|
|
PyObject *codec_lookup(PyObject *self, PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
|
|
|
char *encoding;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
|
2006-05-29 18:04:52 -03:00
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
|
|
|
return _PyCodec_Lookup(encoding);
|
|
|
|
}
|
|
|
|
|
2004-07-10 09:06:10 -03:00
|
|
|
PyDoc_STRVAR(encode__doc__,
|
|
|
|
"encode(obj, [encoding[,errors]]) -> object\n\
|
|
|
|
\n\
|
|
|
|
Encodes obj using the codec registered for encoding. encoding defaults\n\
|
|
|
|
to the default encoding. errors may be given to set a different error\n\
|
|
|
|
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
|
|
|
a ValueError. Other possible values are 'ignore', 'replace' and\n\
|
|
|
|
'xmlcharrefreplace' as well as any other name registered with\n\
|
|
|
|
codecs.register_error that can handle ValueErrors.");
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
codec_encode(PyObject *self, PyObject *args)
|
|
|
|
{
|
2004-07-10 18:41:14 -03:00
|
|
|
const char *encoding = NULL;
|
|
|
|
const char *errors = NULL;
|
2004-07-10 09:06:10 -03:00
|
|
|
PyObject *v;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2004-07-10 09:06:10 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
|
|
|
|
return NULL;
|
|
|
|
|
2005-03-08 11:03:08 -04:00
|
|
|
#ifdef Py_USING_UNICODE
|
2004-07-10 09:06:10 -03:00
|
|
|
if (encoding == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
encoding = PyUnicode_GetDefaultEncoding();
|
2005-03-08 11:03:08 -04:00
|
|
|
#else
|
|
|
|
if (encoding == NULL) {
|
2010-05-09 12:15:40 -03:00
|
|
|
PyErr_SetString(PyExc_ValueError, "no encoding specified");
|
|
|
|
return NULL;
|
2005-03-08 11:03:08 -04:00
|
|
|
}
|
|
|
|
#endif
|
2004-07-10 09:06:10 -03:00
|
|
|
|
|
|
|
/* Encode via the codec registry */
|
2006-05-29 18:04:52 -03:00
|
|
|
return PyCodec_Encode(v, encoding, errors);
|
2004-07-10 09:06:10 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
PyDoc_STRVAR(decode__doc__,
|
|
|
|
"decode(obj, [encoding[,errors]]) -> object\n\
|
|
|
|
\n\
|
|
|
|
Decodes obj using the codec registered for encoding. encoding defaults\n\
|
|
|
|
to the default encoding. errors may be given to set a different error\n\
|
|
|
|
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
|
|
|
a ValueError. Other possible values are 'ignore' and 'replace'\n\
|
Merged revisions 66766-66767,66771-66772,66774,66776,66783-66787,66790,66793,66797 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
................
r66766 | benjamin.peterson | 2008-10-03 06:52:06 -0500 (Fri, 03 Oct 2008) | 1 line
update the mac installer script
................
r66767 | andrew.kuchling | 2008-10-03 07:26:42 -0500 (Fri, 03 Oct 2008) | 1 line
Docstring typo.
................
r66771 | hirokazu.yamamoto | 2008-10-03 11:18:42 -0500 (Fri, 03 Oct 2008) | 2 lines
Fixed following error when DocXMLRPCServer failed.
UnboundLocalError: local variable 'serv' referenced before assignment
................
r66772 | andrew.kuchling | 2008-10-03 11:29:19 -0500 (Fri, 03 Oct 2008) | 1 line
Mention exception in docstring
................
r66774 | andrew.kuchling | 2008-10-03 11:42:52 -0500 (Fri, 03 Oct 2008) | 1 line
Typo fix
................
r66776 | hirokazu.yamamoto | 2008-10-03 12:34:49 -0500 (Fri, 03 Oct 2008) | 2 lines
Issue #1706863: Fixed "'NoneType' object has no attribute 'rfind'" error when sqlite libfile not found.
................
r66783 | andrew.kuchling | 2008-10-03 20:02:29 -0500 (Fri, 03 Oct 2008) | 1 line
Use correct capitalization of NaN
................
r66784 | andrew.kuchling | 2008-10-03 20:03:42 -0500 (Fri, 03 Oct 2008) | 1 line
Docstring change: Specify exception raised
................
r66785 | andrew.kuchling | 2008-10-03 20:04:24 -0500 (Fri, 03 Oct 2008) | 1 line
Docstring changes: Specify exceptions raised
................
r66786 | andrew.kuchling | 2008-10-03 20:05:56 -0500 (Fri, 03 Oct 2008) | 3 lines
Docstring change for *partition: use same tense as other docstrings.
Hyphenate left- and right-justified.
Fix 'registerd' typo
................
r66787 | andrew.kuchling | 2008-10-03 22:08:56 -0500 (Fri, 03 Oct 2008) | 1 line
two corrections
................
r66790 | andrew.kuchling | 2008-10-04 11:52:01 -0500 (Sat, 04 Oct 2008) | 1 line
Set svn:keywords
................
r66793 | georg.brandl | 2008-10-04 13:26:01 -0500 (Sat, 04 Oct 2008) | 2 lines
#4041: don't refer to removed and outdated modules.
................
r66797 | benjamin.peterson | 2008-10-04 15:55:50 -0500 (Sat, 04 Oct 2008) | 19 lines
Merged revisions 66707,66775,66782 via svnmerge from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
r66707 | benjamin.peterson | 2008-09-30 18:27:10 -0500 (Tue, 30 Sep 2008) | 1 line
fix #4001: fix_imports didn't check for __init__.py before converting to relative imports
........
r66775 | collin.winter | 2008-10-03 12:08:26 -0500 (Fri, 03 Oct 2008) | 4 lines
Add an alternative iterative pattern matching system that, while slower, correctly parses files that cause the faster recursive pattern matcher to fail with a recursion error. lib2to3 falls back to the iterative matcher if the recursive one fails.
Fixes http://bugs.python.org/issue2532. Thanks to Nick Edds.
........
r66782 | benjamin.peterson | 2008-10-03 17:51:36 -0500 (Fri, 03 Oct 2008) | 1 line
add Victor Stinner's fixer for os.getcwdu -> os.getcwd #4023
........
................
2008-10-04 18:33:08 -03:00
|
|
|
as well as any other name registered with codecs.register_error that is\n\
|
2004-07-10 09:06:10 -03:00
|
|
|
able to handle ValueErrors.");
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
codec_decode(PyObject *self, PyObject *args)
|
|
|
|
{
|
2004-07-10 18:41:14 -03:00
|
|
|
const char *encoding = NULL;
|
|
|
|
const char *errors = NULL;
|
2004-07-10 09:06:10 -03:00
|
|
|
PyObject *v;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2004-07-10 09:06:10 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
|
|
|
|
return NULL;
|
|
|
|
|
2005-03-08 11:03:08 -04:00
|
|
|
#ifdef Py_USING_UNICODE
|
2004-07-10 09:06:10 -03:00
|
|
|
if (encoding == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
encoding = PyUnicode_GetDefaultEncoding();
|
2005-03-08 11:03:08 -04:00
|
|
|
#else
|
|
|
|
if (encoding == NULL) {
|
2010-05-09 12:15:40 -03:00
|
|
|
PyErr_SetString(PyExc_ValueError, "no encoding specified");
|
|
|
|
return NULL;
|
2005-03-08 11:03:08 -04:00
|
|
|
}
|
|
|
|
#endif
|
2004-07-10 09:06:10 -03:00
|
|
|
|
|
|
|
/* Decode via the codec registry */
|
2006-05-29 18:04:52 -03:00
|
|
|
return PyCodec_Decode(v, encoding, errors);
|
2004-07-10 09:06:10 -03:00
|
|
|
}
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
/* --- Helpers ------------------------------------------------------------ */
|
|
|
|
|
|
|
|
static
|
|
|
|
PyObject *codec_tuple(PyObject *unicode,
|
2010-05-09 12:15:40 -03:00
|
|
|
Py_ssize_t len)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2006-05-29 18:04:52 -03:00
|
|
|
PyObject *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
if (unicode == NULL)
|
2006-05-29 18:04:52 -03:00
|
|
|
return NULL;
|
|
|
|
v = Py_BuildValue("On", unicode, len);
|
|
|
|
Py_DECREF(unicode);
|
2000-03-10 19:09:23 -04:00
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2002-08-14 04:46:28 -03:00
|
|
|
/* --- String codecs ------------------------------------------------------ */
|
|
|
|
static PyObject *
|
|
|
|
escape_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2002-08-14 04:46:28 -03:00
|
|
|
{
|
|
|
|
const char *errors = NULL;
|
|
|
|
const char *data;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t size;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2002-08-14 04:46:28 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&data, &size, &errors))
|
|
|
|
return NULL;
|
2008-06-09 01:58:54 -03:00
|
|
|
return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
|
2010-05-09 12:15:40 -03:00
|
|
|
size);
|
2002-08-14 04:46:28 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
escape_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2002-08-14 04:46:28 -03:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
PyObject *str;
|
|
|
|
const char *errors = NULL;
|
|
|
|
char *buf;
|
2010-06-09 14:55:28 -03:00
|
|
|
Py_ssize_t consumed, len;
|
2010-06-09 13:31:23 -03:00
|
|
|
|
2010-06-09 14:55:28 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "S|z:escape_encode",
|
|
|
|
&str, &errors))
|
2010-06-09 13:31:23 -03:00
|
|
|
return NULL;
|
|
|
|
|
2010-06-09 14:55:28 -03:00
|
|
|
consumed = PyString_GET_SIZE(str);
|
2010-06-09 13:31:23 -03:00
|
|
|
str = PyString_Repr(str, 0);
|
|
|
|
if (!str)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* The string will be quoted. Unquote, similar to unicode-escape. */
|
|
|
|
buf = PyString_AS_STRING (str);
|
|
|
|
len = PyString_GET_SIZE (str);
|
|
|
|
memmove(buf, buf+1, len-2);
|
|
|
|
if (_PyString_Resize(&str, len-2) < 0)
|
|
|
|
return NULL;
|
|
|
|
|
2010-06-09 14:55:28 -03:00
|
|
|
return codec_tuple(str, consumed);
|
2002-08-14 04:46:28 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef Py_USING_UNICODE
|
2000-03-10 19:09:23 -04:00
|
|
|
/* --- Decoder ------------------------------------------------------------ */
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
unicode_internal_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-09-21 18:09:45 -03:00
|
|
|
PyObject *obj;
|
|
|
|
const char *errors = NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *data;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t size;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2000-09-21 18:09:45 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&obj, &errors))
|
|
|
|
return NULL;
|
2000-09-21 18:09:45 -03:00
|
|
|
|
2003-02-04 15:35:03 -04:00
|
|
|
if (PyUnicode_Check(obj)) {
|
2010-05-09 12:15:40 -03:00
|
|
|
Py_INCREF(obj);
|
|
|
|
return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
|
2003-02-04 15:35:03 -04:00
|
|
|
}
|
2000-09-21 18:09:45 -03:00
|
|
|
else {
|
2010-05-09 12:15:40 -03:00
|
|
|
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
|
|
|
|
return NULL;
|
2005-08-30 07:23:14 -03:00
|
|
|
|
2010-05-09 12:15:40 -03:00
|
|
|
return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
|
|
|
|
size);
|
2000-09-21 18:09:45 -03:00
|
|
|
}
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
2001-09-20 07:35:46 -03:00
|
|
|
static PyObject *
|
|
|
|
utf_7_decode(PyObject *self,
|
2007-11-20 19:31:27 -04:00
|
|
|
PyObject *args)
|
2001-09-20 07:35:46 -03:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2001-09-20 07:35:46 -03:00
|
|
|
const char *errors = NULL;
|
2007-11-20 19:31:27 -04:00
|
|
|
int final = 0;
|
|
|
|
Py_ssize_t consumed;
|
|
|
|
PyObject *decoded = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len;
|
2001-09-20 07:35:46 -03:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2007-11-20 19:31:27 -04:00
|
|
|
if (decoded == NULL)
|
|
|
|
return NULL;
|
|
|
|
return codec_tuple(decoded, consumed);
|
2001-09-20 07:35:46 -03:00
|
|
|
}
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
static PyObject *
|
|
|
|
utf_8_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
2004-09-07 17:24:22 -03:00
|
|
|
int final = 0;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t consumed;
|
2004-09-07 17:24:22 -03:00
|
|
|
PyObject *decoded = NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len;
|
|
|
|
|
|
|
|
decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2004-09-07 17:24:22 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2004-09-07 17:24:22 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 0;
|
2004-09-07 17:24:22 -03:00
|
|
|
int final = 0;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t consumed;
|
2004-09-07 17:24:22 -03:00
|
|
|
PyObject *decoded;
|
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2004-09-07 17:24:22 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2004-09-07 17:24:22 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_le_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = -1;
|
2004-09-07 17:24:22 -03:00
|
|
|
int final = 0;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t consumed;
|
2004-09-07 17:24:22 -03:00
|
|
|
PyObject *decoded = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2006-02-15 13:27:45 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2004-09-07 17:24:22 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2004-09-07 17:24:22 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_be_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 1;
|
2004-09-07 17:24:22 -03:00
|
|
|
int final = 0;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t consumed;
|
2004-09-07 17:24:22 -03:00
|
|
|
PyObject *decoded = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
|
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2004-09-07 17:24:22 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2004-09-07 17:24:22 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* This non-standard version also provides access to the byteorder
|
|
|
|
parameter of the builtin UTF-16 codec.
|
|
|
|
|
|
|
|
It returns a tuple (unicode, bytesread, byteorder) with byteorder
|
|
|
|
being the value in effect at the end of data.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_ex_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 0;
|
|
|
|
PyObject *unicode, *tuple;
|
2004-09-07 17:24:22 -03:00
|
|
|
int final = 0;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t consumed;
|
2004-09-07 17:24:22 -03:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &byteorder, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2000-03-10 19:09:23 -04:00
|
|
|
if (unicode == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2006-05-29 18:04:52 -03:00
|
|
|
tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
|
2000-03-10 19:09:23 -04:00
|
|
|
Py_DECREF(unicode);
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
2007-08-17 13:41:28 -03:00
|
|
|
static PyObject *
|
|
|
|
utf_32_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2007-08-17 13:41:28 -03:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 0;
|
|
|
|
int final = 0;
|
|
|
|
Py_ssize_t consumed;
|
|
|
|
PyObject *decoded;
|
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2007-08-17 13:41:28 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_32_le_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2007-08-17 13:41:28 -03:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = -1;
|
|
|
|
int final = 0;
|
|
|
|
Py_ssize_t consumed;
|
2008-08-12 11:49:50 -03:00
|
|
|
PyObject *decoded;
|
2007-08-17 13:41:28 -03:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2007-08-17 13:41:28 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_32_be_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2007-08-17 13:41:28 -03:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 1;
|
|
|
|
int final = 0;
|
|
|
|
Py_ssize_t consumed;
|
2008-08-12 11:49:50 -03:00
|
|
|
PyObject *decoded;
|
2007-08-17 13:41:28 -03:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2007-08-17 13:41:28 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This non-standard version also provides access to the byteorder
|
|
|
|
parameter of the builtin UTF-32 codec.
|
|
|
|
|
|
|
|
It returns a tuple (unicode, bytesread, byteorder) with byteorder
|
|
|
|
being the value in effect at the end of data.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_32_ex_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2007-08-17 13:41:28 -03:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 0;
|
|
|
|
PyObject *unicode, *tuple;
|
|
|
|
int final = 0;
|
|
|
|
Py_ssize_t consumed;
|
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &byteorder, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len; /* This is overwritten unless final is true. */
|
|
|
|
unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
&byteorder, final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2007-08-17 13:41:28 -03:00
|
|
|
if (unicode == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
|
|
|
|
Py_DECREF(unicode);
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
static PyObject *
|
|
|
|
unicode_escape_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *unicode;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2010-06-09 13:31:23 -03:00
|
|
|
unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
|
|
|
|
PyBuffer_Release(&pbuf);
|
|
|
|
return codec_tuple(unicode, pbuf.len);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
raw_unicode_escape_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
2010-06-09 13:31:23 -03:00
|
|
|
PyObject *unicode;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2010-06-09 13:31:23 -03:00
|
|
|
unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
|
|
|
|
PyBuffer_Release(&pbuf);
|
|
|
|
return codec_tuple(unicode, pbuf.len);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
latin_1_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
|
|
|
PyObject *unicode;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2010-06-09 13:31:23 -03:00
|
|
|
unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
|
|
|
|
PyBuffer_Release(&pbuf);
|
|
|
|
return codec_tuple(unicode, pbuf.len);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
ascii_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
|
|
|
PyObject *unicode;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2010-06-09 13:31:23 -03:00
|
|
|
unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
|
|
|
|
PyBuffer_Release(&pbuf);
|
|
|
|
return codec_tuple(unicode, pbuf.len);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
charmap_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
|
|
|
PyObject *unicode;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
PyObject *mapping = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &mapping))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
if (mapping == Py_None)
|
2010-05-09 12:15:40 -03:00
|
|
|
mapping = NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2010-06-09 13:31:23 -03:00
|
|
|
unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
|
|
|
|
PyBuffer_Release(&pbuf);
|
|
|
|
return codec_tuple(unicode, pbuf.len);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
2002-06-30 12:26:10 -03:00
|
|
|
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
2000-03-28 16:29:59 -04:00
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
mbcs_decode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-28 16:29:59 -04:00
|
|
|
{
|
2010-06-09 13:31:23 -03:00
|
|
|
Py_buffer pbuf;
|
2000-03-28 16:29:59 -04:00
|
|
|
const char *errors = NULL;
|
2006-08-02 10:53:55 -03:00
|
|
|
int final = 0;
|
2008-08-12 11:49:50 -03:00
|
|
|
Py_ssize_t consumed;
|
|
|
|
PyObject *decoded = NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&pbuf, &errors, &final))
|
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
consumed = pbuf.len;
|
2000-03-28 16:29:59 -04:00
|
|
|
|
2008-08-12 11:49:50 -03:00
|
|
|
decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
|
2010-05-09 12:15:40 -03:00
|
|
|
final ? NULL : &consumed);
|
2010-06-09 13:31:23 -03:00
|
|
|
PyBuffer_Release(&pbuf);
|
2008-08-12 11:49:50 -03:00
|
|
|
if (decoded == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2008-08-12 11:49:50 -03:00
|
|
|
return codec_tuple(decoded, consumed);
|
2000-03-28 16:29:59 -04:00
|
|
|
}
|
|
|
|
|
2002-06-30 12:26:10 -03:00
|
|
|
#endif /* MS_WINDOWS */
|
2000-03-28 16:29:59 -04:00
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
/* --- Encoder ------------------------------------------------------------ */
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
readbuffer_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
|
|
|
const char *data;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t size;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&data, &size, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2008-06-09 01:58:54 -03:00
|
|
|
return codec_tuple(PyString_FromStringAndSize(data, size),
|
2010-05-09 12:15:40 -03:00
|
|
|
size);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
charbuffer_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
|
|
|
const char *data;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t size;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&data, &size, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2008-06-09 01:58:54 -03:00
|
|
|
return codec_tuple(PyString_FromStringAndSize(data, size),
|
2010-05-09 12:15:40 -03:00
|
|
|
size);
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
2000-09-21 18:09:45 -03:00
|
|
|
static PyObject *
|
|
|
|
unicode_internal_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-09-21 18:09:45 -03:00
|
|
|
{
|
|
|
|
PyObject *obj;
|
|
|
|
const char *errors = NULL;
|
|
|
|
const char *data;
|
2006-02-15 13:27:45 -04:00
|
|
|
Py_ssize_t size;
|
2005-11-02 04:30:08 -04:00
|
|
|
|
2000-09-21 18:09:45 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&obj, &errors))
|
|
|
|
return NULL;
|
2000-09-21 18:09:45 -03:00
|
|
|
|
|
|
|
if (PyUnicode_Check(obj)) {
|
2010-05-09 12:15:40 -03:00
|
|
|
data = PyUnicode_AS_DATA(obj);
|
|
|
|
size = PyUnicode_GET_DATA_SIZE(obj);
|
|
|
|
return codec_tuple(PyString_FromStringAndSize(data, size),
|
|
|
|
size);
|
2000-09-21 18:09:45 -03:00
|
|
|
}
|
|
|
|
else {
|
2010-05-09 12:15:40 -03:00
|
|
|
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
|
|
|
|
return NULL;
|
|
|
|
return codec_tuple(PyString_FromStringAndSize(data, size),
|
|
|
|
size);
|
2000-09-21 18:09:45 -03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-09-20 07:35:46 -03:00
|
|
|
static PyObject *
|
|
|
|
utf_7_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2001-09-20 07:35:46 -03:00
|
|
|
{
|
|
|
|
PyObject *str, *v;
|
|
|
|
const char *errors = NULL;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2001-09-20 07:35:46 -03:00
|
|
|
|
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2001-09-20 07:35:46 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
errors),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2001-09-20 07:35:46 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
static PyObject *
|
|
|
|
utf_8_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* This version provides access to the byteorder parameter of the
|
|
|
|
builtin UTF-16 codecs as optional third argument. It defaults to 0
|
|
|
|
which means: use the native byte order and prepend the data with a
|
2005-11-02 04:30:08 -04:00
|
|
|
BOM mark.
|
2000-03-10 19:09:23 -04:00
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 0;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors, &byteorder))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors,
|
|
|
|
byteorder),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_le_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2001-06-17 15:32:36 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors,
|
|
|
|
-1),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_16_be_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2001-06-17 15:32:36 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors,
|
|
|
|
+1),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
2007-08-17 13:41:28 -03:00
|
|
|
/* This version provides access to the byteorder parameter of the
|
|
|
|
builtin UTF-32 codecs as optional third argument. It defaults to 0
|
|
|
|
which means: use the native byte order and prepend the data with a
|
|
|
|
BOM mark.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_32_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
|
|
|
PyObject *str, *v;
|
|
|
|
const char *errors = NULL;
|
|
|
|
int byteorder = 0;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors, &byteorder))
|
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
|
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors,
|
|
|
|
byteorder),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2007-08-17 13:41:28 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_32_le_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
|
|
|
PyObject *str, *v;
|
|
|
|
const char *errors = NULL;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
|
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors,
|
|
|
|
-1),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2007-08-17 13:41:28 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
utf_32_be_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2007-08-17 13:41:28 -03:00
|
|
|
{
|
|
|
|
PyObject *str, *v;
|
|
|
|
const char *errors = NULL;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
|
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2007-08-17 13:41:28 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors,
|
|
|
|
+1),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2007-08-17 13:41:28 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
static PyObject *
|
|
|
|
unicode_escape_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2005-11-02 04:30:08 -04:00
|
|
|
v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_GET_SIZE(str)),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
raw_unicode_escape_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_AS_UNICODE(str),
|
|
|
|
PyUnicode_GET_SIZE(str)),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
latin_1_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeLatin1(
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_AS_UNICODE(str),
|
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
ascii_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeASCII(
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_AS_UNICODE(str),
|
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
charmap_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-10 19:09:23 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
PyObject *mapping = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors, &mapping))
|
|
|
|
return NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
if (mapping == Py_None)
|
2010-05-09 12:15:40 -03:00
|
|
|
mapping = NULL;
|
2000-03-10 19:09:23 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeCharmap(
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_AS_UNICODE(str),
|
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
mapping,
|
|
|
|
errors),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-10 19:09:23 -04:00
|
|
|
}
|
|
|
|
|
2006-06-04 16:36:28 -03:00
|
|
|
static PyObject*
|
|
|
|
charmap_build(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
PyObject *map;
|
|
|
|
if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
|
|
|
|
return NULL;
|
|
|
|
return PyUnicode_BuildEncodingMap(map);
|
|
|
|
}
|
|
|
|
|
2002-06-30 12:26:10 -03:00
|
|
|
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
2000-03-28 16:29:59 -04:00
|
|
|
|
|
|
|
static PyObject *
|
|
|
|
mbcs_encode(PyObject *self,
|
2010-05-09 12:15:40 -03:00
|
|
|
PyObject *args)
|
2000-03-28 16:29:59 -04:00
|
|
|
{
|
2000-07-05 08:24:13 -03:00
|
|
|
PyObject *str, *v;
|
2000-03-28 16:29:59 -04:00
|
|
|
const char *errors = NULL;
|
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
|
2010-05-09 12:15:40 -03:00
|
|
|
&str, &errors))
|
|
|
|
return NULL;
|
2000-03-28 16:29:59 -04:00
|
|
|
|
2000-07-05 08:24:13 -03:00
|
|
|
str = PyUnicode_FromObject(str);
|
|
|
|
if (str == NULL)
|
2010-05-09 12:15:40 -03:00
|
|
|
return NULL;
|
2000-07-05 08:24:13 -03:00
|
|
|
v = codec_tuple(PyUnicode_EncodeMBCS(
|
2010-05-09 12:15:40 -03:00
|
|
|
PyUnicode_AS_UNICODE(str),
|
|
|
|
PyUnicode_GET_SIZE(str),
|
|
|
|
errors),
|
|
|
|
PyUnicode_GET_SIZE(str));
|
2000-07-05 08:24:13 -03:00
|
|
|
Py_DECREF(str);
|
|
|
|
return v;
|
2000-03-28 16:29:59 -04:00
|
|
|
}
|
|
|
|
|
2002-06-30 12:26:10 -03:00
|
|
|
#endif /* MS_WINDOWS */
|
2001-08-17 15:39:25 -03:00
|
|
|
#endif /* Py_USING_UNICODE */
|
2000-03-28 16:29:59 -04:00
|
|
|
|
2002-09-02 10:14:32 -03:00
|
|
|
/* --- Error handler registry --------------------------------------------- */
|
|
|
|
|
2002-10-31 09:36:29 -04:00
|
|
|
PyDoc_STRVAR(register_error__doc__,
|
|
|
|
"register_error(errors, handler)\n\
|
|
|
|
\n\
|
|
|
|
Register the specified error handler under the name\n\
|
|
|
|
errors. handler must be a callable object, that\n\
|
|
|
|
will be called with an exception instance containing\n\
|
|
|
|
information about the location of the encoding/decoding\n\
|
|
|
|
error and must return a (replacement, new position) tuple.");
|
|
|
|
|
2002-09-02 10:14:32 -03:00
|
|
|
static PyObject *register_error(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
PyObject *handler;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "sO:register_error",
|
2010-05-09 12:15:40 -03:00
|
|
|
&name, &handler))
|
|
|
|
return NULL;
|
2002-09-02 10:14:32 -03:00
|
|
|
if (PyCodec_RegisterError(name, handler))
|
|
|
|
return NULL;
|
2006-05-29 18:04:52 -03:00
|
|
|
Py_RETURN_NONE;
|
2002-09-02 10:14:32 -03:00
|
|
|
}
|
|
|
|
|
2002-10-31 09:36:29 -04:00
|
|
|
PyDoc_STRVAR(lookup_error__doc__,
|
|
|
|
"lookup_error(errors) -> handler\n\
|
|
|
|
\n\
|
|
|
|
Return the error handler for the specified error handling name\n\
|
|
|
|
or raise a LookupError, if no handler exists under this name.");
|
|
|
|
|
2002-09-02 10:14:32 -03:00
|
|
|
static PyObject *lookup_error(PyObject *self, PyObject *args)
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
|
|
|
|
if (!PyArg_ParseTuple(args, "s:lookup_error",
|
2010-05-09 12:15:40 -03:00
|
|
|
&name))
|
|
|
|
return NULL;
|
2002-09-02 10:14:32 -03:00
|
|
|
return PyCodec_LookupError(name);
|
|
|
|
}
|
|
|
|
|
2000-03-10 19:09:23 -04:00
|
|
|
/* --- Module API --------------------------------------------------------- */
|
|
|
|
|
|
|
|
static PyMethodDef _codecs_functions[] = {
|
2010-05-09 12:15:40 -03:00
|
|
|
{"register", codec_register, METH_O,
|
2002-10-31 09:36:29 -04:00
|
|
|
register__doc__},
|
2010-05-09 12:15:40 -03:00
|
|
|
{"lookup", codec_lookup, METH_VARARGS,
|
2002-10-31 09:36:29 -04:00
|
|
|
lookup__doc__},
|
2010-05-09 12:15:40 -03:00
|
|
|
{"encode", codec_encode, METH_VARARGS,
|
|
|
|
encode__doc__},
|
|
|
|
{"decode", codec_decode, METH_VARARGS,
|
|
|
|
decode__doc__},
|
|
|
|
{"escape_encode", escape_encode, METH_VARARGS},
|
|
|
|
{"escape_decode", escape_decode, METH_VARARGS},
|
2001-08-17 15:39:25 -03:00
|
|
|
#ifdef Py_USING_UNICODE
|
2010-05-09 12:15:40 -03:00
|
|
|
{"utf_8_encode", utf_8_encode, METH_VARARGS},
|
|
|
|
{"utf_8_decode", utf_8_decode, METH_VARARGS},
|
|
|
|
{"utf_7_encode", utf_7_encode, METH_VARARGS},
|
|
|
|
{"utf_7_decode", utf_7_decode, METH_VARARGS},
|
|
|
|
{"utf_16_encode", utf_16_encode, METH_VARARGS},
|
|
|
|
{"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
|
|
|
|
{"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
|
|
|
|
{"utf_16_decode", utf_16_decode, METH_VARARGS},
|
|
|
|
{"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
|
|
|
|
{"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
|
|
|
|
{"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
|
|
|
|
{"utf_32_encode", utf_32_encode, METH_VARARGS},
|
|
|
|
{"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
|
|
|
|
{"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
|
|
|
|
{"utf_32_decode", utf_32_decode, METH_VARARGS},
|
|
|
|
{"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
|
|
|
|
{"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
|
|
|
|
{"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
|
|
|
|
{"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
|
|
|
|
{"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
|
|
|
|
{"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
|
|
|
|
{"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
|
|
|
|
{"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
|
|
|
|
{"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
|
|
|
|
{"latin_1_encode", latin_1_encode, METH_VARARGS},
|
|
|
|
{"latin_1_decode", latin_1_decode, METH_VARARGS},
|
|
|
|
{"ascii_encode", ascii_encode, METH_VARARGS},
|
|
|
|
{"ascii_decode", ascii_decode, METH_VARARGS},
|
|
|
|
{"charmap_encode", charmap_encode, METH_VARARGS},
|
|
|
|
{"charmap_decode", charmap_decode, METH_VARARGS},
|
|
|
|
{"charmap_build", charmap_build, METH_VARARGS},
|
|
|
|
{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
|
|
|
|
{"charbuffer_encode", charbuffer_encode, METH_VARARGS},
|
2002-06-30 12:26:10 -03:00
|
|
|
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
2010-05-09 12:15:40 -03:00
|
|
|
{"mbcs_encode", mbcs_encode, METH_VARARGS},
|
|
|
|
{"mbcs_decode", mbcs_decode, METH_VARARGS},
|
2000-03-28 16:29:59 -04:00
|
|
|
#endif
|
2001-08-17 15:39:25 -03:00
|
|
|
#endif /* Py_USING_UNICODE */
|
2010-05-09 12:15:40 -03:00
|
|
|
{"register_error", register_error, METH_VARARGS,
|
2002-10-31 09:36:29 -04:00
|
|
|
register_error__doc__},
|
2010-05-09 12:15:40 -03:00
|
|
|
{"lookup_error", lookup_error, METH_VARARGS,
|
2002-10-31 09:36:29 -04:00
|
|
|
lookup_error__doc__},
|
2010-05-09 12:15:40 -03:00
|
|
|
{NULL, NULL} /* sentinel */
|
2000-03-10 19:09:23 -04:00
|
|
|
};
|
|
|
|
|
2002-08-01 23:27:13 -03:00
|
|
|
PyMODINIT_FUNC
|
2000-07-21 03:00:07 -03:00
|
|
|
init_codecs(void)
|
2000-03-10 19:09:23 -04:00
|
|
|
{
|
|
|
|
Py_InitModule("_codecs", _codecs_functions);
|
|
|
|
}
|