2004-01-17 10:29:29 -04:00
|
|
|
/*
|
|
|
|
* multibytecodec.h: Common Multibyte Codec Implementation
|
|
|
|
*
|
|
|
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _PYTHON_MULTIBYTECODEC_H_
|
|
|
|
#define _PYTHON_MULTIBYTECODEC_H_
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2023-07-03 05:23:43 -03:00
|
|
|
#include "pycore_unicodeobject.h" // _PyUnicodeWriter
|
|
|
|
|
2004-07-18 00:06:29 -03:00
|
|
|
#ifdef uint16_t
|
|
|
|
typedef uint16_t ucs2_t, DBCHAR;
|
|
|
|
#else
|
|
|
|
typedef unsigned short ucs2_t, DBCHAR;
|
|
|
|
#endif
|
2004-01-17 10:29:29 -04:00
|
|
|
|
2018-11-01 07:48:49 -03:00
|
|
|
/*
|
|
|
|
* A struct that provides 8 bytes of state for multibyte
|
|
|
|
* codecs. Codecs are free to use this how they want. Note: if you
|
|
|
|
* need to add a new field to this struct, ensure that its byte order
|
|
|
|
* is independent of CPU endianness so that the return value of
|
|
|
|
* getstate doesn't differ between little and big endian CPUs.
|
|
|
|
*/
|
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
unsigned char c[8];
|
2004-01-17 10:29:29 -04:00
|
|
|
} MultibyteCodec_State;
|
|
|
|
|
2023-04-27 10:02:43 -03:00
|
|
|
struct _cjk_mod_state;
|
|
|
|
struct _multibyte_codec;
|
|
|
|
|
|
|
|
typedef int (*mbcodec_init)(const struct _multibyte_codec *codec);
|
2006-03-04 12:08:19 -04:00
|
|
|
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
|
2023-04-27 10:02:43 -03:00
|
|
|
const struct _multibyte_codec *codec,
|
2020-04-11 04:48:40 -03:00
|
|
|
int kind, const void *data,
|
2013-04-13 21:06:32 -03:00
|
|
|
Py_ssize_t *inpos, Py_ssize_t inlen,
|
2010-05-09 12:52:27 -03:00
|
|
|
unsigned char **outbuf, Py_ssize_t outleft,
|
|
|
|
int flags);
|
2004-07-18 00:06:29 -03:00
|
|
|
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
|
2023-04-27 10:02:43 -03:00
|
|
|
const struct _multibyte_codec *codec);
|
2006-03-04 12:08:19 -04:00
|
|
|
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
|
2023-04-27 10:02:43 -03:00
|
|
|
const struct _multibyte_codec *codec,
|
2010-05-09 12:52:27 -03:00
|
|
|
unsigned char **outbuf, Py_ssize_t outleft);
|
2006-03-04 12:08:19 -04:00
|
|
|
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
|
2023-04-27 10:02:43 -03:00
|
|
|
const struct _multibyte_codec *codec,
|
2010-05-09 12:52:27 -03:00
|
|
|
const unsigned char **inbuf, Py_ssize_t inleft,
|
2013-04-11 17:09:04 -03:00
|
|
|
_PyUnicodeWriter *writer);
|
2004-07-18 00:06:29 -03:00
|
|
|
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
|
2023-04-27 10:02:43 -03:00
|
|
|
const struct _multibyte_codec *codec);
|
2006-03-04 12:08:19 -04:00
|
|
|
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
|
2023-04-27 10:02:43 -03:00
|
|
|
const struct _multibyte_codec *codec);
|
2004-01-17 10:29:29 -04:00
|
|
|
|
2023-04-27 10:02:43 -03:00
|
|
|
typedef struct _multibyte_codec {
|
2010-05-09 12:52:27 -03:00
|
|
|
const char *encoding;
|
|
|
|
const void *config;
|
|
|
|
mbcodec_init codecinit;
|
|
|
|
mbencode_func encode;
|
|
|
|
mbencodeinit_func encinit;
|
|
|
|
mbencodereset_func encreset;
|
|
|
|
mbdecode_func decode;
|
|
|
|
mbdecodeinit_func decinit;
|
|
|
|
mbdecodereset_func decreset;
|
2023-04-27 10:02:43 -03:00
|
|
|
struct _cjk_mod_state *modstate;
|
2004-01-17 10:29:29 -04:00
|
|
|
} MultibyteCodec;
|
|
|
|
|
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
PyObject_HEAD
|
2023-04-19 00:08:16 -03:00
|
|
|
const MultibyteCodec *codec;
|
2023-04-19 13:02:17 -03:00
|
|
|
PyObject *cjk_module;
|
2004-01-17 10:29:29 -04:00
|
|
|
} MultibyteCodecObject;
|
|
|
|
|
2021-01-04 17:33:45 -04:00
|
|
|
#define MultibyteCodec_Check(state, op) Py_IS_TYPE((op), state->multibytecodec_type)
|
2006-04-21 07:40:58 -03:00
|
|
|
|
2010-05-09 12:52:27 -03:00
|
|
|
#define _MultibyteStatefulCodec_HEAD \
|
|
|
|
PyObject_HEAD \
|
2023-04-19 00:08:16 -03:00
|
|
|
const MultibyteCodec *codec; \
|
2010-05-09 12:52:27 -03:00
|
|
|
MultibyteCodec_State state; \
|
|
|
|
PyObject *errors;
|
2004-01-17 10:29:29 -04:00
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulCodec_HEAD
|
2006-04-21 07:40:58 -03:00
|
|
|
} MultibyteStatefulCodecContext;
|
2004-01-17 10:29:29 -04:00
|
|
|
|
2010-05-09 12:52:27 -03:00
|
|
|
#define MAXENCPENDING 2
|
|
|
|
#define _MultibyteStatefulEncoder_HEAD \
|
|
|
|
_MultibyteStatefulCodec_HEAD \
|
2013-04-13 21:06:32 -03:00
|
|
|
PyObject *pending;
|
2004-01-17 10:29:29 -04:00
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulEncoder_HEAD
|
2006-04-21 07:40:58 -03:00
|
|
|
} MultibyteStatefulEncoderContext;
|
|
|
|
|
2010-05-09 12:52:27 -03:00
|
|
|
#define MAXDECPENDING 8
|
|
|
|
#define _MultibyteStatefulDecoder_HEAD \
|
|
|
|
_MultibyteStatefulCodec_HEAD \
|
|
|
|
unsigned char pending[MAXDECPENDING]; \
|
|
|
|
Py_ssize_t pendingsize;
|
2006-04-21 07:40:58 -03:00
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulDecoder_HEAD
|
2006-04-21 07:40:58 -03:00
|
|
|
} MultibyteStatefulDecoderContext;
|
|
|
|
|
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulEncoder_HEAD
|
2006-04-21 07:40:58 -03:00
|
|
|
} MultibyteIncrementalEncoderObject;
|
|
|
|
|
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulDecoder_HEAD
|
2006-04-21 07:40:58 -03:00
|
|
|
} MultibyteIncrementalDecoderObject;
|
|
|
|
|
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulDecoder_HEAD
|
|
|
|
PyObject *stream;
|
2006-04-21 07:40:58 -03:00
|
|
|
} MultibyteStreamReaderObject;
|
|
|
|
|
|
|
|
typedef struct {
|
2010-05-09 12:52:27 -03:00
|
|
|
_MultibyteStatefulEncoder_HEAD
|
|
|
|
PyObject *stream;
|
2004-01-17 10:29:29 -04:00
|
|
|
} MultibyteStreamWriterObject;
|
|
|
|
|
|
|
|
/* positive values for illegal sequences */
|
2010-05-09 12:52:27 -03:00
|
|
|
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
|
|
|
|
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
|
|
|
|
#define MBERR_INTERNAL (-3) /* internal runtime error */
|
2013-07-16 16:41:43 -03:00
|
|
|
#define MBERR_EXCEPTION (-4) /* an exception has been raised */
|
2010-05-09 12:52:27 -03:00
|
|
|
|
|
|
|
#define ERROR_STRICT (PyObject *)(1)
|
|
|
|
#define ERROR_IGNORE (PyObject *)(2)
|
|
|
|
#define ERROR_REPLACE (PyObject *)(3)
|
|
|
|
#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
|
2013-10-28 20:19:27 -03:00
|
|
|
#define ERROR_DECREF(p) \
|
|
|
|
do { \
|
|
|
|
if (p != NULL && ERROR_ISCUSTOM(p)) \
|
|
|
|
Py_DECREF(p); \
|
|
|
|
} while (0);
|
2004-01-17 10:29:29 -04:00
|
|
|
|
2010-05-09 12:52:27 -03:00
|
|
|
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
|
|
|
|
#define MBENC_MAX MBENC_FLUSH
|
2004-01-17 10:29:29 -04:00
|
|
|
|
2023-04-19 13:02:17 -03:00
|
|
|
typedef struct {
|
|
|
|
const MultibyteCodec *codec;
|
|
|
|
PyObject *cjk_module;
|
|
|
|
} codec_capsule;
|
|
|
|
|
|
|
|
#define MAP_CAPSULE "multibytecodec.map"
|
|
|
|
#define CODEC_CAPSULE "multibytecodec.codec"
|
2009-05-05 19:31:58 -03:00
|
|
|
|
|
|
|
|
2004-01-17 10:29:29 -04:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|