bpo-33578: Add getstate/setstate for CJK codec (GH-6984)
This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell. The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long. https://bugs.python.org/issue33578
This commit is contained in:
parent
4b5e62dbb2
commit
ac22f6aa98
|
@ -2971,6 +2971,34 @@ class TextIOWrapperTest(unittest.TestCase):
|
|||
finally:
|
||||
StatefulIncrementalDecoder.codecEnabled = 0
|
||||
|
||||
def test_multibyte_seek_and_tell(self):
|
||||
f = self.open(support.TESTFN, "w", encoding="euc_jp")
|
||||
f.write("AB\n\u3046\u3048\n")
|
||||
f.close()
|
||||
|
||||
f = self.open(support.TESTFN, "r", encoding="euc_jp")
|
||||
self.assertEqual(f.readline(), "AB\n")
|
||||
p0 = f.tell()
|
||||
self.assertEqual(f.readline(), "\u3046\u3048\n")
|
||||
p1 = f.tell()
|
||||
f.seek(p0)
|
||||
self.assertEqual(f.readline(), "\u3046\u3048\n")
|
||||
self.assertEqual(f.tell(), p1)
|
||||
f.close()
|
||||
|
||||
def test_seek_with_encoder_state(self):
|
||||
f = self.open(support.TESTFN, "w", encoding="euc_jis_2004")
|
||||
f.write("\u00e6\u0300")
|
||||
p0 = f.tell()
|
||||
f.write("\u00e6")
|
||||
f.seek(p0)
|
||||
f.write("\u0300")
|
||||
f.close()
|
||||
|
||||
f = self.open(support.TESTFN, "r", encoding="euc_jis_2004")
|
||||
self.assertEqual(f.readline(), "\u00e6\u0300\u0300")
|
||||
f.close()
|
||||
|
||||
def test_encoded_writes(self):
|
||||
data = "1234567890"
|
||||
tests = ("utf-16",
|
||||
|
|
|
@ -117,6 +117,88 @@ class Test_IncrementalEncoder(unittest.TestCase):
|
|||
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
|
||||
self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
|
||||
|
||||
def test_state_methods_with_buffer_state(self):
|
||||
# euc_jis_2004 stores state as a buffer of pending bytes
|
||||
encoder = codecs.getincrementalencoder('euc_jis_2004')()
|
||||
|
||||
initial_state = encoder.getstate()
|
||||
self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
|
||||
encoder.setstate(initial_state)
|
||||
self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
|
||||
|
||||
self.assertEqual(encoder.encode('\u00e6'), b'')
|
||||
partial_state = encoder.getstate()
|
||||
self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
|
||||
encoder.setstate(partial_state)
|
||||
self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
|
||||
|
||||
def test_state_methods_with_non_buffer_state(self):
|
||||
# iso2022_jp stores state without using a buffer
|
||||
encoder = codecs.getincrementalencoder('iso2022_jp')()
|
||||
|
||||
self.assertEqual(encoder.encode('z'), b'z')
|
||||
en_state = encoder.getstate()
|
||||
|
||||
self.assertEqual(encoder.encode('\u3042'), b'\x1b\x24\x42\x24\x22')
|
||||
jp_state = encoder.getstate()
|
||||
self.assertEqual(encoder.encode('z'), b'\x1b\x28\x42z')
|
||||
|
||||
encoder.setstate(jp_state)
|
||||
self.assertEqual(encoder.encode('\u3042'), b'\x24\x22')
|
||||
|
||||
encoder.setstate(en_state)
|
||||
self.assertEqual(encoder.encode('z'), b'z')
|
||||
|
||||
def test_getstate_returns_expected_value(self):
|
||||
# Note: getstate is implemented such that these state values
|
||||
# are expected to be the same across all builds of Python,
|
||||
# regardless of x32/64 bit, endianness and compiler.
|
||||
|
||||
# euc_jis_2004 stores state as a buffer of pending bytes
|
||||
buffer_state_encoder = codecs.getincrementalencoder('euc_jis_2004')()
|
||||
self.assertEqual(buffer_state_encoder.getstate(), 0)
|
||||
buffer_state_encoder.encode('\u00e6')
|
||||
self.assertEqual(buffer_state_encoder.getstate(),
|
||||
int.from_bytes(
|
||||
b"\x02"
|
||||
b"\xc3\xa6"
|
||||
b"\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
'little'))
|
||||
buffer_state_encoder.encode('\u0300')
|
||||
self.assertEqual(buffer_state_encoder.getstate(), 0)
|
||||
|
||||
# iso2022_jp stores state without using a buffer
|
||||
non_buffer_state_encoder = codecs.getincrementalencoder('iso2022_jp')()
|
||||
self.assertEqual(non_buffer_state_encoder.getstate(),
|
||||
int.from_bytes(
|
||||
b"\x00"
|
||||
b"\x42\x42\x00\x00\x00\x00\x00\x00",
|
||||
'little'))
|
||||
non_buffer_state_encoder.encode('\u3042')
|
||||
self.assertEqual(non_buffer_state_encoder.getstate(),
|
||||
int.from_bytes(
|
||||
b"\x00"
|
||||
b"\xc2\x42\x00\x00\x00\x00\x00\x00",
|
||||
'little'))
|
||||
|
||||
def test_setstate_validates_input_size(self):
|
||||
encoder = codecs.getincrementalencoder('euc_jp')()
|
||||
pending_size_nine = int.from_bytes(
|
||||
b"\x09"
|
||||
b"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
b"\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
'little')
|
||||
self.assertRaises(UnicodeError, encoder.setstate, pending_size_nine)
|
||||
|
||||
def test_setstate_validates_input_bytes(self):
|
||||
encoder = codecs.getincrementalencoder('euc_jp')()
|
||||
invalid_utf8 = int.from_bytes(
|
||||
b"\x01"
|
||||
b"\xff"
|
||||
b"\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
'little')
|
||||
self.assertRaises(UnicodeDecodeError, encoder.setstate, invalid_utf8)
|
||||
|
||||
def test_issue5640(self):
|
||||
encoder = codecs.getincrementalencoder('shift-jis')('backslashreplace')
|
||||
self.assertEqual(encoder.encode('\xff'), b'\\xff')
|
||||
|
@ -165,6 +247,37 @@ class Test_IncrementalDecoder(unittest.TestCase):
|
|||
decoder = codecs.getincrementaldecoder(enc)()
|
||||
self.assertRaises(TypeError, decoder.decode, "")
|
||||
|
||||
def test_state_methods(self):
|
||||
decoder = codecs.getincrementaldecoder('euc_jp')()
|
||||
|
||||
# Decode a complete input sequence
|
||||
self.assertEqual(decoder.decode(b'\xa4\xa6'), '\u3046')
|
||||
pending1, _ = decoder.getstate()
|
||||
self.assertEqual(pending1, b'')
|
||||
|
||||
# Decode first half of a partial input sequence
|
||||
self.assertEqual(decoder.decode(b'\xa4'), '')
|
||||
pending2, flags2 = decoder.getstate()
|
||||
self.assertEqual(pending2, b'\xa4')
|
||||
|
||||
# Decode second half of a partial input sequence
|
||||
self.assertEqual(decoder.decode(b'\xa6'), '\u3046')
|
||||
pending3, _ = decoder.getstate()
|
||||
self.assertEqual(pending3, b'')
|
||||
|
||||
# Jump back and decode second half of partial input sequence again
|
||||
decoder.setstate((pending2, flags2))
|
||||
self.assertEqual(decoder.decode(b'\xa6'), '\u3046')
|
||||
pending4, _ = decoder.getstate()
|
||||
self.assertEqual(pending4, b'')
|
||||
|
||||
def test_setstate_validates_input(self):
|
||||
decoder = codecs.getincrementaldecoder('euc_jp')()
|
||||
self.assertRaises(TypeError, decoder.setstate, 123)
|
||||
self.assertRaises(TypeError, decoder.setstate, ("invalid", 0))
|
||||
self.assertRaises(TypeError, decoder.setstate, (b"1234", "invalid"))
|
||||
self.assertRaises(UnicodeError, decoder.setstate, (b"123456789", 0))
|
||||
|
||||
class Test_StreamReader(unittest.TestCase):
|
||||
def test_bug1728403(self):
|
||||
try:
|
||||
|
|
|
@ -1626,6 +1626,7 @@ Nicolas M. Thiéry
|
|||
James Thomas
|
||||
Robin Thomas
|
||||
Brian Thorne
|
||||
Christopher Thorne
|
||||
Stephen Thorne
|
||||
Jeremy Thurgood
|
||||
Eric Tiedemann
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Implement multibyte encoder/decoder state methods
|
|
@ -51,6 +51,12 @@
|
|||
; \
|
||||
}
|
||||
|
||||
/*
|
||||
* codecs in this file use the first byte of MultibyteCodec_State.c[8]
|
||||
* to store a 0 or 1 state value
|
||||
*/
|
||||
#define CN_STATE_OFFSET 0
|
||||
|
||||
/*
|
||||
* GB2312 codec
|
||||
*/
|
||||
|
@ -329,15 +335,15 @@ DECODER(gb18030)
|
|||
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
if (state->c[CN_STATE_OFFSET] != 0) {
|
||||
WRITEBYTE2('~', '}');
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
NEXT_OUT(2);
|
||||
}
|
||||
return 0;
|
||||
|
@ -350,10 +356,10 @@ ENCODER(hz)
|
|||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i) {
|
||||
if (state->c[CN_STATE_OFFSET]) {
|
||||
WRITEBYTE2('~', '}');
|
||||
NEXT_OUT(2);
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
}
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
|
@ -375,10 +381,10 @@ ENCODER(hz)
|
|||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
if (state->c[CN_STATE_OFFSET] == 0) {
|
||||
WRITEBYTE4('~', '{', code >> 8, code & 0xff);
|
||||
NEXT(1, 4);
|
||||
state->i = 1;
|
||||
state->c[CN_STATE_OFFSET] = 1;
|
||||
}
|
||||
else {
|
||||
WRITEBYTE2(code >> 8, code & 0xff);
|
||||
|
@ -391,13 +397,13 @@ ENCODER(hz)
|
|||
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
state->c[CN_STATE_OFFSET] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -411,14 +417,14 @@ DECODER(hz)
|
|||
unsigned char c2 = INBYTE2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
if (c2 == '~' && state->i == 0)
|
||||
if (c2 == '~' && state->c[CN_STATE_OFFSET] == 0)
|
||||
OUTCHAR('~');
|
||||
else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '\n' && state->i == 0)
|
||||
else if (c2 == '{' && state->c[CN_STATE_OFFSET] == 0)
|
||||
state->c[CN_STATE_OFFSET] = 1; /* set GB */
|
||||
else if (c2 == '\n' && state->c[CN_STATE_OFFSET] == 0)
|
||||
; /* line-continuation */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else if (c2 == '}' && state->c[CN_STATE_OFFSET] == 1)
|
||||
state->c[CN_STATE_OFFSET] = 0; /* set ASCII */
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
|
@ -428,7 +434,7 @@ DECODER(hz)
|
|||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
if (state->c[CN_STATE_OFFSET] == 0) { /* ASCII mode */
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
}
|
||||
|
|
|
@ -115,6 +115,50 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_getstate__doc__,
|
||||
"getstate($self, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF \
|
||||
{"getstate", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_getstate, METH_NOARGS, _multibytecodec_MultibyteIncrementalEncoder_getstate__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_getstate(MultibyteIncrementalEncoderObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(self);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_setstate__doc__,
|
||||
"setstate($self, state, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF \
|
||||
{"setstate", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_setstate, METH_O, _multibytecodec_MultibyteIncrementalEncoder_setstate__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
|
||||
PyLongObject *statelong);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_setstate(MultibyteIncrementalEncoderObject *self, PyObject *arg)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyLongObject *statelong;
|
||||
|
||||
if (!PyArg_Parse(arg, "O!:setstate", &PyLong_Type, &statelong)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(self, statelong);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_reset__doc__,
|
||||
"reset($self, /)\n"
|
||||
"--\n"
|
||||
|
@ -169,6 +213,50 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_getstate__doc__,
|
||||
"getstate($self, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF \
|
||||
{"getstate", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_getstate, METH_NOARGS, _multibytecodec_MultibyteIncrementalDecoder_getstate__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_getstate(MultibyteIncrementalDecoderObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(self);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_setstate__doc__,
|
||||
"setstate($self, state, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF \
|
||||
{"setstate", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_setstate, METH_O, _multibytecodec_MultibyteIncrementalDecoder_setstate__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
|
||||
PyObject *state);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_setstate(MultibyteIncrementalDecoderObject *self, PyObject *arg)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *state;
|
||||
|
||||
if (!PyArg_Parse(arg, "O!:setstate", &PyTuple_Type, &state)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(self, state);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_reset__doc__,
|
||||
"reset($self, /)\n"
|
||||
"--\n"
|
||||
|
@ -330,4 +418,4 @@ PyDoc_STRVAR(_multibytecodec___create_codec__doc__,
|
|||
|
||||
#define _MULTIBYTECODEC___CREATE_CODEC_METHODDEF \
|
||||
{"__create_codec", (PyCFunction)_multibytecodec___create_codec, METH_O, _multibytecodec___create_codec__doc__},
|
||||
/*[clinic end generated code: output=680f59f4cfe63c25 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=2fa0a38494716b97 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -895,6 +895,93 @@ _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEnco
|
|||
return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalEncoder.getstate
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
|
||||
/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
|
||||
{
|
||||
/* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
|
||||
for UTF-8 encoded buffer (each character can use up to 4
|
||||
bytes), and required bytes for MultibyteCodec_State.c. A byte
|
||||
array is used to avoid different compilers generating different
|
||||
values for the same state, e.g. as a result of struct padding.
|
||||
*/
|
||||
unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
|
||||
Py_ssize_t statesize;
|
||||
const char *pendingbuffer = NULL;
|
||||
Py_ssize_t pendingsize;
|
||||
|
||||
if (self->pending != NULL) {
|
||||
pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
|
||||
if (pendingbuffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (pendingsize > MAXENCPENDING*4) {
|
||||
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
|
||||
return NULL;
|
||||
}
|
||||
statebytes[0] = pendingsize;
|
||||
memcpy(statebytes+1, pendingbuffer, pendingsize);
|
||||
statesize = 1 + pendingsize;
|
||||
} else {
|
||||
statebytes[0] = 0;
|
||||
statesize = 1;
|
||||
}
|
||||
memcpy(statebytes+statesize, self->state.c,
|
||||
sizeof(self->state.c));
|
||||
statesize += sizeof(self->state.c);
|
||||
|
||||
return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
|
||||
1 /* little-endian */ ,
|
||||
0 /* unsigned */ );
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalEncoder.setstate
|
||||
state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
|
||||
PyLongObject *statelong)
|
||||
/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
|
||||
{
|
||||
PyObject *pending = NULL;
|
||||
unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
|
||||
|
||||
if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
|
||||
1 /* little-endian */ ,
|
||||
0 /* unsigned */ ) < 0) {
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
if (statebytes[0] > MAXENCPENDING*4) {
|
||||
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
|
||||
statebytes[0], "strict");
|
||||
if (pending == NULL) {
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
Py_CLEAR(self->pending);
|
||||
self->pending = pending;
|
||||
memcpy(self->state.c, statebytes+1+statebytes[0],
|
||||
sizeof(self->state.c));
|
||||
|
||||
Py_RETURN_NONE;
|
||||
|
||||
errorexit:
|
||||
Py_XDECREF(pending);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalEncoder.reset
|
||||
[clinic start generated code]*/
|
||||
|
@ -919,6 +1006,8 @@ _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncod
|
|||
|
||||
static struct PyMethodDef mbiencoder_methods[] = {
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
@ -984,6 +1073,7 @@ mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
|
|||
{
|
||||
PyObject_GC_UnTrack(self);
|
||||
ERROR_DECREF(self->errors);
|
||||
Py_CLEAR(self->pending);
|
||||
Py_TYPE(self)->tp_free(self);
|
||||
}
|
||||
|
||||
|
@ -1119,6 +1209,68 @@ errorexit:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalDecoder.getstate
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
|
||||
/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
|
||||
{
|
||||
PyObject *buffer;
|
||||
|
||||
buffer = PyBytes_FromStringAndSize((const char *)self->pending,
|
||||
self->pendingsize);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return make_tuple(buffer, (Py_ssize_t)*self->state.c);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalDecoder.setstate
|
||||
state: object(subclass_of='&PyTuple_Type')
|
||||
/
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
|
||||
PyObject *state)
|
||||
/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
|
||||
{
|
||||
PyObject *buffer;
|
||||
Py_ssize_t buffersize;
|
||||
char *bufferstr;
|
||||
unsigned long long flag;
|
||||
|
||||
if (!PyArg_ParseTuple(state, "SK;setstate(): illegal state argument",
|
||||
&buffer, &flag))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffersize = PyBytes_Size(buffer);
|
||||
if (buffersize == -1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (buffersize > MAXDECPENDING) {
|
||||
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bufferstr = PyBytes_AsString(buffer);
|
||||
if (bufferstr == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
self->pendingsize = buffersize;
|
||||
memcpy(self->pending, bufferstr, self->pendingsize);
|
||||
memcpy(self->state.c, (unsigned char *)&flag, sizeof(flag));
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_multibytecodec.MultibyteIncrementalDecoder.reset
|
||||
[clinic start generated code]*/
|
||||
|
@ -1137,6 +1289,8 @@ _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecod
|
|||
|
||||
static struct PyMethodDef mbidecoder_methods[] = {
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
|
||||
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
|
|
@ -16,12 +16,15 @@ typedef uint16_t ucs2_t, DBCHAR;
|
|||
typedef unsigned short ucs2_t, DBCHAR;
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
void *p;
|
||||
int i;
|
||||
/*
|
||||
* A struct that provides 8 bytes of state for multibyte
|
||||
* codecs. Codecs are free to use this how they want. Note: if you
|
||||
* need to add a new field to this struct, ensure that its byte order
|
||||
* is independent of CPU endianness so that the return value of
|
||||
* getstate doesn't differ between little and big endian CPUs.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned char c[8];
|
||||
ucs2_t u2[4];
|
||||
Py_UCS4 u4[2];
|
||||
} MultibyteCodec_State;
|
||||
|
||||
typedef int (*mbcodec_init)(const void *config);
|
||||
|
|
Loading…
Reference in New Issue