Check whether the choosen encoding requires byte swapping

for this iconv() implementation in the init function.

For encoding: use a byteswapped version of the input if
neccessary.

For decoding: byteswap every piece returned by iconv()
if neccessary (but not those pieces returned from the
callback)

Comment out test_sane() in the test script, because
whether this works depends on whether byte swapping
is neccessary or not (an on Py_UNICODE_SIZE)
This commit is contained in:
Walter Dörwald 2003-01-30 19:55:28 +00:00
parent 2b93c4c708
commit b4ff1113ca
2 changed files with 93 additions and 11 deletions

View File

@ -7,11 +7,15 @@ from StringIO import StringIO
class IconvCodecTest(unittest.TestCase): class IconvCodecTest(unittest.TestCase):
if sys.byteorder == 'big': if sys.byteorder == 'big':
spam = '\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m' spam = '\x00s\x00p\x00a\x00m' * 2
else: else:
spam = 's\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00' spam = 's\x00p\x00a\x00m\x00' * 2
def test_sane(self): def test_sane(self):
# FIXME: Commented out, because it's not clear whether
# the internal encoding choosen requires byte swapping
# for this iconv() implementation.
if False:
self.encoder, self.decoder, self.reader, self.writer = \ self.encoder, self.decoder, self.reader, self.writer = \
codecs.lookup(_iconv_codec.internal_encoding) codecs.lookup(_iconv_codec.internal_encoding)
self.assertEqual(self.decoder(self.spam), (u'spamspam', 16)) self.assertEqual(self.decoder(self.spam), (u'spamspam', 16))

View File

@ -42,6 +42,10 @@ PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object");
staticforward PyTypeObject iconvcodec_Type; staticforward PyTypeObject iconvcodec_Type;
/* does the choosen internal encoding require
* byteswapping to get native endianness?
* 0=no, 1=yes, -1=unknown */
static int byteswap = -1;
#define ERROR_STRICT (PyObject *)(1) #define ERROR_STRICT (PyObject *)(1)
#define ERROR_IGNORE (PyObject *)(2) #define ERROR_IGNORE (PyObject *)(2)
@ -88,6 +92,8 @@ iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
size_t inplen, inplen_total, outlen, outlen_total, estep; size_t inplen, inplen_total, outlen, outlen_total, estep;
PyObject *outputobj = NULL, *errorcb = NULL, PyObject *outputobj = NULL, *errorcb = NULL,
*exceptionobj = NULL; *exceptionobj = NULL;
Py_UNICODE *swappedinput;
int swapi;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode", if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode",
kwlist, &input, &inputlen, &errors)) kwlist, &input, &inputlen, &errors))
@ -121,6 +127,24 @@ iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
out = PyString_AS_STRING(outputobj) + (out - out_top); \ out = PyString_AS_STRING(outputobj) + (out - out_top); \
out_top = PyString_AS_STRING(outputobj); \ out_top = PyString_AS_STRING(outputobj); \
} }
if (byteswap) {
swappedinput = PyMem_Malloc(inplen);
if (swappedinput == NULL)
return NULL;
for (swapi = 0; swapi<inputlen; ++swapi)
{
Py_UNICODE c = input[swapi];
#if Py_UNICODE_SIZE == 2
c = ((char *)&c)[0]<<8 | ((char *)&c)[1];
#else
c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 |
((char *)&c)[2]<<8 | ((char *)&c)[3];
#endif
swappedinput[swapi] = c;
}
inp = inp_top = (char *)swappedinput;
}
while (inplen > 0) { while (inplen > 0) {
if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) == -1) { if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) == -1) {
char reason[128]; char reason[128];
@ -253,6 +277,8 @@ errorexit_cbpad: Py_XDECREF(retobj);
rettup = PyTuple_New(2); rettup = PyTuple_New(2);
if (rettup == NULL) { if (rettup == NULL) {
Py_DECREF(outputobj); Py_DECREF(outputobj);
if (byteswap)
PyMem_Free(swappedinput);
return NULL; return NULL;
} }
PyTuple_SET_ITEM(rettup, 0, outputobj); PyTuple_SET_ITEM(rettup, 0, outputobj);
@ -266,6 +292,8 @@ errorexit:
Py_DECREF(errorcb); Py_DECREF(errorcb);
} }
Py_XDECREF(exceptionobj); Py_XDECREF(exceptionobj);
if (byteswap)
PyMem_Free(swappedinput);
return NULL; return NULL;
} }
@ -319,7 +347,27 @@ iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \ out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \
} }
while (inplen > 0) { while (inplen > 0) {
if (iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen) == -1) { char *oldout = out;
char res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen);
if (byteswap) {
while (oldout < out)
{
char c0 = oldout[0];
#if Py_UNICODE_SIZE == 2
oldout[0] = oldout[1];
oldout[1] = c0;
#else
char c1 = oldout[1];
oldout[0] = oldout[3];
oldout[1] = oldout[2];
oldout[2] = c1;
oldout[3] = c0;
#endif
oldout += sizeof(Py_UNICODE);
}
}
if (res == -1) {
char reason[128], *reasonpos = (char *)reason; char reason[128], *reasonpos = (char *)reason;
int errpos; int errpos;
@ -602,6 +650,36 @@ init_iconv_codec(void)
{ {
PyObject *m; PyObject *m;
char in = 1;
char *inptr = &in;
int insize = 1;
Py_UNICODE out = 0;
char *outptr = (char *)&out;
int outsize = sizeof(out);
int res;
iconv_t hdl = iconv_open(UNICODE_ENCODING, "ASCII");
if (hdl == (iconv_t)-1)
Py_FatalError("can't initialize the _iconv_codec module: iconv_open() failed");
res = iconv(hdl, &inptr, &insize, &outptr, &outsize);
if (res == -1)
Py_FatalError("can't initialize the _iconv_codec module: iconv() failed");
/* Check whether conv() returned native endianess or not for the choosen encoding */
if (out == 0x1)
byteswap = 0;
#if Py_UNICODE_SIZE == 2
else if (out == 0x0100)
#else
else if (out == 0x01000000)
#endif
byteswap = 1;
else
Py_FatalError("can't initialize the _iconv_codec module: mixed endianess");
iconv_close(hdl);
m = Py_InitModule("_iconv_codec", _iconv_codec_methods); m = Py_InitModule("_iconv_codec", _iconv_codec_methods);
PyModule_AddStringConstant(m, "__version__", (char*)__version__); PyModule_AddStringConstant(m, "__version__", (char*)__version__);