experimental UCS-4 support: added USE_UCS4_STORAGE define to

unicodeobject.h, which forces sizeof(Py_UNICODE) == sizeof(Py_UCS4).
(this may be good enough for platforms that doesn't have a 16-bit
type.  the UTF-16 codecs don't work, though)
This commit is contained in:
Fredrik Lundh 2001-06-26 17:17:07 +00:00
parent 45714e9ecb
commit 1294ad0c59
3 changed files with 25 additions and 12 deletions

View File

@ -58,6 +58,19 @@ Copyright (c) Corporation for National Research Initiatives.
/* --- Internal Unicode Format -------------------------------------------- */ /* --- Internal Unicode Format -------------------------------------------- */
/* experimental UCS-4 support. enable at your own risk! */
#undef USE_UCS4_STORAGE
/*
* Use this typedef when you need to represent a UTF-16 surrogate pair
* as single unsigned integer.
*/
#if SIZEOF_INT >= 4
typedef unsigned int Py_UCS4;
#elif SIZEOF_LONG >= 4
typedef unsigned long Py_UCS4;
#endif
/* Set these flags if the platform has "wchar.h", "wctype.h" and the /* Set these flags if the platform has "wchar.h", "wctype.h" and the
wchar_t type is a 16-bit unsigned type */ wchar_t type is a 16-bit unsigned type */
/* #define HAVE_WCHAR_H */ /* #define HAVE_WCHAR_H */
@ -66,8 +79,8 @@ Copyright (c) Corporation for National Research Initiatives.
/* Defaults for various platforms */ /* Defaults for various platforms */
#ifndef HAVE_USABLE_WCHAR_T #ifndef HAVE_USABLE_WCHAR_T
/* Windows has a usable wchar_t type */ /* Windows has a usable wchar_t type (unless we're using UCS-4) */
# if defined(MS_WIN32) # if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE)
# define HAVE_USABLE_WCHAR_T # define HAVE_USABLE_WCHAR_T
# endif # endif
@ -105,18 +118,12 @@ typedef wchar_t Py_UNICODE;
If a short is not 16 bits on your platform, you have to fix the If a short is not 16 bits on your platform, you have to fix the
typedef below, or the module initialization code will complain. */ typedef below, or the module initialization code will complain. */
#ifdef USE_UCS4_STORAGE
typedef Py_UCS4 Py_UNICODE;
#else
typedef unsigned short Py_UNICODE; typedef unsigned short Py_UNICODE;
#endif #endif
/*
* Use this typedef when you need to represent a UTF-16 surrogate pair
* as single unsigned integer.
*/
#if SIZEOF_INT >= 4
typedef unsigned int Py_UCS4;
#elif SIZEOF_LONG >= 4
typedef unsigned long Py_UCS4;
#endif #endif

View File

@ -14,7 +14,11 @@
#include "sre_constants.h" #include "sre_constants.h"
/* size of a code word (must be unsigned short or larger) */ /* size of a code word (must be unsigned short or larger) */
#ifdef USE_UCS4_STORAGE
#define SRE_CODE unsigned long
#else
#define SRE_CODE unsigned short #define SRE_CODE unsigned short
#endif
typedef struct { typedef struct {
PyObject_VAR_HEAD PyObject_VAR_HEAD

View File

@ -5282,9 +5282,11 @@ void _PyUnicode_Init(void)
int i; int i;
/* Doublecheck the configuration... */ /* Doublecheck the configuration... */
#ifndef USE_UCS4_STORAGE
if (sizeof(Py_UNICODE) != 2) if (sizeof(Py_UNICODE) != 2)
Py_FatalError("Unicode configuration error: " Py_FatalError("Unicode configuration error: "
"sizeof(Py_UNICODE) != 2 bytes"); "sizeof(Py_UNICODE) != 2 bytes");
#endif
/* Init the implementation */ /* Init the implementation */
unicode_freelist = NULL; unicode_freelist = NULL;