diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 72d67343063..ae7904979b5 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -751,7 +751,7 @@ Encodings and Unicode Unicode strings are stored internally as sequences of codepoints (to be precise as :ctype:`Py_UNICODE` arrays). Depending on the way Python is compiled (either -via :option:`--enable-unicode=ucs2` or :option:`--enable-unicode=ucs4`, with the +via :option:`--without-wide-unicode` or :option:`--with-wide-unicode`, with the former being the default) :ctype:`Py_UNICODE` is either a 16-bit or 32-bit data type. Once a Unicode object is used outside of CPU and memory, CPU endianness and how these arrays are stored as bytes become an issue. Transforming a diff --git a/Makefile.pre.in b/Makefile.pre.in index 2379fdd0548..d7621854487 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -171,7 +171,6 @@ DYNLOADFILE= @DYNLOADFILE@ MACHDEP_OBJS= @MACHDEP_OBJS@ LIBOBJDIR= Python/ LIBOBJS= @LIBOBJS@ -UNICODE_OBJS= @UNICODE_OBJS@ PYTHON= python$(EXE) BUILDPYTHON= python$(BUILDEXE) @@ -320,8 +319,9 @@ OBJECT_OBJS= \ Objects/structseq.o \ Objects/tupleobject.o \ Objects/typeobject.o \ - Objects/weakrefobject.o \ - $(UNICODE_OBJS) + Objects/unicodeobject.o \ + Objects/unicodectype.o \ + Objects/weakrefobject.o ########################################################################## diff --git a/Misc/NEWS b/Misc/NEWS index 9ff17b43ccf..0bd12482ad2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -7,7 +7,7 @@ Python News What's New in Python 3.0a3? =========================== -*Release data: XX-XXX-2008* +*Release date: XX-XXX-2008* Core and Builtins ----------------- @@ -46,6 +46,7 @@ Extension Modules - Issue #1762972: Readded the reload() function as imp.reload() + Library ------- @@ -56,6 +57,13 @@ Library - Issue #1578: Problems in win_getpass. +Build +----- + +- Renamed --enable-unicode configure flag to --with-wide-unicode, since + Unicode strings can't be disabled anymore. + + C API ----- @@ -143,12 +151,12 @@ Library - Removed all types from the 'types' module that are easily accessable through builtins. + What's New in Python 3.0a1? ========================== *Release date: 31-Aug-2007* - Core and Builtins ----------------- diff --git a/PC/os2emx/pyconfig.h b/PC/os2emx/pyconfig.h index 1039d3d5231..380e19c08dd 100644 --- a/PC/os2emx/pyconfig.h +++ b/PC/os2emx/pyconfig.h @@ -58,7 +58,6 @@ #define WITH_DOC_STRINGS 1 /* Unicode related */ -#define Py_USING_UNICODE 1 #define PY_UNICODE_TYPE wchar_t #define Py_UNICODE_SIZE SIZEOF_SHORT diff --git a/PC/os2vacpp/pyconfig.h b/PC/os2vacpp/pyconfig.h index 97f9b80f75f..f7d388cd13c 100644 --- a/PC/os2vacpp/pyconfig.h +++ b/PC/os2vacpp/pyconfig.h @@ -80,7 +80,6 @@ /* #define SIZEOF_LONG_LONG 8 */ /* Count of Bytes in a (long long) */ /* unicode definines */ -#define Py_USING_UNICODE #define PY_UNICODE_TYPE wchar_t #define Py_UNICODE_SIZE SIZEOF_SHORT diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 8d0704a4b98..0b3314ec92a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -218,10 +218,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, assert(tok->cur - tok->buf < INT_MAX); err_ret->offset = (int)(tok->cur - tok->buf); len = tok->inp - tok->buf; -#ifdef Py_USING_UNICODE text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); - -#endif if (text == NULL) { text = (char *) PyObject_MALLOC(len + 1); if (text != NULL) { diff --git a/configure b/configure index 0d2a04b2e6d..02f5f730c24 100755 --- a/configure +++ b/configure @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 59625 . +# From configure.in Revision: 59826 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.61 for python 3.0. # @@ -728,7 +728,6 @@ HAVE_GETHOSTBYNAME_R HAVE_GETHOSTBYNAME LIBM LIBC -UNICODE_OBJS THREADHEADERS SRCDIRS LTLIBOBJS' @@ -1321,8 +1320,6 @@ Optional Features: --enable-toolbox-glue disable/enable MacOSX glue code for extensions --enable-ipv6 Enable ipv6 (with ipv4) support --disable-ipv6 Disable ipv6 support - --enable-unicode[=ucs[24]] - Enable Unicode strings (default is yes) Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1349,6 +1346,7 @@ Optional Packages: --with-fpectl enable SIGFPE catching --with-libm=STRING math library --with-libc=STRING C library + --with-wide-unicode Use 4-byte Unicode characters (default is 2 bytes) Some influential environment variables: CC C compiler command @@ -21481,95 +21479,76 @@ fi echo "${ECHO_T}$ac_cv_wchar_t_signed" >&6; } fi -{ echo "$as_me:$LINENO: checking what type to use for unicode" >&5 -echo $ECHO_N "checking what type to use for unicode... $ECHO_C" >&6; } -# Check whether --enable-unicode was given. -if test "${enable_unicode+set}" = set; then - enableval=$enable_unicode; +{ echo "$as_me:$LINENO: checking what type to use for str" >&5 +echo $ECHO_N "checking what type to use for str... $ECHO_C" >&6; } + +# Check whether --with-wide-unicode was given. +if test "${with_wide_unicode+set}" = set; then + withval=$with_wide_unicode; +if test "$withval" != no +then unicode_size="4" +else unicode_size="2" +fi + else - enable_unicode=yes -fi +case "$have_ucs4_tcl" in + yes) unicode_size="4" ;; + *) unicode_size="2" ;; +esac -if test $enable_unicode = yes -then - # Without any arguments, Py_UNICODE defaults to two-byte mode - case "$have_ucs4_tcl" in - yes) enable_unicode="ucs4" - ;; - *) enable_unicode="ucs2" - ;; - esac fi -case "$enable_unicode" in -ucs2) unicode_size="2" - cat >>confdefs.h <<\_ACEOF -#define Py_UNICODE_SIZE 2 -_ACEOF - ;; -ucs4) unicode_size="4" - cat >>confdefs.h <<\_ACEOF +case "$unicode_size" in + 4) cat >>confdefs.h <<\_ACEOF #define Py_UNICODE_SIZE 4 _ACEOF - - ;; + ;; + *) cat >>confdefs.h <<\_ACEOF +#define Py_UNICODE_SIZE 2 +_ACEOF + ;; esac - -if test "$enable_unicode" = "no" -then - UNICODE_OBJS="" - { echo "$as_me:$LINENO: result: not used" >&5 -echo "${ECHO_T}not used" >&6; } -else - UNICODE_OBJS="Objects/unicodeobject.o Objects/unicodectype.o" - -cat >>confdefs.h <<\_ACEOF -#define Py_USING_UNICODE 1 -_ACEOF - - - # wchar_t is only usable if it maps to an unsigned type - if test "$unicode_size" = "$ac_cv_sizeof_wchar_t" \ +# wchar_t is only usable if it maps to an unsigned type +if test "$unicode_size" = "$ac_cv_sizeof_wchar_t" \ -a "$ac_cv_wchar_t_signed" = "no" - then - PY_UNICODE_TYPE="wchar_t" +then + PY_UNICODE_TYPE="wchar_t" cat >>confdefs.h <<\_ACEOF #define HAVE_USABLE_WCHAR_T 1 _ACEOF - cat >>confdefs.h <<\_ACEOF + cat >>confdefs.h <<\_ACEOF #define PY_UNICODE_TYPE wchar_t _ACEOF - elif test "$ac_cv_sizeof_short" = "$unicode_size" - then - PY_UNICODE_TYPE="unsigned short" - cat >>confdefs.h <<\_ACEOF +elif test "$ac_cv_sizeof_short" = "$unicode_size" +then + PY_UNICODE_TYPE="unsigned short" + cat >>confdefs.h <<\_ACEOF #define PY_UNICODE_TYPE unsigned short _ACEOF - elif test "$ac_cv_sizeof_long" = "$unicode_size" - then - PY_UNICODE_TYPE="unsigned long" - cat >>confdefs.h <<\_ACEOF +elif test "$ac_cv_sizeof_long" = "$unicode_size" +then + PY_UNICODE_TYPE="unsigned long" + cat >>confdefs.h <<\_ACEOF #define PY_UNICODE_TYPE unsigned long _ACEOF - else - PY_UNICODE_TYPE="no type found" - fi - { echo "$as_me:$LINENO: result: $PY_UNICODE_TYPE" >&5 -echo "${ECHO_T}$PY_UNICODE_TYPE" >&6; } +else + PY_UNICODE_TYPE="no type found" fi +{ echo "$as_me:$LINENO: result: $PY_UNICODE_TYPE" >&5 +echo "${ECHO_T}$PY_UNICODE_TYPE" >&6; } # check for endianness { echo "$as_me:$LINENO: checking whether byte ordering is bigendian" >&5 @@ -24432,13 +24411,12 @@ HAVE_GETHOSTBYNAME_R!$HAVE_GETHOSTBYNAME_R$ac_delim HAVE_GETHOSTBYNAME!$HAVE_GETHOSTBYNAME$ac_delim LIBM!$LIBM$ac_delim LIBC!$LIBC$ac_delim -UNICODE_OBJS!$UNICODE_OBJS$ac_delim THREADHEADERS!$THREADHEADERS$ac_delim SRCDIRS!$SRCDIRS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 19; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 18; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/configure.in b/configure.in index a92705c0c59..fe52c4b64b0 100644 --- a/configure.in +++ b/configure.in @@ -3003,72 +3003,55 @@ then ac_cv_wchar_t_signed=yes)]) AC_MSG_RESULT($ac_cv_wchar_t_signed) fi - -AC_MSG_CHECKING(what type to use for unicode) -dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output -AC_ARG_ENABLE(unicode, - AC_HELP_STRING(--enable-unicode@<:@=ucs@<:@24@:>@@:>@, Enable Unicode strings (default is yes)), - [], - [enable_unicode=yes]) -if test $enable_unicode = yes -then - # Without any arguments, Py_UNICODE defaults to two-byte mode - case "$have_ucs4_tcl" in - yes) enable_unicode="ucs4" - ;; - *) enable_unicode="ucs2" - ;; - esac +AC_MSG_CHECKING(what type to use for str) +AC_ARG_WITH(wide-unicode, + AC_HELP_STRING(--with-wide-unicode, Use 4-byte Unicode characters (default is 2 bytes)), +[ +if test "$withval" != no +then unicode_size="4" +else unicode_size="2" fi +], +[ +case "$have_ucs4_tcl" in + yes) unicode_size="4" ;; + *) unicode_size="2" ;; +esac +]) AH_TEMPLATE(Py_UNICODE_SIZE, [Define as the size of the unicode type.]) -case "$enable_unicode" in -ucs2) unicode_size="2" - AC_DEFINE(Py_UNICODE_SIZE,2) - ;; -ucs4) unicode_size="4" - AC_DEFINE(Py_UNICODE_SIZE,4) - ;; +case "$unicode_size" in + 4) AC_DEFINE(Py_UNICODE_SIZE, 4) ;; + *) AC_DEFINE(Py_UNICODE_SIZE, 2) ;; esac AH_TEMPLATE(PY_UNICODE_TYPE, [Define as the integral type used for Unicode representation.]) -AC_SUBST(UNICODE_OBJS) -if test "$enable_unicode" = "no" -then - UNICODE_OBJS="" - AC_MSG_RESULT(not used) -else - UNICODE_OBJS="Objects/unicodeobject.o Objects/unicodectype.o" - AC_DEFINE(Py_USING_UNICODE, 1, - [Define if you want to have a Unicode type.]) - - # wchar_t is only usable if it maps to an unsigned type - if test "$unicode_size" = "$ac_cv_sizeof_wchar_t" \ +# wchar_t is only usable if it maps to an unsigned type +if test "$unicode_size" = "$ac_cv_sizeof_wchar_t" \ -a "$ac_cv_wchar_t_signed" = "no" - then - PY_UNICODE_TYPE="wchar_t" - AC_DEFINE(HAVE_USABLE_WCHAR_T, 1, - [Define if you have a useable wchar_t type defined in wchar.h; useable - means wchar_t must be an unsigned type with at least 16 bits. (see - Include/unicodeobject.h).]) - AC_DEFINE(PY_UNICODE_TYPE,wchar_t) - elif test "$ac_cv_sizeof_short" = "$unicode_size" - then - PY_UNICODE_TYPE="unsigned short" - AC_DEFINE(PY_UNICODE_TYPE,unsigned short) - elif test "$ac_cv_sizeof_long" = "$unicode_size" - then - PY_UNICODE_TYPE="unsigned long" - AC_DEFINE(PY_UNICODE_TYPE,unsigned long) - else - PY_UNICODE_TYPE="no type found" - fi - AC_MSG_RESULT($PY_UNICODE_TYPE) +then + PY_UNICODE_TYPE="wchar_t" + AC_DEFINE(HAVE_USABLE_WCHAR_T, 1, + [Define if you have a useable wchar_t type defined in wchar.h; useable + means wchar_t must be an unsigned type with at least 16 bits. (see + Include/unicodeobject.h).]) + AC_DEFINE(PY_UNICODE_TYPE,wchar_t) +elif test "$ac_cv_sizeof_short" = "$unicode_size" +then + PY_UNICODE_TYPE="unsigned short" + AC_DEFINE(PY_UNICODE_TYPE,unsigned short) +elif test "$ac_cv_sizeof_long" = "$unicode_size" +then + PY_UNICODE_TYPE="unsigned long" + AC_DEFINE(PY_UNICODE_TYPE,unsigned long) +else + PY_UNICODE_TYPE="no type found" fi +AC_MSG_RESULT($PY_UNICODE_TYPE) # check for endianness AC_C_BIGENDIAN diff --git a/pyconfig.h.in b/pyconfig.h.in index dbd7f0484b4..41dd9437285 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -821,9 +821,6 @@ /* Define as the size of the unicode type. */ #undef Py_UNICODE_SIZE -/* Define if you want to have a Unicode type. */ -#undef Py_USING_UNICODE - /* Define as the return type of signal handlers (`int' or `void'). */ #undef RETSIGTYPE