Implement PEP 393.
This commit is contained in:
parent
48d49497c5
commit
d63a3b8beb
|
@ -1072,6 +1072,15 @@ They all return *NULL* or ``-1`` if an exception occurs.
|
|||
occurred and an exception has been set.
|
||||
|
||||
|
||||
.. c:function:: Py_ssize_t PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, Py_ssize_t start, Py_ssize_t end, int direction)
|
||||
|
||||
Return the first position of the character *ch* in ``str[start:end]`` using
|
||||
the given *direction* (*direction* == 1 means to do a forward search,
|
||||
*direction* == -1 a backward search). The return value is the index of the
|
||||
first match; a value of ``-1`` indicates that no match was found, and ``-2``
|
||||
indicates that an error occurred and an exception has been set.
|
||||
|
||||
|
||||
.. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end)
|
||||
|
||||
Return the number of non-overlapping occurrences of *substr* in
|
||||
|
|
|
@ -160,4 +160,9 @@ PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
|
|||
#define PyDoc_STR(str) ""
|
||||
#endif
|
||||
|
||||
#define PY_ARRAY_LENGTH(array) (sizeof(array) / sizeof((array)[0]))
|
||||
|
||||
#define PY_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
||||
#define PY_MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
#endif /* !Py_PYTHON_H */
|
||||
|
|
|
@ -64,8 +64,9 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op);
|
|||
(Advanced String Formatting). */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj,
|
||||
Py_UNICODE *format_spec,
|
||||
Py_ssize_t format_spec_len);
|
||||
PyObject *format_spec,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -113,8 +113,9 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void);
|
|||
/* Format the object based on the format_spec, as defined in PEP 3101
|
||||
(Advanced String Formatting). */
|
||||
PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
|
||||
Py_UNICODE *format_spec,
|
||||
Py_ssize_t format_spec_len);
|
||||
PyObject *format_spec,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end);
|
||||
#endif /* Py_LIMITED_API */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -80,6 +80,7 @@ PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLongAndOverflow(PyObject *, int *);
|
|||
PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int);
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int);
|
||||
PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base);
|
||||
#endif
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
|
@ -155,8 +156,9 @@ PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base);
|
|||
/* Format the object based on the format_spec, as defined in PEP 3101
|
||||
(Advanced String Formatting). */
|
||||
PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj,
|
||||
Py_UNICODE *format_spec,
|
||||
Py_ssize_t format_spec_len);
|
||||
PyObject *format_spec,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end);
|
||||
#endif /* Py_LIMITED_API */
|
||||
|
||||
/* These aren't really part of the long object, but they're handy. The
|
||||
|
|
|
@ -301,6 +301,12 @@ PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
|
|||
Py_ssize_t end,
|
||||
const char *reason /* UTF-8 encoded string */
|
||||
);
|
||||
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
|
||||
PyObject *object,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end,
|
||||
const char *reason /* UTF-8 encoded string */
|
||||
);
|
||||
#endif
|
||||
|
||||
/* get the encoding attribute */
|
||||
|
|
|
@ -286,12 +286,15 @@ typedef size_t Py_uhash_t;
|
|||
/* fastest possible local call under MSVC */
|
||||
#define Py_LOCAL(type) static type __fastcall
|
||||
#define Py_LOCAL_INLINE(type) static __inline type __fastcall
|
||||
#define Py_LOCAL_CALLBACK(name) (__fastcall *name)
|
||||
#elif defined(USE_INLINE)
|
||||
#define Py_LOCAL(type) static type
|
||||
#define Py_LOCAL_INLINE(type) static inline type
|
||||
#define Py_LOCAL_CALLBACK(name) (*name)
|
||||
#else
|
||||
#define Py_LOCAL(type) static type
|
||||
#define Py_LOCAL_INLINE(type) static type
|
||||
#define Py_LOCAL_CALLBACK(name) (*name)
|
||||
#endif
|
||||
|
||||
/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks
|
||||
|
|
|
@ -64,16 +64,15 @@ Copyright (c) Corporation for National Research Initiatives.
|
|||
/* Python 3.x requires unicode */
|
||||
#define Py_USING_UNICODE
|
||||
|
||||
/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
|
||||
properly set, but the default rules below doesn't set it. I'll
|
||||
sort this out some other day -- fredrik@pythonware.com */
|
||||
|
||||
#ifndef Py_UNICODE_SIZE
|
||||
#error Must define Py_UNICODE_SIZE
|
||||
#ifndef SIZEOF_WCHAR_T
|
||||
#error Must define SIZEOF_WCHAR_T
|
||||
#endif
|
||||
|
||||
/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
|
||||
strings are stored as UCS-2 (with limited support for UTF-16) */
|
||||
#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
|
||||
|
||||
/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
|
||||
Otherwise, Unicode strings are stored as UCS-2 (with limited support
|
||||
for UTF-16) */
|
||||
|
||||
#if Py_UNICODE_SIZE >= 4
|
||||
#define Py_UNICODE_WIDE
|
||||
|
@ -84,19 +83,14 @@ Copyright (c) Corporation for National Research Initiatives.
|
|||
/* #define HAVE_WCHAR_H */
|
||||
/* #define HAVE_USABLE_WCHAR_T */
|
||||
|
||||
/* Defaults for various platforms */
|
||||
#ifndef PY_UNICODE_TYPE
|
||||
|
||||
/* Windows has a usable wchar_t type (unless we're using UCS-4) */
|
||||
# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
|
||||
# define HAVE_USABLE_WCHAR_T
|
||||
# define PY_UNICODE_TYPE wchar_t
|
||||
# endif
|
||||
|
||||
# if defined(Py_UNICODE_WIDE)
|
||||
# define PY_UNICODE_TYPE Py_UCS4
|
||||
# endif
|
||||
/* Py_UNICODE was the native Unicode storage format (code unit) used by
|
||||
Python and represents a single Unicode element in the Unicode type.
|
||||
With PEP 393, Py_UNICODE is deprected and replaced with a
|
||||
typedef to wchar_t. */
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
#define PY_UNICODE_TYPE wchar_t
|
||||
typedef wchar_t Py_UNICODE;
|
||||
#endif
|
||||
|
||||
/* If the compiler provides a wchar_t type we try to support it
|
||||
|
@ -109,7 +103,7 @@ Copyright (c) Corporation for National Research Initiatives.
|
|||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||
#if defined(MS_WINDOWS)
|
||||
# define HAVE_MBCS
|
||||
#endif
|
||||
|
||||
|
@ -121,201 +115,19 @@ Copyright (c) Corporation for National Research Initiatives.
|
|||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use this typedef when you need to represent a UTF-16 surrogate pair
|
||||
* as single unsigned integer.
|
||||
*/
|
||||
/* Py_UCS4 and Py_UCS2 are typdefs for the respecitve
|
||||
unicode representations. */
|
||||
#if SIZEOF_INT >= 4
|
||||
typedef unsigned int Py_UCS4;
|
||||
#elif SIZEOF_LONG >= 4
|
||||
typedef unsigned long Py_UCS4;
|
||||
#endif
|
||||
|
||||
/* Py_UNICODE is the native Unicode storage format (code unit) used by
|
||||
Python and represents a single Unicode element in the Unicode
|
||||
type. */
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||
#endif
|
||||
|
||||
/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
|
||||
|
||||
/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
|
||||
produce different external names and thus cause import errors in
|
||||
case Python interpreters and extensions with mixed compiled in
|
||||
Unicode width assumptions are combined. */
|
||||
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
|
||||
# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
|
||||
# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
|
||||
# define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
|
||||
# define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
|
||||
# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
|
||||
# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
|
||||
# define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
|
||||
# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
|
||||
# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
|
||||
# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
|
||||
# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
|
||||
# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
|
||||
# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
|
||||
# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
|
||||
# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
|
||||
# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
|
||||
# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
|
||||
# define PyUnicode_Compare PyUnicodeUCS2_Compare
|
||||
# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString
|
||||
# define PyUnicode_Concat PyUnicodeUCS2_Concat
|
||||
# define PyUnicode_Append PyUnicodeUCS2_Append
|
||||
# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
|
||||
# define PyUnicode_Contains PyUnicodeUCS2_Contains
|
||||
# define PyUnicode_Count PyUnicodeUCS2_Count
|
||||
# define PyUnicode_Decode PyUnicodeUCS2_Decode
|
||||
# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
|
||||
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
|
||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
||||
# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
|
||||
# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
|
||||
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
|
||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
||||
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
||||
# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
|
||||
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
||||
# define PyUnicode_Encode PyUnicodeUCS2_Encode
|
||||
# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
||||
# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
|
||||
# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
|
||||
# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
|
||||
# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
|
||||
# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
|
||||
# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
|
||||
# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
|
||||
# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
|
||||
# define PyUnicode_Find PyUnicodeUCS2_Find
|
||||
# define PyUnicode_Format PyUnicodeUCS2_Format
|
||||
# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
|
||||
# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
|
||||
# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
|
||||
# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
|
||||
# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
|
||||
# define PyUnicode_FromString PyUnicodeUCS2_FromString
|
||||
# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
|
||||
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
|
||||
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
|
||||
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
|
||||
# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
|
||||
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
|
||||
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
|
||||
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
|
||||
# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
|
||||
# define PyUnicode_Join PyUnicodeUCS2_Join
|
||||
# define PyUnicode_Partition PyUnicodeUCS2_Partition
|
||||
# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
|
||||
# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
|
||||
# define PyUnicode_Replace PyUnicodeUCS2_Replace
|
||||
# define PyUnicode_Resize PyUnicodeUCS2_Resize
|
||||
# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
|
||||
# define PyUnicode_Split PyUnicodeUCS2_Split
|
||||
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
|
||||
# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
|
||||
# define PyUnicode_Translate PyUnicodeUCS2_Translate
|
||||
# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
|
||||
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
|
||||
# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
|
||||
# define _PyUnicode_Init _PyUnicodeUCS2_Init
|
||||
# define PyUnicode_strdup PyUnicodeUCS2_strdup
|
||||
|
||||
#else
|
||||
|
||||
# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
|
||||
# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
|
||||
# define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
|
||||
# define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
|
||||
# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
|
||||
# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
|
||||
# define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
|
||||
# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
|
||||
# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
|
||||
# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
|
||||
# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
|
||||
# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
|
||||
# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
|
||||
# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
|
||||
# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
|
||||
# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
|
||||
# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
|
||||
# define PyUnicode_Compare PyUnicodeUCS4_Compare
|
||||
# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString
|
||||
# define PyUnicode_Concat PyUnicodeUCS4_Concat
|
||||
# define PyUnicode_Append PyUnicodeUCS4_Append
|
||||
# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
|
||||
# define PyUnicode_Contains PyUnicodeUCS4_Contains
|
||||
# define PyUnicode_Count PyUnicodeUCS4_Count
|
||||
# define PyUnicode_Decode PyUnicodeUCS4_Decode
|
||||
# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
|
||||
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
|
||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
||||
# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
|
||||
# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
|
||||
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
|
||||
# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
||||
# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
||||
# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
||||
# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
|
||||
# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
||||
# define PyUnicode_Encode PyUnicodeUCS4_Encode
|
||||
# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
||||
# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
|
||||
# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
|
||||
# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
|
||||
# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
|
||||
# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
|
||||
# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
|
||||
# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
|
||||
# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
|
||||
# define PyUnicode_Find PyUnicodeUCS4_Find
|
||||
# define PyUnicode_Format PyUnicodeUCS4_Format
|
||||
# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
|
||||
# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
|
||||
# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
|
||||
# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
|
||||
# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
|
||||
# define PyUnicode_FromString PyUnicodeUCS4_FromString
|
||||
# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
|
||||
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
|
||||
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
|
||||
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
|
||||
# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
|
||||
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
|
||||
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
|
||||
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
|
||||
# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
|
||||
# define PyUnicode_Join PyUnicodeUCS4_Join
|
||||
# define PyUnicode_Partition PyUnicodeUCS4_Partition
|
||||
# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
|
||||
# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
|
||||
# define PyUnicode_Replace PyUnicodeUCS4_Replace
|
||||
# define PyUnicode_Resize PyUnicodeUCS4_Resize
|
||||
# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
|
||||
# define PyUnicode_Split PyUnicodeUCS4_Split
|
||||
# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
|
||||
# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
|
||||
# define PyUnicode_Translate PyUnicodeUCS4_Translate
|
||||
# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
|
||||
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
|
||||
# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
|
||||
# define _PyUnicode_Init _PyUnicodeUCS4_Init
|
||||
# define PyUnicode_strdup PyUnicodeUCS4_strdup
|
||||
|
||||
#error "Could not find a proper typedef for Py_UCS4"
|
||||
#endif
|
||||
|
||||
typedef unsigned short Py_UCS2;
|
||||
typedef unsigned char Py_UCS1;
|
||||
|
||||
/* --- Internal Unicode Operations ---------------------------------------- */
|
||||
|
||||
/* Since splitting on whitespace is an important use case, and
|
||||
|
@ -354,7 +166,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
Py_UNICODE_ISDIGIT(ch) || \
|
||||
Py_UNICODE_ISNUMERIC(ch))
|
||||
|
||||
#define Py_UNICODE_COPY(target, source, length) \
|
||||
#define Py_UNICODE_COPY(target, source, length) \
|
||||
Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
|
||||
|
||||
#define Py_UNICODE_FILL(target, value, length) \
|
||||
|
@ -375,9 +187,10 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
valid, and the substring must not be empty. */
|
||||
|
||||
#define Py_UNICODE_MATCH(string, offset, substring) \
|
||||
((*((string)->str + (offset)) == *((substring)->str)) && \
|
||||
((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
|
||||
!memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
|
||||
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
|
||||
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
|
||||
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
|
||||
|
||||
#endif /* Py_LIMITED_API */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -387,41 +200,303 @@ extern "C" {
|
|||
/* --- Unicode Type ------------------------------------------------------- */
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
|
||||
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
|
||||
structure. state.ascii and state.compact are set, and the data
|
||||
immediately follow the structure. utf8_length and wstr_length can be found
|
||||
in the length field; the utf8 pointer is equal to the data pointer. */
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
Py_ssize_t length; /* Length of raw Unicode data in buffer */
|
||||
Py_UNICODE *str; /* Raw Unicode buffer */
|
||||
Py_ssize_t length; /* Number of code points in the string */
|
||||
Py_hash_t hash; /* Hash value; -1 if not set */
|
||||
int state; /* != 0 if interned. In this case the two
|
||||
* references from the dictionary to this object
|
||||
* are *not* counted in ob_refcnt. */
|
||||
PyObject *defenc; /* (Default) Encoded version as Python
|
||||
string, or NULL; this is used for
|
||||
implementing the buffer protocol */
|
||||
struct {
|
||||
/*
|
||||
SSTATE_NOT_INTERNED (0)
|
||||
SSTATE_INTERNED_MORTAL (1)
|
||||
SSTATE_INTERNED_IMMORTAL (2)
|
||||
|
||||
If interned != SSTATE_NOT_INTERNED, the two references from the
|
||||
dictionary to this object are *not* counted in ob_refcnt.
|
||||
*/
|
||||
unsigned int interned:2;
|
||||
/* Character size:
|
||||
|
||||
PyUnicode_WCHAR_KIND (0): wchar_t*
|
||||
PyUnicode_1BYTE_KIND (1): Py_UCS1*
|
||||
PyUnicode_2BYTE_KIND (2): Py_UCS2*
|
||||
PyUnicode_4BYTE_KIND (3): Py_UCS4*
|
||||
*/
|
||||
unsigned int kind:2;
|
||||
/* Compact is with respect to the allocation scheme. Compact unicode
|
||||
objects only require one memory block while non-compact objects use
|
||||
one block for the PyUnicodeObject struct and another for its data
|
||||
buffer. */
|
||||
unsigned int compact:1;
|
||||
/* Compact objects which are ASCII-only also have the state.compact
|
||||
flag set, and use the PyASCIIObject struct. */
|
||||
unsigned int ascii:1;
|
||||
/* The ready flag indicates whether the object layout is initialized
|
||||
completely. This means that this is either a compact object, or
|
||||
the data pointer is filled out. The bit is redundant, and helps
|
||||
to minimize the test in PyUnicode_IS_READY(). */
|
||||
unsigned int ready:1;
|
||||
} state;
|
||||
wchar_t *wstr; /* wchar_t representation (null-terminated) */
|
||||
} PyASCIIObject;
|
||||
|
||||
/* Non-ASCII strings allocated through PyUnicode_New use the
|
||||
PyCompactUnicodeOject structure. state.compact is set, and the data
|
||||
immediately follow the structure. */
|
||||
typedef struct {
|
||||
PyASCIIObject _base;
|
||||
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
|
||||
* terminating \0. */
|
||||
char *utf8; /* UTF-8 representation (null-terminated) */
|
||||
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
|
||||
* surrogates count as two code points. */
|
||||
} PyCompactUnicodeObject;
|
||||
|
||||
/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
|
||||
PyUnicodeObject structure. The actual string data is initially in the wstr
|
||||
block, and copied into the data block using PyUnicode_Ready. */
|
||||
typedef struct {
|
||||
PyCompactUnicodeObject _base;
|
||||
union {
|
||||
void *any;
|
||||
Py_UCS1 *latin1;
|
||||
Py_UCS2 *ucs2;
|
||||
Py_UCS4 *ucs4;
|
||||
} data; /* Canonical, smallest-form Unicode buffer */
|
||||
} PyUnicodeObject;
|
||||
#endif
|
||||
|
||||
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
|
||||
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
|
||||
|
||||
#define SSTATE_NOT_INTERNED 0
|
||||
#define SSTATE_INTERNED_MORTAL 1
|
||||
#define SSTATE_INTERNED_IMMORTAL 2
|
||||
|
||||
#define PyUnicode_Check(op) \
|
||||
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
|
||||
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
|
||||
|
||||
/* Fast access macros */
|
||||
#ifndef Py_LIMITED_API
|
||||
|
||||
#define PyUnicode_WSTR_LENGTH(op) \
|
||||
(((PyASCIIObject*)op)->state.ascii ? \
|
||||
((PyASCIIObject*)op)->length : \
|
||||
((PyCompactUnicodeObject*)op)->wstr_length)
|
||||
|
||||
/* Returns the deprecated Py_UNICODE representation's size in code units
|
||||
(this includes surrogate pairs as 2 units).
|
||||
If the Py_UNICODE representation is not available, it will be computed
|
||||
on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
|
||||
|
||||
#define PyUnicode_GET_SIZE(op) \
|
||||
(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
|
||||
(assert(PyUnicode_Check(op)), \
|
||||
(((PyASCIIObject *)(op))->wstr) ? \
|
||||
PyUnicode_WSTR_LENGTH(op) : \
|
||||
((void)PyUnicode_AsUnicode((PyObject *)(op)), \
|
||||
PyUnicode_WSTR_LENGTH(op)))
|
||||
|
||||
#define PyUnicode_GET_DATA_SIZE(op) \
|
||||
(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
|
||||
(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
|
||||
|
||||
/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
|
||||
representation on demand. Using this macro is very inefficient now,
|
||||
try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
|
||||
use PyUnicode_WRITE() and PyUnicode_READ(). */
|
||||
|
||||
#define PyUnicode_AS_UNICODE(op) \
|
||||
(assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
|
||||
(assert(PyUnicode_Check(op)), \
|
||||
(((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
|
||||
PyUnicode_AsUnicode((PyObject *)(op)))
|
||||
|
||||
#define PyUnicode_AS_DATA(op) \
|
||||
(assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
|
||||
((const char *)(PyUnicode_AS_UNICODE(op)))
|
||||
|
||||
|
||||
/* --- Flexible String Representaion Helper Macros (PEP 393) -------------- */
|
||||
|
||||
/* Values for PyUnicodeObject.state: */
|
||||
|
||||
/* Interning state. */
|
||||
#define SSTATE_NOT_INTERNED 0
|
||||
#define SSTATE_INTERNED_MORTAL 1
|
||||
#define SSTATE_INTERNED_IMMORTAL 2
|
||||
|
||||
#define PyUnicode_IS_COMPACT_ASCII(op) (((PyASCIIObject*)op)->state.ascii)
|
||||
|
||||
/* String contains only wstr byte characters. This is only possible
|
||||
when the string was created with a legacy API and PyUnicode_Ready()
|
||||
has not been called yet. */
|
||||
#define PyUnicode_WCHAR_KIND 0
|
||||
|
||||
/* Return values of the PyUnicode_KIND() macro: */
|
||||
|
||||
#define PyUnicode_1BYTE_KIND 1
|
||||
#define PyUnicode_2BYTE_KIND 2
|
||||
#define PyUnicode_4BYTE_KIND 3
|
||||
|
||||
|
||||
/* Return the number of bytes the string uses to represent single characters,
|
||||
this can be 1, 2 or 4. */
|
||||
#define PyUnicode_CHARACTER_SIZE(op) \
|
||||
(1 << (PyUnicode_KIND(op) - 1))
|
||||
|
||||
/* Return pointers to the canonical representation casted as unsigned char,
|
||||
Py_UCS2, or Py_UCS4 for direct character access.
|
||||
No checks are performed, use PyUnicode_CHARACTER_SIZE or
|
||||
PyUnicode_KIND() before to ensure these will work correctly. */
|
||||
|
||||
#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
|
||||
#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
|
||||
#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
|
||||
|
||||
/* Return true if the string is compact or 0 if not.
|
||||
No type checks or Ready calls are performed. */
|
||||
#define PyUnicode_IS_COMPACT(op) \
|
||||
(((PyASCIIObject*)(op))->state.compact)
|
||||
|
||||
/* Return one of the PyUnicode_*_KIND values defined above. */
|
||||
#define PyUnicode_KIND(op) \
|
||||
(assert(PyUnicode_Check(op)), \
|
||||
assert(PyUnicode_IS_READY(op)), \
|
||||
((PyASCIIObject *)(op))->state.kind)
|
||||
|
||||
/* Return a void pointer to the raw unicode buffer. */
|
||||
#define _PyUnicode_COMPACT_DATA(op) \
|
||||
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||
((void*)((PyASCIIObject*)(op) + 1)) : \
|
||||
((void*)((PyCompactUnicodeObject*)(op) + 1)))
|
||||
|
||||
#define _PyUnicode_NONCOMPACT_DATA(op) \
|
||||
(assert(((PyUnicodeObject*)(op))->data.any), \
|
||||
((((PyUnicodeObject *)(op))->data.any)))
|
||||
|
||||
#define PyUnicode_DATA(op) \
|
||||
(assert(PyUnicode_Check(op)), \
|
||||
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
|
||||
_PyUnicode_NONCOMPACT_DATA(op))
|
||||
|
||||
#define _PyUnicode_UTF8(op) \
|
||||
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||
((char*)((PyASCIIObject*)(op) + 1)) : \
|
||||
((PyCompactUnicodeObject*)(op))->utf8)
|
||||
|
||||
#define _PyUnicode_UTF8_LENGTH(op) \
|
||||
(PyUnicode_IS_COMPACT_ASCII(op) ? \
|
||||
((PyASCIIObject*)(op))->length : \
|
||||
((PyCompactUnicodeObject*)(op))->utf8_length)
|
||||
|
||||
/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
|
||||
|
||||
The index is a character index, the result is a size in bytes. */
|
||||
#define PyUnicode_KIND_SIZE(kind, index) ((index) << ((kind) - 1))
|
||||
|
||||
/* In the access macros below, "kind" may be evaluated more than once.
|
||||
All other macro parameters are evaluated exactly once, so it is safe
|
||||
to put side effects into them (such as increasing the index). */
|
||||
|
||||
/* Write into the canonical representation, this macro does not do any sanity
|
||||
checks and is intended for usage in loops. The caller should cache the
|
||||
kind and data pointers optained form other macro calls.
|
||||
index is the index in the string (starts at 0) and value is the new
|
||||
code point value which shoule be written to that location. */
|
||||
#define PyUnicode_WRITE(kind, data, index, value) \
|
||||
do { \
|
||||
switch ((kind)) { \
|
||||
case PyUnicode_1BYTE_KIND: { \
|
||||
((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
|
||||
break; \
|
||||
} \
|
||||
case PyUnicode_2BYTE_KIND: { \
|
||||
((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
|
||||
break; \
|
||||
} \
|
||||
default: { \
|
||||
assert((kind) == PyUnicode_4BYTE_KIND); \
|
||||
((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Read a code point form the string's canonical representation. No checks
|
||||
or ready calls are performed. */
|
||||
#define PyUnicode_READ(kind, data, index) \
|
||||
((Py_UCS4) \
|
||||
((kind) == PyUnicode_1BYTE_KIND ? \
|
||||
((const unsigned char *)(data))[(index)] : \
|
||||
((kind) == PyUnicode_2BYTE_KIND ? \
|
||||
((const Py_UCS2 *)(data))[(index)] : \
|
||||
((const Py_UCS4 *)(data))[(index)] \
|
||||
) \
|
||||
))
|
||||
|
||||
/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
|
||||
calls PyUnicode_KIND() and might call it twice. For single reads, use
|
||||
PyUnicode_READ_CHAR, for multiple consecutive reads callers should
|
||||
cache kind and use PyUnicode_READ instead. */
|
||||
#define PyUnicode_READ_CHAR(unicode, index) \
|
||||
((Py_UCS4) \
|
||||
(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
|
||||
((const unsigned char *)(PyUnicode_DATA((unicode))))[(index)] : \
|
||||
(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
|
||||
((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
|
||||
((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
|
||||
) \
|
||||
))
|
||||
|
||||
/* Returns the length of the unicode string. The caller has to make sure that
|
||||
the string has it's canonical representation set before calling
|
||||
this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
|
||||
#define PyUnicode_GET_LENGTH(op) \
|
||||
(assert(PyUnicode_Check(op)), \
|
||||
assert(PyUnicode_IS_READY(op)), \
|
||||
((PyASCIIObject *)(op))->length)
|
||||
|
||||
|
||||
/* Fast check to determine whether an object is ready. Equivalent to
|
||||
PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
|
||||
|
||||
#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
|
||||
|
||||
/* PyUnicode_READY() does less work than PyUnicode_Ready() in the best
|
||||
case. If the canonical representation is not yet set, it will still call
|
||||
PyUnicode_Ready().
|
||||
Returns 0 on success and -1 on errors. */
|
||||
#define PyUnicode_READY(op) \
|
||||
(assert(PyUnicode_Check(op)), \
|
||||
(PyUnicode_IS_READY(op) ? \
|
||||
0 : _PyUnicode_Ready((PyUnicodeObject *)(op))))
|
||||
|
||||
/* Generic helper macro to convert characters of different types.
|
||||
from_type and to_type have to be valid type names, begin and end
|
||||
are pointers to the source characters which should be of type
|
||||
"from_type *". to is a pointer of type "to_type *" and points to the
|
||||
buffer where the result characters are written to. */
|
||||
#define PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
|
||||
do { \
|
||||
const from_type *iter_; to_type *to_; \
|
||||
for (iter_ = (begin), to_ = (to_type *)(to); \
|
||||
iter_ < (end); \
|
||||
++iter_, ++to_) { \
|
||||
*to_ = (to_type)*iter_; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Return a maximum character value which is suitable for creating another
|
||||
string based on op. This is always an approximation but more efficient
|
||||
than interating over the string. */
|
||||
#define PyUnicode_MAX_CHAR_VALUE(op) \
|
||||
(assert(PyUnicode_IS_READY(op)), \
|
||||
(PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f: \
|
||||
(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
|
||||
(PyUnicode_DATA(op) == (((PyCompactUnicodeObject *)(op))->utf8) ? \
|
||||
(0x7fU) : (0xffU) \
|
||||
) : \
|
||||
(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
|
||||
(0xffffU) : (0x10ffffU) \
|
||||
))))
|
||||
|
||||
#endif
|
||||
|
||||
/* --- Constants ---------------------------------------------------------- */
|
||||
|
@ -437,6 +512,52 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
|
|||
|
||||
/* --- Plain Py_UNICODE --------------------------------------------------- */
|
||||
|
||||
/* With PEP 393, this is the recommended way to allocate a new unicode object.
|
||||
This function will allocate the object and its buffer in a single memory
|
||||
block. Objects created using this function are not resizable. */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_New(
|
||||
Py_ssize_t size, /* Number of code points in the new string */
|
||||
Py_UCS4 maxchar /* maximum code point value in the string */
|
||||
);
|
||||
#endif
|
||||
|
||||
/* Initializes the canonical string representation from a the deprected
|
||||
wstr/Py_UNICODE representation. This function is used to convert
|
||||
unicode objects which were created using the old API to the new flexible
|
||||
format introduced with PEP 393. The PyUnicode_READY() macro can be
|
||||
more efficient if the string is already ready. */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(int) _PyUnicode_Ready(
|
||||
PyUnicodeObject *unicode /* Unicode object */
|
||||
);
|
||||
#endif
|
||||
|
||||
/* Copy character from one unicode object into another, this function performs
|
||||
character conversion when nessesary and falls back to memcpy if possible.
|
||||
Return -1 and raise an exception on error, return 0 on success. */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(int) PyUnicode_CopyCharacters(
|
||||
PyObject *to,
|
||||
Py_ssize_t to_start,
|
||||
PyObject *from,
|
||||
Py_ssize_t from_start,
|
||||
Py_ssize_t how_many
|
||||
);
|
||||
#endif
|
||||
|
||||
/* Find the maximum code point and count the number of surrogate pairs so a
|
||||
correct string length can be computed before converting a string to UCS4.
|
||||
This function counts single surrogates as a character and not as a pair. */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(int) _PyUnicode_FindMaxCharAndNumSurrogatePairs(
|
||||
const wchar_t *begin,
|
||||
const wchar_t *end,
|
||||
Py_UCS4 *maxchar,
|
||||
Py_ssize_t *num_surrogates
|
||||
);
|
||||
#endif
|
||||
|
||||
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
|
||||
size.
|
||||
|
||||
|
@ -461,13 +582,43 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
|
|||
);
|
||||
|
||||
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
|
||||
UTF-8 encoded bytes */
|
||||
UTF-8 encoded bytes. The size is determined with strlen(). */
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
|
||||
const char *u /* UTF-8 encoded string */
|
||||
);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
|
||||
int kind,
|
||||
const void *buffer,
|
||||
Py_ssize_t size);
|
||||
#endif
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
|
||||
PyObject *str,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end);
|
||||
|
||||
/* Copy the string into a UCS4 buffer including the null character is copy_null
|
||||
is set. Return NULL and raise an exception on error. Raise a ValueError if
|
||||
the buffer is smaller than the string. Return buffer on success.
|
||||
|
||||
buflen is the length of the buffer in (Py_UCS4) characters. */
|
||||
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
|
||||
PyObject *unicode,
|
||||
Py_UCS4* buffer,
|
||||
Py_ssize_t buflen,
|
||||
int copy_null);
|
||||
|
||||
/* Copy the string into a UCS4 buffer. A new buffer is allocated using
|
||||
* PyMem_Malloc; if this fails, NULL is returned with a memory error
|
||||
exception set. */
|
||||
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
|
||||
|
||||
/* Return a read-only pointer to the Unicode object's internal
|
||||
Py_UNICODE buffer. */
|
||||
Py_UNICODE buffer.
|
||||
If the wchar_t/Py_UNICODE representation is not yet available, this
|
||||
function will calculate it. */
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
|
||||
|
@ -475,12 +626,47 @@ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
|
|||
);
|
||||
#endif
|
||||
|
||||
/* Return a read-only pointer to the Unicode object's internal
|
||||
Py_UNICODE buffer and save the length at size.
|
||||
If the wchar_t/Py_UNICODE representation is not yet available, this
|
||||
function will calculate it. */
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
|
||||
PyObject *unicode, /* Unicode object */
|
||||
Py_ssize_t *size /* location where to save the length */
|
||||
);
|
||||
#endif
|
||||
|
||||
/* Get the length of the Unicode object. */
|
||||
|
||||
PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
|
||||
PyObject *unicode
|
||||
);
|
||||
|
||||
/* Get the number of Py_UNICODE units in the
|
||||
string representation. */
|
||||
|
||||
PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
|
||||
PyObject *unicode /* Unicode object */
|
||||
);
|
||||
|
||||
/* Read a character from the string. */
|
||||
|
||||
PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
|
||||
PyObject *unicode,
|
||||
Py_ssize_t index
|
||||
);
|
||||
|
||||
/* Write a character to the string. The string must have been created through
|
||||
PyUnicode_New, must not be shared, and must not have been hashed yet. */
|
||||
|
||||
PyAPI_FUNC(int) PyUnicode_WriteChar(
|
||||
PyObject *unicode,
|
||||
Py_ssize_t index,
|
||||
Py_UCS4 character
|
||||
);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
/* Get the maximum ordinal for a Unicode character. */
|
||||
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
|
||||
|
@ -558,8 +744,9 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
|
|||
/* Format the object based on the format_spec, as defined in PEP 3101
|
||||
(Advanced String Formatting). */
|
||||
PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
|
||||
Py_UNICODE *format_spec,
|
||||
Py_ssize_t format_spec_len);
|
||||
PyObject *format_spec,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end);
|
||||
#endif
|
||||
|
||||
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
|
||||
|
@ -572,7 +759,8 @@ PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
|
|||
#endif
|
||||
|
||||
/* Use only if you know it's a string */
|
||||
#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
|
||||
#define PyUnicode_CHECK_INTERNED(op) \
|
||||
(((PyASCIIObject *)(op))->state.interned)
|
||||
|
||||
/* --- wchar_t support for platforms which support it --------------------- */
|
||||
|
||||
|
@ -619,6 +807,8 @@ PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
|
|||
Py_ssize_t *size /* number of characters of the result */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
|
||||
|
||||
#endif
|
||||
|
||||
/* --- Unicode ordinals --------------------------------------------------- */
|
||||
|
@ -664,49 +854,42 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
|
|||
|
||||
/* --- Manage the default encoding ---------------------------------------- */
|
||||
|
||||
/* Return a Python string holding the default encoded value of the
|
||||
Unicode object.
|
||||
|
||||
Same as PyUnicode_AsUTF8String() except
|
||||
the resulting string is cached in the Unicode object for subsequent
|
||||
usage by this function. The cached version is needed to implement
|
||||
the character buffer interface and will live (at least) as long as
|
||||
the Unicode object itself.
|
||||
|
||||
The refcount of the string is *not* incremented.
|
||||
|
||||
*** Exported for internal use by the interpreter only !!! ***
|
||||
|
||||
*/
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
|
||||
PyObject *unicode);
|
||||
#endif
|
||||
|
||||
/* Returns a pointer to the default encoding (UTF-8) of the
|
||||
Unicode object unicode and the size of the encoded representation
|
||||
in bytes stored in *size.
|
||||
|
||||
In case of an error, no *size is set.
|
||||
|
||||
This funcation caches the UTF-8 encoded string in the unicodeobject
|
||||
and subsequent calls will return the same string. The memory is relased
|
||||
when the unicodeobject is deallocated.
|
||||
|
||||
_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
|
||||
support the previous internal function with the same behaviour.
|
||||
|
||||
*** This API is for interpreter INTERNAL USE ONLY and will likely
|
||||
*** be removed or changed in the future.
|
||||
|
||||
*** If you need to access the Unicode object as UTF-8 bytes string,
|
||||
*** please use PyUnicode_AsUTF8String() instead.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
|
||||
PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
|
||||
PyObject *unicode,
|
||||
Py_ssize_t *size);
|
||||
#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
|
||||
#endif
|
||||
|
||||
/* Returns a pointer to the default encoding (UTF-8) of the
|
||||
Unicode object unicode.
|
||||
|
||||
Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
|
||||
in the unicodeobject.
|
||||
|
||||
_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
|
||||
support the previous internal function with the same behaviour.
|
||||
|
||||
Use of this API is DEPRECATED since no size information can be
|
||||
extracted from the returned data.
|
||||
|
||||
|
@ -719,7 +902,8 @@ PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
|
|||
*/
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
|
||||
PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
|
||||
#define _PyUnicode_AsString PyUnicode_AsUTF8
|
||||
#endif
|
||||
|
||||
/* Returns "utf-8". */
|
||||
|
@ -846,6 +1030,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
|||
);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
|
||||
PyObject *unicode,
|
||||
const char *errors);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
|
||||
const Py_UNICODE *data, /* Unicode char buffer */
|
||||
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
||||
|
@ -1076,6 +1264,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
|
|||
);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
|
||||
PyObject* unicode,
|
||||
const char* errors);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
|
||||
const Py_UNICODE *data, /* Unicode char buffer */
|
||||
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
||||
|
@ -1100,6 +1292,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
|
|||
);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
|
||||
PyObject* unicode,
|
||||
const char* errors);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
|
||||
const Py_UNICODE *data, /* Unicode char buffer */
|
||||
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
||||
|
@ -1252,6 +1448,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
|
|||
);
|
||||
#endif
|
||||
|
||||
/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject
|
||||
as argument instead of a raw buffer and length. This function additionally
|
||||
transforms spaces to ASCII because this is what the callers in longobject,
|
||||
floatobject, and complexobject did anyways. */
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
|
||||
PyObject *unicode /* Unicode object */
|
||||
);
|
||||
#endif
|
||||
|
||||
/* --- File system encoding ---------------------------------------------- */
|
||||
|
||||
/* ParseTuple converter: encode str objects to bytes using
|
||||
|
@ -1439,6 +1646,15 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
|
|||
int direction /* Find direction: +1 forward, -1 backward */
|
||||
);
|
||||
|
||||
/* Like PyUnicode_Find, but search for single character only. */
|
||||
PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
|
||||
PyObject *str,
|
||||
Py_UCS4 ch,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end,
|
||||
int direction
|
||||
);
|
||||
|
||||
/* Count the number of occurrences of substr in str[start:end]. */
|
||||
|
||||
PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
|
||||
|
@ -1542,13 +1758,15 @@ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buff
|
|||
into the string pointed to by buffer. For the argument descriptions,
|
||||
see Objects/stringlib/localeutil.h */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
|
||||
Py_ssize_t n_buffer,
|
||||
Py_UNICODE *digits,
|
||||
Py_ssize_t n_digits,
|
||||
Py_ssize_t min_width,
|
||||
const char *grouping,
|
||||
const char *thousands_sep);
|
||||
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
|
||||
int kind,
|
||||
void *buffer,
|
||||
Py_ssize_t n_buffer,
|
||||
void *digits,
|
||||
Py_ssize_t n_digits,
|
||||
Py_ssize_t min_width,
|
||||
const char *grouping,
|
||||
const char *thousands_sep);
|
||||
#endif
|
||||
/* === Characters Type APIs =============================================== */
|
||||
|
||||
|
@ -1673,6 +1891,43 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
|
|||
Py_UNICODE c
|
||||
);
|
||||
|
||||
PyAPI_FUNC(size_t) Py_UCS4_strlen(
|
||||
const Py_UCS4 *u
|
||||
);
|
||||
|
||||
PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcpy(
|
||||
Py_UCS4 *s1,
|
||||
const Py_UCS4 *s2);
|
||||
|
||||
PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcat(
|
||||
Py_UCS4 *s1, const Py_UCS4 *s2);
|
||||
|
||||
PyAPI_FUNC(Py_UCS4*) Py_UCS4_strncpy(
|
||||
Py_UCS4 *s1,
|
||||
const Py_UCS4 *s2,
|
||||
size_t n);
|
||||
|
||||
PyAPI_FUNC(int) Py_UCS4_strcmp(
|
||||
const Py_UCS4 *s1,
|
||||
const Py_UCS4 *s2
|
||||
);
|
||||
|
||||
PyAPI_FUNC(int) Py_UCS4_strncmp(
|
||||
const Py_UCS4 *s1,
|
||||
const Py_UCS4 *s2,
|
||||
size_t n
|
||||
);
|
||||
|
||||
PyAPI_FUNC(Py_UCS4*) Py_UCS4_strchr(
|
||||
const Py_UCS4 *s,
|
||||
Py_UCS4 c
|
||||
);
|
||||
|
||||
PyAPI_FUNC(Py_UCS4*) Py_UCS4_strrchr(
|
||||
const Py_UCS4 *s,
|
||||
Py_UCS4 c
|
||||
);
|
||||
|
||||
/* Create a copy of a unicode string ending with a nul character. Return NULL
|
||||
and raise a MemoryError exception on memory allocation failure, otherwise
|
||||
return a new allocated buffer (use PyMem_Free() to free the buffer). */
|
||||
|
|
|
@ -121,8 +121,7 @@ def py_scanstring(s, end, strict=True,
|
|||
msg = "Invalid \\uXXXX escape"
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
uni = int(esc, 16)
|
||||
# Check for surrogate pair on UCS-4 systems
|
||||
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
|
||||
if 0xd800 <= uni <= 0xdbff:
|
||||
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
|
||||
if not s[end + 5:end + 7] == '\\u':
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
|
|
|
@ -9,14 +9,9 @@ class TestScanstring:
|
|||
scanstring('"z\\ud834\\udd20x"', 1, True),
|
||||
('z\U0001d120x', 16))
|
||||
|
||||
if sys.maxunicode == 65535:
|
||||
self.assertEqual(
|
||||
scanstring('"z\U0001d120x"', 1, True),
|
||||
('z\U0001d120x', 6))
|
||||
else:
|
||||
self.assertEqual(
|
||||
scanstring('"z\U0001d120x"', 1, True),
|
||||
('z\U0001d120x', 5))
|
||||
self.assertEqual(
|
||||
scanstring('"z\U0001d120x"', 1, True),
|
||||
('z\U0001d120x', 5))
|
||||
|
||||
self.assertEqual(
|
||||
scanstring('"\\u007b"', 1, True),
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import test.support, unittest
|
||||
import sys, codecs, html.entities, unicodedata
|
||||
import ctypes
|
||||
|
||||
class PosReturn:
|
||||
# this can be used for configurable callbacks
|
||||
|
@ -577,8 +578,10 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
|
||||
("\\uffff", 1)
|
||||
)
|
||||
# 1 on UCS-4 builds, 2 on UCS-2
|
||||
len_wide = len("\U00010000")
|
||||
if ctypes.sizeof(ctypes.c_wchar) == 2:
|
||||
len_wide = 2
|
||||
else:
|
||||
len_wide = 1
|
||||
self.assertEqual(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError("ascii", "\U00010000",
|
||||
|
|
|
@ -622,6 +622,10 @@ class UTF8Test(ReadTest):
|
|||
b"abc\xed\xa0\x80def")
|
||||
self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
|
||||
"abc\ud800def")
|
||||
self.assertEqual("\U00010fff\uD800".encode("utf-8", "surrogatepass"),
|
||||
b"\xf0\x90\xbf\xbf\xed\xa0\x80")
|
||||
self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("utf-8", "surrogatepass"),
|
||||
"\U00010fff\uD800")
|
||||
self.assertTrue(codecs.lookup_error("surrogatepass"))
|
||||
|
||||
class UTF7Test(ReadTest):
|
||||
|
|
|
@ -218,10 +218,6 @@ class TestTranforms(unittest.TestCase):
|
|||
# out of range
|
||||
asm = dis_single('"fuu"[10]')
|
||||
self.assertIn('BINARY_SUBSCR', asm)
|
||||
# non-BMP char (see #5057)
|
||||
asm = dis_single('"\U00012345"[0]')
|
||||
self.assertIn('BINARY_SUBSCR', asm)
|
||||
|
||||
|
||||
def test_folding_of_unaryops_on_constants(self):
|
||||
for line, elem in (
|
||||
|
|
|
@ -780,6 +780,13 @@ class ReTests(unittest.TestCase):
|
|||
self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
|
||||
self.assertRaises(TypeError, _sre.compile, {}, 0, [])
|
||||
|
||||
def test_search_dot_unicode(self):
|
||||
self.assertIsNotNone(re.search("123.*-", '123abc-'))
|
||||
self.assertIsNotNone(re.search("123.*-", '123\xe9-'))
|
||||
self.assertIsNotNone(re.search("123.*-", '123\u20ac-'))
|
||||
self.assertIsNotNone(re.search("123.*-", '123\U0010ffff-'))
|
||||
self.assertIsNotNone(re.search("123.*-", '123\xe9\u20ac\U0010ffff-'))
|
||||
|
||||
def run_re_tests():
|
||||
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
||||
if verbose:
|
||||
|
|
|
@ -833,13 +833,39 @@ class SizeofTest(unittest.TestCase):
|
|||
class newstyleclass(object): pass
|
||||
check(newstyleclass, s)
|
||||
# unicode
|
||||
usize = len('\0'.encode('unicode-internal'))
|
||||
samples = ['', '1'*100]
|
||||
# we need to test for both sizes, because we don't know if the string
|
||||
# has been cached
|
||||
# each tuple contains a string and its expected character size
|
||||
# don't put any static strings here, as they may contain
|
||||
# wchar_t or UTF-8 representations
|
||||
samples = ['1'*100, '\xff'*50,
|
||||
'\u0100'*40, '\uffff'*100,
|
||||
'\U00010000'*30, '\U0010ffff'*100]
|
||||
asciifields = h + "PPiP"
|
||||
compactfields = asciifields + "PPP"
|
||||
unicodefields = compactfields + "P"
|
||||
for s in samples:
|
||||
basicsize = size(h + 'PPPiP') + usize * (len(s) + 1)
|
||||
check(s, basicsize)
|
||||
maxchar = ord(max(s))
|
||||
if maxchar < 128:
|
||||
L = size(asciifields) + len(s) + 1
|
||||
elif maxchar < 256:
|
||||
L = size(compactfields) + len(s) + 1
|
||||
elif maxchar < 65536:
|
||||
L = size(compactfields) + 2*(len(s) + 1)
|
||||
else:
|
||||
L = size(compactfields) + 4*(len(s) + 1)
|
||||
check(s, L)
|
||||
# verify that the UTF-8 size is accounted for
|
||||
s = chr(0x4000) # 4 bytes canonical representation
|
||||
check(s, size(compactfields) + 4)
|
||||
try:
|
||||
# FIXME: codecs.lookup(str) calls encoding.search_function() which
|
||||
# calls __import__ using str in the module name. __import__ encodes
|
||||
# the module name to the file system encoding (which is the locale
|
||||
# encoding), so test_sys fails if the locale encoding is not UTF-8.
|
||||
codecs.lookup(s) # produces 4 bytes UTF-8
|
||||
except LookupError:
|
||||
check(s, size(compactfields) + 4 + 4)
|
||||
# TODO: add check that forces the presence of wchar_t representation
|
||||
# TODO: add check that forces layout of unicodefields
|
||||
# weakref
|
||||
import weakref
|
||||
check(weakref.ref(int), size(h + '2Pl2P'))
|
||||
|
|
|
@ -1583,16 +1583,32 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
|
||||
|
||||
def test_raiseMemError(self):
|
||||
# Ensure that the freelist contains a consistent object, even
|
||||
# when a string allocation fails with a MemoryError.
|
||||
# This used to crash the interpreter,
|
||||
# or leak references when the number was smaller.
|
||||
charwidth = 4 if sys.maxunicode >= 0x10000 else 2
|
||||
# Note: sys.maxsize is half of the actual max allocation because of
|
||||
# the signedness of Py_ssize_t.
|
||||
alloc = lambda: "a" * (sys.maxsize // charwidth * 2)
|
||||
self.assertRaises(MemoryError, alloc)
|
||||
self.assertRaises(MemoryError, alloc)
|
||||
if struct.calcsize('P') == 8:
|
||||
# 64 bits pointers
|
||||
ascii_struct_size = 64
|
||||
compact_struct_size = 88
|
||||
else:
|
||||
# 32 bits pointers
|
||||
ascii_struct_size = 32
|
||||
compact_struct_size = 44
|
||||
|
||||
for char in ('a', '\xe9', '\u20ac', '\U0010ffff'):
|
||||
code = ord(char)
|
||||
if code < 0x100:
|
||||
char_size = 1 # sizeof(Py_UCS1)
|
||||
struct_size = ascii_struct_size
|
||||
elif code < 0x10000:
|
||||
char_size = 2 # sizeof(Py_UCS2)
|
||||
struct_size = compact_struct_size
|
||||
else:
|
||||
char_size = 4 # sizeof(Py_UCS4)
|
||||
struct_size = compact_struct_size
|
||||
# Note: sys.maxsize is half of the actual max allocation because of
|
||||
# the signedness of Py_ssize_t. -1 because of the null character.
|
||||
maxlen = ((sys.maxsize - struct_size) // char_size) - 1
|
||||
alloc = lambda: char * maxlen
|
||||
self.assertRaises(MemoryError, alloc)
|
||||
self.assertRaises(MemoryError, alloc)
|
||||
|
||||
def test_format_subclass(self):
|
||||
class S(str):
|
||||
|
@ -1608,10 +1624,7 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
from ctypes import (pythonapi, py_object,
|
||||
c_int, c_long, c_longlong, c_ssize_t,
|
||||
c_uint, c_ulong, c_ulonglong, c_size_t)
|
||||
if sys.maxunicode == 65535:
|
||||
name = "PyUnicodeUCS2_FromFormat"
|
||||
else:
|
||||
name = "PyUnicodeUCS4_FromFormat"
|
||||
name = "PyUnicode_FromFormat"
|
||||
_PyUnicode_FromFormat = getattr(pythonapi, name)
|
||||
_PyUnicode_FromFormat.restype = py_object
|
||||
|
||||
|
|
|
@ -629,7 +629,6 @@ BYTESTR_DEPS = \
|
|||
$(srcdir)/Objects/stringlib/partition.h \
|
||||
$(srcdir)/Objects/stringlib/split.h \
|
||||
$(srcdir)/Objects/stringlib/stringdefs.h \
|
||||
$(srcdir)/Objects/stringlib/string_format.h \
|
||||
$(srcdir)/Objects/stringlib/transmogrify.h \
|
||||
$(srcdir)/Objects/stringlib/unicodedefs.h \
|
||||
$(srcdir)/Objects/stringlib/localeutil.h
|
||||
|
@ -639,7 +638,7 @@ Objects/bytesobject.o: $(srcdir)/Objects/bytesobject.c $(BYTESTR_DEPS)
|
|||
Objects/bytearrayobject.o: $(srcdir)/Objects/bytearrayobject.c $(BYTESTR_DEPS)
|
||||
|
||||
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
|
||||
$(BYTESTR_DEPS)
|
||||
$(BYTESTR_DEPS) $(srcdir)/Objects/stringlib/unicode_format.h
|
||||
|
||||
Objects/dictobject.o: $(srcdir)/Objects/stringlib/eq.h
|
||||
Objects/setobject.o: $(srcdir)/Objects/stringlib/eq.h
|
||||
|
@ -650,8 +649,7 @@ $(OPCODETARGETS_H): $(OPCODETARGETGEN_FILES)
|
|||
Python/ceval.o: $(OPCODETARGETS_H) Python/ceval_gil.h
|
||||
|
||||
Python/formatter_unicode.o: $(srcdir)/Python/formatter_unicode.c \
|
||||
$(BYTESTR_DEPS) \
|
||||
$(srcdir)/Objects/stringlib/formatter.h
|
||||
$(BYTESTR_DEPS)
|
||||
|
||||
Objects/typeobject.o: $(srcdir)/Objects/typeslots.inc
|
||||
$(srcdir)/Objects/typeslots.inc: $(srcdir)/Include/typeslots.h $(srcdir)/Objects/typeslots.py
|
||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- PEP 393: flexible string representation.
|
||||
|
||||
- Issue #13012: The 'keepends' parameter to str.splitlines may now be passed
|
||||
as a keyword argument: "my_string.splitlines(keepends=True)". The same
|
||||
change also applies to bytes.splitlines and bytearray.splitlines.
|
||||
|
|
|
@ -700,12 +700,10 @@ utf_8_encode(PyObject *self,
|
|||
return NULL;
|
||||
|
||||
str = PyUnicode_FromObject(str);
|
||||
if (str == NULL)
|
||||
if (str == NULL || PyUnicode_READY(str) == -1)
|
||||
return NULL;
|
||||
v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
|
||||
PyUnicode_GET_SIZE(str),
|
||||
errors),
|
||||
PyUnicode_GET_SIZE(str));
|
||||
v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
|
||||
PyUnicode_GET_LENGTH(str));
|
||||
Py_DECREF(str);
|
||||
return v;
|
||||
}
|
||||
|
|
|
@ -128,7 +128,7 @@ get_nullchar_as_None(Py_UNICODE c)
|
|||
return Py_None;
|
||||
}
|
||||
else
|
||||
return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
|
||||
return PyUnicode_FromOrdinal(c);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
|
|
@ -1843,11 +1843,9 @@ PyCSimpleType_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
return NULL;
|
||||
}
|
||||
if (PyUnicode_Check(proto)) {
|
||||
PyObject *v = _PyUnicode_AsDefaultEncodedString(proto);
|
||||
if (!v)
|
||||
proto_str = PyUnicode_AsUTF8AndSize(proto, &proto_len);
|
||||
if (!proto_str)
|
||||
goto error;
|
||||
proto_str = PyBytes_AS_STRING(v);
|
||||
proto_len = PyBytes_GET_SIZE(v);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"class must define a '_type_' string attribute");
|
||||
|
|
|
@ -658,13 +658,6 @@ static int ConvParam(PyObject *obj, Py_ssize_t index, struct argument *pa)
|
|||
|
||||
#ifdef CTYPES_UNICODE
|
||||
if (PyUnicode_Check(obj)) {
|
||||
#if Py_UNICODE_SIZE == SIZEOF_WCHAR_T
|
||||
pa->ffi_type = &ffi_type_pointer;
|
||||
pa->value.p = PyUnicode_AS_UNICODE(obj);
|
||||
Py_INCREF(obj);
|
||||
pa->keep = obj;
|
||||
return 0;
|
||||
#else
|
||||
pa->ffi_type = &ffi_type_pointer;
|
||||
pa->value.p = PyUnicode_AsWideCharString(obj, NULL);
|
||||
if (pa->value.p == NULL)
|
||||
|
@ -675,7 +668,6 @@ static int ConvParam(PyObject *obj, Py_ssize_t index, struct argument *pa)
|
|||
return -1;
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
|
||||
#define CTYPES_CFIELD_CAPSULE_NAME_PYMEM "_ctypes/cfield.c pymem"
|
||||
|
||||
#if Py_UNICODE_SIZE != SIZEOF_WCHAR_T
|
||||
static void pymem_destructor(PyObject *ptr)
|
||||
{
|
||||
void *p = PyCapsule_GetPointer(ptr, CTYPES_CFIELD_CAPSULE_NAME_PYMEM);
|
||||
|
@ -17,7 +16,6 @@ static void pymem_destructor(PyObject *ptr)
|
|||
PyMem_Free(p);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************/
|
||||
|
@ -1238,32 +1236,24 @@ u_get(void *ptr, Py_ssize_t size)
|
|||
static PyObject *
|
||||
U_get(void *ptr, Py_ssize_t size)
|
||||
{
|
||||
PyObject *result;
|
||||
Py_ssize_t len;
|
||||
Py_UNICODE *p;
|
||||
wchar_t *p;
|
||||
|
||||
size /= sizeof(wchar_t); /* we count character units here, not bytes */
|
||||
|
||||
result = PyUnicode_FromWideChar((wchar_t *)ptr, size);
|
||||
if (!result)
|
||||
return NULL;
|
||||
/* We need 'result' to be able to count the characters with wcslen,
|
||||
since ptr may not be NUL terminated. If the length is smaller (if
|
||||
it was actually NUL terminated, we construct a new one and throw
|
||||
away the result.
|
||||
*/
|
||||
/* chop off at the first NUL character, if any. */
|
||||
p = PyUnicode_AS_UNICODE(result);
|
||||
for (len = 0; len < size; ++len)
|
||||
p = (wchar_t*)ptr;
|
||||
for (len = 0; len < size; ++len) {
|
||||
if (!p[len])
|
||||
break;
|
||||
|
||||
if (len < size) {
|
||||
PyObject *ob = PyUnicode_FromWideChar((wchar_t *)ptr, len);
|
||||
Py_DECREF(result);
|
||||
return ob;
|
||||
}
|
||||
return result;
|
||||
|
||||
return PyUnicode_FromWideChar((wchar_t *)ptr, len);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -1401,6 +1391,9 @@ z_get(void *ptr, Py_ssize_t size)
|
|||
static PyObject *
|
||||
Z_set(void *ptr, PyObject *value, Py_ssize_t size)
|
||||
{
|
||||
PyObject *keep;
|
||||
wchar_t *buffer;
|
||||
|
||||
if (value == Py_None) {
|
||||
*(wchar_t **)ptr = NULL;
|
||||
Py_INCREF(value);
|
||||
|
@ -1420,37 +1413,20 @@ Z_set(void *ptr, PyObject *value, Py_ssize_t size)
|
|||
"unicode string or integer address expected instead of %s instance",
|
||||
value->ob_type->tp_name);
|
||||
return NULL;
|
||||
} else
|
||||
Py_INCREF(value);
|
||||
#if Py_UNICODE_SIZE == SIZEOF_WCHAR_T
|
||||
/* We can copy directly. Hm, are unicode objects always NUL
|
||||
terminated in Python, internally?
|
||||
*/
|
||||
*(wchar_t **)ptr = (wchar_t *) PyUnicode_AS_UNICODE(value);
|
||||
return value;
|
||||
#else
|
||||
{
|
||||
/* We must create a wchar_t* buffer from the unicode object,
|
||||
and keep it alive */
|
||||
PyObject *keep;
|
||||
wchar_t *buffer;
|
||||
|
||||
buffer = PyUnicode_AsWideCharString(value, NULL);
|
||||
if (!buffer) {
|
||||
Py_DECREF(value);
|
||||
return NULL;
|
||||
}
|
||||
keep = PyCapsule_New(buffer, CTYPES_CFIELD_CAPSULE_NAME_PYMEM, pymem_destructor);
|
||||
if (!keep) {
|
||||
Py_DECREF(value);
|
||||
PyMem_Free(buffer);
|
||||
return NULL;
|
||||
}
|
||||
*(wchar_t **)ptr = (wchar_t *)buffer;
|
||||
Py_DECREF(value);
|
||||
return keep;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We must create a wchar_t* buffer from the unicode object,
|
||||
and keep it alive */
|
||||
buffer = PyUnicode_AsWideCharString(value, NULL);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
keep = PyCapsule_New(buffer, CTYPES_CFIELD_CAPSULE_NAME_PYMEM, pymem_destructor);
|
||||
if (!keep) {
|
||||
PyMem_Free(buffer);
|
||||
return NULL;
|
||||
}
|
||||
*(wchar_t **)ptr = buffer;
|
||||
return keep;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
|
|
@ -203,8 +203,11 @@ PyCurses_ConvertToChtype(PyObject *obj, chtype *ch)
|
|||
} else if(PyBytes_Check(obj)
|
||||
&& (PyBytes_Size(obj) == 1)) {
|
||||
*ch = (chtype) *PyBytes_AsString(obj);
|
||||
} else if (PyUnicode_Check(obj) && PyUnicode_GetSize(obj) == 1) {
|
||||
*ch = (chtype) *PyUnicode_AS_UNICODE(obj);
|
||||
} else if (PyUnicode_Check(obj) && PyUnicode_GET_LENGTH(obj) == 1) {
|
||||
Py_UCS4 ucs = PyUnicode_READ(PyUnicode_KIND(obj),
|
||||
PyUnicode_DATA(obj),
|
||||
0);
|
||||
*ch = (chtype)ucs;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -985,9 +985,8 @@ append_keyword_tzinfo(PyObject *repr, PyObject *tzinfo)
|
|||
if (tzinfo == Py_None)
|
||||
return repr;
|
||||
/* Get rid of the trailing ')'. */
|
||||
assert(PyUnicode_AS_UNICODE(repr)[PyUnicode_GET_SIZE(repr)-1] == ')');
|
||||
temp = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(repr),
|
||||
PyUnicode_GET_SIZE(repr) - 1);
|
||||
assert(PyUnicode_READ_CHAR(repr, PyUnicode_GET_LENGTH(repr)-1) == ')');
|
||||
temp = PyUnicode_Substring(repr, 0, PyUnicode_GET_LENGTH(repr) - 1);
|
||||
Py_DECREF(repr);
|
||||
if (temp == NULL)
|
||||
return NULL;
|
||||
|
@ -4214,9 +4213,9 @@ static PyObject *
|
|||
datetime_strptime(PyObject *cls, PyObject *args)
|
||||
{
|
||||
static PyObject *module = NULL;
|
||||
const Py_UNICODE *string, *format;
|
||||
PyObject *string, *format;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "uu:strptime", &string, &format))
|
||||
if (!PyArg_ParseTuple(args, "UU:strptime", &string, &format))
|
||||
return NULL;
|
||||
|
||||
if (module == NULL) {
|
||||
|
@ -4224,7 +4223,7 @@ datetime_strptime(PyObject *cls, PyObject *args)
|
|||
if (module == NULL)
|
||||
return NULL;
|
||||
}
|
||||
return PyObject_CallMethod(module, "_strptime_datetime", "Ouu",
|
||||
return PyObject_CallMethod(module, "_strptime_datetime", "OOO",
|
||||
cls, string, format);
|
||||
}
|
||||
|
||||
|
|
|
@ -212,6 +212,7 @@ dbm_contains(PyObject *self, PyObject *arg)
|
|||
{
|
||||
dbmobject *dp = (dbmobject *)self;
|
||||
datum key, val;
|
||||
Py_ssize_t size;
|
||||
|
||||
if ((dp)->di_dbm == NULL) {
|
||||
PyErr_SetString(DbmError,
|
||||
|
@ -219,8 +220,9 @@ dbm_contains(PyObject *self, PyObject *arg)
|
|||
return -1;
|
||||
}
|
||||
if (PyUnicode_Check(arg)) {
|
||||
arg = _PyUnicode_AsDefaultEncodedString(arg);
|
||||
if (arg == NULL)
|
||||
key.dptr = PyUnicode_AsUTF8AndSize(arg, &size);
|
||||
key.dsize = size;
|
||||
if (key.dptr == NULL)
|
||||
return -1;
|
||||
}
|
||||
if (!PyBytes_Check(arg)) {
|
||||
|
@ -229,8 +231,10 @@ dbm_contains(PyObject *self, PyObject *arg)
|
|||
arg->ob_type->tp_name);
|
||||
return -1;
|
||||
}
|
||||
key.dptr = PyBytes_AS_STRING(arg);
|
||||
key.dsize = PyBytes_GET_SIZE(arg);
|
||||
else {
|
||||
key.dptr = PyBytes_AS_STRING(arg);
|
||||
key.dsize = PyBytes_GET_SIZE(arg);
|
||||
}
|
||||
val = dbm_fetch(dp->di_dbm, key);
|
||||
return val.dptr != NULL;
|
||||
}
|
||||
|
|
|
@ -723,13 +723,16 @@ checkpath(PyObject* tag)
|
|||
(ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
|
||||
|
||||
if (PyUnicode_Check(tag)) {
|
||||
Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
|
||||
for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
|
||||
if (p[i] == '{')
|
||||
const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
|
||||
void *data = PyUnicode_DATA(tag);
|
||||
unsigned int kind = PyUnicode_KIND(tag);
|
||||
for (i = 0; i < len; i++) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (ch == '{')
|
||||
check = 0;
|
||||
else if (p[i] == '}')
|
||||
else if (ch == '}')
|
||||
check = 1;
|
||||
else if (check && PATHCHAR(p[i]))
|
||||
else if (check && PATHCHAR(ch))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -2401,9 +2404,10 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
|
|||
XML_Encoding *info)
|
||||
{
|
||||
PyObject* u;
|
||||
Py_UNICODE* p;
|
||||
unsigned char s[256];
|
||||
int i;
|
||||
void *data;
|
||||
unsigned int kind;
|
||||
|
||||
memset(info, 0, sizeof(XML_Encoding));
|
||||
|
||||
|
@ -2413,17 +2417,20 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
|
|||
u = PyUnicode_Decode((char*) s, 256, name, "replace");
|
||||
if (!u)
|
||||
return XML_STATUS_ERROR;
|
||||
if (PyUnicode_READY(u))
|
||||
return XML_STATUS_ERROR;
|
||||
|
||||
if (PyUnicode_GET_SIZE(u) != 256) {
|
||||
if (PyUnicode_GET_LENGTH(u) != 256) {
|
||||
Py_DECREF(u);
|
||||
return XML_STATUS_ERROR;
|
||||
}
|
||||
|
||||
p = PyUnicode_AS_UNICODE(u);
|
||||
|
||||
kind = PyUnicode_KIND(u);
|
||||
data = PyUnicode_DATA(u);
|
||||
for (i = 0; i < 256; i++) {
|
||||
if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
|
||||
info->map[i] = p[i];
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
|
||||
info->map[i] = ch;
|
||||
else
|
||||
info->map[i] = -1;
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ extern PyObject *_PyIncrementalNewlineDecoder_decode(
|
|||
Otherwise, the function will scan further and return garbage. */
|
||||
extern Py_ssize_t _PyIO_find_line_ending(
|
||||
int translated, int universal, PyObject *readnl,
|
||||
Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed);
|
||||
int kind, char *start, char *end, Py_ssize_t *consumed);
|
||||
|
||||
|
||||
#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
Py_UNICODE *buf;
|
||||
Py_UCS4 *buf;
|
||||
Py_ssize_t pos;
|
||||
Py_ssize_t string_size;
|
||||
size_t buf_size;
|
||||
|
@ -56,7 +56,7 @@ resize_buffer(stringio *self, size_t size)
|
|||
/* Here, unsigned types are used to avoid dealing with signed integer
|
||||
overflow, which is undefined in C. */
|
||||
size_t alloc = self->buf_size;
|
||||
Py_UNICODE *new_buf = NULL;
|
||||
Py_UCS4 *new_buf = NULL;
|
||||
|
||||
assert(self->buf != NULL);
|
||||
|
||||
|
@ -84,10 +84,9 @@ resize_buffer(stringio *self, size_t size)
|
|||
alloc = size + 1;
|
||||
}
|
||||
|
||||
if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
|
||||
if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
|
||||
goto overflow;
|
||||
new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
|
||||
alloc * sizeof(Py_UNICODE));
|
||||
new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
|
||||
if (new_buf == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
|
@ -108,9 +107,9 @@ resize_buffer(stringio *self, size_t size)
|
|||
static Py_ssize_t
|
||||
write_str(stringio *self, PyObject *obj)
|
||||
{
|
||||
Py_UNICODE *str;
|
||||
Py_ssize_t len;
|
||||
PyObject *decoded = NULL;
|
||||
|
||||
assert(self->buf != NULL);
|
||||
assert(self->pos >= 0);
|
||||
|
||||
|
@ -132,8 +131,7 @@ write_str(stringio *self, PyObject *obj)
|
|||
return -1;
|
||||
|
||||
assert(PyUnicode_Check(decoded));
|
||||
str = PyUnicode_AS_UNICODE(decoded);
|
||||
len = PyUnicode_GET_SIZE(decoded);
|
||||
len = PyUnicode_GET_LENGTH(decoded);
|
||||
|
||||
assert(len >= 0);
|
||||
|
||||
|
@ -161,18 +159,21 @@ write_str(stringio *self, PyObject *obj)
|
|||
|
||||
*/
|
||||
memset(self->buf + self->string_size, '\0',
|
||||
(self->pos - self->string_size) * sizeof(Py_UNICODE));
|
||||
(self->pos - self->string_size) * sizeof(Py_UCS4));
|
||||
}
|
||||
|
||||
/* Copy the data to the internal buffer, overwriting some of the
|
||||
existing data if self->pos < self->string_size. */
|
||||
memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
|
||||
self->pos += len;
|
||||
if (!PyUnicode_AsUCS4(decoded,
|
||||
self->buf + self->pos,
|
||||
self->buf_size - self->pos,
|
||||
0))
|
||||
goto fail;
|
||||
|
||||
/* Set the new length of the internal string if it has changed. */
|
||||
if (self->string_size < self->pos) {
|
||||
self->pos += len;
|
||||
if (self->string_size < self->pos)
|
||||
self->string_size = self->pos;
|
||||
}
|
||||
|
||||
Py_DECREF(decoded);
|
||||
return 0;
|
||||
|
@ -190,7 +191,8 @@ stringio_getvalue(stringio *self)
|
|||
{
|
||||
CHECK_INITIALIZED(self);
|
||||
CHECK_CLOSED(self);
|
||||
return PyUnicode_FromUnicode(self->buf, self->string_size);
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
|
||||
self->string_size);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(stringio_tell_doc,
|
||||
|
@ -214,7 +216,7 @@ static PyObject *
|
|||
stringio_read(stringio *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t size, n;
|
||||
Py_UNICODE *output;
|
||||
Py_UCS4 *output;
|
||||
PyObject *arg = Py_None;
|
||||
|
||||
CHECK_INITIALIZED(self);
|
||||
|
@ -247,19 +249,19 @@ stringio_read(stringio *self, PyObject *args)
|
|||
|
||||
output = self->buf + self->pos;
|
||||
self->pos += size;
|
||||
return PyUnicode_FromUnicode(output, size);
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
|
||||
}
|
||||
|
||||
/* Internal helper, used by stringio_readline and stringio_iternext */
|
||||
static PyObject *
|
||||
_stringio_readline(stringio *self, Py_ssize_t limit)
|
||||
{
|
||||
Py_UNICODE *start, *end, old_char;
|
||||
Py_UCS4 *start, *end, old_char;
|
||||
Py_ssize_t len, consumed;
|
||||
|
||||
/* In case of overseek, return the empty string */
|
||||
if (self->pos >= self->string_size)
|
||||
return PyUnicode_FromString("");
|
||||
return PyUnicode_New(0, 0);
|
||||
|
||||
start = self->buf + self->pos;
|
||||
if (limit < 0 || limit > self->string_size - self->pos)
|
||||
|
@ -270,14 +272,14 @@ _stringio_readline(stringio *self, Py_ssize_t limit)
|
|||
*end = '\0';
|
||||
len = _PyIO_find_line_ending(
|
||||
self->readtranslate, self->readuniversal, self->readnl,
|
||||
start, end, &consumed);
|
||||
PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
|
||||
*end = old_char;
|
||||
/* If we haven't found any line ending, we just return everything
|
||||
(`consumed` is ignored). */
|
||||
if (len < 0)
|
||||
len = limit;
|
||||
self->pos += len;
|
||||
return PyUnicode_FromUnicode(start, len);
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(stringio_readline_doc,
|
||||
|
@ -462,8 +464,10 @@ stringio_write(stringio *self, PyObject *obj)
|
|||
Py_TYPE(obj)->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
if (PyUnicode_READY(obj))
|
||||
return NULL;
|
||||
CHECK_CLOSED(self);
|
||||
size = PyUnicode_GET_SIZE(obj);
|
||||
size = PyUnicode_GET_LENGTH(obj);
|
||||
|
||||
if (size > 0 && write_str(self, obj) < 0)
|
||||
return NULL;
|
||||
|
@ -535,7 +539,7 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
/* tp_alloc initializes all the fields to zero. So we don't have to
|
||||
initialize them here. */
|
||||
|
||||
self->buf = (Py_UNICODE *)PyMem_Malloc(0);
|
||||
self->buf = (Py_UCS4 *)PyMem_Malloc(0);
|
||||
if (self->buf == NULL) {
|
||||
Py_DECREF(self);
|
||||
return PyErr_NoMemory();
|
||||
|
@ -747,11 +751,22 @@ stringio_setstate(stringio *self, PyObject *state)
|
|||
once by __init__. So we do not take any chance and replace object's
|
||||
buffer completely. */
|
||||
{
|
||||
Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0));
|
||||
Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0));
|
||||
if (resize_buffer(self, bufsize) < 0)
|
||||
PyObject *item;
|
||||
Py_UCS4 *buf;
|
||||
Py_ssize_t bufsize;
|
||||
|
||||
item = PyTuple_GET_ITEM(state, 0);
|
||||
buf = PyUnicode_AsUCS4Copy(item);
|
||||
if (buf == NULL)
|
||||
return NULL;
|
||||
memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE));
|
||||
bufsize = PyUnicode_GET_LENGTH(item);
|
||||
|
||||
if (resize_buffer(self, bufsize) < 0) {
|
||||
PyMem_Free(buf);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
|
||||
PyMem_Free(buf);
|
||||
self->string_size = bufsize;
|
||||
}
|
||||
|
||||
|
|
|
@ -274,18 +274,28 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
goto error;
|
||||
}
|
||||
|
||||
output_len = PyUnicode_GET_SIZE(output);
|
||||
if (PyUnicode_READY(output) == -1)
|
||||
goto error;
|
||||
|
||||
output_len = PyUnicode_GET_LENGTH(output);
|
||||
if (self->pendingcr && (final || output_len > 0)) {
|
||||
Py_UNICODE *out;
|
||||
PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
|
||||
/* Prefix output with CR */
|
||||
int kind;
|
||||
PyObject *modified;
|
||||
char *out;
|
||||
|
||||
modified = PyUnicode_New(output_len + 1,
|
||||
PyUnicode_MAX_CHAR_VALUE(output));
|
||||
if (modified == NULL)
|
||||
goto error;
|
||||
out = PyUnicode_AS_UNICODE(modified);
|
||||
out[0] = '\r';
|
||||
memcpy(out + 1, PyUnicode_AS_UNICODE(output),
|
||||
output_len * sizeof(Py_UNICODE));
|
||||
kind = PyUnicode_KIND(modified);
|
||||
out = PyUnicode_DATA(modified);
|
||||
PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
|
||||
memcpy(out + PyUnicode_KIND_SIZE(kind, 1),
|
||||
PyUnicode_DATA(output),
|
||||
PyUnicode_KIND_SIZE(kind, output_len));
|
||||
Py_DECREF(output);
|
||||
output = modified;
|
||||
output = modified; /* output remains ready */
|
||||
self->pendingcr = 0;
|
||||
output_len++;
|
||||
}
|
||||
|
@ -295,21 +305,13 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
*/
|
||||
if (!final) {
|
||||
if (output_len > 0
|
||||
&& PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
|
||||
|
||||
if (Py_REFCNT(output) == 1) {
|
||||
if (PyUnicode_Resize(&output, output_len - 1) < 0)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
PyObject *modified = PyUnicode_FromUnicode(
|
||||
PyUnicode_AS_UNICODE(output),
|
||||
output_len - 1);
|
||||
if (modified == NULL)
|
||||
goto error;
|
||||
Py_DECREF(output);
|
||||
output = modified;
|
||||
}
|
||||
&& PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
|
||||
{
|
||||
PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
|
||||
if (modified == NULL)
|
||||
goto error;
|
||||
Py_DECREF(output);
|
||||
output = modified;
|
||||
self->pendingcr = 1;
|
||||
}
|
||||
}
|
||||
|
@ -317,13 +319,15 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
/* Record which newlines are read and do newline translation if desired,
|
||||
all in one pass. */
|
||||
{
|
||||
Py_UNICODE *in_str;
|
||||
void *in_str;
|
||||
Py_ssize_t len;
|
||||
int seennl = self->seennl;
|
||||
int only_lf = 0;
|
||||
int kind;
|
||||
|
||||
in_str = PyUnicode_AS_UNICODE(output);
|
||||
len = PyUnicode_GET_SIZE(output);
|
||||
in_str = PyUnicode_DATA(output);
|
||||
len = PyUnicode_GET_LENGTH(output);
|
||||
kind = PyUnicode_KIND(output);
|
||||
|
||||
if (len == 0)
|
||||
return output;
|
||||
|
@ -332,7 +336,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
for the \r *byte* with the libc's optimized memchr.
|
||||
*/
|
||||
if (seennl == SEEN_LF || seennl == 0) {
|
||||
only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
|
||||
only_lf = (memchr(in_str, '\r', PyUnicode_KIND_SIZE(kind, len)) == NULL);
|
||||
}
|
||||
|
||||
if (only_lf) {
|
||||
|
@ -340,21 +344,19 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
(there's nothing else to be done, even when in translation mode)
|
||||
*/
|
||||
if (seennl == 0 &&
|
||||
memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
|
||||
Py_UNICODE *s, *end;
|
||||
s = in_str;
|
||||
end = in_str + len;
|
||||
memchr(in_str, '\n', PyUnicode_KIND_SIZE(kind, len)) != NULL) {
|
||||
Py_ssize_t i = 0;
|
||||
for (;;) {
|
||||
Py_UNICODE c;
|
||||
/* Fast loop for non-control characters */
|
||||
while (*s > '\n')
|
||||
s++;
|
||||
c = *s++;
|
||||
while (PyUnicode_READ(kind, in_str, i) > '\n')
|
||||
i++;
|
||||
c = PyUnicode_READ(kind, in_str, i++);
|
||||
if (c == '\n') {
|
||||
seennl |= SEEN_LF;
|
||||
break;
|
||||
}
|
||||
if (s > end)
|
||||
if (i >= len)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -362,29 +364,27 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
need translating */
|
||||
}
|
||||
else if (!self->translate) {
|
||||
Py_UNICODE *s, *end;
|
||||
Py_ssize_t i = 0;
|
||||
/* We have already seen all newline types, no need to scan again */
|
||||
if (seennl == SEEN_ALL)
|
||||
goto endscan;
|
||||
s = in_str;
|
||||
end = in_str + len;
|
||||
for (;;) {
|
||||
Py_UNICODE c;
|
||||
Py_UCS4 c;
|
||||
/* Fast loop for non-control characters */
|
||||
while (*s > '\r')
|
||||
s++;
|
||||
c = *s++;
|
||||
while (PyUnicode_READ(kind, in_str, i) > '\r')
|
||||
i++;
|
||||
c = PyUnicode_READ(kind, in_str, i++);
|
||||
if (c == '\n')
|
||||
seennl |= SEEN_LF;
|
||||
else if (c == '\r') {
|
||||
if (*s == '\n') {
|
||||
if (PyUnicode_READ(kind, in_str, i) == '\n') {
|
||||
seennl |= SEEN_CRLF;
|
||||
s++;
|
||||
i++;
|
||||
}
|
||||
else
|
||||
seennl |= SEEN_CR;
|
||||
}
|
||||
if (s > end)
|
||||
if (i >= len)
|
||||
break;
|
||||
if (seennl == SEEN_ALL)
|
||||
break;
|
||||
|
@ -393,61 +393,50 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
|||
;
|
||||
}
|
||||
else {
|
||||
PyObject *translated = NULL;
|
||||
Py_UNICODE *out_str;
|
||||
Py_UNICODE *in, *out, *end;
|
||||
if (Py_REFCNT(output) != 1) {
|
||||
/* We could try to optimize this so that we only do a copy
|
||||
when there is something to translate. On the other hand,
|
||||
most decoders should only output non-shared strings, i.e.
|
||||
translation is done in place. */
|
||||
translated = PyUnicode_FromUnicode(NULL, len);
|
||||
if (translated == NULL)
|
||||
goto error;
|
||||
assert(Py_REFCNT(translated) == 1);
|
||||
memcpy(PyUnicode_AS_UNICODE(translated),
|
||||
PyUnicode_AS_UNICODE(output),
|
||||
len * sizeof(Py_UNICODE));
|
||||
void *translated;
|
||||
int kind = PyUnicode_KIND(output);
|
||||
void *in_str = PyUnicode_DATA(output);
|
||||
Py_ssize_t in, out;
|
||||
/* XXX: Previous in-place translation here is disabled as
|
||||
resizing is not possible anymore */
|
||||
/* We could try to optimize this so that we only do a copy
|
||||
when there is something to translate. On the other hand,
|
||||
we already know there is a \r byte, so chances are high
|
||||
that something needs to be done. */
|
||||
translated = PyMem_Malloc(PyUnicode_KIND_SIZE(kind, len));
|
||||
if (translated == NULL) {
|
||||
PyErr_NoMemory();
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
translated = output;
|
||||
}
|
||||
out_str = PyUnicode_AS_UNICODE(translated);
|
||||
in = in_str;
|
||||
out = out_str;
|
||||
end = in_str + len;
|
||||
in = out = 0;
|
||||
for (;;) {
|
||||
Py_UNICODE c;
|
||||
Py_UCS4 c;
|
||||
/* Fast loop for non-control characters */
|
||||
while ((c = *in++) > '\r')
|
||||
*out++ = c;
|
||||
while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
|
||||
PyUnicode_WRITE(kind, translated, out++, c);
|
||||
if (c == '\n') {
|
||||
*out++ = c;
|
||||
PyUnicode_WRITE(kind, translated, out++, c);
|
||||
seennl |= SEEN_LF;
|
||||
continue;
|
||||
}
|
||||
if (c == '\r') {
|
||||
if (*in == '\n') {
|
||||
if (PyUnicode_READ(kind, in_str, in) == '\n') {
|
||||
in++;
|
||||
seennl |= SEEN_CRLF;
|
||||
}
|
||||
else
|
||||
seennl |= SEEN_CR;
|
||||
*out++ = '\n';
|
||||
PyUnicode_WRITE(kind, translated, out++, '\n');
|
||||
continue;
|
||||
}
|
||||
if (in > end)
|
||||
if (in > len)
|
||||
break;
|
||||
*out++ = c;
|
||||
}
|
||||
if (translated != output) {
|
||||
Py_DECREF(output);
|
||||
output = translated;
|
||||
}
|
||||
if (out - out_str != len) {
|
||||
if (PyUnicode_Resize(&output, out - out_str) < 0)
|
||||
goto error;
|
||||
PyUnicode_WRITE(kind, translated, out++, c);
|
||||
}
|
||||
Py_DECREF(output);
|
||||
output = PyUnicode_FromKindAndData(kind, translated, out);
|
||||
if (!output)
|
||||
goto error;
|
||||
}
|
||||
self->seennl |= seennl;
|
||||
}
|
||||
|
@ -705,9 +694,7 @@ typedef struct
|
|||
static PyObject *
|
||||
ascii_encode(textio *self, PyObject *text)
|
||||
{
|
||||
return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
|
||||
PyUnicode_GET_SIZE(text),
|
||||
PyBytes_AS_STRING(self->errors));
|
||||
return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -777,17 +764,13 @@ utf32_encode(textio *self, PyObject *text)
|
|||
static PyObject *
|
||||
utf8_encode(textio *self, PyObject *text)
|
||||
{
|
||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
|
||||
PyUnicode_GET_SIZE(text),
|
||||
PyBytes_AS_STRING(self->errors));
|
||||
return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
latin1_encode(textio *self, PyObject *text)
|
||||
{
|
||||
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
|
||||
PyUnicode_GET_SIZE(text),
|
||||
PyBytes_AS_STRING(self->errors));
|
||||
return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
|
||||
}
|
||||
|
||||
/* Map normalized encoding names onto the specialized encoding funcs */
|
||||
|
@ -1213,18 +1196,6 @@ textiowrapper_detach(textio *self)
|
|||
return buffer;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(const Py_UNICODE *)
|
||||
findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
|
||||
{
|
||||
/* like wcschr, but doesn't stop at NULL characters */
|
||||
while (size-- > 0) {
|
||||
if (*s == ch)
|
||||
return s;
|
||||
s++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Flush the internal write buffer. This doesn't explicitly flush the
|
||||
underlying buffered object, though. */
|
||||
static int
|
||||
|
@ -1269,6 +1240,9 @@ textiowrapper_write(textio *self, PyObject *args)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(text) == -1)
|
||||
return NULL;
|
||||
|
||||
CHECK_CLOSED(self);
|
||||
|
||||
if (self->encoder == NULL)
|
||||
|
@ -1276,11 +1250,10 @@ textiowrapper_write(textio *self, PyObject *args)
|
|||
|
||||
Py_INCREF(text);
|
||||
|
||||
textlen = PyUnicode_GetSize(text);
|
||||
textlen = PyUnicode_GET_LENGTH(text);
|
||||
|
||||
if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
|
||||
if (findchar(PyUnicode_AS_UNICODE(text),
|
||||
PyUnicode_GET_SIZE(text), '\n'))
|
||||
if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
|
||||
haslf = 1;
|
||||
|
||||
if (haslf && self->writetranslate && self->writenl != NULL) {
|
||||
|
@ -1296,8 +1269,7 @@ textiowrapper_write(textio *self, PyObject *args)
|
|||
needflush = 1;
|
||||
else if (self->line_buffering &&
|
||||
(haslf ||
|
||||
findchar(PyUnicode_AS_UNICODE(text),
|
||||
PyUnicode_GET_SIZE(text), '\r')))
|
||||
PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
|
||||
needflush = 1;
|
||||
|
||||
/* XXX What if we were just reading? */
|
||||
|
@ -1369,7 +1341,8 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
|
|||
if (self->decoded_chars == NULL)
|
||||
return PyUnicode_FromStringAndSize(NULL, 0);
|
||||
|
||||
avail = (PyUnicode_GET_SIZE(self->decoded_chars)
|
||||
/* decoded_chars is guaranteed to be "ready". */
|
||||
avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
|
||||
- self->decoded_chars_used);
|
||||
|
||||
assert(avail >= 0);
|
||||
|
@ -1378,9 +1351,9 @@ textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
|
|||
n = avail;
|
||||
|
||||
if (self->decoded_chars_used > 0 || n < avail) {
|
||||
chars = PyUnicode_FromUnicode(
|
||||
PyUnicode_AS_UNICODE(self->decoded_chars)
|
||||
+ self->decoded_chars_used, n);
|
||||
chars = PyUnicode_Substring(self->decoded_chars,
|
||||
self->decoded_chars_used,
|
||||
self->decoded_chars_used + n);
|
||||
if (chars == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1464,8 +1437,10 @@ textiowrapper_read_chunk(textio *self)
|
|||
/* TODO sanity check: isinstance(decoded_chars, unicode) */
|
||||
if (decoded_chars == NULL)
|
||||
goto fail;
|
||||
if (PyUnicode_READY(decoded_chars) == -1)
|
||||
goto fail;
|
||||
textiowrapper_set_decoded_chars(self, decoded_chars);
|
||||
nchars = PyUnicode_GET_SIZE(decoded_chars);
|
||||
nchars = PyUnicode_GET_LENGTH(decoded_chars);
|
||||
if (nchars > 0)
|
||||
self->b2cratio = (double) nbytes / nchars;
|
||||
else
|
||||
|
@ -1553,7 +1528,9 @@ textiowrapper_read(textio *self, PyObject *args)
|
|||
result = textiowrapper_get_decoded_chars(self, n);
|
||||
if (result == NULL)
|
||||
goto fail;
|
||||
remaining -= PyUnicode_GET_SIZE(result);
|
||||
if (PyUnicode_READY(result) == -1)
|
||||
goto fail;
|
||||
remaining -= PyUnicode_GET_LENGTH(result);
|
||||
|
||||
/* Keep reading chunks until we have n characters to return */
|
||||
while (remaining > 0) {
|
||||
|
@ -1573,7 +1550,7 @@ textiowrapper_read(textio *self, PyObject *args)
|
|||
result = textiowrapper_get_decoded_chars(self, remaining);
|
||||
if (result == NULL)
|
||||
goto fail;
|
||||
remaining -= PyUnicode_GET_SIZE(result);
|
||||
remaining -= PyUnicode_GET_LENGTH(result);
|
||||
}
|
||||
if (chunks != NULL) {
|
||||
if (result != NULL && PyList_Append(chunks, result) < 0)
|
||||
|
@ -1596,33 +1573,34 @@ textiowrapper_read(textio *self, PyObject *args)
|
|||
/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
|
||||
that is to the NUL character. Otherwise the function will produce
|
||||
incorrect results. */
|
||||
static Py_UNICODE *
|
||||
find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
|
||||
static char *
|
||||
find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
|
||||
{
|
||||
Py_UNICODE *s = start;
|
||||
int size = PyUnicode_KIND_SIZE(kind, 1);
|
||||
for (;;) {
|
||||
while (*s > ch)
|
||||
s++;
|
||||
if (*s == ch)
|
||||
while (PyUnicode_READ(kind, s, 0) > ch)
|
||||
s += size;
|
||||
if (PyUnicode_READ(kind, s, 0) == ch)
|
||||
return s;
|
||||
if (s == end)
|
||||
return NULL;
|
||||
s++;
|
||||
s += size;
|
||||
}
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
_PyIO_find_line_ending(
|
||||
int translated, int universal, PyObject *readnl,
|
||||
Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
|
||||
int kind, char *start, char *end, Py_ssize_t *consumed)
|
||||
{
|
||||
Py_ssize_t len = end - start;
|
||||
int size = PyUnicode_KIND_SIZE(kind, 1);
|
||||
Py_ssize_t len = ((char*)end - (char*)start)/size;
|
||||
|
||||
if (translated) {
|
||||
/* Newlines are already translated, only search for \n */
|
||||
Py_UNICODE *pos = find_control_char(start, end, '\n');
|
||||
char *pos = find_control_char(kind, start, end, '\n');
|
||||
if (pos != NULL)
|
||||
return pos - start + 1;
|
||||
return (pos - start)/size + 1;
|
||||
else {
|
||||
*consumed = len;
|
||||
return -1;
|
||||
|
@ -1632,63 +1610,66 @@ _PyIO_find_line_ending(
|
|||
/* Universal newline search. Find any of \r, \r\n, \n
|
||||
* The decoder ensures that \r\n are not split in two pieces
|
||||
*/
|
||||
Py_UNICODE *s = start;
|
||||
char *s = start;
|
||||
for (;;) {
|
||||
Py_UNICODE ch;
|
||||
Py_UCS4 ch;
|
||||
/* Fast path for non-control chars. The loop always ends
|
||||
since the Py_UNICODE storage is NUL-terminated. */
|
||||
while (*s > '\r')
|
||||
s++;
|
||||
while (PyUnicode_READ(kind, s, 0) > '\r')
|
||||
s += size;
|
||||
if (s >= end) {
|
||||
*consumed = len;
|
||||
return -1;
|
||||
}
|
||||
ch = *s++;
|
||||
ch = PyUnicode_READ(kind, s, 0);
|
||||
s += size;
|
||||
if (ch == '\n')
|
||||
return s - start;
|
||||
return (s - start)/size;
|
||||
if (ch == '\r') {
|
||||
if (*s == '\n')
|
||||
return s - start + 1;
|
||||
if (PyUnicode_READ(kind, s, 0) == '\n')
|
||||
return (s - start)/size + 1;
|
||||
else
|
||||
return s - start;
|
||||
return (s - start)/size;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Non-universal mode. */
|
||||
Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
|
||||
Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
|
||||
Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
|
||||
char *nl = PyUnicode_DATA(readnl);
|
||||
/* Assume that readnl is an ASCII character. */
|
||||
assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
|
||||
if (readnl_len == 1) {
|
||||
Py_UNICODE *pos = find_control_char(start, end, nl[0]);
|
||||
char *pos = find_control_char(kind, start, end, nl[0]);
|
||||
if (pos != NULL)
|
||||
return pos - start + 1;
|
||||
return (pos - start)/size + 1;
|
||||
*consumed = len;
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
Py_UNICODE *s = start;
|
||||
Py_UNICODE *e = end - readnl_len + 1;
|
||||
Py_UNICODE *pos;
|
||||
char *s = start;
|
||||
char *e = end - (readnl_len - 1)*size;
|
||||
char *pos;
|
||||
if (e < s)
|
||||
e = s;
|
||||
while (s < e) {
|
||||
Py_ssize_t i;
|
||||
Py_UNICODE *pos = find_control_char(s, end, nl[0]);
|
||||
char *pos = find_control_char(kind, s, end, nl[0]);
|
||||
if (pos == NULL || pos >= e)
|
||||
break;
|
||||
for (i = 1; i < readnl_len; i++) {
|
||||
if (pos[i] != nl[i])
|
||||
if (PyUnicode_READ(kind, pos, i) != nl[i])
|
||||
break;
|
||||
}
|
||||
if (i == readnl_len)
|
||||
return pos - start + readnl_len;
|
||||
s = pos + 1;
|
||||
return (pos - start)/size + readnl_len;
|
||||
s = pos + size;
|
||||
}
|
||||
pos = find_control_char(e, end, nl[0]);
|
||||
pos = find_control_char(kind, e, end, nl[0]);
|
||||
if (pos == NULL)
|
||||
*consumed = len;
|
||||
else
|
||||
*consumed = pos - start;
|
||||
*consumed = (pos - start)/size;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -1709,14 +1690,15 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
|
|||
chunked = 0;
|
||||
|
||||
while (1) {
|
||||
Py_UNICODE *ptr;
|
||||
char *ptr;
|
||||
Py_ssize_t line_len;
|
||||
int kind;
|
||||
Py_ssize_t consumed = 0;
|
||||
|
||||
/* First, get some data if necessary */
|
||||
res = 1;
|
||||
while (!self->decoded_chars ||
|
||||
!PyUnicode_GET_SIZE(self->decoded_chars)) {
|
||||
!PyUnicode_GET_LENGTH(self->decoded_chars)) {
|
||||
res = textiowrapper_read_chunk(self);
|
||||
if (res < 0)
|
||||
goto error;
|
||||
|
@ -1741,18 +1723,24 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
|
|||
assert(self->decoded_chars_used == 0);
|
||||
line = PyUnicode_Concat(remaining, self->decoded_chars);
|
||||
start = 0;
|
||||
offset_to_buffer = PyUnicode_GET_SIZE(remaining);
|
||||
offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
|
||||
Py_CLEAR(remaining);
|
||||
if (line == NULL)
|
||||
goto error;
|
||||
if (PyUnicode_READY(line) == -1)
|
||||
goto error;
|
||||
}
|
||||
|
||||
ptr = PyUnicode_AS_UNICODE(line);
|
||||
line_len = PyUnicode_GET_SIZE(line);
|
||||
ptr = PyUnicode_DATA(line);
|
||||
line_len = PyUnicode_GET_LENGTH(line);
|
||||
kind = PyUnicode_KIND(line);
|
||||
|
||||
endpos = _PyIO_find_line_ending(
|
||||
self->readtranslate, self->readuniversal, self->readnl,
|
||||
ptr + start, ptr + line_len, &consumed);
|
||||
kind,
|
||||
ptr + PyUnicode_KIND_SIZE(kind, start),
|
||||
ptr + PyUnicode_KIND_SIZE(kind, line_len),
|
||||
&consumed);
|
||||
if (endpos >= 0) {
|
||||
endpos += start;
|
||||
if (limit >= 0 && (endpos - start) + chunked >= limit)
|
||||
|
@ -1776,21 +1764,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
|
|||
if (chunks == NULL)
|
||||
goto error;
|
||||
}
|
||||
s = PyUnicode_FromUnicode(ptr + start, endpos - start);
|
||||
s = PyUnicode_Substring(line, start, endpos);
|
||||
if (s == NULL)
|
||||
goto error;
|
||||
if (PyList_Append(chunks, s) < 0) {
|
||||
Py_DECREF(s);
|
||||
goto error;
|
||||
}
|
||||
chunked += PyUnicode_GET_SIZE(s);
|
||||
chunked += PyUnicode_GET_LENGTH(s);
|
||||
Py_DECREF(s);
|
||||
}
|
||||
/* There may be some remaining bytes we'll have to prepend to the
|
||||
next chunk of data */
|
||||
if (endpos < line_len) {
|
||||
remaining = PyUnicode_FromUnicode(
|
||||
ptr + endpos, line_len - endpos);
|
||||
remaining = PyUnicode_Substring(line, endpos, line_len);
|
||||
if (remaining == NULL)
|
||||
goto error;
|
||||
}
|
||||
|
@ -1802,19 +1789,12 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
|
|||
if (line != NULL) {
|
||||
/* Our line ends in the current buffer */
|
||||
self->decoded_chars_used = endpos - offset_to_buffer;
|
||||
if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
|
||||
if (start == 0 && Py_REFCNT(line) == 1) {
|
||||
if (PyUnicode_Resize(&line, endpos) < 0)
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
PyObject *s = PyUnicode_FromUnicode(
|
||||
PyUnicode_AS_UNICODE(line) + start, endpos - start);
|
||||
Py_CLEAR(line);
|
||||
if (s == NULL)
|
||||
goto error;
|
||||
line = s;
|
||||
}
|
||||
if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
|
||||
PyObject *s = PyUnicode_Substring(line, start, endpos);
|
||||
Py_CLEAR(line);
|
||||
if (s == NULL)
|
||||
goto error;
|
||||
line = s;
|
||||
}
|
||||
}
|
||||
if (remaining != NULL) {
|
||||
|
@ -1828,16 +1808,20 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
|
|||
Py_CLEAR(remaining);
|
||||
}
|
||||
if (chunks != NULL) {
|
||||
if (line != NULL && PyList_Append(chunks, line) < 0)
|
||||
goto error;
|
||||
Py_CLEAR(line);
|
||||
if (line != NULL) {
|
||||
if (PyList_Append(chunks, line) < 0)
|
||||
goto error;
|
||||
Py_DECREF(line);
|
||||
}
|
||||
line = PyUnicode_Join(_PyIO_empty_str, chunks);
|
||||
if (line == NULL)
|
||||
goto error;
|
||||
Py_DECREF(chunks);
|
||||
Py_CLEAR(chunks);
|
||||
}
|
||||
if (line == NULL) {
|
||||
Py_INCREF(_PyIO_empty_str);
|
||||
line = _PyIO_empty_str;
|
||||
}
|
||||
if (line == NULL)
|
||||
line = PyUnicode_FromStringAndSize(NULL, 0);
|
||||
|
||||
return line;
|
||||
|
||||
|
@ -2128,6 +2112,10 @@ textiowrapper_seek(textio *self, PyObject *args)
|
|||
|
||||
if (decoded == NULL)
|
||||
goto fail;
|
||||
if (PyUnicode_READY(decoded) == -1) {
|
||||
Py_DECREF(decoded);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
textiowrapper_set_decoded_chars(self, decoded);
|
||||
|
||||
|
@ -2250,7 +2238,7 @@ textiowrapper_tell(textio *self, PyObject *args)
|
|||
if (_decoded == NULL) \
|
||||
goto fail; \
|
||||
assert (PyUnicode_Check(_decoded)); \
|
||||
res = PyUnicode_GET_SIZE(_decoded); \
|
||||
res = PyUnicode_GET_LENGTH(_decoded); \
|
||||
Py_DECREF(_decoded); \
|
||||
} while (0)
|
||||
|
||||
|
@ -2333,7 +2321,7 @@ textiowrapper_tell(textio *self, PyObject *args)
|
|||
if (decoded == NULL)
|
||||
goto fail;
|
||||
assert (PyUnicode_Check(decoded));
|
||||
chars_decoded += PyUnicode_GET_SIZE(decoded);
|
||||
chars_decoded += PyUnicode_GET_LENGTH(decoded);
|
||||
Py_DECREF(decoded);
|
||||
cookie.need_eof = 1;
|
||||
|
||||
|
@ -2559,10 +2547,10 @@ textiowrapper_iternext(textio *self)
|
|||
}
|
||||
}
|
||||
|
||||
if (line == NULL)
|
||||
if (line == NULL || PyUnicode_READY(line) == -1)
|
||||
return NULL;
|
||||
|
||||
if (PyUnicode_GET_SIZE(line) == 0) {
|
||||
if (PyUnicode_GET_LENGTH(line) == 0) {
|
||||
/* Reached EOF or would have blocked */
|
||||
Py_DECREF(line);
|
||||
Py_CLEAR(self->snapshot);
|
||||
|
|
252
Modules/_json.c
252
Modules/_json.c
|
@ -238,13 +238,6 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj);
|
|||
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
|
||||
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
|
||||
|
||||
#define MIN_EXPANSION 6
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
#define MAX_EXPANSION (2 * MIN_EXPANSION)
|
||||
#else
|
||||
#define MAX_EXPANSION MIN_EXPANSION
|
||||
#endif
|
||||
|
||||
static int
|
||||
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
|
||||
{
|
||||
|
@ -263,7 +256,7 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
|
|||
}
|
||||
|
||||
static Py_ssize_t
|
||||
ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
|
||||
ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
|
||||
{
|
||||
/* Escape unicode code point c to ASCII escape sequences
|
||||
in char *output. output must have at least 12 bytes unused to
|
||||
|
@ -278,10 +271,9 @@ ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
|
|||
case '\r': output[chars++] = 'r'; break;
|
||||
case '\t': output[chars++] = 't'; break;
|
||||
default:
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
if (c >= 0x10000) {
|
||||
/* UTF-16 surrogate pair */
|
||||
Py_UNICODE v = c - 0x10000;
|
||||
Py_UCS4 v = c - 0x10000;
|
||||
c = 0xd800 | ((v >> 10) & 0x3ff);
|
||||
output[chars++] = 'u';
|
||||
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
|
||||
|
@ -291,7 +283,6 @@ ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
|
|||
c = 0xdc00 | (v & 0x3ff);
|
||||
output[chars++] = '\\';
|
||||
}
|
||||
#endif
|
||||
output[chars++] = 'u';
|
||||
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
|
||||
output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
|
||||
|
@ -308,54 +299,52 @@ ascii_escape_unicode(PyObject *pystr)
|
|||
Py_ssize_t i;
|
||||
Py_ssize_t input_chars;
|
||||
Py_ssize_t output_size;
|
||||
Py_ssize_t max_output_size;
|
||||
Py_ssize_t chars;
|
||||
PyObject *rval;
|
||||
Py_UNICODE *output;
|
||||
Py_UNICODE *input_unicode;
|
||||
void *input;
|
||||
unsigned char *output;
|
||||
int kind;
|
||||
|
||||
input_chars = PyUnicode_GET_SIZE(pystr);
|
||||
input_unicode = PyUnicode_AS_UNICODE(pystr);
|
||||
if (PyUnicode_READY(pystr) == -1)
|
||||
return NULL;
|
||||
|
||||
/* One char input can be up to 6 chars output, estimate 4 of these */
|
||||
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
|
||||
max_output_size = 2 + (input_chars * MAX_EXPANSION);
|
||||
rval = PyUnicode_FromStringAndSize(NULL, output_size);
|
||||
input_chars = PyUnicode_GET_LENGTH(pystr);
|
||||
input = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
|
||||
/* Compute the output size */
|
||||
for (i = 0, output_size = 2; i < input_chars; i++) {
|
||||
Py_UCS4 c = PyUnicode_READ(kind, input, i);
|
||||
if (S_CHAR(c))
|
||||
output_size++;
|
||||
else {
|
||||
switch(c) {
|
||||
case '\\': case '"': case '\b': case '\f':
|
||||
case '\n': case '\r': case '\t':
|
||||
output_size += 2; break;
|
||||
default:
|
||||
output_size += c >= 0x10000 ? 12 : 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rval = PyUnicode_New(output_size, 127);
|
||||
if (rval == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
output = PyUnicode_AS_UNICODE(rval);
|
||||
output = PyUnicode_1BYTE_DATA(rval);
|
||||
chars = 0;
|
||||
output[chars++] = '"';
|
||||
for (i = 0; i < input_chars; i++) {
|
||||
Py_UNICODE c = input_unicode[i];
|
||||
Py_UCS4 c = PyUnicode_READ(kind, input, i);
|
||||
if (S_CHAR(c)) {
|
||||
output[chars++] = c;
|
||||
}
|
||||
else {
|
||||
chars = ascii_escape_unichar(c, output, chars);
|
||||
}
|
||||
if (output_size - chars < (1 + MAX_EXPANSION)) {
|
||||
/* There's more than four, so let's resize by a lot */
|
||||
Py_ssize_t new_output_size = output_size * 2;
|
||||
/* This is an upper bound */
|
||||
if (new_output_size > max_output_size) {
|
||||
new_output_size = max_output_size;
|
||||
}
|
||||
/* Make sure that the output size changed before resizing */
|
||||
if (new_output_size != output_size) {
|
||||
output_size = new_output_size;
|
||||
if (PyUnicode_Resize(&rval, output_size) == -1) {
|
||||
return NULL;
|
||||
}
|
||||
output = PyUnicode_AS_UNICODE(rval);
|
||||
}
|
||||
}
|
||||
}
|
||||
output[chars++] = '"';
|
||||
if (PyUnicode_Resize(&rval, chars) == -1) {
|
||||
return NULL;
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
|
@ -436,22 +425,30 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
|||
Return value is a new PyUnicode
|
||||
*/
|
||||
PyObject *rval = NULL;
|
||||
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
|
||||
Py_ssize_t len;
|
||||
Py_ssize_t begin = end - 1;
|
||||
Py_ssize_t next /* = begin */;
|
||||
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
|
||||
const void *buf;
|
||||
int kind;
|
||||
PyObject *chunks = NULL;
|
||||
PyObject *chunk = NULL;
|
||||
|
||||
if (PyUnicode_READY(pystr) == -1)
|
||||
return 0;
|
||||
|
||||
len = PyUnicode_GET_LENGTH(pystr);
|
||||
buf = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
|
||||
if (end < 0 || len <= end) {
|
||||
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
|
||||
goto bail;
|
||||
}
|
||||
while (1) {
|
||||
/* Find the end of the string or the next escape */
|
||||
Py_UNICODE c = 0;
|
||||
Py_UCS4 c = 0;
|
||||
for (next = end; next < len; next++) {
|
||||
c = buf[next];
|
||||
c = PyUnicode_READ(kind, buf, next);
|
||||
if (c == '"' || c == '\\') {
|
||||
break;
|
||||
}
|
||||
|
@ -467,7 +464,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
|||
/* Pick up this chunk if it's not zero length */
|
||||
if (next != end) {
|
||||
APPEND_OLD_CHUNK
|
||||
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
|
||||
chunk = PyUnicode_FromKindAndData(
|
||||
kind,
|
||||
(char*)buf + PyUnicode_KIND_SIZE(kind, end),
|
||||
next - end);
|
||||
if (chunk == NULL) {
|
||||
goto bail;
|
||||
}
|
||||
|
@ -481,7 +481,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
|||
raise_errmsg("Unterminated string starting at", pystr, begin);
|
||||
goto bail;
|
||||
}
|
||||
c = buf[next];
|
||||
c = PyUnicode_READ(kind, buf, next);
|
||||
if (c != 'u') {
|
||||
/* Non-unicode backslash escapes */
|
||||
end = next + 1;
|
||||
|
@ -511,7 +511,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
|||
}
|
||||
/* Decode 4 hex digits */
|
||||
for (; next < end; next++) {
|
||||
Py_UNICODE digit = buf[next];
|
||||
Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
|
||||
c <<= 4;
|
||||
switch (digit) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
|
@ -528,22 +528,22 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
|||
goto bail;
|
||||
}
|
||||
}
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
/* Surrogate pair */
|
||||
if ((c & 0xfc00) == 0xd800) {
|
||||
Py_UNICODE c2 = 0;
|
||||
Py_UCS4 c2 = 0;
|
||||
if (end + 6 >= len) {
|
||||
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
|
||||
goto bail;
|
||||
}
|
||||
if (buf[next++] != '\\' || buf[next++] != 'u') {
|
||||
if (PyUnicode_READ(kind, buf, next++) != '\\' ||
|
||||
PyUnicode_READ(kind, buf, next++) != 'u') {
|
||||
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
|
||||
goto bail;
|
||||
}
|
||||
end += 6;
|
||||
/* Decode 4 hex digits */
|
||||
for (; next < end; next++) {
|
||||
Py_UNICODE digit = buf[next];
|
||||
Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
|
||||
c2 <<= 4;
|
||||
switch (digit) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
|
@ -570,10 +570,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
|||
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
|
||||
goto bail;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
APPEND_OLD_CHUNK
|
||||
chunk = PyUnicode_FromUnicode(&c, 1);
|
||||
chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
|
||||
if (chunk == NULL) {
|
||||
goto bail;
|
||||
}
|
||||
|
@ -711,8 +710,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
|
|||
|
||||
Returns a new PyObject (usually a dict, but object_hook can change that)
|
||||
*/
|
||||
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
|
||||
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
|
||||
void *str;
|
||||
int kind;
|
||||
Py_ssize_t end_idx;
|
||||
PyObject *val = NULL;
|
||||
PyObject *rval = NULL;
|
||||
PyObject *key = NULL;
|
||||
|
@ -720,6 +720,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
|
|||
int has_pairs_hook = (s->object_pairs_hook != Py_None);
|
||||
Py_ssize_t next_idx;
|
||||
|
||||
if (PyUnicode_READY(pystr) == -1)
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
|
||||
|
||||
if (has_pairs_hook)
|
||||
rval = PyList_New(0);
|
||||
else
|
||||
|
@ -728,15 +735,15 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
|
|||
return NULL;
|
||||
|
||||
/* skip whitespace after { */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
|
||||
|
||||
/* only loop if the object is non-empty */
|
||||
if (idx <= end_idx && str[idx] != '}') {
|
||||
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
|
||||
while (idx <= end_idx) {
|
||||
PyObject *memokey;
|
||||
|
||||
/* read key */
|
||||
if (str[idx] != '"') {
|
||||
if (PyUnicode_READ(kind, str, idx) != '"') {
|
||||
raise_errmsg("Expecting property name", pystr, idx);
|
||||
goto bail;
|
||||
}
|
||||
|
@ -756,13 +763,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
|
|||
idx = next_idx;
|
||||
|
||||
/* skip whitespace between key and : delimiter, read :, skip whitespace */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
if (idx > end_idx || str[idx] != ':') {
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
|
||||
raise_errmsg("Expecting : delimiter", pystr, idx);
|
||||
goto bail;
|
||||
}
|
||||
idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
|
||||
/* read any JSON term */
|
||||
val = scan_once_unicode(s, pystr, idx, &next_idx);
|
||||
|
@ -790,26 +797,26 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
|
|||
idx = next_idx;
|
||||
|
||||
/* skip whitespace before } or , */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
|
||||
/* bail if the object is closed or we didn't get the , delimiter */
|
||||
if (idx > end_idx) break;
|
||||
if (str[idx] == '}') {
|
||||
if (PyUnicode_READ(kind, str, idx) == '}') {
|
||||
break;
|
||||
}
|
||||
else if (str[idx] != ',') {
|
||||
else if (PyUnicode_READ(kind, str, idx) != ',') {
|
||||
raise_errmsg("Expecting , delimiter", pystr, idx);
|
||||
goto bail;
|
||||
}
|
||||
idx++;
|
||||
|
||||
/* skip whitespace after , delimiter */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
}
|
||||
}
|
||||
|
||||
/* verify that idx < end_idx, str[idx] should be '}' */
|
||||
if (idx > end_idx || str[idx] != '}') {
|
||||
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
|
||||
raise_errmsg("Expecting object", pystr, end_idx);
|
||||
goto bail;
|
||||
}
|
||||
|
@ -845,19 +852,27 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
|
|||
|
||||
Returns a new PyList
|
||||
*/
|
||||
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
|
||||
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
|
||||
void *str;
|
||||
int kind;
|
||||
Py_ssize_t end_idx;
|
||||
PyObject *val = NULL;
|
||||
PyObject *rval = PyList_New(0);
|
||||
Py_ssize_t next_idx;
|
||||
if (rval == NULL)
|
||||
return NULL;
|
||||
|
||||
if (PyUnicode_READY(pystr) == -1)
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
|
||||
|
||||
/* skip whitespace after [ */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
|
||||
/* only loop if the array is non-empty */
|
||||
if (idx <= end_idx && str[idx] != ']') {
|
||||
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
|
||||
while (idx <= end_idx) {
|
||||
|
||||
/* read any JSON term */
|
||||
|
@ -872,26 +887,26 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
|
|||
idx = next_idx;
|
||||
|
||||
/* skip whitespace between term and , */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
|
||||
/* bail if the array is closed or we didn't get the , delimiter */
|
||||
if (idx > end_idx) break;
|
||||
if (str[idx] == ']') {
|
||||
if (PyUnicode_READ(kind, str, idx) == ']') {
|
||||
break;
|
||||
}
|
||||
else if (str[idx] != ',') {
|
||||
else if (PyUnicode_READ(kind, str, idx) != ',') {
|
||||
raise_errmsg("Expecting , delimiter", pystr, idx);
|
||||
goto bail;
|
||||
}
|
||||
idx++;
|
||||
|
||||
/* skip whitespace after , */
|
||||
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
|
||||
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
|
||||
}
|
||||
}
|
||||
|
||||
/* verify that idx < end_idx, str[idx] should be ']' */
|
||||
if (idx > end_idx || str[idx] != ']') {
|
||||
/* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
|
||||
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
|
||||
raise_errmsg("Expecting object", pystr, end_idx);
|
||||
goto bail;
|
||||
}
|
||||
|
@ -940,16 +955,24 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
|
|||
PyInt, PyLong, or PyFloat.
|
||||
May return other types if parse_int or parse_float are set
|
||||
*/
|
||||
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
|
||||
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
|
||||
void *str;
|
||||
int kind;
|
||||
Py_ssize_t end_idx;
|
||||
Py_ssize_t idx = start;
|
||||
int is_float = 0;
|
||||
PyObject *rval;
|
||||
PyObject *numstr = NULL;
|
||||
PyObject *custom_func;
|
||||
|
||||
if (PyUnicode_READY(pystr) == -1)
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
|
||||
|
||||
/* read a sign if it's there, make sure it's not the end of the string */
|
||||
if (str[idx] == '-') {
|
||||
if (PyUnicode_READ(kind, str, idx) == '-') {
|
||||
idx++;
|
||||
if (idx > end_idx) {
|
||||
PyErr_SetNone(PyExc_StopIteration);
|
||||
|
@ -958,12 +981,12 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
|
|||
}
|
||||
|
||||
/* read as many integer digits as we find as long as it doesn't start with 0 */
|
||||
if (str[idx] >= '1' && str[idx] <= '9') {
|
||||
if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
|
||||
idx++;
|
||||
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
|
||||
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
|
||||
}
|
||||
/* if it starts with 0 we only expect one integer digit */
|
||||
else if (str[idx] == '0') {
|
||||
else if (PyUnicode_READ(kind, str, idx) == '0') {
|
||||
idx++;
|
||||
}
|
||||
/* no integer digits, error */
|
||||
|
@ -973,25 +996,25 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
|
|||
}
|
||||
|
||||
/* if the next char is '.' followed by a digit then read all float digits */
|
||||
if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
|
||||
if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
|
||||
is_float = 1;
|
||||
idx += 2;
|
||||
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
|
||||
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
|
||||
}
|
||||
|
||||
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
|
||||
if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
|
||||
if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
|
||||
Py_ssize_t e_start = idx;
|
||||
idx++;
|
||||
|
||||
/* read an exponent sign if present */
|
||||
if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
|
||||
if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
|
||||
|
||||
/* read all digits */
|
||||
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
|
||||
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
|
||||
|
||||
/* if we got a digit, then parse as float. if not, backtrack */
|
||||
if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
|
||||
if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
|
||||
is_float = 1;
|
||||
}
|
||||
else {
|
||||
|
@ -1008,7 +1031,9 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
|
|||
|
||||
if (custom_func) {
|
||||
/* copy the section we determined to be a number */
|
||||
numstr = PyUnicode_FromUnicode(&str[start], idx - start);
|
||||
numstr = PyUnicode_FromKindAndData(kind,
|
||||
(char*)str + PyUnicode_KIND_SIZE(kind, start),
|
||||
idx - start);
|
||||
if (numstr == NULL)
|
||||
return NULL;
|
||||
rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
|
||||
|
@ -1024,7 +1049,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
|
|||
return NULL;
|
||||
buf = PyBytes_AS_STRING(numstr);
|
||||
for (i = 0; i < n; i++) {
|
||||
buf[i] = (char) str[i + start];
|
||||
buf[i] = (char) PyUnicode_READ(kind, str, i + start);
|
||||
}
|
||||
if (is_float)
|
||||
rval = PyFloat_FromString(numstr);
|
||||
|
@ -1047,13 +1072,23 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
|
|||
Returns a new PyObject representation of the term.
|
||||
*/
|
||||
PyObject *res;
|
||||
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
|
||||
Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
|
||||
void *str;
|
||||
int kind;
|
||||
Py_ssize_t length;
|
||||
|
||||
if (PyUnicode_READY(pystr) == -1)
|
||||
return NULL;
|
||||
|
||||
str = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
length = PyUnicode_GET_LENGTH(pystr);
|
||||
|
||||
if (idx >= length) {
|
||||
PyErr_SetNone(PyExc_StopIteration);
|
||||
return NULL;
|
||||
}
|
||||
switch (str[idx]) {
|
||||
|
||||
switch (PyUnicode_READ(kind, str, idx)) {
|
||||
case '"':
|
||||
/* string */
|
||||
return scanstring_unicode(pystr, idx + 1,
|
||||
|
@ -1077,7 +1112,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
|
|||
return res;
|
||||
case 'n':
|
||||
/* null */
|
||||
if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
|
||||
if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
|
||||
Py_INCREF(Py_None);
|
||||
*next_idx_ptr = idx + 4;
|
||||
return Py_None;
|
||||
|
@ -1085,7 +1120,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
|
|||
break;
|
||||
case 't':
|
||||
/* true */
|
||||
if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
|
||||
if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
|
||||
Py_INCREF(Py_True);
|
||||
*next_idx_ptr = idx + 4;
|
||||
return Py_True;
|
||||
|
@ -1093,7 +1128,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
|
|||
break;
|
||||
case 'f':
|
||||
/* false */
|
||||
if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
|
||||
if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
|
||||
PyUnicode_READ(kind, str, idx + 2) == 'l' &&
|
||||
PyUnicode_READ(kind, str, idx + 3) == 's' &&
|
||||
PyUnicode_READ(kind, str, idx + 4) == 'e') {
|
||||
Py_INCREF(Py_False);
|
||||
*next_idx_ptr = idx + 5;
|
||||
return Py_False;
|
||||
|
@ -1101,19 +1139,33 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
|
|||
break;
|
||||
case 'N':
|
||||
/* NaN */
|
||||
if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
|
||||
if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
|
||||
PyUnicode_READ(kind, str, idx + 2) == 'N') {
|
||||
return _parse_constant(s, "NaN", idx, next_idx_ptr);
|
||||
}
|
||||
break;
|
||||
case 'I':
|
||||
/* Infinity */
|
||||
if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
|
||||
if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
|
||||
PyUnicode_READ(kind, str, idx + 2) == 'f' &&
|
||||
PyUnicode_READ(kind, str, idx + 3) == 'i' &&
|
||||
PyUnicode_READ(kind, str, idx + 4) == 'n' &&
|
||||
PyUnicode_READ(kind, str, idx + 5) == 'i' &&
|
||||
PyUnicode_READ(kind, str, idx + 6) == 't' &&
|
||||
PyUnicode_READ(kind, str, idx + 7) == 'y') {
|
||||
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
/* -Infinity */
|
||||
if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
|
||||
if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
|
||||
PyUnicode_READ(kind, str, idx + 2) == 'n' &&
|
||||
PyUnicode_READ(kind, str, idx + 3) == 'f' &&
|
||||
PyUnicode_READ(kind, str, idx + 4) == 'i' &&
|
||||
PyUnicode_READ(kind, str, idx + 5) == 'n' &&
|
||||
PyUnicode_READ(kind, str, idx + 6) == 'i' &&
|
||||
PyUnicode_READ(kind, str, idx + 7) == 't' &&
|
||||
PyUnicode_READ(kind, str, idx + 8) == 'y') {
|
||||
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -1867,9 +1867,7 @@ save_unicode(PicklerObject *self, PyObject *obj)
|
|||
if (self->bin) {
|
||||
char pdata[5];
|
||||
|
||||
encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
|
||||
PyUnicode_GET_SIZE(obj),
|
||||
"surrogatepass");
|
||||
encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
|
||||
if (encoded == NULL)
|
||||
goto error;
|
||||
|
||||
|
|
|
@ -1436,10 +1436,11 @@ pysqlite_connection_create_collation(pysqlite_Connection* self, PyObject* args)
|
|||
PyObject* uppercase_name = 0;
|
||||
PyObject* name;
|
||||
PyObject* retval;
|
||||
Py_UNICODE* chk;
|
||||
Py_ssize_t i, len;
|
||||
char *uppercase_name_str;
|
||||
int rc;
|
||||
unsigned int kind;
|
||||
void *data;
|
||||
|
||||
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
|
||||
goto finally;
|
||||
|
@ -1454,12 +1455,16 @@ pysqlite_connection_create_collation(pysqlite_Connection* self, PyObject* args)
|
|||
goto finally;
|
||||
}
|
||||
|
||||
len = PyUnicode_GET_SIZE(uppercase_name);
|
||||
chk = PyUnicode_AS_UNICODE(uppercase_name);
|
||||
for (i=0; i<len; i++, chk++) {
|
||||
if ((*chk >= '0' && *chk <= '9')
|
||||
|| (*chk >= 'A' && *chk <= 'Z')
|
||||
|| (*chk == '_'))
|
||||
if (PyUnicode_READY(uppercase_name))
|
||||
goto finally;
|
||||
len = PyUnicode_GET_LENGTH(uppercase_name);
|
||||
kind = PyUnicode_KIND(uppercase_name);
|
||||
data = PyUnicode_DATA(uppercase_name);
|
||||
for (i=0; i<len; i++) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if ((ch >= '0' && ch <= '9')
|
||||
|| (ch >= 'A' && ch <= 'Z')
|
||||
|| (ch == '_'))
|
||||
{
|
||||
continue;
|
||||
} else {
|
||||
|
|
378
Modules/_sre.c
378
Modules/_sre.c
|
@ -163,8 +163,6 @@ static unsigned int sre_lower_locale(unsigned int ch)
|
|||
|
||||
/* unicode-specific character predicates */
|
||||
|
||||
#if defined(HAVE_UNICODE)
|
||||
|
||||
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
|
||||
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
|
||||
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
|
||||
|
@ -176,8 +174,6 @@ static unsigned int sre_lower_unicode(unsigned int ch)
|
|||
return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
LOCAL(int)
|
||||
sre_category(SRE_CODE category, unsigned int ch)
|
||||
{
|
||||
|
@ -205,7 +201,6 @@ sre_category(SRE_CODE category, unsigned int ch)
|
|||
case SRE_CATEGORY_LOC_NOT_WORD:
|
||||
return !SRE_LOC_IS_WORD(ch);
|
||||
|
||||
#if defined(HAVE_UNICODE)
|
||||
case SRE_CATEGORY_UNI_DIGIT:
|
||||
return SRE_UNI_IS_DIGIT(ch);
|
||||
case SRE_CATEGORY_UNI_NOT_DIGIT:
|
||||
|
@ -222,24 +217,6 @@ sre_category(SRE_CODE category, unsigned int ch)
|
|||
return SRE_UNI_IS_LINEBREAK(ch);
|
||||
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
||||
return !SRE_UNI_IS_LINEBREAK(ch);
|
||||
#else
|
||||
case SRE_CATEGORY_UNI_DIGIT:
|
||||
return SRE_IS_DIGIT(ch);
|
||||
case SRE_CATEGORY_UNI_NOT_DIGIT:
|
||||
return !SRE_IS_DIGIT(ch);
|
||||
case SRE_CATEGORY_UNI_SPACE:
|
||||
return SRE_IS_SPACE(ch);
|
||||
case SRE_CATEGORY_UNI_NOT_SPACE:
|
||||
return !SRE_IS_SPACE(ch);
|
||||
case SRE_CATEGORY_UNI_WORD:
|
||||
return SRE_LOC_IS_WORD(ch);
|
||||
case SRE_CATEGORY_UNI_NOT_WORD:
|
||||
return !SRE_LOC_IS_WORD(ch);
|
||||
case SRE_CATEGORY_UNI_LINEBREAK:
|
||||
return SRE_IS_LINEBREAK(ch);
|
||||
case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
||||
return !SRE_IS_LINEBREAK(ch);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -280,6 +257,7 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
|||
/* generate 8-bit version */
|
||||
|
||||
#define SRE_CHAR unsigned char
|
||||
#define SRE_CHARGET(state, buf, index) ((unsigned char*)buf)[index]
|
||||
#define SRE_AT sre_at
|
||||
#define SRE_COUNT sre_count
|
||||
#define SRE_CHARSET sre_charset
|
||||
|
@ -287,15 +265,11 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
|||
#define SRE_MATCH sre_match
|
||||
#define SRE_MATCH_CONTEXT sre_match_context
|
||||
#define SRE_SEARCH sre_search
|
||||
#define SRE_LITERAL_TEMPLATE sre_literal_template
|
||||
|
||||
#if defined(HAVE_UNICODE)
|
||||
|
||||
#define SRE_RECURSIVE
|
||||
#include "_sre.c"
|
||||
#undef SRE_RECURSIVE
|
||||
|
||||
#undef SRE_LITERAL_TEMPLATE
|
||||
#undef SRE_SEARCH
|
||||
#undef SRE_MATCH
|
||||
#undef SRE_MATCH_CONTEXT
|
||||
|
@ -304,10 +278,15 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
|||
#undef SRE_COUNT
|
||||
#undef SRE_AT
|
||||
#undef SRE_CHAR
|
||||
#undef SRE_CHARGET
|
||||
|
||||
/* generate 16-bit unicode version */
|
||||
/* generate 8/16/32-bit unicode version */
|
||||
|
||||
#define SRE_CHAR Py_UNICODE
|
||||
#define SRE_CHAR void
|
||||
#define SRE_CHARGET(state, buf, index) \
|
||||
((state->charsize==1) ? ((Py_UCS1*)buf)[index] : \
|
||||
(state->charsize==2) ? ((Py_UCS2*)buf)[index] : \
|
||||
((Py_UCS4*)buf)[index])
|
||||
#define SRE_AT sre_uat
|
||||
#define SRE_COUNT sre_ucount
|
||||
#define SRE_CHARSET sre_ucharset
|
||||
|
@ -315,8 +294,6 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
|||
#define SRE_MATCH sre_umatch
|
||||
#define SRE_MATCH_CONTEXT sre_umatch_context
|
||||
#define SRE_SEARCH sre_usearch
|
||||
#define SRE_LITERAL_TEMPLATE sre_uliteral_template
|
||||
#endif
|
||||
|
||||
#endif /* SRE_RECURSIVE */
|
||||
|
||||
|
@ -327,7 +304,7 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
|||
settings */
|
||||
|
||||
LOCAL(int)
|
||||
SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
||||
SRE_AT(SRE_STATE* state, char* ptr, SRE_CODE at)
|
||||
{
|
||||
/* check if pointer is at given position */
|
||||
|
||||
|
@ -341,16 +318,16 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
|||
|
||||
case SRE_AT_BEGINNING_LINE:
|
||||
return ((void*) ptr == state->beginning ||
|
||||
SRE_IS_LINEBREAK((int) ptr[-1]));
|
||||
SRE_IS_LINEBREAK((int) SRE_CHARGET(state, ptr, -1)));
|
||||
|
||||
case SRE_AT_END:
|
||||
return (((void*) (ptr+1) == state->end &&
|
||||
SRE_IS_LINEBREAK((int) ptr[0])) ||
|
||||
return (((void*) (ptr+state->charsize) == state->end &&
|
||||
SRE_IS_LINEBREAK((int) SRE_CHARGET(state, ptr, 0))) ||
|
||||
((void*) ptr == state->end));
|
||||
|
||||
case SRE_AT_END_LINE:
|
||||
return ((void*) ptr == state->end ||
|
||||
SRE_IS_LINEBREAK((int) ptr[0]));
|
||||
SRE_IS_LINEBREAK((int) SRE_CHARGET(state, ptr, 0)));
|
||||
|
||||
case SRE_AT_END_STRING:
|
||||
return ((void*) ptr == state->end);
|
||||
|
@ -359,57 +336,55 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
|||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_IS_WORD((int) ptr[-1]) : 0;
|
||||
SRE_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
SRE_IS_WORD((int) ptr[0]) : 0;
|
||||
SRE_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
|
||||
return thisp != thatp;
|
||||
|
||||
case SRE_AT_NON_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_IS_WORD((int) ptr[-1]) : 0;
|
||||
SRE_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
SRE_IS_WORD((int) ptr[0]) : 0;
|
||||
SRE_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
|
||||
return thisp == thatp;
|
||||
|
||||
case SRE_AT_LOC_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
|
||||
SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
|
||||
SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
|
||||
return thisp != thatp;
|
||||
|
||||
case SRE_AT_LOC_NON_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
|
||||
SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
|
||||
SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
|
||||
return thisp == thatp;
|
||||
|
||||
#if defined(HAVE_UNICODE)
|
||||
case SRE_AT_UNI_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
|
||||
SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
|
||||
SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
|
||||
return thisp != thatp;
|
||||
|
||||
case SRE_AT_UNI_NON_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
|
||||
SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
|
||||
SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
|
||||
return thisp == thatp;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
@ -476,7 +451,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
|||
count = *(set++);
|
||||
|
||||
if (sizeof(SRE_CODE) == 2) {
|
||||
block = ((unsigned char*)set)[ch >> 8];
|
||||
block = ((char*)set)[ch >> 8];
|
||||
set += 128;
|
||||
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
|
||||
return ok;
|
||||
|
@ -486,7 +461,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
|||
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
|
||||
* warnings when c's type supports only numbers < N+1 */
|
||||
if (!(ch & ~65535))
|
||||
block = ((unsigned char*)set)[ch >> 8];
|
||||
block = ((char*)set)[ch >> 8];
|
||||
else
|
||||
block = -1;
|
||||
set += 64;
|
||||
|
@ -512,28 +487,29 @@ LOCAL(Py_ssize_t)
|
|||
SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
||||
{
|
||||
SRE_CODE chr;
|
||||
SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
|
||||
SRE_CHAR* end = (SRE_CHAR *)state->end;
|
||||
char* ptr = (char *)state->ptr;
|
||||
char* end = (char *)state->end;
|
||||
Py_ssize_t i;
|
||||
|
||||
/* adjust end */
|
||||
if (maxcount < end - ptr && maxcount != 65535)
|
||||
end = ptr + maxcount;
|
||||
end = ptr + maxcount*state->charsize;
|
||||
|
||||
switch (pattern[0]) {
|
||||
|
||||
case SRE_OP_IN:
|
||||
/* repeated set */
|
||||
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
|
||||
while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
|
||||
ptr++;
|
||||
while (ptr < end &&
|
||||
SRE_CHARSET(pattern + 2, SRE_CHARGET(state, ptr, 0)))
|
||||
ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_ANY:
|
||||
/* repeated dot wildcard. */
|
||||
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
|
||||
while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
|
||||
ptr++;
|
||||
while (ptr < end && !SRE_IS_LINEBREAK(SRE_CHARGET(state, ptr, 0)))
|
||||
ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_ANY_ALL:
|
||||
|
@ -547,38 +523,38 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
|||
/* repeated literal */
|
||||
chr = pattern[1];
|
||||
TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
|
||||
while (ptr < end && (SRE_CODE) *ptr == chr)
|
||||
ptr++;
|
||||
while (ptr < end && (SRE_CODE) SRE_CHARGET(state, ptr, 0) == chr)
|
||||
ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_LITERAL_IGNORE:
|
||||
/* repeated literal */
|
||||
chr = pattern[1];
|
||||
TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
|
||||
while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
|
||||
ptr++;
|
||||
while (ptr < end && (SRE_CODE) state->lower(SRE_CHARGET(state, ptr, 0)) == chr)
|
||||
ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_NOT_LITERAL:
|
||||
/* repeated non-literal */
|
||||
chr = pattern[1];
|
||||
TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
|
||||
while (ptr < end && (SRE_CODE) *ptr != chr)
|
||||
ptr++;
|
||||
while (ptr < end && (SRE_CODE) SRE_CHARGET(state, ptr, 0) != chr)
|
||||
ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_NOT_LITERAL_IGNORE:
|
||||
/* repeated non-literal */
|
||||
chr = pattern[1];
|
||||
TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
|
||||
while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
|
||||
ptr++;
|
||||
while (ptr < end && (SRE_CODE) state->lower(SRE_CHARGET(state, ptr, 0)) != chr)
|
||||
ptr += state->charsize;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* repeated single character pattern */
|
||||
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
|
||||
while ((SRE_CHAR*) state->ptr < end) {
|
||||
while ((char*) state->ptr < end) {
|
||||
i = SRE_MATCH(state, pattern);
|
||||
if (i < 0)
|
||||
return i;
|
||||
|
@ -586,12 +562,12 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
|||
break;
|
||||
}
|
||||
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
|
||||
(SRE_CHAR*) state->ptr - ptr));
|
||||
return (SRE_CHAR*) state->ptr - ptr;
|
||||
((char*)state->ptr - ptr)/state->charsize));
|
||||
return ((char*)state->ptr - ptr)/state->charsize;
|
||||
}
|
||||
|
||||
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
|
||||
return ptr - (SRE_CHAR*) state->ptr;
|
||||
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, (ptr - (char*) state->ptr)/state->charsize));
|
||||
return (ptr - (char*) state->ptr)/state->charsize;
|
||||
}
|
||||
|
||||
#if 0 /* not used in this release */
|
||||
|
@ -602,8 +578,8 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
|
|||
returns the number of SRE_CODE objects to skip if successful, 0
|
||||
if no match */
|
||||
|
||||
SRE_CHAR* end = state->end;
|
||||
SRE_CHAR* ptr = state->ptr;
|
||||
char* end = state->end;
|
||||
char* ptr = state->ptr;
|
||||
Py_ssize_t i;
|
||||
|
||||
/* check minimal length */
|
||||
|
@ -614,7 +590,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
|
||||
/* <length> <skip> <prefix data> <overlap data> */
|
||||
for (i = 0; i < pattern[5]; i++)
|
||||
if ((SRE_CODE) ptr[i] != pattern[7 + i])
|
||||
if ((SRE_CODE) SRE_CHARGET(state, ptr, i) != pattern[7 + i])
|
||||
return 0;
|
||||
return pattern[0] + 2 * pattern[6];
|
||||
}
|
||||
|
@ -783,7 +759,7 @@ do { \
|
|||
typedef struct {
|
||||
Py_ssize_t last_ctx_pos;
|
||||
Py_ssize_t jump;
|
||||
SRE_CHAR* ptr;
|
||||
char* ptr;
|
||||
SRE_CODE* pattern;
|
||||
Py_ssize_t count;
|
||||
Py_ssize_t lastmark;
|
||||
|
@ -799,7 +775,7 @@ typedef struct {
|
|||
LOCAL(Py_ssize_t)
|
||||
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||
{
|
||||
SRE_CHAR* end = (SRE_CHAR *)state->end;
|
||||
char* end = (char*)state->end;
|
||||
Py_ssize_t alloc_pos, ctx_pos = -1;
|
||||
Py_ssize_t i, ret = 0;
|
||||
Py_ssize_t jump;
|
||||
|
@ -818,12 +794,12 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
|
||||
entrance:
|
||||
|
||||
ctx->ptr = (SRE_CHAR *)state->ptr;
|
||||
ctx->ptr = (char *)state->ptr;
|
||||
|
||||
if (ctx->pattern[0] == SRE_OP_INFO) {
|
||||
/* optimization info block */
|
||||
/* <INFO> <1=skip> <2=flags> <3=min> ... */
|
||||
if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
|
||||
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
|
||||
TRACE(("reject (got %d chars, need %d)\n",
|
||||
(end - ctx->ptr), ctx->pattern[3]));
|
||||
RETURN_FAILURE;
|
||||
|
@ -865,10 +841,10 @@ entrance:
|
|||
/* <LITERAL> <code> */
|
||||
TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
|
||||
ctx->ptr, *ctx->pattern));
|
||||
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
|
||||
if (ctx->ptr >= end || (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0) != ctx->pattern[0])
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern++;
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_NOT_LITERAL:
|
||||
|
@ -876,10 +852,10 @@ entrance:
|
|||
/* <NOT_LITERAL> <code> */
|
||||
TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
|
||||
ctx->ptr, *ctx->pattern));
|
||||
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
|
||||
if (ctx->ptr >= end || (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0) == ctx->pattern[0])
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern++;
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_SUCCESS:
|
||||
|
@ -902,19 +878,19 @@ entrance:
|
|||
/* <CATEGORY> <code> */
|
||||
TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
|
||||
ctx->ptr, *ctx->pattern));
|
||||
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
|
||||
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], SRE_CHARGET(state, ctx->ptr, 0)))
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern++;
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_ANY:
|
||||
/* match anything (except a newline) */
|
||||
/* <ANY> */
|
||||
TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
|
||||
if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
|
||||
RETURN_FAILURE;
|
||||
ctx->ptr++;
|
||||
if (ctx->ptr >= end || SRE_IS_LINEBREAK(SRE_CHARGET(state, ctx->ptr, 0)))
|
||||
RETURN_FAILURE;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_ANY_ALL:
|
||||
|
@ -923,47 +899,47 @@ entrance:
|
|||
TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
|
||||
if (ctx->ptr >= end)
|
||||
RETURN_FAILURE;
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_IN:
|
||||
/* match set member (or non_member) */
|
||||
/* <IN> <skip> <set> */
|
||||
TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
|
||||
if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
|
||||
RETURN_FAILURE;
|
||||
if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, SRE_CHARGET(state, ctx->ptr, 0)))
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern += ctx->pattern[0];
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_LITERAL_IGNORE:
|
||||
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
|
||||
ctx->pattern, ctx->ptr, ctx->pattern[0]));
|
||||
if (ctx->ptr >= end ||
|
||||
state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
|
||||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*ctx->pattern))
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern++;
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_NOT_LITERAL_IGNORE:
|
||||
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
|
||||
ctx->pattern, ctx->ptr, *ctx->pattern));
|
||||
if (ctx->ptr >= end ||
|
||||
state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
|
||||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) == state->lower(*ctx->pattern))
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern++;
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_IN_IGNORE:
|
||||
TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
|
||||
if (ctx->ptr >= end
|
||||
|| !SRE_CHARSET(ctx->pattern+1,
|
||||
(SRE_CODE)state->lower(*ctx->ptr)))
|
||||
(SRE_CODE)state->lower(SRE_CHARGET(state, ctx->ptr, 0))))
|
||||
RETURN_FAILURE;
|
||||
ctx->pattern += ctx->pattern[0];
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
break;
|
||||
|
||||
case SRE_OP_JUMP:
|
||||
|
@ -986,11 +962,11 @@ entrance:
|
|||
for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
|
||||
if (ctx->pattern[1] == SRE_OP_LITERAL &&
|
||||
(ctx->ptr >= end ||
|
||||
(SRE_CODE) *ctx->ptr != ctx->pattern[2]))
|
||||
(SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0) != ctx->pattern[2]))
|
||||
continue;
|
||||
if (ctx->pattern[1] == SRE_OP_IN &&
|
||||
(ctx->ptr >= end ||
|
||||
!SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
|
||||
!SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0))))
|
||||
continue;
|
||||
state->ptr = ctx->ptr;
|
||||
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
|
||||
|
@ -1021,7 +997,7 @@ entrance:
|
|||
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
|
||||
ctx->pattern[1], ctx->pattern[2]));
|
||||
|
||||
if (ctx->ptr + ctx->pattern[1] > end)
|
||||
if (ctx->ptr + state->charsize * ctx->pattern[1] > end)
|
||||
RETURN_FAILURE; /* cannot match */
|
||||
|
||||
state->ptr = ctx->ptr;
|
||||
|
@ -1030,7 +1006,7 @@ entrance:
|
|||
RETURN_ON_ERROR(ret);
|
||||
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
||||
ctx->count = ret;
|
||||
ctx->ptr += ctx->count;
|
||||
ctx->ptr += state->charsize * ctx->count;
|
||||
|
||||
/* when we arrive here, count contains the number of
|
||||
matches, and ctx->ptr points to the tail of the target
|
||||
|
@ -1054,8 +1030,9 @@ entrance:
|
|||
ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
|
||||
for (;;) {
|
||||
while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
|
||||
(ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
|
||||
ctx->ptr--;
|
||||
(ctx->ptr >= end ||
|
||||
SRE_CHARGET(state, ctx->ptr, 0) != ctx->u.chr)) {
|
||||
ctx->ptr -= state->charsize;
|
||||
ctx->count--;
|
||||
}
|
||||
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
|
||||
|
@ -1070,7 +1047,7 @@ entrance:
|
|||
|
||||
LASTMARK_RESTORE();
|
||||
|
||||
ctx->ptr--;
|
||||
ctx->ptr -= state->charsize;
|
||||
ctx->count--;
|
||||
}
|
||||
|
||||
|
@ -1084,7 +1061,7 @@ entrance:
|
|||
RETURN_ON_ERROR(ret);
|
||||
RETURN_SUCCESS;
|
||||
}
|
||||
ctx->ptr--;
|
||||
ctx->ptr -= state->charsize;
|
||||
ctx->count--;
|
||||
LASTMARK_RESTORE();
|
||||
}
|
||||
|
@ -1104,7 +1081,7 @@ entrance:
|
|||
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
|
||||
ctx->pattern[1], ctx->pattern[2]));
|
||||
|
||||
if (ctx->ptr + ctx->pattern[1] > end)
|
||||
if (ctx->ptr + state->charsize * ctx->pattern[1] > end)
|
||||
RETURN_FAILURE; /* cannot match */
|
||||
|
||||
state->ptr = ctx->ptr;
|
||||
|
@ -1121,7 +1098,7 @@ entrance:
|
|||
RETURN_FAILURE;
|
||||
/* advance past minimum matches of repeat */
|
||||
ctx->count = ret;
|
||||
ctx->ptr += ctx->count;
|
||||
ctx->ptr += state->charsize * ctx->count;
|
||||
}
|
||||
|
||||
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
|
||||
|
@ -1148,7 +1125,7 @@ entrance:
|
|||
if (ret == 0)
|
||||
break;
|
||||
assert(ret == 1);
|
||||
ctx->ptr++;
|
||||
ctx->ptr += state->charsize;
|
||||
ctx->count++;
|
||||
LASTMARK_RESTORE();
|
||||
}
|
||||
|
@ -1320,14 +1297,16 @@ entrance:
|
|||
if (groupref >= state->lastmark) {
|
||||
RETURN_FAILURE;
|
||||
} else {
|
||||
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
|
||||
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
|
||||
char* p = (char*) state->mark[groupref];
|
||||
char* e = (char*) state->mark[groupref+1];
|
||||
if (!p || !e || e < p)
|
||||
RETURN_FAILURE;
|
||||
while (p < e) {
|
||||
if (ctx->ptr >= end || *ctx->ptr != *p)
|
||||
if (ctx->ptr >= end ||
|
||||
SRE_CHARGET(state, ctx->ptr, 0) != SRE_CHARGET(state, p, 0))
|
||||
RETURN_FAILURE;
|
||||
p++; ctx->ptr++;
|
||||
p += state->charsize;
|
||||
ctx->ptr += state->charsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1344,15 +1323,16 @@ entrance:
|
|||
if (groupref >= state->lastmark) {
|
||||
RETURN_FAILURE;
|
||||
} else {
|
||||
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
|
||||
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
|
||||
char* p = (char*) state->mark[groupref];
|
||||
char* e = (char*) state->mark[groupref+1];
|
||||
if (!p || !e || e < p)
|
||||
RETURN_FAILURE;
|
||||
while (p < e) {
|
||||
if (ctx->ptr >= end ||
|
||||
state->lower(*ctx->ptr) != state->lower(*p))
|
||||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
|
||||
RETURN_FAILURE;
|
||||
p++; ctx->ptr++;
|
||||
p++;
|
||||
ctx->ptr += state->charsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1386,7 +1366,7 @@ entrance:
|
|||
/* <ASSERT> <skip> <back> <pattern> */
|
||||
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
|
||||
ctx->ptr, ctx->pattern[1]));
|
||||
state->ptr = ctx->ptr - ctx->pattern[1];
|
||||
state->ptr = ctx->ptr - state->charsize * ctx->pattern[1];
|
||||
if (state->ptr < state->beginning)
|
||||
RETURN_FAILURE;
|
||||
DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
|
||||
|
@ -1399,7 +1379,7 @@ entrance:
|
|||
/* <ASSERT_NOT> <skip> <back> <pattern> */
|
||||
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
|
||||
ctx->ptr, ctx->pattern[1]));
|
||||
state->ptr = ctx->ptr - ctx->pattern[1];
|
||||
state->ptr = ctx->ptr - state->charsize * ctx->pattern[1];
|
||||
if (state->ptr >= state->beginning) {
|
||||
DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
|
||||
if (ret) {
|
||||
|
@ -1481,8 +1461,8 @@ exit:
|
|||
LOCAL(Py_ssize_t)
|
||||
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||
{
|
||||
SRE_CHAR* ptr = (SRE_CHAR *)state->start;
|
||||
SRE_CHAR* end = (SRE_CHAR *)state->end;
|
||||
char* ptr = (char*)state->start;
|
||||
char* end = (char*)state->end;
|
||||
Py_ssize_t status = 0;
|
||||
Py_ssize_t prefix_len = 0;
|
||||
Py_ssize_t prefix_skip = 0;
|
||||
|
@ -1500,9 +1480,9 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (pattern[3] > 1) {
|
||||
/* adjust end point (but make sure we leave at least one
|
||||
character in there, so literal search will work) */
|
||||
end -= pattern[3]-1;
|
||||
end -= (pattern[3]-1) * state->charsize;
|
||||
if (end <= ptr)
|
||||
end = ptr+1;
|
||||
end = ptr + state->charsize;
|
||||
}
|
||||
|
||||
if (flags & SRE_INFO_PREFIX) {
|
||||
|
@ -1528,10 +1508,10 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
/* pattern starts with a known prefix. use the overlap
|
||||
table to skip forward as fast as we possibly can */
|
||||
Py_ssize_t i = 0;
|
||||
end = (SRE_CHAR *)state->end;
|
||||
end = (char *)state->end;
|
||||
while (ptr < end) {
|
||||
for (;;) {
|
||||
if ((SRE_CODE) ptr[0] != prefix[i]) {
|
||||
if ((SRE_CODE) SRE_CHARGET(state, ptr, 0) != prefix[i]) {
|
||||
if (!i)
|
||||
break;
|
||||
else
|
||||
|
@ -1540,8 +1520,8 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (++i == prefix_len) {
|
||||
/* found a potential match */
|
||||
TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
|
||||
state->start = ptr + 1 - prefix_len;
|
||||
state->ptr = ptr + 1 - prefix_len + prefix_skip;
|
||||
state->start = ptr - (prefix_len - 1) * state->charsize;
|
||||
state->ptr = ptr - (prefix_len - prefix_skip - 1) * state->charsize;
|
||||
if (flags & SRE_INFO_LITERAL)
|
||||
return 1; /* we got all of it */
|
||||
status = SRE_MATCH(state, pattern + 2*prefix_skip);
|
||||
|
@ -1553,7 +1533,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
break;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
ptr += state->charsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1563,15 +1543,16 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
/* pattern starts with a literal character. this is used
|
||||
for short prefixes, and if fast search is disabled */
|
||||
SRE_CODE chr = pattern[1];
|
||||
end = (SRE_CHAR *)state->end;
|
||||
end = (char*)state->end;
|
||||
for (;;) {
|
||||
while (ptr < end && (SRE_CODE) ptr[0] != chr)
|
||||
ptr++;
|
||||
while (ptr < end && (SRE_CODE) SRE_CHARGET(state, ptr, 0) != chr)
|
||||
ptr += state->charsize;
|
||||
if (ptr >= end)
|
||||
return 0;
|
||||
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
|
||||
state->start = ptr;
|
||||
state->ptr = ++ptr;
|
||||
ptr += state->charsize;
|
||||
state->ptr = ptr;
|
||||
if (flags & SRE_INFO_LITERAL)
|
||||
return 1; /* we got all of it */
|
||||
status = SRE_MATCH(state, pattern + 2);
|
||||
|
@ -1580,10 +1561,10 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
}
|
||||
} else if (charset) {
|
||||
/* pattern starts with a character from a known set */
|
||||
end = (SRE_CHAR *)state->end;
|
||||
end = (char*)state->end;
|
||||
for (;;) {
|
||||
while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
|
||||
ptr++;
|
||||
while (ptr < end && !SRE_CHARSET(charset, SRE_CHARGET(state, ptr, 0)))
|
||||
ptr += state->charsize;
|
||||
if (ptr >= end)
|
||||
return 0;
|
||||
TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
|
||||
|
@ -1592,13 +1573,14 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
status = SRE_MATCH(state, pattern);
|
||||
if (status != 0)
|
||||
break;
|
||||
ptr++;
|
||||
ptr += state->charsize;
|
||||
}
|
||||
} else
|
||||
/* general case */
|
||||
while (ptr <= end) {
|
||||
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
||||
state->start = state->ptr = ptr++;
|
||||
state->start = state->ptr = ptr;
|
||||
ptr += state->charsize;
|
||||
status = SRE_MATCH(state, pattern);
|
||||
if (status != 0)
|
||||
break;
|
||||
|
@ -1607,16 +1589,6 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
return status;
|
||||
}
|
||||
|
||||
LOCAL(int)
|
||||
SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
|
||||
{
|
||||
/* check if given string is a literal template (i.e. no escapes) */
|
||||
while (len-- > 0)
|
||||
if (*ptr++ == '\\')
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if !defined(SRE_RECURSIVE)
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
@ -1626,6 +1598,23 @@ SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
|
|||
static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
|
||||
static PyObject*pattern_scanner(PatternObject*, PyObject*);
|
||||
|
||||
static int
|
||||
sre_literal_template(int charsize, char* ptr, Py_ssize_t len)
|
||||
{
|
||||
/* check if given string is a literal template (i.e. no escapes) */
|
||||
struct {
|
||||
int charsize;
|
||||
} state = {
|
||||
charsize
|
||||
};
|
||||
while (len-- > 0) {
|
||||
if (SRE_CHARGET((&state), ptr, 0) == '\\')
|
||||
return 0;
|
||||
ptr += charsize;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
sre_codesize(PyObject* self, PyObject *unused)
|
||||
{
|
||||
|
@ -1641,11 +1630,7 @@ sre_getlower(PyObject* self, PyObject* args)
|
|||
if (flags & SRE_FLAG_LOCALE)
|
||||
return Py_BuildValue("i", sre_lower_locale(character));
|
||||
if (flags & SRE_FLAG_UNICODE)
|
||||
#if defined(HAVE_UNICODE)
|
||||
return Py_BuildValue("i", sre_lower_unicode(character));
|
||||
#else
|
||||
return Py_BuildValue("i", sre_lower_locale(character));
|
||||
#endif
|
||||
return Py_BuildValue("i", sre_lower(character));
|
||||
}
|
||||
|
||||
|
@ -1664,7 +1649,8 @@ state_reset(SRE_STATE* state)
|
|||
}
|
||||
|
||||
static void*
|
||||
getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
|
||||
getstring(PyObject* string, Py_ssize_t* p_length,
|
||||
int* p_logical_charsize, int* p_charsize)
|
||||
{
|
||||
/* given a python object, return a data pointer, a length (in
|
||||
characters), and a character size. return NULL if the object
|
||||
|
@ -1679,9 +1665,12 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
|
|||
/* Unicode objects do not support the buffer API. So, get the data
|
||||
directly instead. */
|
||||
if (PyUnicode_Check(string)) {
|
||||
ptr = (void *)PyUnicode_AS_DATA(string);
|
||||
*p_length = PyUnicode_GET_SIZE(string);
|
||||
*p_charsize = sizeof(Py_UNICODE);
|
||||
if (PyUnicode_READY(string) == -1)
|
||||
return NULL;
|
||||
ptr = PyUnicode_DATA(string);
|
||||
*p_length = PyUnicode_GET_LENGTH(string);
|
||||
*p_charsize = PyUnicode_CHARACTER_SIZE(string);
|
||||
*p_logical_charsize = 4;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
@ -1713,10 +1702,8 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
|
|||
|
||||
if (PyBytes_Check(string) || bytes == size)
|
||||
charsize = 1;
|
||||
#if defined(HAVE_UNICODE)
|
||||
else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
|
||||
charsize = sizeof(Py_UNICODE);
|
||||
#endif
|
||||
else {
|
||||
PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
|
||||
return NULL;
|
||||
|
@ -1724,6 +1711,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
|
|||
|
||||
*p_length = size;
|
||||
*p_charsize = charsize;
|
||||
*p_logical_charsize = charsize;
|
||||
|
||||
if (ptr == NULL) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
|
@ -1739,7 +1727,7 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
/* prepare state object */
|
||||
|
||||
Py_ssize_t length;
|
||||
int charsize;
|
||||
int logical_charsize, charsize;
|
||||
void* ptr;
|
||||
|
||||
memset(state, 0, sizeof(SRE_STATE));
|
||||
|
@ -1747,16 +1735,16 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
state->lastmark = -1;
|
||||
state->lastindex = -1;
|
||||
|
||||
ptr = getstring(string, &length, &charsize);
|
||||
ptr = getstring(string, &length, &logical_charsize, &charsize);
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
|
||||
if (charsize == 1 && pattern->charsize > 1) {
|
||||
if (logical_charsize == 1 && pattern->logical_charsize > 1) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"can't use a string pattern on a bytes-like object");
|
||||
return NULL;
|
||||
}
|
||||
if (charsize > 1 && pattern->charsize == 1) {
|
||||
if (logical_charsize > 1 && pattern->logical_charsize == 1) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"can't use a bytes pattern on a string-like object");
|
||||
return NULL;
|
||||
|
@ -1773,6 +1761,7 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
else if (end > length)
|
||||
end = length;
|
||||
|
||||
state->logical_charsize = logical_charsize;
|
||||
state->charsize = charsize;
|
||||
|
||||
state->beginning = ptr;
|
||||
|
@ -1788,11 +1777,7 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
if (pattern->flags & SRE_FLAG_LOCALE)
|
||||
state->lower = sre_lower_locale;
|
||||
else if (pattern->flags & SRE_FLAG_UNICODE)
|
||||
#if defined(HAVE_UNICODE)
|
||||
state->lower = sre_lower_unicode;
|
||||
#else
|
||||
state->lower = sre_lower_locale;
|
||||
#endif
|
||||
else
|
||||
state->lower = sre_lower;
|
||||
|
||||
|
@ -1891,12 +1876,10 @@ pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
|
||||
TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
||||
if (state.charsize == 1) {
|
||||
if (state.logical_charsize == 1) {
|
||||
status = sre_match(&state, PatternObject_GetCode(self));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_umatch(&state, PatternObject_GetCode(self));
|
||||
#endif
|
||||
}
|
||||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
@ -1928,12 +1911,10 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
|
||||
TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
||||
if (state.charsize == 1) {
|
||||
if (state.logical_charsize == 1) {
|
||||
status = sre_search(&state, PatternObject_GetCode(self));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_usearch(&state, PatternObject_GetCode(self));
|
||||
#endif
|
||||
}
|
||||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
@ -2075,12 +2056,10 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
|
||||
state.ptr = state.start;
|
||||
|
||||
if (state.charsize == 1) {
|
||||
if (state.logical_charsize == 1) {
|
||||
status = sre_search(&state, PatternObject_GetCode(self));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_usearch(&state, PatternObject_GetCode(self));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
|
@ -2205,12 +2184,10 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
|
||||
state.ptr = state.start;
|
||||
|
||||
if (state.charsize == 1) {
|
||||
if (state.logical_charsize == 1) {
|
||||
status = sre_search(&state, PatternObject_GetCode(self));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_usearch(&state, PatternObject_GetCode(self));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
|
@ -2295,7 +2272,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
|
|||
int status;
|
||||
Py_ssize_t n;
|
||||
Py_ssize_t i, b, e;
|
||||
int bint;
|
||||
int logical_charsize, charsize;
|
||||
int filter_is_callable;
|
||||
|
||||
if (PyCallable_Check(ptemplate)) {
|
||||
|
@ -2306,16 +2283,10 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
|
|||
} else {
|
||||
/* if not callable, check if it's a literal string */
|
||||
int literal;
|
||||
ptr = getstring(ptemplate, &n, &bint);
|
||||
b = bint;
|
||||
ptr = getstring(ptemplate, &n, &logical_charsize, &charsize);
|
||||
b = charsize;
|
||||
if (ptr) {
|
||||
if (b == 1) {
|
||||
literal = sre_literal_template((unsigned char *)ptr, n);
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
|
||||
#endif
|
||||
}
|
||||
literal = sre_literal_template(b, ptr, n);
|
||||
} else {
|
||||
PyErr_Clear();
|
||||
literal = 0;
|
||||
|
@ -2357,12 +2328,10 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
|
|||
|
||||
state.ptr = state.start;
|
||||
|
||||
if (state.charsize == 1) {
|
||||
if (state.logical_charsize == 1) {
|
||||
status = sre_search(&state, PatternObject_GetCode(self));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_usearch(&state, PatternObject_GetCode(self));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
|
@ -2694,15 +2663,18 @@ _compile(PyObject* self_, PyObject* args)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (pattern == Py_None)
|
||||
self->charsize = -1;
|
||||
else {
|
||||
Py_ssize_t p_length;
|
||||
if (!getstring(pattern, &p_length, &self->charsize)) {
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if (pattern == Py_None) {
|
||||
self->logical_charsize = -1;
|
||||
self->charsize = -1;
|
||||
}
|
||||
else {
|
||||
Py_ssize_t p_length;
|
||||
if (!getstring(pattern, &p_length, &self->logical_charsize,
|
||||
&self->charsize)) {
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Py_INCREF(pattern);
|
||||
self->pattern = pattern;
|
||||
|
@ -3746,12 +3718,10 @@ scanner_match(ScannerObject* self, PyObject *unused)
|
|||
|
||||
state->ptr = state->start;
|
||||
|
||||
if (state->charsize == 1) {
|
||||
if (state->logical_charsize == 1) {
|
||||
status = sre_match(state, PatternObject_GetCode(self->pattern));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_umatch(state, PatternObject_GetCode(self->pattern));
|
||||
#endif
|
||||
}
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
|
@ -3779,12 +3749,10 @@ scanner_search(ScannerObject* self, PyObject *unused)
|
|||
|
||||
state->ptr = state->start;
|
||||
|
||||
if (state->charsize == 1) {
|
||||
if (state->logical_charsize == 1) {
|
||||
status = sre_search(state, PatternObject_GetCode(self->pattern));
|
||||
} else {
|
||||
#if defined(HAVE_UNICODE)
|
||||
status = sre_usearch(state, PatternObject_GetCode(self->pattern));
|
||||
#endif
|
||||
}
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
|
|
|
@ -1355,7 +1355,7 @@ static PyObject *
|
|||
test_Z_code(PyObject *self)
|
||||
{
|
||||
PyObject *tuple, *obj;
|
||||
Py_UNICODE *value1, *value2;
|
||||
const Py_UNICODE *value1, *value2;
|
||||
Py_ssize_t len1, len2;
|
||||
|
||||
tuple = PyTuple_New(2);
|
||||
|
|
|
@ -80,18 +80,6 @@ Copyright (C) 1994 Steen Lumholt.
|
|||
#error "Tk older than 8.3.1 not supported"
|
||||
#endif
|
||||
|
||||
/* Unicode conversion assumes that Tcl_UniChar is two bytes.
|
||||
We cannot test this directly, so we test UTF-8 size instead,
|
||||
expecting that TCL_UTF_MAX is changed if Tcl ever supports
|
||||
either UTF-16 or UCS-4.
|
||||
Redhat 8 sets TCL_UTF_MAX to 6, and uses wchar_t for
|
||||
Tcl_Unichar. This is also ok as long as Python uses UCS-4,
|
||||
as well.
|
||||
*/
|
||||
#if TCL_UTF_MAX != 3 && !(defined(Py_UNICODE_WIDE) && TCL_UTF_MAX==6)
|
||||
#error "unsupported Tcl configuration"
|
||||
#endif
|
||||
|
||||
#if !(defined(MS_WINDOWS) || defined(__CYGWIN__))
|
||||
#define HAVE_CREATEFILEHANDLER
|
||||
#endif
|
||||
|
@ -975,38 +963,44 @@ AsObj(PyObject *value)
|
|||
return result;
|
||||
}
|
||||
else if (PyUnicode_Check(value)) {
|
||||
Py_UNICODE *inbuf = PyUnicode_AS_UNICODE(value);
|
||||
Py_ssize_t size = PyUnicode_GET_SIZE(value);
|
||||
/* This #ifdef assumes that Tcl uses UCS-2.
|
||||
See TCL_UTF_MAX test above. */
|
||||
#if defined(Py_UNICODE_WIDE) && TCL_UTF_MAX == 3
|
||||
void *inbuf;
|
||||
Py_ssize_t size;
|
||||
int kind;
|
||||
Tcl_UniChar *outbuf = NULL;
|
||||
Py_ssize_t i;
|
||||
size_t allocsize = ((size_t)size) * sizeof(Tcl_UniChar);
|
||||
if (allocsize >= size)
|
||||
outbuf = (Tcl_UniChar*)ckalloc(allocsize);
|
||||
size_t allocsize;
|
||||
|
||||
if (PyUnicode_READY(value) == -1)
|
||||
return NULL;
|
||||
|
||||
inbuf = PyUnicode_DATA(value);
|
||||
size = PyUnicode_GET_LENGTH(value);
|
||||
kind = PyUnicode_KIND(value);
|
||||
allocsize = ((size_t)size) * sizeof(Tcl_UniChar);
|
||||
outbuf = (Tcl_UniChar*)ckalloc(allocsize);
|
||||
/* Else overflow occurred, and we take the next exit */
|
||||
if (!outbuf) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < size; i++) {
|
||||
if (inbuf[i] >= 0x10000) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, inbuf, i);
|
||||
/* We cannot test for sizeof(Tcl_UniChar) directly,
|
||||
so we test for UTF-8 size instead. */
|
||||
#if TCL_UTF_MAX == 3
|
||||
if (ch >= 0x10000) {
|
||||
/* Tcl doesn't do UTF-16, yet. */
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"unsupported character");
|
||||
ckfree(FREECAST outbuf);
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
outbuf[i] = inbuf[i];
|
||||
outbuf[i] = ch;
|
||||
}
|
||||
result = Tcl_NewUnicodeObj(outbuf, size);
|
||||
ckfree(FREECAST outbuf);
|
||||
return result;
|
||||
#else
|
||||
return Tcl_NewUnicodeObj(inbuf, size);
|
||||
#endif
|
||||
|
||||
}
|
||||
else if(PyTclObject_Check(value)) {
|
||||
Tcl_Obj *v = ((PyTclObject*)value)->value;
|
||||
|
@ -1088,24 +1082,14 @@ FromObj(PyObject* tkapp, Tcl_Obj *value)
|
|||
}
|
||||
|
||||
if (value->typePtr == app->StringType) {
|
||||
#if defined(Py_UNICODE_WIDE) && TCL_UTF_MAX==3
|
||||
PyObject *result;
|
||||
int size;
|
||||
Tcl_UniChar *input;
|
||||
Py_UNICODE *output;
|
||||
|
||||
size = Tcl_GetCharLength(value);
|
||||
result = PyUnicode_FromUnicode(NULL, size);
|
||||
if (!result)
|
||||
return NULL;
|
||||
input = Tcl_GetUnicode(value);
|
||||
output = PyUnicode_AS_UNICODE(result);
|
||||
while (size--)
|
||||
*output++ = *input++;
|
||||
return result;
|
||||
#if TCL_UTF_MAX==3
|
||||
return PyUnicode_FromKindAndData(
|
||||
PyUnicode_2BYTE_KIND, Tcl_GetUnicode(value),
|
||||
Tcl_GetCharLength(value));
|
||||
#else
|
||||
return PyUnicode_FromUnicode(Tcl_GetUnicode(value),
|
||||
Tcl_GetCharLength(value));
|
||||
return PyUnicode_FromKindAndData(
|
||||
PyUnicode_4BYTE_KIND, Tcl_GetUnicode(value),
|
||||
Tcl_GetCharLength(value));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -2810,9 +2810,9 @@ PyMODINIT_FUNC
|
|||
PyInit_array(void)
|
||||
{
|
||||
PyObject *m;
|
||||
char buffer[PY_ARRAY_LENGTH(descriptors)], *p;
|
||||
PyObject *typecodes;
|
||||
Py_ssize_t size = 0;
|
||||
register Py_UNICODE *p;
|
||||
struct arraydescr *descr;
|
||||
|
||||
if (PyType_Ready(&Arraytype) < 0)
|
||||
|
@ -2831,13 +2831,13 @@ PyInit_array(void)
|
|||
size++;
|
||||
}
|
||||
|
||||
typecodes = PyUnicode_FromStringAndSize(NULL, size);
|
||||
p = PyUnicode_AS_UNICODE(typecodes);
|
||||
p = buffer;
|
||||
for (descr = descriptors; descr->typecode != '\0'; descr++) {
|
||||
*p++ = (char)descr->typecode;
|
||||
}
|
||||
typecodes = PyUnicode_DecodeASCII(buffer, p - buffer, NULL);
|
||||
|
||||
PyModule_AddObject(m, "typecodes", (PyObject *)typecodes);
|
||||
PyModule_AddObject(m, "typecodes", typecodes);
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
Py_DECREF(m);
|
||||
|
|
|
@ -376,7 +376,7 @@ MD5_hexdigest(MD5object *self, PyObject *unused)
|
|||
unsigned char digest[MD5_DIGESTSIZE];
|
||||
struct md5_state temp;
|
||||
PyObject *retval;
|
||||
Py_UNICODE *hex_digest;
|
||||
Py_UCS1 *hex_digest;
|
||||
int i, j;
|
||||
|
||||
/* Get the raw (binary) digest value */
|
||||
|
@ -384,14 +384,10 @@ MD5_hexdigest(MD5object *self, PyObject *unused)
|
|||
md5_done(&temp, digest);
|
||||
|
||||
/* Create a new string */
|
||||
retval = PyUnicode_FromStringAndSize(NULL, MD5_DIGESTSIZE * 2);
|
||||
retval = PyUnicode_New(MD5_DIGESTSIZE * 2, 127);
|
||||
if (!retval)
|
||||
return NULL;
|
||||
hex_digest = PyUnicode_AS_UNICODE(retval);
|
||||
if (!hex_digest) {
|
||||
Py_DECREF(retval);
|
||||
return NULL;
|
||||
}
|
||||
hex_digest = PyUnicode_1BYTE_DATA(retval);
|
||||
|
||||
/* Make hex version of the digest */
|
||||
for(i=j=0; i<MD5_DIGESTSIZE; i++) {
|
||||
|
|
|
@ -402,7 +402,8 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
for (idx = 0; idx < nattrs; ++idx) {
|
||||
PyObject *item = PyTuple_GET_ITEM(args, idx);
|
||||
Py_ssize_t item_len;
|
||||
Py_UNICODE *item_buffer;
|
||||
void *data;
|
||||
unsigned int kind;
|
||||
int dot_count;
|
||||
|
||||
if (!PyUnicode_Check(item)) {
|
||||
|
@ -411,13 +412,18 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
Py_DECREF(attr);
|
||||
return NULL;
|
||||
}
|
||||
item_len = PyUnicode_GET_SIZE(item);
|
||||
item_buffer = PyUnicode_AS_UNICODE(item);
|
||||
if (PyUnicode_READY(item)) {
|
||||
Py_DECREF(attr);
|
||||
return NULL;
|
||||
}
|
||||
item_len = PyUnicode_GET_LENGTH(item);
|
||||
kind = PyUnicode_KIND(item);
|
||||
data = PyUnicode_DATA(item);
|
||||
|
||||
/* check whethere the string is dotted */
|
||||
dot_count = 0;
|
||||
for (char_idx = 0; char_idx < item_len; ++char_idx) {
|
||||
if (item_buffer[char_idx] == (Py_UNICODE)'.')
|
||||
if (PyUnicode_READ(kind, data, char_idx) == '.')
|
||||
++dot_count;
|
||||
}
|
||||
|
||||
|
@ -438,12 +444,12 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
}
|
||||
|
||||
for (; dot_count > 0; --dot_count) {
|
||||
while (item_buffer[unibuff_till] != (Py_UNICODE)'.') {
|
||||
while (PyUnicode_READ(kind, data, unibuff_till) != '.') {
|
||||
++unibuff_till;
|
||||
}
|
||||
attr_chain_item = PyUnicode_FromUnicode(
|
||||
item_buffer + unibuff_from,
|
||||
unibuff_till - unibuff_from);
|
||||
attr_chain_item = PyUnicode_Substring(item,
|
||||
unibuff_from,
|
||||
unibuff_till);
|
||||
if (attr_chain_item == NULL) {
|
||||
Py_DECREF(attr_chain);
|
||||
Py_DECREF(attr);
|
||||
|
@ -456,9 +462,8 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
}
|
||||
|
||||
/* now add the last dotless name */
|
||||
attr_chain_item = PyUnicode_FromUnicode(
|
||||
item_buffer + unibuff_from,
|
||||
item_len - unibuff_from);
|
||||
attr_chain_item = PyUnicode_Substring(item,
|
||||
unibuff_from, item_len);
|
||||
if (attr_chain_item == NULL) {
|
||||
Py_DECREF(attr_chain);
|
||||
Py_DECREF(attr);
|
||||
|
|
|
@ -1102,17 +1102,22 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
|
|||
PyUnicodeObject *_u_string = NULL;
|
||||
int result = 0;
|
||||
int i;
|
||||
int kind;
|
||||
void *data;
|
||||
|
||||
/* Yes, supports only 8bit encodings */
|
||||
_u_string = (PyUnicodeObject *)
|
||||
PyUnicode_Decode(template_buffer, 256, name, "replace");
|
||||
|
||||
if (_u_string == NULL)
|
||||
if (_u_string == NULL || PyUnicode_READY(_u_string) == -1)
|
||||
return result;
|
||||
|
||||
kind = PyUnicode_KIND(_u_string);
|
||||
data = PyUnicode_DATA(_u_string);
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
/* Stupid to access directly, but fast */
|
||||
Py_UNICODE c = _u_string->str[i];
|
||||
Py_UCS4 c = PyUnicode_READ(kind, data, i);
|
||||
if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
|
||||
info->map[i] = -1;
|
||||
else
|
||||
|
@ -1229,7 +1234,7 @@ get_pybool(int istrue)
|
|||
static PyObject *
|
||||
xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
|
||||
{
|
||||
Py_UNICODE *name;
|
||||
const Py_UNICODE *name;
|
||||
int handlernum = -1;
|
||||
|
||||
if (!PyUnicode_Check(nameobj))
|
||||
|
|
|
@ -352,7 +352,7 @@ SHA1_hexdigest(SHA1object *self, PyObject *unused)
|
|||
unsigned char digest[SHA1_DIGESTSIZE];
|
||||
struct sha1_state temp;
|
||||
PyObject *retval;
|
||||
Py_UNICODE *hex_digest;
|
||||
Py_UCS1 *hex_digest;
|
||||
int i, j;
|
||||
|
||||
/* Get the raw (binary) digest value */
|
||||
|
@ -360,14 +360,10 @@ SHA1_hexdigest(SHA1object *self, PyObject *unused)
|
|||
sha1_done(&temp, digest);
|
||||
|
||||
/* Create a new string */
|
||||
retval = PyUnicode_FromStringAndSize(NULL, SHA1_DIGESTSIZE * 2);
|
||||
retval = PyUnicode_New(SHA1_DIGESTSIZE * 2, 127);
|
||||
if (!retval)
|
||||
return NULL;
|
||||
hex_digest = PyUnicode_AS_UNICODE(retval);
|
||||
if (!hex_digest) {
|
||||
Py_DECREF(retval);
|
||||
return NULL;
|
||||
}
|
||||
hex_digest = PyUnicode_1BYTE_DATA(retval);
|
||||
|
||||
/* Make hex version of the digest */
|
||||
for(i=j=0; i<SHA1_DIGESTSIZE; i++) {
|
||||
|
|
|
@ -445,7 +445,7 @@ SHA256_hexdigest(SHAobject *self, PyObject *unused)
|
|||
unsigned char digest[SHA_DIGESTSIZE];
|
||||
SHAobject temp;
|
||||
PyObject *retval;
|
||||
Py_UNICODE *hex_digest;
|
||||
Py_UCS1 *hex_digest;
|
||||
int i, j;
|
||||
|
||||
/* Get the raw (binary) digest value */
|
||||
|
@ -453,14 +453,10 @@ SHA256_hexdigest(SHAobject *self, PyObject *unused)
|
|||
sha_final(digest, &temp);
|
||||
|
||||
/* Create a new string */
|
||||
retval = PyUnicode_FromStringAndSize(NULL, self->digestsize * 2);
|
||||
retval = PyUnicode_New(self->digestsize * 2, 127);
|
||||
if (!retval)
|
||||
return NULL;
|
||||
hex_digest = PyUnicode_AS_UNICODE(retval);
|
||||
if (!hex_digest) {
|
||||
Py_DECREF(retval);
|
||||
return NULL;
|
||||
}
|
||||
hex_digest = PyUnicode_1BYTE_DATA(retval);
|
||||
|
||||
/* Make hex version of the digest */
|
||||
for(i=j=0; i<self->digestsize; i++) {
|
||||
|
|
|
@ -511,7 +511,7 @@ SHA512_hexdigest(SHAobject *self, PyObject *unused)
|
|||
unsigned char digest[SHA_DIGESTSIZE];
|
||||
SHAobject temp;
|
||||
PyObject *retval;
|
||||
Py_UNICODE *hex_digest;
|
||||
Py_UCS1 *hex_digest;
|
||||
int i, j;
|
||||
|
||||
/* Get the raw (binary) digest value */
|
||||
|
@ -519,14 +519,10 @@ SHA512_hexdigest(SHAobject *self, PyObject *unused)
|
|||
sha512_final(digest, &temp);
|
||||
|
||||
/* Create a new string */
|
||||
retval = PyUnicode_FromStringAndSize(NULL, self->digestsize * 2);
|
||||
retval = PyUnicode_New(self->digestsize * 2, 127);
|
||||
if (!retval)
|
||||
return NULL;
|
||||
hex_digest = PyUnicode_AS_UNICODE(retval);
|
||||
if (!hex_digest) {
|
||||
Py_DECREF(retval);
|
||||
return NULL;
|
||||
}
|
||||
hex_digest = PyUnicode_1BYTE_DATA(retval);
|
||||
|
||||
/* Make hex version of the digest */
|
||||
for (i=j=0; i<self->digestsize; i++) {
|
||||
|
|
|
@ -30,7 +30,8 @@ typedef struct {
|
|||
PyObject* pattern; /* pattern source (or None) */
|
||||
int flags; /* flags used when compiling pattern source */
|
||||
PyObject *weakreflist; /* List of weak references */
|
||||
int charsize; /* pattern charsize (or -1) */
|
||||
int logical_charsize; /* pattern charsize (or -1) */
|
||||
int charsize;
|
||||
/* pattern code */
|
||||
Py_ssize_t codesize;
|
||||
SRE_CODE code[1];
|
||||
|
@ -71,6 +72,7 @@ typedef struct {
|
|||
PyObject* string;
|
||||
Py_ssize_t pos, endpos;
|
||||
/* character size */
|
||||
int logical_charsize; /* kind of thing: 1 - bytes, 2/4 - unicode */
|
||||
int charsize;
|
||||
/* registers */
|
||||
Py_ssize_t lastindex;
|
||||
|
|
|
@ -70,7 +70,7 @@ syslog_get_argv(void)
|
|||
|
||||
Py_ssize_t argv_len, scriptlen;
|
||||
PyObject *scriptobj;
|
||||
Py_UNICODE *atslash, *atstart;
|
||||
Py_ssize_t slash;
|
||||
PyObject *argv = PySys_GetObject("argv");
|
||||
|
||||
if (argv == NULL) {
|
||||
|
@ -95,11 +95,13 @@ syslog_get_argv(void)
|
|||
return(NULL);
|
||||
}
|
||||
|
||||
atstart = PyUnicode_AS_UNICODE(scriptobj);
|
||||
atslash = Py_UNICODE_strrchr(atstart, SEP);
|
||||
if (atslash) {
|
||||
return(PyUnicode_FromUnicode(atslash + 1,
|
||||
scriptlen - (atslash - atstart) - 1));
|
||||
slash = PyUnicode_FindChar(scriptobj, SEP,
|
||||
0, PyUnicode_GET_LENGTH(scriptobj), -1);
|
||||
if (slash == -2)
|
||||
return NULL;
|
||||
if (slash != -1) {
|
||||
return PyUnicode_Substring(scriptobj, slash,
|
||||
PyUnicode_GET_LENGTH(scriptobj));
|
||||
} else {
|
||||
Py_INCREF(scriptobj);
|
||||
return(scriptobj);
|
||||
|
|
|
@ -92,16 +92,13 @@ new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4)
|
|||
|
||||
static Py_UCS4 getuchar(PyUnicodeObject *obj)
|
||||
{
|
||||
Py_UNICODE *v = PyUnicode_AS_UNICODE(obj);
|
||||
|
||||
if (PyUnicode_GET_SIZE(obj) == 1)
|
||||
return *v;
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
else if ((PyUnicode_GET_SIZE(obj) == 2) &&
|
||||
(0xD800 <= v[0] && v[0] <= 0xDBFF) &&
|
||||
(0xDC00 <= v[1] && v[1] <= 0xDFFF))
|
||||
return (((v[0] & 0x3FF)<<10) | (v[1] & 0x3FF)) + 0x10000;
|
||||
#endif
|
||||
if (PyUnicode_READY(obj))
|
||||
return (Py_UCS4)-1;
|
||||
if (PyUnicode_GET_LENGTH(obj) == 1) {
|
||||
if (PyUnicode_READY(obj))
|
||||
return (Py_UCS4)-1;
|
||||
return PyUnicode_READ_CHAR(obj, 0);
|
||||
}
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"need a single Unicode character as parameter");
|
||||
return (Py_UCS4)-1;
|
||||
|
@ -1142,7 +1139,6 @@ static PyObject *
|
|||
unicodedata_lookup(PyObject* self, PyObject* args)
|
||||
{
|
||||
Py_UCS4 code;
|
||||
Py_UNICODE str[2];
|
||||
|
||||
char* name;
|
||||
int namelen;
|
||||
|
@ -1155,15 +1151,7 @@ unicodedata_lookup(PyObject* self, PyObject* args)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if (code >= 0x10000) {
|
||||
str[0] = 0xd800 + ((code - 0x10000) >> 10);
|
||||
str[1] = 0xdc00 + ((code - 0x10000) & 0x3ff);
|
||||
return PyUnicode_FromUnicode(str, 2);
|
||||
}
|
||||
#endif
|
||||
str[0] = (Py_UNICODE) code;
|
||||
return PyUnicode_FromUnicode(str, 1);
|
||||
return PyUnicode_FromOrdinal(code);
|
||||
}
|
||||
|
||||
/* XXX Add doc strings. */
|
||||
|
|
|
@ -64,7 +64,7 @@ static int
|
|||
zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
PyObject *pathobj, *files;
|
||||
Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
|
||||
Py_UCS4 *path, *p, *prefix, buf[MAXPATHLEN+2];
|
||||
Py_ssize_t len;
|
||||
|
||||
if (!_PyArg_NoKeywords("zipimporter()", kwds))
|
||||
|
@ -74,8 +74,11 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
|||
PyUnicode_FSDecoder, &pathobj))
|
||||
return -1;
|
||||
|
||||
if (PyUnicode_READY(pathobj) == -1)
|
||||
return -1;
|
||||
|
||||
/* copy path to buf */
|
||||
len = PyUnicode_GET_SIZE(pathobj);
|
||||
len = PyUnicode_GET_LENGTH(pathobj);
|
||||
if (len == 0) {
|
||||
PyErr_SetString(ZipImportError, "archive path is empty");
|
||||
goto error;
|
||||
|
@ -85,7 +88,8 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
|||
"archive path too long");
|
||||
goto error;
|
||||
}
|
||||
Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
|
||||
if (!PyUnicode_AsUCS4(pathobj, buf, PY_ARRAY_LENGTH(buf), 1))
|
||||
goto error;
|
||||
|
||||
#ifdef ALTSEP
|
||||
for (p = buf; *p; p++) {
|
||||
|
@ -101,7 +105,8 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
|||
int rv;
|
||||
|
||||
if (pathobj == NULL) {
|
||||
pathobj = PyUnicode_FromUnicode(buf, len);
|
||||
pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buf, len);
|
||||
if (pathobj == NULL)
|
||||
goto error;
|
||||
}
|
||||
|
@ -116,7 +121,7 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
|||
else if (PyErr_Occurred())
|
||||
goto error;
|
||||
/* back up one path element */
|
||||
p = Py_UNICODE_strrchr(buf, SEP);
|
||||
p = Py_UCS4_strrchr(buf, SEP);
|
||||
if (prefix != NULL)
|
||||
*prefix = SEP;
|
||||
if (p == NULL)
|
||||
|
@ -148,7 +153,7 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
|||
|
||||
if (prefix != NULL) {
|
||||
prefix++;
|
||||
len = Py_UNICODE_strlen(prefix);
|
||||
len = Py_UCS4_strlen(prefix);
|
||||
if (prefix[len-1] != SEP) {
|
||||
/* add trailing SEP */
|
||||
prefix[len] = SEP;
|
||||
|
@ -158,7 +163,8 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
|
|||
}
|
||||
else
|
||||
len = 0;
|
||||
self->prefix = PyUnicode_FromUnicode(prefix, len);
|
||||
self->prefix = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
prefix, len);
|
||||
if (self->prefix == NULL)
|
||||
goto error;
|
||||
|
||||
|
@ -193,7 +199,7 @@ zipimporter_repr(ZipImporter *self)
|
|||
{
|
||||
if (self->archive == NULL)
|
||||
return PyUnicode_FromString("<zipimporter object \"???\">");
|
||||
else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0)
|
||||
else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
|
||||
return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
|
||||
self->archive, SEP, self->prefix);
|
||||
else
|
||||
|
@ -206,16 +212,24 @@ static PyObject *
|
|||
get_subname(PyObject *fullname)
|
||||
{
|
||||
Py_ssize_t len;
|
||||
Py_UNICODE *subname;
|
||||
subname = Py_UNICODE_strrchr(PyUnicode_AS_UNICODE(fullname), '.');
|
||||
Py_UCS4 *subname, *fullname_ucs4;
|
||||
fullname_ucs4 = PyUnicode_AsUCS4Copy(fullname);
|
||||
if (!fullname_ucs4)
|
||||
return NULL;
|
||||
subname = Py_UCS4_strrchr(fullname_ucs4, '.');
|
||||
if (subname == NULL) {
|
||||
PyMem_Free(fullname_ucs4);
|
||||
Py_INCREF(fullname);
|
||||
return fullname;
|
||||
} else {
|
||||
PyObject *result;
|
||||
subname++;
|
||||
len = PyUnicode_GET_SIZE(fullname);
|
||||
len -= subname - PyUnicode_AS_UNICODE(fullname);
|
||||
return PyUnicode_FromUnicode(subname, len);
|
||||
len = PyUnicode_GET_LENGTH(fullname);
|
||||
len -= subname - fullname_ucs4;
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
subname, len);
|
||||
PyMem_Free(fullname_ucs4);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,23 +242,29 @@ static PyObject*
|
|||
make_filename(PyObject *prefix, PyObject *name)
|
||||
{
|
||||
PyObject *pathobj;
|
||||
Py_UNICODE *p;
|
||||
Py_UCS4 *p, *buf;
|
||||
Py_ssize_t len;
|
||||
|
||||
pathobj = PyUnicode_FromUnicode(NULL,
|
||||
PyUnicode_GET_SIZE(prefix)
|
||||
+ PyUnicode_GET_SIZE(name));
|
||||
if (pathobj == NULL)
|
||||
len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
|
||||
p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
|
||||
if (buf == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
p = PyUnicode_AS_UNICODE(pathobj);
|
||||
|
||||
Py_UNICODE_strcpy(p, PyUnicode_AS_UNICODE(prefix));
|
||||
p += PyUnicode_GET_SIZE(prefix);
|
||||
Py_UNICODE_strcpy(p, PyUnicode_AS_UNICODE(name));
|
||||
if (!PyUnicode_AsUCS4(prefix, p, len, 0))
|
||||
return NULL;
|
||||
p += PyUnicode_GET_LENGTH(prefix);
|
||||
len -= PyUnicode_GET_LENGTH(prefix);
|
||||
if (!PyUnicode_AsUCS4(name, p, len, 1))
|
||||
return NULL;
|
||||
for (; *p; p++) {
|
||||
if (*p == '.')
|
||||
*p = SEP;
|
||||
}
|
||||
pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buf, p-buf);
|
||||
PyMem_Free(buf);
|
||||
return pathobj;
|
||||
}
|
||||
|
||||
|
@ -330,6 +350,8 @@ zipimporter_load_module(PyObject *obj, PyObject *args)
|
|||
if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
|
||||
&fullname))
|
||||
return NULL;
|
||||
if (PyUnicode_READY(fullname) == -1)
|
||||
return NULL;
|
||||
|
||||
code = get_module_code(self, fullname, &ispackage, &modpath);
|
||||
if (code == NULL)
|
||||
|
@ -426,46 +448,53 @@ zipimporter_is_package(PyObject *obj, PyObject *args)
|
|||
return PyBool_FromLong(mi == MI_PACKAGE);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
zipimporter_get_data(PyObject *obj, PyObject *args)
|
||||
{
|
||||
ZipImporter *self = (ZipImporter *)obj;
|
||||
PyObject *pathobj, *key;
|
||||
const Py_UNICODE *path;
|
||||
const Py_UCS4 *path;
|
||||
#ifdef ALTSEP
|
||||
Py_UNICODE *p, buf[MAXPATHLEN + 1];
|
||||
Py_UCS4 *p;
|
||||
#endif
|
||||
Py_UNICODE *archive;
|
||||
PyObject *toc_entry;
|
||||
Py_ssize_t path_len, len;
|
||||
Py_UCS4 buf[MAXPATHLEN + 1], archive[MAXPATHLEN + 1];
|
||||
|
||||
if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
|
||||
return NULL;
|
||||
|
||||
path_len = PyUnicode_GET_SIZE(pathobj);
|
||||
path = PyUnicode_AS_UNICODE(pathobj);
|
||||
#ifdef ALTSEP
|
||||
if (PyUnicode_READY(pathobj) == -1)
|
||||
return NULL;
|
||||
|
||||
path_len = PyUnicode_GET_LENGTH(pathobj);
|
||||
if (path_len >= MAXPATHLEN) {
|
||||
PyErr_SetString(ZipImportError, "path too long");
|
||||
return NULL;
|
||||
}
|
||||
Py_UNICODE_strcpy(buf, path);
|
||||
if (!PyUnicode_AsUCS4(pathobj, buf, PY_ARRAY_LENGTH(buf), 1))
|
||||
return NULL;
|
||||
path = buf;
|
||||
#ifdef ALTSEP
|
||||
for (p = buf; *p; p++) {
|
||||
if (*p == ALTSEP)
|
||||
*p = SEP;
|
||||
}
|
||||
path = buf;
|
||||
#endif
|
||||
archive = PyUnicode_AS_UNICODE(self->archive);
|
||||
len = PyUnicode_GET_SIZE(self->archive);
|
||||
if ((size_t)len < Py_UNICODE_strlen(path) &&
|
||||
Py_UNICODE_strncmp(path, archive, len) == 0 &&
|
||||
path[len] == SEP) {
|
||||
path += len + 1;
|
||||
path_len -= len + 1;
|
||||
len = PyUnicode_GET_LENGTH(self->archive);
|
||||
if ((size_t)len < Py_UCS4_strlen(path)) {
|
||||
if (!PyUnicode_AsUCS4(self->archive, archive, PY_ARRAY_LENGTH(archive), 1))
|
||||
return NULL;
|
||||
if (Py_UCS4_strncmp(path, archive, len) == 0 &&
|
||||
path[len] == SEP) {
|
||||
path += len + 1;
|
||||
path_len -= len + 1;
|
||||
}
|
||||
}
|
||||
|
||||
key = PyUnicode_FromUnicode(path, path_len);
|
||||
key = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
path, path_len);
|
||||
if (key == NULL)
|
||||
return NULL;
|
||||
toc_entry = PyDict_GetItem(self->files, key);
|
||||
|
@ -725,9 +754,10 @@ read_directory(PyObject *archive)
|
|||
unsigned short flags;
|
||||
long compress, crc, data_size, file_size, file_offset, date, time;
|
||||
long header_offset, name_size, header_size, header_position;
|
||||
long i, l, count;
|
||||
long l, count;
|
||||
Py_ssize_t i;
|
||||
size_t length;
|
||||
Py_UNICODE path[MAXPATHLEN + 5];
|
||||
Py_UCS4 path[MAXPATHLEN + 5];
|
||||
char name[MAXPATHLEN + 5];
|
||||
PyObject *nameobj = NULL;
|
||||
char *p, endof_central_dir[22];
|
||||
|
@ -736,12 +766,13 @@ read_directory(PyObject *archive)
|
|||
const char *charset;
|
||||
int bootstrap;
|
||||
|
||||
if (PyUnicode_GET_SIZE(archive) > MAXPATHLEN) {
|
||||
if (PyUnicode_GET_LENGTH(archive) > MAXPATHLEN) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"Zip path name is too long");
|
||||
return NULL;
|
||||
}
|
||||
Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive));
|
||||
if (!PyUnicode_AsUCS4(archive, path, PY_ARRAY_LENGTH(path), 1))
|
||||
return NULL;
|
||||
|
||||
fp = _Py_fopen(archive, "rb");
|
||||
if (fp == NULL) {
|
||||
|
@ -771,7 +802,7 @@ read_directory(PyObject *archive)
|
|||
if (files == NULL)
|
||||
goto error;
|
||||
|
||||
length = Py_UNICODE_strlen(path);
|
||||
length = Py_UCS4_strlen(path);
|
||||
path[length] = SEP;
|
||||
|
||||
/* Start of Central Directory */
|
||||
|
@ -802,7 +833,7 @@ read_directory(PyObject *archive)
|
|||
name_size = MAXPATHLEN;
|
||||
|
||||
p = name;
|
||||
for (i = 0; i < name_size; i++) {
|
||||
for (i = 0; i < (Py_ssize_t)name_size; i++) {
|
||||
*p = (char)getc(fp);
|
||||
if (*p == '/')
|
||||
*p = SEP;
|
||||
|
@ -827,6 +858,8 @@ read_directory(PyObject *archive)
|
|||
else
|
||||
charset = "cp437";
|
||||
nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
|
||||
if (PyUnicode_READY(nameobj) == -1)
|
||||
goto error;
|
||||
if (nameobj == NULL) {
|
||||
if (bootstrap)
|
||||
PyErr_Format(PyExc_NotImplementedError,
|
||||
|
@ -835,11 +868,12 @@ read_directory(PyObject *archive)
|
|||
PY_MAJOR_VERSION, PY_MINOR_VERSION);
|
||||
goto error;
|
||||
}
|
||||
Py_UNICODE_strncpy(path + length + 1,
|
||||
PyUnicode_AS_UNICODE(nameobj),
|
||||
MAXPATHLEN - length - 1);
|
||||
|
||||
pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
|
||||
for (i = 0; (i < MAXPATHLEN - length - 1) &&
|
||||
(i < PyUnicode_GET_LENGTH(nameobj)); i++)
|
||||
path[length + 1 + i] = PyUnicode_READ_CHAR(nameobj, i);
|
||||
path[length + 1 + i] = 0;
|
||||
pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
path, Py_UCS4_strlen(path));
|
||||
if (pathobj == NULL)
|
||||
goto error;
|
||||
t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
|
||||
|
@ -1148,8 +1182,11 @@ get_mtime_of_source(ZipImporter *self, PyObject *path)
|
|||
time_t mtime;
|
||||
|
||||
/* strip 'c' or 'o' from *.py[co] */
|
||||
stripped = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(path),
|
||||
PyUnicode_GET_SIZE(path) - 1);
|
||||
if (PyUnicode_READY(path) == -1)
|
||||
return (time_t)-1;
|
||||
stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
|
||||
PyUnicode_DATA(path),
|
||||
PyUnicode_GET_LENGTH(path) - 1);
|
||||
if (stripped == NULL)
|
||||
return (time_t)-1;
|
||||
|
||||
|
|
|
@ -1379,9 +1379,7 @@ PyNumber_Long(PyObject *o)
|
|||
PyBytes_GET_SIZE(o));
|
||||
if (PyUnicode_Check(o))
|
||||
/* The above check is done in PyLong_FromUnicode(). */
|
||||
return PyLong_FromUnicode(PyUnicode_AS_UNICODE(o),
|
||||
PyUnicode_GET_SIZE(o),
|
||||
10);
|
||||
return PyLong_FromUnicodeObject(o, 10);
|
||||
if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
|
||||
return long_from_string(buffer, buffer_len);
|
||||
|
||||
|
|
|
@ -854,83 +854,79 @@ bytearray_repr(PyByteArrayObject *self)
|
|||
const char *quote_prefix = "bytearray(b";
|
||||
const char *quote_postfix = ")";
|
||||
Py_ssize_t length = Py_SIZE(self);
|
||||
/* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
|
||||
/* 15 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */
|
||||
size_t newsize;
|
||||
PyObject *v;
|
||||
if (length > (PY_SSIZE_T_MAX - 14) / 4) {
|
||||
register Py_ssize_t i;
|
||||
register char c;
|
||||
register char *p;
|
||||
int quote;
|
||||
char *test, *start;
|
||||
char *buffer;
|
||||
|
||||
if (length > (PY_SSIZE_T_MAX - 15) / 4) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"bytearray object is too large to make repr");
|
||||
return NULL;
|
||||
}
|
||||
newsize = 14 + 4 * length;
|
||||
v = PyUnicode_FromUnicode(NULL, newsize);
|
||||
if (v == NULL) {
|
||||
|
||||
newsize = 15 + length * 4;
|
||||
buffer = PyMem_Malloc(newsize);
|
||||
if (buffer == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
register Py_ssize_t i;
|
||||
register Py_UNICODE c;
|
||||
register Py_UNICODE *p;
|
||||
int quote;
|
||||
|
||||
/* Figure out which quote to use; single is preferred */
|
||||
quote = '\'';
|
||||
{
|
||||
char *test, *start;
|
||||
start = PyByteArray_AS_STRING(self);
|
||||
for (test = start; test < start+length; ++test) {
|
||||
if (*test == '"') {
|
||||
quote = '\''; /* back to single */
|
||||
goto decided;
|
||||
}
|
||||
else if (*test == '\'')
|
||||
quote = '"';
|
||||
}
|
||||
decided:
|
||||
;
|
||||
/* Figure out which quote to use; single is preferred */
|
||||
quote = '\'';
|
||||
start = PyByteArray_AS_STRING(self);
|
||||
for (test = start; test < start+length; ++test) {
|
||||
if (*test == '"') {
|
||||
quote = '\''; /* back to single */
|
||||
break;
|
||||
}
|
||||
|
||||
p = PyUnicode_AS_UNICODE(v);
|
||||
while (*quote_prefix)
|
||||
*p++ = *quote_prefix++;
|
||||
*p++ = quote;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
/* There's at least enough room for a hex escape
|
||||
and a closing quote. */
|
||||
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
|
||||
c = self->ob_bytes[i];
|
||||
if (c == '\'' || c == '\\')
|
||||
*p++ = '\\', *p++ = c;
|
||||
else if (c == '\t')
|
||||
*p++ = '\\', *p++ = 't';
|
||||
else if (c == '\n')
|
||||
*p++ = '\\', *p++ = 'n';
|
||||
else if (c == '\r')
|
||||
*p++ = '\\', *p++ = 'r';
|
||||
else if (c == 0)
|
||||
*p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
|
||||
else if (c < ' ' || c >= 0x7f) {
|
||||
*p++ = '\\';
|
||||
*p++ = 'x';
|
||||
*p++ = hexdigits[(c & 0xf0) >> 4];
|
||||
*p++ = hexdigits[c & 0xf];
|
||||
}
|
||||
else
|
||||
*p++ = c;
|
||||
}
|
||||
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
|
||||
*p++ = quote;
|
||||
while (*quote_postfix) {
|
||||
*p++ = *quote_postfix++;
|
||||
}
|
||||
*p = '\0';
|
||||
if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
|
||||
Py_DECREF(v);
|
||||
return NULL;
|
||||
}
|
||||
return v;
|
||||
else if (*test == '\'')
|
||||
quote = '"';
|
||||
}
|
||||
|
||||
p = buffer;
|
||||
while (*quote_prefix)
|
||||
*p++ = *quote_prefix++;
|
||||
*p++ = quote;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
/* There's at least enough room for a hex escape
|
||||
and a closing quote. */
|
||||
assert(newsize - (p - buffer) >= 5);
|
||||
c = self->ob_bytes[i];
|
||||
if (c == '\'' || c == '\\')
|
||||
*p++ = '\\', *p++ = c;
|
||||
else if (c == '\t')
|
||||
*p++ = '\\', *p++ = 't';
|
||||
else if (c == '\n')
|
||||
*p++ = '\\', *p++ = 'n';
|
||||
else if (c == '\r')
|
||||
*p++ = '\\', *p++ = 'r';
|
||||
else if (c == 0)
|
||||
*p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
|
||||
else if (c < ' ' || c >= 0x7f) {
|
||||
*p++ = '\\';
|
||||
*p++ = 'x';
|
||||
*p++ = hexdigits[(c & 0xf0) >> 4];
|
||||
*p++ = hexdigits[c & 0xf];
|
||||
}
|
||||
else
|
||||
*p++ = c;
|
||||
}
|
||||
assert(newsize - (p - buffer) >= 1);
|
||||
*p++ = quote;
|
||||
while (*quote_postfix) {
|
||||
*p++ = *quote_postfix++;
|
||||
}
|
||||
|
||||
v = PyUnicode_DecodeASCII(buffer, p - buffer, NULL);
|
||||
PyMem_Free(buffer);
|
||||
return v;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -1034,6 +1030,8 @@ bytearray_dealloc(PyByteArrayObject *self)
|
|||
/* -------------------------------------------------------------------- */
|
||||
/* Methods */
|
||||
|
||||
#define FASTSEARCH fastsearch
|
||||
#define STRINGLIB(F) stringlib_##F
|
||||
#define STRINGLIB_CHAR char
|
||||
#define STRINGLIB_LEN PyByteArray_GET_SIZE
|
||||
#define STRINGLIB_STR PyByteArray_AS_STRING
|
||||
|
@ -2651,15 +2649,20 @@ bytearray_fromhex(PyObject *cls, PyObject *args)
|
|||
{
|
||||
PyObject *newbytes, *hexobj;
|
||||
char *buf;
|
||||
Py_UNICODE *hex;
|
||||
Py_ssize_t hexlen, byteslen, i, j;
|
||||
int top, bot;
|
||||
void *data;
|
||||
unsigned int kind;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
|
||||
return NULL;
|
||||
assert(PyUnicode_Check(hexobj));
|
||||
hexlen = PyUnicode_GET_SIZE(hexobj);
|
||||
hex = PyUnicode_AS_UNICODE(hexobj);
|
||||
if (PyUnicode_READY(hexobj))
|
||||
return NULL;
|
||||
kind = PyUnicode_KIND(hexobj);
|
||||
data = PyUnicode_DATA(hexobj);
|
||||
hexlen = PyUnicode_GET_LENGTH(hexobj);
|
||||
|
||||
byteslen = hexlen/2; /* This overestimates if there are spaces */
|
||||
newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
|
||||
if (!newbytes)
|
||||
|
@ -2667,12 +2670,12 @@ bytearray_fromhex(PyObject *cls, PyObject *args)
|
|||
buf = PyByteArray_AS_STRING(newbytes);
|
||||
for (i = j = 0; i < hexlen; i += 2) {
|
||||
/* skip over spaces in the input */
|
||||
while (hex[i] == ' ')
|
||||
while (PyUnicode_READ(kind, data, i) == ' ')
|
||||
i++;
|
||||
if (i >= hexlen)
|
||||
break;
|
||||
top = hex_digit_to_int(hex[i]);
|
||||
bot = hex_digit_to_int(hex[i+1]);
|
||||
top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
|
||||
bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
|
||||
if (top == -1 || bot == -1) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"non-hexadecimal number found in "
|
||||
|
|
|
@ -566,74 +566,68 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
|
|||
{
|
||||
static const char *hexdigits = "0123456789abcdef";
|
||||
register PyBytesObject* op = (PyBytesObject*) obj;
|
||||
Py_ssize_t length = Py_SIZE(op);
|
||||
size_t newsize;
|
||||
Py_ssize_t i, length = Py_SIZE(op);
|
||||
size_t newsize, squotes, dquotes;
|
||||
PyObject *v;
|
||||
if (length > (PY_SSIZE_T_MAX - 3) / 4) {
|
||||
unsigned char quote, *s, *p;
|
||||
|
||||
/* Compute size of output string */
|
||||
squotes = dquotes = 0;
|
||||
newsize = 3; /* b'' */
|
||||
s = (unsigned char*)op->ob_sval;
|
||||
for (i = 0; i < length; i++) {
|
||||
switch(s[i]) {
|
||||
case '\'': squotes++; newsize++; break;
|
||||
case '"': dquotes++; newsize++; break;
|
||||
case '\\': case '\t': case '\n': case '\r':
|
||||
newsize += 2; break; /* \C */
|
||||
default:
|
||||
if (s[i] < ' ' || s[i] >= 0x7f)
|
||||
newsize += 4; /* \xHH */
|
||||
else
|
||||
newsize++;
|
||||
}
|
||||
}
|
||||
quote = '\'';
|
||||
if (smartquotes && squotes && !dquotes)
|
||||
quote = '"';
|
||||
if (squotes && quote == '\'')
|
||||
newsize += squotes;
|
||||
|
||||
if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"bytes object is too large to make repr");
|
||||
return NULL;
|
||||
}
|
||||
newsize = 3 + 4 * length;
|
||||
v = PyUnicode_FromUnicode(NULL, newsize);
|
||||
|
||||
v = PyUnicode_New(newsize, 127);
|
||||
if (v == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
register Py_ssize_t i;
|
||||
register Py_UNICODE c;
|
||||
register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
|
||||
int quote;
|
||||
p = PyUnicode_1BYTE_DATA(v);
|
||||
|
||||
/* Figure out which quote to use; single is preferred */
|
||||
quote = '\'';
|
||||
if (smartquotes) {
|
||||
char *test, *start;
|
||||
start = PyBytes_AS_STRING(op);
|
||||
for (test = start; test < start+length; ++test) {
|
||||
if (*test == '"') {
|
||||
quote = '\''; /* back to single */
|
||||
goto decided;
|
||||
}
|
||||
else if (*test == '\'')
|
||||
quote = '"';
|
||||
}
|
||||
decided:
|
||||
;
|
||||
*p++ = 'b', *p++ = quote;
|
||||
for (i = 0; i < length; i++) {
|
||||
unsigned char c = op->ob_sval[i];
|
||||
if (c == quote || c == '\\')
|
||||
*p++ = '\\', *p++ = c;
|
||||
else if (c == '\t')
|
||||
*p++ = '\\', *p++ = 't';
|
||||
else if (c == '\n')
|
||||
*p++ = '\\', *p++ = 'n';
|
||||
else if (c == '\r')
|
||||
*p++ = '\\', *p++ = 'r';
|
||||
else if (c < ' ' || c >= 0x7f) {
|
||||
*p++ = '\\';
|
||||
*p++ = 'x';
|
||||
*p++ = hexdigits[(c & 0xf0) >> 4];
|
||||
*p++ = hexdigits[c & 0xf];
|
||||
}
|
||||
|
||||
*p++ = 'b', *p++ = quote;
|
||||
for (i = 0; i < length; i++) {
|
||||
/* There's at least enough room for a hex escape
|
||||
and a closing quote. */
|
||||
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
|
||||
c = op->ob_sval[i];
|
||||
if (c == quote || c == '\\')
|
||||
*p++ = '\\', *p++ = c;
|
||||
else if (c == '\t')
|
||||
*p++ = '\\', *p++ = 't';
|
||||
else if (c == '\n')
|
||||
*p++ = '\\', *p++ = 'n';
|
||||
else if (c == '\r')
|
||||
*p++ = '\\', *p++ = 'r';
|
||||
else if (c < ' ' || c >= 0x7f) {
|
||||
*p++ = '\\';
|
||||
*p++ = 'x';
|
||||
*p++ = hexdigits[(c & 0xf0) >> 4];
|
||||
*p++ = hexdigits[c & 0xf];
|
||||
}
|
||||
else
|
||||
*p++ = c;
|
||||
}
|
||||
assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
|
||||
*p++ = quote;
|
||||
*p = '\0';
|
||||
if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
|
||||
Py_DECREF(v);
|
||||
return NULL;
|
||||
}
|
||||
return v;
|
||||
else
|
||||
*p++ = c;
|
||||
}
|
||||
*p++ = quote;
|
||||
return v;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -2356,15 +2350,20 @@ bytes_fromhex(PyObject *cls, PyObject *args)
|
|||
{
|
||||
PyObject *newstring, *hexobj;
|
||||
char *buf;
|
||||
Py_UNICODE *hex;
|
||||
Py_ssize_t hexlen, byteslen, i, j;
|
||||
int top, bot;
|
||||
void *data;
|
||||
unsigned int kind;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
|
||||
return NULL;
|
||||
assert(PyUnicode_Check(hexobj));
|
||||
hexlen = PyUnicode_GET_SIZE(hexobj);
|
||||
hex = PyUnicode_AS_UNICODE(hexobj);
|
||||
if (PyUnicode_READY(hexobj))
|
||||
return NULL;
|
||||
kind = PyUnicode_KIND(hexobj);
|
||||
data = PyUnicode_DATA(hexobj);
|
||||
hexlen = PyUnicode_GET_LENGTH(hexobj);
|
||||
|
||||
byteslen = hexlen/2; /* This overestimates if there are spaces */
|
||||
newstring = PyBytes_FromStringAndSize(NULL, byteslen);
|
||||
if (!newstring)
|
||||
|
@ -2372,12 +2371,12 @@ bytes_fromhex(PyObject *cls, PyObject *args)
|
|||
buf = PyBytes_AS_STRING(newstring);
|
||||
for (i = j = 0; i < hexlen; i += 2) {
|
||||
/* skip over spaces in the input */
|
||||
while (hex[i] == ' ')
|
||||
while (PyUnicode_READ(kind, data, i) == ' ')
|
||||
i++;
|
||||
if (i >= hexlen)
|
||||
break;
|
||||
top = hex_digit_to_int(hex[i]);
|
||||
bot = hex_digit_to_int(hex[i+1]);
|
||||
top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
|
||||
bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
|
||||
if (top == -1 || bot == -1) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"non-hexadecimal number found in "
|
||||
|
|
|
@ -8,19 +8,24 @@
|
|||
/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
|
||||
|
||||
static int
|
||||
all_name_chars(Py_UNICODE *s)
|
||||
all_name_chars(PyObject *o)
|
||||
{
|
||||
static char ok_name_char[256];
|
||||
static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
|
||||
PyUnicodeObject *u = (PyUnicodeObject *)o;
|
||||
const unsigned char *s;
|
||||
|
||||
if (!PyUnicode_Check(o) || PyUnicode_READY(u) == -1 ||
|
||||
PyUnicode_MAX_CHAR_VALUE(u) >= 128)
|
||||
return 0;
|
||||
|
||||
if (ok_name_char[*name_chars] == 0) {
|
||||
unsigned char *p;
|
||||
for (p = name_chars; *p; p++)
|
||||
ok_name_char[*p] = 1;
|
||||
}
|
||||
s = PyUnicode_1BYTE_DATA(u);
|
||||
while (*s) {
|
||||
if (*s >= 128)
|
||||
return 0;
|
||||
if (ok_name_char[*s++] == 0)
|
||||
return 0;
|
||||
}
|
||||
|
@ -77,9 +82,7 @@ PyCode_New(int argcount, int kwonlyargcount,
|
|||
/* Intern selected string constants */
|
||||
for (i = PyTuple_GET_SIZE(consts); --i >= 0; ) {
|
||||
PyObject *v = PyTuple_GetItem(consts, i);
|
||||
if (!PyUnicode_Check(v))
|
||||
continue;
|
||||
if (!all_name_chars(PyUnicode_AS_UNICODE(v)))
|
||||
if (!all_name_chars(v))
|
||||
continue;
|
||||
PyUnicode_InternInPlace(&PyTuple_GET_ITEM(consts, i));
|
||||
}
|
||||
|
|
|
@ -702,9 +702,8 @@ complex__format__(PyObject* self, PyObject* args)
|
|||
|
||||
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
|
||||
return NULL;
|
||||
return _PyComplex_FormatAdvanced(self,
|
||||
PyUnicode_AS_UNICODE(format_spec),
|
||||
PyUnicode_GET_SIZE(format_spec));
|
||||
return _PyComplex_FormatAdvanced(self, format_spec, 0,
|
||||
PyUnicode_GET_LENGTH(format_spec));
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
@ -755,20 +754,10 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
|||
Py_ssize_t len;
|
||||
|
||||
if (PyUnicode_Check(v)) {
|
||||
Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
|
||||
Py_UNICODE *bufptr;
|
||||
s_buffer = PyUnicode_TransformDecimalToASCII(
|
||||
PyUnicode_AS_UNICODE(v), buflen);
|
||||
s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
|
||||
if (s_buffer == NULL)
|
||||
return NULL;
|
||||
/* Replace non-ASCII whitespace with ' ' */
|
||||
bufptr = PyUnicode_AS_UNICODE(s_buffer);
|
||||
for (i = 0; i < buflen; i++) {
|
||||
Py_UNICODE ch = bufptr[i];
|
||||
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
|
||||
bufptr[i] = ' ';
|
||||
}
|
||||
s = _PyUnicode_AsStringAndSize(s_buffer, &len);
|
||||
s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
|
||||
if (s == NULL)
|
||||
goto error;
|
||||
}
|
||||
|
|
|
@ -710,7 +710,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
|
|||
if (!PyDict_Check(op))
|
||||
return NULL;
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1)
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1)
|
||||
{
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1) {
|
||||
|
@ -762,7 +762,7 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key)
|
|||
return NULL;
|
||||
}
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1)
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1)
|
||||
{
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1) {
|
||||
|
@ -797,7 +797,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
|
|||
assert(value);
|
||||
mp = (PyDictObject *)op;
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1)
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1)
|
||||
{
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
|
@ -842,7 +842,7 @@ PyDict_DelItem(PyObject *op, PyObject *key)
|
|||
}
|
||||
assert(key);
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return -1;
|
||||
|
@ -1122,7 +1122,7 @@ dict_subscript(PyDictObject *mp, register PyObject *key)
|
|||
PyDictEntry *ep;
|
||||
assert(mp->ma_table != NULL);
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return NULL;
|
||||
|
@ -1726,7 +1726,7 @@ dict_contains(register PyDictObject *mp, PyObject *key)
|
|||
PyDictEntry *ep;
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return NULL;
|
||||
|
@ -1750,7 +1750,7 @@ dict_get(register PyDictObject *mp, PyObject *args)
|
|||
return NULL;
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return NULL;
|
||||
|
@ -1779,7 +1779,7 @@ dict_setdefault(register PyDictObject *mp, PyObject *args)
|
|||
return NULL;
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return NULL;
|
||||
|
@ -1824,7 +1824,7 @@ dict_pop(PyDictObject *mp, PyObject *args)
|
|||
return NULL;
|
||||
}
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return NULL;
|
||||
|
@ -2033,7 +2033,7 @@ PyDict_Contains(PyObject *op, PyObject *key)
|
|||
PyDictEntry *ep;
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return -1;
|
||||
|
|
|
@ -962,21 +962,18 @@ SyntaxError_traverse(PySyntaxErrorObject *self, visitproc visit, void *arg)
|
|||
static PyObject*
|
||||
my_basename(PyObject *name)
|
||||
{
|
||||
Py_UNICODE *unicode;
|
||||
Py_ssize_t i, size, offset;
|
||||
|
||||
unicode = PyUnicode_AS_UNICODE(name);
|
||||
size = PyUnicode_GET_SIZE(name);
|
||||
int kind = PyUnicode_KIND(name);
|
||||
void *data = PyUnicode_DATA(name);
|
||||
size = PyUnicode_GET_LENGTH(name);
|
||||
offset = 0;
|
||||
for(i=0; i < size; i++) {
|
||||
if (unicode[i] == SEP)
|
||||
if (PyUnicode_READ(kind, data, i) == SEP)
|
||||
offset = i + 1;
|
||||
}
|
||||
if (offset != 0) {
|
||||
return PyUnicode_FromUnicode(
|
||||
PyUnicode_AS_UNICODE(name) + offset,
|
||||
size - offset);
|
||||
} else {
|
||||
if (offset != 0)
|
||||
return PyUnicode_Substring(name, offset, size);
|
||||
else {
|
||||
Py_INCREF(name);
|
||||
return name;
|
||||
}
|
||||
|
@ -1712,6 +1709,7 @@ static PyTypeObject _PyExc_UnicodeTranslateError = {
|
|||
};
|
||||
PyObject *PyExc_UnicodeTranslateError = (PyObject *)&_PyExc_UnicodeTranslateError;
|
||||
|
||||
/* Deprecated. */
|
||||
PyObject *
|
||||
PyUnicodeTranslateError_Create(
|
||||
const Py_UNICODE *object, Py_ssize_t length,
|
||||
|
@ -1721,6 +1719,14 @@ PyUnicodeTranslateError_Create(
|
|||
object, length, start, end, reason);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyUnicodeTranslateError_Create(
|
||||
PyObject *object,
|
||||
Py_ssize_t start, Py_ssize_t end, const char *reason)
|
||||
{
|
||||
return PyObject_CallFunction(PyExc_UnicodeTranslateError, "Ons",
|
||||
object, start, end, reason);
|
||||
}
|
||||
|
||||
/*
|
||||
* AssertionError extends Exception
|
||||
|
|
|
@ -103,23 +103,18 @@ PyFile_GetLine(PyObject *f, int n)
|
|||
}
|
||||
}
|
||||
if (n < 0 && result != NULL && PyUnicode_Check(result)) {
|
||||
Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
|
||||
Py_ssize_t len = PyUnicode_GET_SIZE(result);
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(result);
|
||||
if (len == 0) {
|
||||
Py_DECREF(result);
|
||||
result = NULL;
|
||||
PyErr_SetString(PyExc_EOFError,
|
||||
"EOF when reading a line");
|
||||
}
|
||||
else if (s[len-1] == '\n') {
|
||||
if (result->ob_refcnt == 1)
|
||||
PyUnicode_Resize(&result, len-1);
|
||||
else {
|
||||
PyObject *v;
|
||||
v = PyUnicode_FromUnicode(s, len-1);
|
||||
Py_DECREF(result);
|
||||
result = v;
|
||||
}
|
||||
else if (PyUnicode_READ_CHAR(result, len-1) == '\n') {
|
||||
PyObject *v;
|
||||
v = PyUnicode_Substring(result, 0, len-1);
|
||||
Py_DECREF(result);
|
||||
result = v;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -174,20 +174,10 @@ PyFloat_FromString(PyObject *v)
|
|||
PyObject *result = NULL;
|
||||
|
||||
if (PyUnicode_Check(v)) {
|
||||
Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
|
||||
Py_UNICODE *bufptr;
|
||||
s_buffer = PyUnicode_TransformDecimalToASCII(
|
||||
PyUnicode_AS_UNICODE(v), buflen);
|
||||
s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
|
||||
if (s_buffer == NULL)
|
||||
return NULL;
|
||||
/* Replace non-ASCII whitespace with ' ' */
|
||||
bufptr = PyUnicode_AS_UNICODE(s_buffer);
|
||||
for (i = 0; i < buflen; i++) {
|
||||
Py_UNICODE ch = bufptr[i];
|
||||
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
|
||||
bufptr[i] = ' ';
|
||||
}
|
||||
s = _PyUnicode_AsStringAndSize(s_buffer, &len);
|
||||
s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
|
||||
if (s == NULL) {
|
||||
Py_DECREF(s_buffer);
|
||||
return NULL;
|
||||
|
@ -1741,9 +1731,8 @@ float__format__(PyObject *self, PyObject *args)
|
|||
|
||||
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
|
||||
return NULL;
|
||||
return _PyFloat_FormatAdvanced(self,
|
||||
PyUnicode_AS_UNICODE(format_spec),
|
||||
PyUnicode_GET_SIZE(format_spec));
|
||||
return _PyFloat_FormatAdvanced(self, format_spec, 0,
|
||||
PyUnicode_GET_LENGTH(format_spec));
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(float__format__doc,
|
||||
|
|
|
@ -1551,7 +1551,7 @@ long_to_decimal_string(PyObject *aa)
|
|||
PyObject *str;
|
||||
Py_ssize_t size, strlen, size_a, i, j;
|
||||
digit *pout, *pin, rem, tenpow;
|
||||
Py_UNICODE *p;
|
||||
unsigned char *p;
|
||||
int negative;
|
||||
|
||||
a = (PyLongObject *)aa;
|
||||
|
@ -1619,14 +1619,15 @@ long_to_decimal_string(PyObject *aa)
|
|||
tenpow *= 10;
|
||||
strlen++;
|
||||
}
|
||||
str = PyUnicode_FromUnicode(NULL, strlen);
|
||||
str = PyUnicode_New(strlen, '9');
|
||||
if (str == NULL) {
|
||||
Py_DECREF(scratch);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* fill the string right-to-left */
|
||||
p = PyUnicode_AS_UNICODE(str) + strlen;
|
||||
assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND);
|
||||
p = PyUnicode_1BYTE_DATA(str) + strlen;
|
||||
*p = '\0';
|
||||
/* pout[0] through pout[size-2] contribute exactly
|
||||
_PyLong_DECIMAL_SHIFT digits each */
|
||||
|
@ -1649,7 +1650,7 @@ long_to_decimal_string(PyObject *aa)
|
|||
*--p = '-';
|
||||
|
||||
/* check we've counted correctly */
|
||||
assert(p == PyUnicode_AS_UNICODE(str));
|
||||
assert(p == PyUnicode_1BYTE_DATA(str));
|
||||
Py_DECREF(scratch);
|
||||
return (PyObject *)str;
|
||||
}
|
||||
|
@ -1662,10 +1663,12 @@ PyObject *
|
|||
_PyLong_Format(PyObject *aa, int base)
|
||||
{
|
||||
register PyLongObject *a = (PyLongObject *)aa;
|
||||
PyObject *str;
|
||||
PyObject *v;
|
||||
Py_ssize_t i, sz;
|
||||
Py_ssize_t size_a;
|
||||
Py_UNICODE *p, sign = '\0';
|
||||
char *p;
|
||||
char sign = '\0';
|
||||
char *buffer;
|
||||
int bits;
|
||||
|
||||
assert(base == 2 || base == 8 || base == 10 || base == 16);
|
||||
|
@ -1695,7 +1698,7 @@ _PyLong_Format(PyObject *aa, int base)
|
|||
}
|
||||
/* compute length of output string: allow 2 characters for prefix and
|
||||
1 for possible '-' sign. */
|
||||
if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
|
||||
if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT / sizeof(Py_UCS4)) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"int is too large to format");
|
||||
return NULL;
|
||||
|
@ -1704,11 +1707,12 @@ _PyLong_Format(PyObject *aa, int base)
|
|||
is safe from overflow */
|
||||
sz = 3 + (size_a * PyLong_SHIFT + (bits - 1)) / bits;
|
||||
assert(sz >= 0);
|
||||
str = PyUnicode_FromUnicode(NULL, sz);
|
||||
if (str == NULL)
|
||||
buffer = PyMem_Malloc(sz);
|
||||
if (buffer == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
p = PyUnicode_AS_UNICODE(str) + sz;
|
||||
*p = '\0';
|
||||
}
|
||||
p = &buffer[sz];
|
||||
if (Py_SIZE(a) < 0)
|
||||
sign = '-';
|
||||
|
||||
|
@ -1724,10 +1728,10 @@ _PyLong_Format(PyObject *aa, int base)
|
|||
accumbits += PyLong_SHIFT;
|
||||
assert(accumbits >= bits);
|
||||
do {
|
||||
Py_UNICODE cdigit;
|
||||
cdigit = (Py_UNICODE)(accum & (base - 1));
|
||||
char cdigit;
|
||||
cdigit = (char)(accum & (base - 1));
|
||||
cdigit += (cdigit < 10) ? '0' : 'a'-10;
|
||||
assert(p > PyUnicode_AS_UNICODE(str));
|
||||
assert(p > buffer);
|
||||
*--p = cdigit;
|
||||
accumbits -= bits;
|
||||
accum >>= bits;
|
||||
|
@ -1744,19 +1748,9 @@ _PyLong_Format(PyObject *aa, int base)
|
|||
*--p = '0';
|
||||
if (sign)
|
||||
*--p = sign;
|
||||
if (p != PyUnicode_AS_UNICODE(str)) {
|
||||
Py_UNICODE *q = PyUnicode_AS_UNICODE(str);
|
||||
assert(p > q);
|
||||
do {
|
||||
} while ((*q++ = *p++) != '\0');
|
||||
q--;
|
||||
if (PyUnicode_Resize(&str,(Py_ssize_t) (q -
|
||||
PyUnicode_AS_UNICODE(str)))) {
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return (PyObject *)str;
|
||||
v = PyUnicode_DecodeASCII(p, &buffer[sz] - p, NULL);
|
||||
PyMem_Free(buffer);
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Table of digit values for 8-bit string -> integer conversion.
|
||||
|
@ -2133,24 +2127,27 @@ digit beyond the first.
|
|||
|
||||
PyObject *
|
||||
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
|
||||
{
|
||||
PyObject *v, *unicode = PyUnicode_FromUnicode(u, length);
|
||||
if (unicode == NULL)
|
||||
return NULL;
|
||||
v = PyLong_FromUnicodeObject(unicode, base);
|
||||
Py_DECREF(unicode);
|
||||
return v;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyLong_FromUnicodeObject(PyObject *u, int base)
|
||||
{
|
||||
PyObject *result;
|
||||
PyObject *asciidig;
|
||||
char *buffer, *end;
|
||||
Py_ssize_t i, buflen;
|
||||
Py_UNICODE *ptr;
|
||||
Py_ssize_t buflen;
|
||||
|
||||
asciidig = PyUnicode_TransformDecimalToASCII(u, length);
|
||||
asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u);
|
||||
if (asciidig == NULL)
|
||||
return NULL;
|
||||
/* Replace non-ASCII whitespace with ' ' */
|
||||
ptr = PyUnicode_AS_UNICODE(asciidig);
|
||||
for (i = 0; i < length; i++) {
|
||||
Py_UNICODE ch = ptr[i];
|
||||
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
|
||||
ptr[i] = ' ';
|
||||
}
|
||||
buffer = _PyUnicode_AsStringAndSize(asciidig, &buflen);
|
||||
buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen);
|
||||
if (buffer == NULL) {
|
||||
Py_DECREF(asciidig);
|
||||
return NULL;
|
||||
|
@ -4144,9 +4141,7 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
}
|
||||
|
||||
if (PyUnicode_Check(x))
|
||||
return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x),
|
||||
PyUnicode_GET_SIZE(x),
|
||||
(int)base);
|
||||
return PyLong_FromUnicodeObject(x, (int)base);
|
||||
else if (PyByteArray_Check(x) || PyBytes_Check(x)) {
|
||||
/* Since PyLong_FromString doesn't have a length parameter,
|
||||
* check here for possible NULs in the string. */
|
||||
|
@ -4228,9 +4223,8 @@ long__format__(PyObject *self, PyObject *args)
|
|||
|
||||
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
|
||||
return NULL;
|
||||
return _PyLong_FormatAdvanced(self,
|
||||
PyUnicode_AS_UNICODE(format_spec),
|
||||
PyUnicode_GET_SIZE(format_spec));
|
||||
return _PyLong_FormatAdvanced(self, format_spec, 0,
|
||||
PyUnicode_GET_LENGTH(format_spec));
|
||||
}
|
||||
|
||||
/* Return a pair (q, r) such that a = b * q + r, and
|
||||
|
|
|
@ -285,8 +285,8 @@ _PyModule_Clear(PyObject *m)
|
|||
pos = 0;
|
||||
while (PyDict_Next(d, &pos, &key, &value)) {
|
||||
if (value != Py_None && PyUnicode_Check(key)) {
|
||||
Py_UNICODE *u = PyUnicode_AS_UNICODE(key);
|
||||
if (u[0] == '_' && u[1] != '_') {
|
||||
if (PyUnicode_READ_CHAR(key, 0) == '_' &&
|
||||
PyUnicode_READ_CHAR(key, 1) != '_') {
|
||||
if (Py_VerboseFlag > 1) {
|
||||
const char *s = _PyUnicode_AsString(key);
|
||||
if (s != NULL)
|
||||
|
@ -303,9 +303,8 @@ _PyModule_Clear(PyObject *m)
|
|||
pos = 0;
|
||||
while (PyDict_Next(d, &pos, &key, &value)) {
|
||||
if (value != Py_None && PyUnicode_Check(key)) {
|
||||
Py_UNICODE *u = PyUnicode_AS_UNICODE(key);
|
||||
if (u[0] != '_'
|
||||
|| PyUnicode_CompareWithASCIIString(key, "__builtins__") != 0)
|
||||
if (PyUnicode_READ_CHAR(key, 0) != '_' ||
|
||||
PyUnicode_CompareWithASCIIString(key, "__builtins__") != 0)
|
||||
{
|
||||
if (Py_VerboseFlag > 1) {
|
||||
const char *s = _PyUnicode_AsString(key);
|
||||
|
|
|
@ -295,9 +295,7 @@ PyObject_Print(PyObject *op, FILE *fp, int flags)
|
|||
}
|
||||
else if (PyUnicode_Check(s)) {
|
||||
PyObject *t;
|
||||
t = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(s),
|
||||
PyUnicode_GET_SIZE(s),
|
||||
"backslashreplace");
|
||||
t = PyUnicode_AsEncodedString(s, "utf-8", "backslashreplace");
|
||||
if (t == NULL)
|
||||
ret = 0;
|
||||
else {
|
||||
|
@ -439,11 +437,7 @@ PyObject_ASCII(PyObject *v)
|
|||
return NULL;
|
||||
|
||||
/* repr is guaranteed to be a PyUnicode object by PyObject_Repr */
|
||||
ascii = PyUnicode_EncodeASCII(
|
||||
PyUnicode_AS_UNICODE(repr),
|
||||
PyUnicode_GET_SIZE(repr),
|
||||
"backslashreplace");
|
||||
|
||||
ascii = _PyUnicode_AsASCIIString(repr, "backslashreplace");
|
||||
Py_DECREF(repr);
|
||||
if (ascii == NULL)
|
||||
return NULL;
|
||||
|
|
|
@ -386,7 +386,7 @@ set_add_key(register PySetObject *so, PyObject *key)
|
|||
register Py_ssize_t n_used;
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return -1;
|
||||
|
@ -434,7 +434,7 @@ set_discard_key(PySetObject *so, PyObject *key)
|
|||
assert (PyAnySet_Check(so));
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return -1;
|
||||
|
@ -579,11 +579,8 @@ set_dealloc(PySetObject *so)
|
|||
static PyObject *
|
||||
set_repr(PySetObject *so)
|
||||
{
|
||||
PyObject *keys, *result=NULL;
|
||||
Py_UNICODE *u;
|
||||
PyObject *result=NULL, *keys, *listrepr, *tmp;
|
||||
int status = Py_ReprEnter((PyObject*)so);
|
||||
PyObject *listrepr;
|
||||
Py_ssize_t newsize;
|
||||
|
||||
if (status != 0) {
|
||||
if (status < 0)
|
||||
|
@ -601,31 +598,24 @@ set_repr(PySetObject *so)
|
|||
if (keys == NULL)
|
||||
goto done;
|
||||
|
||||
/* repr(keys)[1:-1] */
|
||||
listrepr = PyObject_Repr(keys);
|
||||
Py_DECREF(keys);
|
||||
if (listrepr == NULL)
|
||||
goto done;
|
||||
newsize = PyUnicode_GET_SIZE(listrepr);
|
||||
result = PyUnicode_FromUnicode(NULL, newsize);
|
||||
if (result == NULL)
|
||||
goto done;
|
||||
|
||||
u = PyUnicode_AS_UNICODE(result);
|
||||
*u++ = '{';
|
||||
/* Omit the brackets from the listrepr */
|
||||
Py_UNICODE_COPY(u, PyUnicode_AS_UNICODE(listrepr)+1,
|
||||
newsize-2);
|
||||
u += newsize-2;
|
||||
*u++ = '}';
|
||||
tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
|
||||
Py_DECREF(listrepr);
|
||||
if (tmp == NULL)
|
||||
goto done;
|
||||
listrepr = tmp;
|
||||
|
||||
if (Py_TYPE(so) != &PySet_Type) {
|
||||
PyObject *tmp = PyUnicode_FromFormat("%s(%U)",
|
||||
Py_TYPE(so)->tp_name,
|
||||
result);
|
||||
Py_DECREF(result);
|
||||
result = tmp;
|
||||
}
|
||||
if (Py_TYPE(so) != &PySet_Type)
|
||||
result = PyUnicode_FromFormat("%s({%U})",
|
||||
Py_TYPE(so)->tp_name,
|
||||
listrepr);
|
||||
else
|
||||
result = PyUnicode_FromFormat("{%U}", listrepr);
|
||||
Py_DECREF(listrepr);
|
||||
done:
|
||||
Py_ReprLeave((PyObject*)so);
|
||||
return result;
|
||||
|
@ -684,7 +674,7 @@ set_contains_key(PySetObject *so, PyObject *key)
|
|||
setentry *entry;
|
||||
|
||||
if (!PyUnicode_CheckExact(key) ||
|
||||
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
|
||||
hash = PyObject_Hash(key);
|
||||
if (hash == -1)
|
||||
return -1;
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
/* stringlib: count implementation */
|
||||
|
||||
#ifndef STRINGLIB_COUNT_H
|
||||
#define STRINGLIB_COUNT_H
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
|
@ -19,7 +16,7 @@ stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
if (sub_len == 0)
|
||||
return (str_len < maxcount) ? str_len + 1 : maxcount;
|
||||
|
||||
count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
|
||||
count = FASTSEARCH(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
|
||||
|
||||
if (count < 0)
|
||||
return 0; /* no match */
|
||||
|
@ -27,4 +24,4 @@ stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
return count;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -9,13 +9,26 @@ unicode_eq(PyObject *aa, PyObject *bb)
|
|||
register PyUnicodeObject *a = (PyUnicodeObject *)aa;
|
||||
register PyUnicodeObject *b = (PyUnicodeObject *)bb;
|
||||
|
||||
if (a->length != b->length)
|
||||
if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) {
|
||||
assert(0 && "unicode_eq ready fail");
|
||||
return 0;
|
||||
if (a->length == 0)
|
||||
return 1;
|
||||
if (a->str[0] != b->str[0])
|
||||
}
|
||||
|
||||
if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))
|
||||
return 0;
|
||||
if (a->length == 1)
|
||||
if (PyUnicode_GET_LENGTH(a) == 0)
|
||||
return 1;
|
||||
return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
|
||||
if (PyUnicode_KIND(a) != PyUnicode_KIND(b))
|
||||
return 0;
|
||||
/* Just comparing the first byte is enough to see if a and b differ.
|
||||
* If they are 2 byte or 4 byte character most differences will happen in
|
||||
* the lower bytes anyways.
|
||||
*/
|
||||
if (PyUnicode_1BYTE_DATA(a)[0] != PyUnicode_1BYTE_DATA(b)[0])
|
||||
return 0;
|
||||
if (PyUnicode_KIND(a) == PyUnicode_1BYTE_KIND &&
|
||||
PyUnicode_GET_LENGTH(a) == 1)
|
||||
return 1;
|
||||
return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b),
|
||||
PyUnicode_GET_LENGTH(a) * PyUnicode_CHARACTER_SIZE(a)) == 0;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
/* stringlib: fastsearch implementation */
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#define STRINGLIB_FASTSEARCH_H
|
||||
|
||||
/* fast search/count implementation, based on a mix between boyer-
|
||||
|
@ -34,7 +33,7 @@
|
|||
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||
const STRINGLIB_CHAR* p, Py_ssize_t m,
|
||||
Py_ssize_t maxcount, int mode)
|
||||
{
|
||||
|
@ -157,4 +156,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
|||
return count;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
/* stringlib: find/index implementation */
|
||||
|
||||
#ifndef STRINGLIB_FIND_H
|
||||
#define STRINGLIB_FIND_H
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t offset)
|
||||
{
|
||||
|
@ -19,7 +16,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
if (sub_len == 0)
|
||||
return offset;
|
||||
|
||||
pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
|
||||
pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);
|
||||
|
||||
if (pos >= 0)
|
||||
pos += offset;
|
||||
|
@ -28,7 +25,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t offset)
|
||||
{
|
||||
|
@ -39,7 +36,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
if (sub_len == 0)
|
||||
return str_len + offset;
|
||||
|
||||
pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
|
||||
pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
|
||||
|
||||
if (pos >= 0)
|
||||
pos += offset;
|
||||
|
@ -63,29 +60,29 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
ADJUST_INDICES(start, end, str_len);
|
||||
return stringlib_find(str + start, end - start, sub, sub_len, start);
|
||||
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
ADJUST_INDICES(start, end, str_len);
|
||||
return stringlib_rfind(str + start, end - start, sub, sub_len, start);
|
||||
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
|
||||
}
|
||||
|
||||
#ifdef STRINGLIB_WANT_CONTAINS_OBJ
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
stringlib_contains_obj(PyObject* str, PyObject* sub)
|
||||
STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)
|
||||
{
|
||||
return stringlib_find(
|
||||
return STRINGLIB(find)(
|
||||
STRINGLIB_STR(str), STRINGLIB_LEN(str),
|
||||
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
|
||||
) != -1;
|
||||
|
@ -105,7 +102,7 @@ is ok.
|
|||
#define FORMAT_BUFFER_SIZE 50
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
stringlib_parse_args_finds(const char * function_name, PyObject *args,
|
||||
STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
|
||||
PyObject **subobj,
|
||||
Py_ssize_t *start, Py_ssize_t *end)
|
||||
{
|
||||
|
@ -153,13 +150,13 @@ after finishing using the substring, must DECREF it).
|
|||
*/
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args,
|
||||
STRINGLIB(parse_args_finds_unicode)(const char * function_name, PyObject *args,
|
||||
PyUnicodeObject **substring,
|
||||
Py_ssize_t *start, Py_ssize_t *end)
|
||||
{
|
||||
PyObject *tmp_substring;
|
||||
|
||||
if(stringlib_parse_args_finds(function_name, args, &tmp_substring,
|
||||
if(STRINGLIB(parse_args_finds)(function_name, args, &tmp_substring,
|
||||
start, end)) {
|
||||
tmp_substring = PyUnicode_FromObject(tmp_substring);
|
||||
if (!tmp_substring)
|
||||
|
@ -171,5 +168,3 @@ stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args,
|
|||
}
|
||||
|
||||
#endif /* STRINGLIB_IS_UNICODE */
|
||||
|
||||
#endif /* STRINGLIB_FIND_H */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,5 @@
|
|||
/* stringlib: locale related helpers implementation */
|
||||
|
||||
#ifndef STRINGLIB_LOCALEUTIL_H
|
||||
#define STRINGLIB_LOCALEUTIL_H
|
||||
|
||||
#include <locale.h>
|
||||
|
||||
#define MAX(x, y) ((x) < (y) ? (y) : (x))
|
||||
|
@ -12,10 +9,10 @@ typedef struct {
|
|||
const char *grouping;
|
||||
char previous;
|
||||
Py_ssize_t i; /* Where we're currently pointing in grouping. */
|
||||
} GroupGenerator;
|
||||
} STRINGLIB(GroupGenerator);
|
||||
|
||||
static void
|
||||
_GroupGenerator_init(GroupGenerator *self, const char *grouping)
|
||||
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping)
|
||||
{
|
||||
self->grouping = grouping;
|
||||
self->i = 0;
|
||||
|
@ -24,7 +21,7 @@ _GroupGenerator_init(GroupGenerator *self, const char *grouping)
|
|||
|
||||
/* Returns the next grouping, or 0 to signify end. */
|
||||
static Py_ssize_t
|
||||
_GroupGenerator_next(GroupGenerator *self)
|
||||
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
|
||||
{
|
||||
/* Note that we don't really do much error checking here. If a
|
||||
grouping string contains just CHAR_MAX, for example, then just
|
||||
|
@ -48,13 +45,11 @@ _GroupGenerator_next(GroupGenerator *self)
|
|||
/* Fill in some digits, leading zeros, and thousands separator. All
|
||||
are optional, depending on when we're called. */
|
||||
static void
|
||||
fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
|
||||
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
|
||||
Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
|
||||
Py_ssize_t thousands_sep_len)
|
||||
{
|
||||
#if STRINGLIB_IS_UNICODE
|
||||
Py_ssize_t i;
|
||||
#endif
|
||||
|
||||
if (thousands_sep) {
|
||||
*buffer_end -= thousands_sep_len;
|
||||
|
@ -76,7 +71,8 @@ fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
|
|||
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
|
||||
|
||||
*buffer_end -= n_zeros;
|
||||
STRINGLIB_FILL(*buffer_end, '0', n_zeros);
|
||||
for (i = 0; i < n_zeros; i++)
|
||||
(*buffer_end)[i] = '0';
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -133,15 +129,15 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
|
|||
be looked at */
|
||||
/* A generator that returns all of the grouping widths, until it
|
||||
returns 0. */
|
||||
GroupGenerator groupgen;
|
||||
_GroupGenerator_init(&groupgen, grouping);
|
||||
STRINGLIB(GroupGenerator) groupgen;
|
||||
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
|
||||
|
||||
if (buffer) {
|
||||
buffer_end = buffer + n_buffer;
|
||||
digits_end = digits + n_digits;
|
||||
}
|
||||
|
||||
while ((l = _GroupGenerator_next(&groupgen)) > 0) {
|
||||
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
|
||||
l = MIN(l, MAX(MAX(remaining, min_width), 1));
|
||||
n_zeros = MAX(0, l - remaining);
|
||||
n_chars = MAX(0, MIN(remaining, l));
|
||||
|
@ -153,7 +149,7 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
|
|||
|
||||
if (buffer) {
|
||||
/* Copy into the output buffer. */
|
||||
fill(&digits_end, &buffer_end, n_chars, n_zeros,
|
||||
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
|
||||
use_separator ? thousands_sep : NULL, thousands_sep_len);
|
||||
}
|
||||
|
||||
|
@ -180,7 +176,7 @@ _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
|
|||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||
if (buffer) {
|
||||
/* Copy into the output buffer. */
|
||||
fill(&digits_end, &buffer_end, n_chars, n_zeros,
|
||||
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
|
||||
use_separator ? thousands_sep : NULL, thousands_sep_len);
|
||||
}
|
||||
}
|
||||
|
@ -209,4 +205,3 @@ _Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
|
|||
return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
|
||||
min_width, grouping, thousands_sep);
|
||||
}
|
||||
#endif /* STRINGLIB_LOCALEUTIL_H */
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
/* stringlib: partition implementation */
|
||||
|
||||
#ifndef STRINGLIB_PARTITION_H
|
||||
#define STRINGLIB_PARTITION_H
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(PyObject*)
|
||||
stringlib_partition(PyObject* str_obj,
|
||||
STRINGLIB(partition)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
PyObject* sep_obj,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
|
||||
|
@ -25,7 +22,7 @@ stringlib_partition(PyObject* str_obj,
|
|||
if (!out)
|
||||
return NULL;
|
||||
|
||||
pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
|
||||
pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_SEARCH);
|
||||
|
||||
if (pos < 0) {
|
||||
#if STRINGLIB_MUTABLE
|
||||
|
@ -58,7 +55,7 @@ stringlib_partition(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject*)
|
||||
stringlib_rpartition(PyObject* str_obj,
|
||||
STRINGLIB(rpartition)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
PyObject* sep_obj,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
|
||||
|
@ -75,7 +72,7 @@ stringlib_rpartition(PyObject* str_obj,
|
|||
if (!out)
|
||||
return NULL;
|
||||
|
||||
pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
|
||||
pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
|
||||
|
||||
if (pos < 0) {
|
||||
#if STRINGLIB_MUTABLE
|
||||
|
@ -107,4 +104,3 @@ stringlib_rpartition(PyObject* str_obj,
|
|||
return out;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
/* stringlib: split implementation */
|
||||
|
||||
#ifndef STRINGLIB_SPLIT_H
|
||||
#define STRINGLIB_SPLIT_H
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
@ -54,7 +51,7 @@
|
|||
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_split_whitespace(PyObject* str_obj,
|
||||
STRINGLIB(split_whitespace)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
|
@ -102,7 +99,7 @@ stringlib_split_whitespace(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_split_char(PyObject* str_obj,
|
||||
STRINGLIB(split_char)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR ch,
|
||||
Py_ssize_t maxcount)
|
||||
|
@ -145,7 +142,7 @@ stringlib_split_char(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_split(PyObject* str_obj,
|
||||
STRINGLIB(split)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
|
||||
Py_ssize_t maxcount)
|
||||
|
@ -158,7 +155,7 @@ stringlib_split(PyObject* str_obj,
|
|||
return NULL;
|
||||
}
|
||||
else if (sep_len == 1)
|
||||
return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
|
||||
return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount);
|
||||
|
||||
list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
if (list == NULL)
|
||||
|
@ -166,7 +163,7 @@ stringlib_split(PyObject* str_obj,
|
|||
|
||||
i = j = 0;
|
||||
while (maxcount-- > 0) {
|
||||
pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
|
||||
pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
|
||||
if (pos < 0)
|
||||
break;
|
||||
j = i + pos;
|
||||
|
@ -193,7 +190,7 @@ stringlib_split(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_rsplit_whitespace(PyObject* str_obj,
|
||||
STRINGLIB(rsplit_whitespace)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
|
@ -243,7 +240,7 @@ stringlib_rsplit_whitespace(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_rsplit_char(PyObject* str_obj,
|
||||
STRINGLIB(rsplit_char)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR ch,
|
||||
Py_ssize_t maxcount)
|
||||
|
@ -287,7 +284,7 @@ stringlib_rsplit_char(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_rsplit(PyObject* str_obj,
|
||||
STRINGLIB(rsplit)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
|
||||
Py_ssize_t maxcount)
|
||||
|
@ -300,7 +297,7 @@ stringlib_rsplit(PyObject* str_obj,
|
|||
return NULL;
|
||||
}
|
||||
else if (sep_len == 1)
|
||||
return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
|
||||
return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount);
|
||||
|
||||
list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
if (list == NULL)
|
||||
|
@ -308,7 +305,7 @@ stringlib_rsplit(PyObject* str_obj,
|
|||
|
||||
j = str_len;
|
||||
while (maxcount-- > 0) {
|
||||
pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
|
||||
pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH);
|
||||
if (pos < 0)
|
||||
break;
|
||||
SPLIT_ADD(str, pos + sep_len, j);
|
||||
|
@ -336,7 +333,7 @@ stringlib_rsplit(PyObject* str_obj,
|
|||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
stringlib_splitlines(PyObject* str_obj,
|
||||
STRINGLIB(splitlines)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
int keepends)
|
||||
{
|
||||
|
@ -391,4 +388,3 @@ stringlib_splitlines(PyObject* str_obj,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 0
|
||||
|
||||
#define FASTSEARCH fastsearch
|
||||
#define STRINGLIB(F) stringlib_##F
|
||||
#define STRINGLIB_OBJECT PyBytesObject
|
||||
#define STRINGLIB_CHAR char
|
||||
#define STRINGLIB_TYPE_NAME "string"
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH ucs1lib_fastsearch
|
||||
#define STRINGLIB(F) ucs1lib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_CHAR Py_UCS1
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
|
||||
#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER
|
||||
#define STRINGLIB_FILL Py_UNICODE_FILL
|
||||
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW PyUnicode_FromUCS1
|
||||
#define STRINGLIB_RESIZE not_supported
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
|
||||
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ucs1_InsertThousandsGrouping
|
||||
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs1_InsertThousandsGroupingLocale
|
||||
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH ucs2lib_fastsearch
|
||||
#define STRINGLIB(F) ucs2lib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_CHAR Py_UCS2
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
|
||||
#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER
|
||||
#define STRINGLIB_FILL Py_UNICODE_FILL
|
||||
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW PyUnicode_FromUCS2
|
||||
#define STRINGLIB_RESIZE not_supported
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
|
||||
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ucs2_InsertThousandsGrouping
|
||||
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs2_InsertThousandsGroupingLocale
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH ucs4lib_fastsearch
|
||||
#define STRINGLIB(F) ucs4lib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_CHAR Py_UCS4
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
|
||||
#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER
|
||||
#define STRINGLIB_FILL Py_UNICODE_FILL
|
||||
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW PyUnicode_FromUCS4
|
||||
#define STRINGLIB_RESIZE not_supported
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
|
||||
#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ucs4_InsertThousandsGrouping
|
||||
#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs4_InsertThousandsGroupingLocale
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#undef FASTSEARCH
|
||||
#undef STRINGLIB
|
||||
#undef STRINGLIB_CHAR
|
||||
#undef STRINGLIB_STR
|
||||
#undef STRINGLIB_LEN
|
||||
#undef STRINGLIB_NEW
|
||||
#undef STRINGLIB_RESIZE
|
||||
#undef _Py_InsertThousandsGrouping
|
||||
#undef _Py_InsertThousandsGroupingLocale
|
||||
|
|
@ -1,16 +1,8 @@
|
|||
/*
|
||||
string_format.h -- implementation of string.format().
|
||||
|
||||
It uses the Objects/stringlib conventions, so that it can be
|
||||
compiled for both unicode and string objects.
|
||||
unicode_format.h -- implementation of str.format().
|
||||
*/
|
||||
|
||||
|
||||
/* Defines for Python 2.6 compatibility */
|
||||
#if PY_VERSION_HEX < 0x03000000
|
||||
#define PyLong_FromSsize_t _PyLong_FromSsize_t
|
||||
#endif
|
||||
|
||||
/* Defines for more efficiently reallocating the string buffer */
|
||||
#define INITIAL_SIZE_INCREMENT 100
|
||||
#define SIZE_MULTIPLIER 2
|
||||
|
@ -26,8 +18,8 @@
|
|||
unicode pointers.
|
||||
*/
|
||||
typedef struct {
|
||||
STRINGLIB_CHAR *ptr;
|
||||
STRINGLIB_CHAR *end;
|
||||
PyObject *str; /* borrowed reference */
|
||||
Py_ssize_t start, end;
|
||||
} SubString;
|
||||
|
||||
|
||||
|
@ -64,34 +56,32 @@ AutoNumber_Init(AutoNumber *auto_number)
|
|||
|
||||
/* fill in a SubString from a pointer and length */
|
||||
Py_LOCAL_INLINE(void)
|
||||
SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
|
||||
SubString_init(SubString *str, PyObject *s, int start, int end)
|
||||
{
|
||||
str->ptr = p;
|
||||
if (p == NULL)
|
||||
str->end = NULL;
|
||||
else
|
||||
str->end = str->ptr + len;
|
||||
str->str = s;
|
||||
str->start = start;
|
||||
str->end = end;
|
||||
}
|
||||
|
||||
/* return a new string. if str->ptr is NULL, return None */
|
||||
/* return a new string. if str->str is NULL, return None */
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
SubString_new_object(SubString *str)
|
||||
{
|
||||
if (str->ptr == NULL) {
|
||||
if (str->str == NULL) {
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
|
||||
return PyUnicode_Substring(str->str, str->start, str->end);
|
||||
}
|
||||
|
||||
/* return a new string. if str->ptr is NULL, return None */
|
||||
/* return a new string. if str->str is NULL, return None */
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
SubString_new_object_or_empty(SubString *str)
|
||||
{
|
||||
if (str->ptr == NULL) {
|
||||
return STRINGLIB_NEW(NULL, 0);
|
||||
if (str->str == NULL) {
|
||||
return PyUnicode_FromUnicode(NULL, 0);
|
||||
}
|
||||
return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
|
||||
return SubString_new_object(str);
|
||||
}
|
||||
|
||||
/* Return 1 if an error has been detected switching between automatic
|
||||
|
@ -125,9 +115,10 @@ autonumber_state_error(AutoNumberState state, int field_name_is_empty)
|
|||
/************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
STRINGLIB_CHAR *ptr;
|
||||
STRINGLIB_CHAR *end;
|
||||
PyObject *obj;
|
||||
char *data;
|
||||
Py_UCS4 maxchar;
|
||||
unsigned int kind;
|
||||
Py_ssize_t pos, size;
|
||||
Py_ssize_t size_increment;
|
||||
} OutputString;
|
||||
|
||||
|
@ -135,12 +126,16 @@ typedef struct {
|
|||
static int
|
||||
output_initialize(OutputString *output, Py_ssize_t size)
|
||||
{
|
||||
output->obj = STRINGLIB_NEW(NULL, size);
|
||||
if (output->obj == NULL)
|
||||
output->data = PyMem_Malloc(size);
|
||||
if (output->data == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return 0;
|
||||
}
|
||||
|
||||
output->ptr = STRINGLIB_STR(output->obj);
|
||||
output->end = STRINGLIB_LEN(output->obj) + output->ptr;
|
||||
output->maxchar = 127;
|
||||
output->kind = PyUnicode_1BYTE_KIND;
|
||||
output->pos = 0;
|
||||
output->size = size;
|
||||
output->size_increment = INITIAL_SIZE_INCREMENT;
|
||||
|
||||
return 1;
|
||||
|
@ -155,20 +150,51 @@ output_initialize(OutputString *output, Py_ssize_t size)
|
|||
static int
|
||||
output_extend(OutputString *output, Py_ssize_t count)
|
||||
{
|
||||
STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
|
||||
Py_ssize_t curlen = output->ptr - startptr;
|
||||
Py_ssize_t maxlen = curlen + count + output->size_increment;
|
||||
Py_ssize_t maxlen = output->size + count + output->size_increment;
|
||||
|
||||
if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
|
||||
output->data = PyMem_Realloc(output->data, maxlen << (output->kind-1));
|
||||
output->size = maxlen;
|
||||
if (output->data == 0) {
|
||||
PyErr_NoMemory();
|
||||
return 0;
|
||||
startptr = STRINGLIB_STR(output->obj);
|
||||
output->ptr = startptr + curlen;
|
||||
output->end = startptr + maxlen;
|
||||
}
|
||||
if (output->size_increment < MAX_SIZE_INCREMENT)
|
||||
output->size_increment *= SIZE_MULTIPLIER;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
output_widen(OutputString *output, Py_UCS4 maxchar)
|
||||
{
|
||||
int kind;
|
||||
void *data;
|
||||
Py_ssize_t i;
|
||||
if (maxchar <= output->maxchar)
|
||||
return 1;
|
||||
if (maxchar < 256) {
|
||||
output->maxchar = 255;
|
||||
return 1;
|
||||
}
|
||||
if (maxchar < 65536) {
|
||||
output->maxchar = 65535;
|
||||
kind = 2;
|
||||
}
|
||||
else {
|
||||
output->maxchar = 1<<21;
|
||||
kind = 3;
|
||||
}
|
||||
data = PyMem_Malloc(output->size << (kind-1));
|
||||
if (data == 0)
|
||||
return 0;
|
||||
for (i = 0; i < output->size; i++)
|
||||
PyUnicode_WRITE(kind, data, i,
|
||||
PyUnicode_READ(output->kind, output->data, i));
|
||||
PyMem_Free(output->data);
|
||||
output->data = data;
|
||||
output->kind = kind;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
output_data dumps characters into our output string
|
||||
buffer.
|
||||
|
@ -179,12 +205,25 @@ output_extend(OutputString *output, Py_ssize_t count)
|
|||
1 for success.
|
||||
*/
|
||||
static int
|
||||
output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
|
||||
output_data(OutputString *output, PyObject *s, Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
if ((count > output->end - output->ptr) && !output_extend(output, count))
|
||||
Py_ssize_t i;
|
||||
int kind;
|
||||
if ((output->pos + end - start > output->size) &&
|
||||
!output_extend(output, end - start))
|
||||
return 0;
|
||||
memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
|
||||
output->ptr += count;
|
||||
kind = PyUnicode_KIND(s);
|
||||
if (PyUnicode_MAX_CHAR_VALUE(s) > output->maxchar) {
|
||||
Py_UCS4 maxchar = output->maxchar;
|
||||
for (i = start; i < end; i++)
|
||||
if (PyUnicode_READ(kind, PyUnicode_DATA(s), i) > maxchar)
|
||||
maxchar = PyUnicode_READ(kind, PyUnicode_DATA(s), i);
|
||||
if (!output_widen(output, maxchar))
|
||||
return 0;
|
||||
}
|
||||
for (i = start; i < end; i++)
|
||||
PyUnicode_WRITE(output->kind, output->data, output->pos++,
|
||||
PyUnicode_READ(kind, PyUnicode_DATA(s), i));
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -197,15 +236,14 @@ get_integer(const SubString *str)
|
|||
{
|
||||
Py_ssize_t accumulator = 0;
|
||||
Py_ssize_t digitval;
|
||||
Py_ssize_t oldaccumulator;
|
||||
STRINGLIB_CHAR *p;
|
||||
Py_ssize_t i;
|
||||
|
||||
/* empty string is an error */
|
||||
if (str->ptr >= str->end)
|
||||
if (str->start >= str->end)
|
||||
return -1;
|
||||
|
||||
for (p = str->ptr; p < str->end; p++) {
|
||||
digitval = STRINGLIB_TODECIMAL(*p);
|
||||
for (i = str->start; i < str->end; i++) {
|
||||
digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
|
||||
if (digitval < 0)
|
||||
return -1;
|
||||
/*
|
||||
|
@ -280,34 +318,36 @@ typedef struct {
|
|||
lifetime of the iterator. can be empty */
|
||||
SubString str;
|
||||
|
||||
/* pointer to where we are inside field_name */
|
||||
STRINGLIB_CHAR *ptr;
|
||||
/* index to where we are inside field_name */
|
||||
Py_ssize_t index;
|
||||
} FieldNameIterator;
|
||||
|
||||
|
||||
static int
|
||||
FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
|
||||
Py_ssize_t len)
|
||||
FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
SubString_init(&self->str, ptr, len);
|
||||
self->ptr = self->str.ptr;
|
||||
SubString_init(&self->str, s, start, end);
|
||||
self->index = start;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
|
||||
{
|
||||
STRINGLIB_CHAR c;
|
||||
Py_UCS4 c;
|
||||
|
||||
name->ptr = self->ptr;
|
||||
name->str = self->str.str;
|
||||
name->start = self->index;
|
||||
|
||||
/* return everything until '.' or '[' */
|
||||
while (self->ptr < self->str.end) {
|
||||
switch (c = *self->ptr++) {
|
||||
while (self->index < self->str.end) {
|
||||
c = PyUnicode_READ_CHAR(self->str.str, self->index++);
|
||||
switch (c) {
|
||||
case '[':
|
||||
case '.':
|
||||
/* backup so that we this character will be seen next time */
|
||||
self->ptr--;
|
||||
self->index--;
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
|
@ -315,7 +355,7 @@ _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
|
|||
break;
|
||||
}
|
||||
/* end of string is okay */
|
||||
name->end = self->ptr;
|
||||
name->end = self->index;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -323,13 +363,15 @@ static int
|
|||
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
|
||||
{
|
||||
int bracket_seen = 0;
|
||||
STRINGLIB_CHAR c;
|
||||
Py_UCS4 c;
|
||||
|
||||
name->ptr = self->ptr;
|
||||
name->str = self->str.str;
|
||||
name->start = self->index;
|
||||
|
||||
/* return everything until ']' */
|
||||
while (self->ptr < self->str.end) {
|
||||
switch (c = *self->ptr++) {
|
||||
while (self->index < self->str.end) {
|
||||
c = PyUnicode_READ_CHAR(self->str.str, self->index++);
|
||||
switch (c) {
|
||||
case ']':
|
||||
bracket_seen = 1;
|
||||
break;
|
||||
|
@ -346,7 +388,7 @@ _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
|
|||
|
||||
/* end of string is okay */
|
||||
/* don't include the ']' */
|
||||
name->end = self->ptr-1;
|
||||
name->end = self->index-1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -356,10 +398,10 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
|
|||
Py_ssize_t *name_idx, SubString *name)
|
||||
{
|
||||
/* check at end of input */
|
||||
if (self->ptr >= self->str.end)
|
||||
if (self->index >= self->str.end)
|
||||
return 1;
|
||||
|
||||
switch (*self->ptr++) {
|
||||
switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
|
||||
case '.':
|
||||
*is_attribute = 1;
|
||||
if (_FieldNameIterator_attr(self, name) == 0)
|
||||
|
@ -382,7 +424,7 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
|
|||
}
|
||||
|
||||
/* empty string is an error */
|
||||
if (name->ptr == name->end) {
|
||||
if (name->start == name->end) {
|
||||
PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
|
||||
return 0;
|
||||
}
|
||||
|
@ -398,24 +440,23 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
|
|||
'rest' is an iterator to return the rest
|
||||
*/
|
||||
static int
|
||||
field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
|
||||
field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
|
||||
Py_ssize_t *first_idx, FieldNameIterator *rest,
|
||||
AutoNumber *auto_number)
|
||||
{
|
||||
STRINGLIB_CHAR c;
|
||||
STRINGLIB_CHAR *p = ptr;
|
||||
STRINGLIB_CHAR *end = ptr + len;
|
||||
Py_UCS4 c;
|
||||
Py_ssize_t i = start;
|
||||
int field_name_is_empty;
|
||||
int using_numeric_index;
|
||||
|
||||
/* find the part up until the first '.' or '[' */
|
||||
while (p < end) {
|
||||
switch (c = *p++) {
|
||||
while (i < end) {
|
||||
switch (c = PyUnicode_READ_CHAR(str, i++)) {
|
||||
case '[':
|
||||
case '.':
|
||||
/* backup so that we this character is available to the
|
||||
"rest" iterator */
|
||||
p--;
|
||||
i--;
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
|
@ -424,15 +465,15 @@ field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
|
|||
}
|
||||
|
||||
/* set up the return values */
|
||||
SubString_init(first, ptr, p - ptr);
|
||||
FieldNameIterator_init(rest, p, end - p);
|
||||
SubString_init(first, str, start, i);
|
||||
FieldNameIterator_init(rest, str, i, end);
|
||||
|
||||
/* see if "first" is an integer, in which case it's used as an index */
|
||||
*first_idx = get_integer(first);
|
||||
if (*first_idx == -1 && PyErr_Occurred())
|
||||
return 0;
|
||||
|
||||
field_name_is_empty = first->ptr >= first->end;
|
||||
field_name_is_empty = first->start >= first->end;
|
||||
|
||||
/* If the field name is omitted or if we have a numeric index
|
||||
specified, then we're doing numeric indexing into args. */
|
||||
|
@ -487,7 +528,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
|
|||
Py_ssize_t index;
|
||||
FieldNameIterator rest;
|
||||
|
||||
if (!field_name_split(input->ptr, input->end - input->ptr, &first,
|
||||
if (!field_name_split(input->str, input->start, input->end, &first,
|
||||
&index, &rest, auto_number)) {
|
||||
goto error;
|
||||
}
|
||||
|
@ -576,11 +617,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
|
|||
int ok = 0;
|
||||
PyObject *result = NULL;
|
||||
PyObject *format_spec_object = NULL;
|
||||
PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
|
||||
STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
|
||||
format_spec->ptr : NULL;
|
||||
Py_ssize_t format_spec_len = format_spec->ptr ?
|
||||
format_spec->end - format_spec->ptr : 0;
|
||||
PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
|
||||
|
||||
/* If we know the type exactly, skip the lookup of __format__ and just
|
||||
call the formatter directly. */
|
||||
|
@ -597,39 +634,28 @@ render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
|
|||
if (formatter) {
|
||||
/* we know exactly which formatter will be called when __format__ is
|
||||
looked up, so call it directly, instead. */
|
||||
result = formatter(fieldobj, format_spec_start, format_spec_len);
|
||||
result = formatter(fieldobj, format_spec->str,
|
||||
format_spec->start, format_spec->end);
|
||||
}
|
||||
else {
|
||||
/* We need to create an object out of the pointers we have, because
|
||||
__format__ takes a string/unicode object for format_spec. */
|
||||
format_spec_object = STRINGLIB_NEW(format_spec_start,
|
||||
format_spec_len);
|
||||
if (format_spec->str)
|
||||
format_spec_object = PyUnicode_Substring(format_spec->str,
|
||||
format_spec->start,
|
||||
format_spec->end);
|
||||
else
|
||||
format_spec_object = PyUnicode_New(0, 0);
|
||||
if (format_spec_object == NULL)
|
||||
goto done;
|
||||
|
||||
result = PyObject_Format(fieldobj, format_spec_object);
|
||||
}
|
||||
if (result == NULL)
|
||||
if (result == NULL || PyUnicode_READY(result) == -1)
|
||||
goto done;
|
||||
|
||||
#if PY_VERSION_HEX >= 0x03000000
|
||||
assert(PyUnicode_Check(result));
|
||||
#else
|
||||
assert(PyBytes_Check(result) || PyUnicode_Check(result));
|
||||
|
||||
/* Convert result to our type. We could be str, and result could
|
||||
be unicode */
|
||||
{
|
||||
PyObject *tmp = STRINGLIB_TOSTR(result);
|
||||
if (tmp == NULL)
|
||||
goto done;
|
||||
Py_DECREF(result);
|
||||
result = tmp;
|
||||
}
|
||||
#endif
|
||||
|
||||
ok = output_data(output,
|
||||
STRINGLIB_STR(result), STRINGLIB_LEN(result));
|
||||
ok = output_data(output, result, 0, PyUnicode_GET_LENGTH(result));
|
||||
done:
|
||||
Py_XDECREF(format_spec_object);
|
||||
Py_XDECREF(result);
|
||||
|
@ -638,23 +664,24 @@ done:
|
|||
|
||||
static int
|
||||
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
|
||||
STRINGLIB_CHAR *conversion)
|
||||
Py_UCS4 *conversion)
|
||||
{
|
||||
/* Note this function works if the field name is zero length,
|
||||
which is good. Zero length field names are handled later, in
|
||||
field_name_split. */
|
||||
|
||||
STRINGLIB_CHAR c = 0;
|
||||
Py_UCS4 c = 0;
|
||||
|
||||
/* initialize these, as they may be empty */
|
||||
*conversion = '\0';
|
||||
SubString_init(format_spec, NULL, 0);
|
||||
SubString_init(format_spec, NULL, 0, 0);
|
||||
|
||||
/* Search for the field name. it's terminated by the end of
|
||||
the string, or a ':' or '!' */
|
||||
field_name->ptr = str->ptr;
|
||||
while (str->ptr < str->end) {
|
||||
switch (c = *(str->ptr++)) {
|
||||
field_name->str = str->str;
|
||||
field_name->start = str->start;
|
||||
while (str->start < str->end) {
|
||||
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
|
||||
case ':':
|
||||
case '!':
|
||||
break;
|
||||
|
@ -667,26 +694,27 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
|
|||
if (c == '!' || c == ':') {
|
||||
/* we have a format specifier and/or a conversion */
|
||||
/* don't include the last character */
|
||||
field_name->end = str->ptr-1;
|
||||
field_name->end = str->start-1;
|
||||
|
||||
/* the format specifier is the rest of the string */
|
||||
format_spec->ptr = str->ptr;
|
||||
format_spec->str = str->str;
|
||||
format_spec->start = str->start;
|
||||
format_spec->end = str->end;
|
||||
|
||||
/* see if there's a conversion specifier */
|
||||
if (c == '!') {
|
||||
/* there must be another character present */
|
||||
if (format_spec->ptr >= format_spec->end) {
|
||||
if (format_spec->start >= format_spec->end) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"end of format while looking for conversion "
|
||||
"specifier");
|
||||
return 0;
|
||||
}
|
||||
*conversion = *(format_spec->ptr++);
|
||||
*conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
|
||||
|
||||
/* if there is another character, it must be a colon */
|
||||
if (format_spec->ptr < format_spec->end) {
|
||||
c = *(format_spec->ptr++);
|
||||
if (format_spec->start < format_spec->end) {
|
||||
c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
|
||||
if (c != ':') {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"expected ':' after format specifier");
|
||||
|
@ -697,7 +725,7 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
|
|||
}
|
||||
else
|
||||
/* end of string, there's no format_spec or conversion */
|
||||
field_name->end = str->ptr;
|
||||
field_name->end = str->start;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -716,9 +744,10 @@ typedef struct {
|
|||
} MarkupIterator;
|
||||
|
||||
static int
|
||||
MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
|
||||
MarkupIterator_init(MarkupIterator *self, PyObject *str,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
SubString_init(&self->str, ptr, len);
|
||||
SubString_init(&self->str, str, start, end);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -727,30 +756,30 @@ MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
|
|||
static int
|
||||
MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
||||
int *field_present, SubString *field_name,
|
||||
SubString *format_spec, STRINGLIB_CHAR *conversion,
|
||||
SubString *format_spec, Py_UCS4 *conversion,
|
||||
int *format_spec_needs_expanding)
|
||||
{
|
||||
int at_end;
|
||||
STRINGLIB_CHAR c = 0;
|
||||
STRINGLIB_CHAR *start;
|
||||
Py_UCS4 c = 0;
|
||||
Py_ssize_t start;
|
||||
int count;
|
||||
Py_ssize_t len;
|
||||
int markup_follows = 0;
|
||||
|
||||
/* initialize all of the output variables */
|
||||
SubString_init(literal, NULL, 0);
|
||||
SubString_init(field_name, NULL, 0);
|
||||
SubString_init(format_spec, NULL, 0);
|
||||
SubString_init(literal, NULL, 0, 0);
|
||||
SubString_init(field_name, NULL, 0, 0);
|
||||
SubString_init(format_spec, NULL, 0, 0);
|
||||
*conversion = '\0';
|
||||
*format_spec_needs_expanding = 0;
|
||||
*field_present = 0;
|
||||
|
||||
/* No more input, end of iterator. This is the normal exit
|
||||
path. */
|
||||
if (self->str.ptr >= self->str.end)
|
||||
if (self->str.start >= self->str.end)
|
||||
return 1;
|
||||
|
||||
start = self->str.ptr;
|
||||
start = self->str.start;
|
||||
|
||||
/* First read any literal text. Read until the end of string, an
|
||||
escaped '{' or '}', or an unescaped '{'. In order to never
|
||||
|
@ -759,8 +788,8 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
including the brace, but no format object. The next time
|
||||
through, we'll return the rest of the literal, skipping past
|
||||
the second consecutive brace. */
|
||||
while (self->str.ptr < self->str.end) {
|
||||
switch (c = *(self->str.ptr++)) {
|
||||
while (self->str.start < self->str.end) {
|
||||
switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
|
||||
case '{':
|
||||
case '}':
|
||||
markup_follows = 1;
|
||||
|
@ -771,10 +800,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
break;
|
||||
}
|
||||
|
||||
at_end = self->str.ptr >= self->str.end;
|
||||
len = self->str.ptr - start;
|
||||
at_end = self->str.start >= self->str.end;
|
||||
len = self->str.start - start;
|
||||
|
||||
if ((c == '}') && (at_end || (c != *self->str.ptr))) {
|
||||
if ((c == '}') && (at_end ||
|
||||
(c != PyUnicode_READ_CHAR(self->str.str,
|
||||
self->str.start)))) {
|
||||
PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
|
||||
"in format string");
|
||||
return 0;
|
||||
|
@ -785,10 +816,10 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
return 0;
|
||||
}
|
||||
if (!at_end) {
|
||||
if (c == *self->str.ptr) {
|
||||
if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
|
||||
/* escaped } or {, skip it in the input. there is no
|
||||
markup object following us, just this literal text */
|
||||
self->str.ptr++;
|
||||
self->str.start++;
|
||||
markup_follows = 0;
|
||||
}
|
||||
else
|
||||
|
@ -796,7 +827,8 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
}
|
||||
|
||||
/* record the literal text */
|
||||
literal->ptr = start;
|
||||
literal->str = self->str.str;
|
||||
literal->start = start;
|
||||
literal->end = start + len;
|
||||
|
||||
if (!markup_follows)
|
||||
|
@ -808,12 +840,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
*field_present = 1;
|
||||
count = 1;
|
||||
|
||||
start = self->str.ptr;
|
||||
start = self->str.start;
|
||||
|
||||
/* we know we can't have a zero length string, so don't worry
|
||||
about that case */
|
||||
while (self->str.ptr < self->str.end) {
|
||||
switch (c = *(self->str.ptr++)) {
|
||||
while (self->str.start < self->str.end) {
|
||||
switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
|
||||
case '{':
|
||||
/* the format spec needs to be recursively expanded.
|
||||
this is an optimization, and not strictly needed */
|
||||
|
@ -826,7 +858,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
/* we're done. parse and get out */
|
||||
SubString s;
|
||||
|
||||
SubString_init(&s, start, self->str.ptr - 1 - start);
|
||||
SubString_init(&s, self->str.str, start, self->str.start - 1);
|
||||
if (parse_field(&s, field_name, format_spec, conversion) == 0)
|
||||
return 0;
|
||||
|
||||
|
@ -845,7 +877,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
|||
|
||||
/* do the !r or !s conversion on obj */
|
||||
static PyObject *
|
||||
do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
|
||||
do_conversion(PyObject *obj, Py_UCS4 conversion)
|
||||
{
|
||||
/* XXX in pre-3.0, do we need to convert this to unicode, since it
|
||||
might have returned a string? */
|
||||
|
@ -853,11 +885,9 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
|
|||
case 'r':
|
||||
return PyObject_Repr(obj);
|
||||
case 's':
|
||||
return STRINGLIB_TOSTR(obj);
|
||||
#if PY_VERSION_HEX >= 0x03000000
|
||||
return PyObject_Str(obj);
|
||||
case 'a':
|
||||
return STRINGLIB_TOASCII(obj);
|
||||
#endif
|
||||
return PyObject_ASCII(obj);
|
||||
default:
|
||||
if (conversion > 32 && conversion < 127) {
|
||||
/* It's the ASCII subrange; casting to char is safe
|
||||
|
@ -889,7 +919,7 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
|
|||
|
||||
static int
|
||||
output_markup(SubString *field_name, SubString *format_spec,
|
||||
int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
|
||||
int format_spec_needs_expanding, Py_UCS4 conversion,
|
||||
OutputString *output, PyObject *args, PyObject *kwargs,
|
||||
int recursion_depth, AutoNumber *auto_number)
|
||||
{
|
||||
|
@ -906,7 +936,7 @@ output_markup(SubString *field_name, SubString *format_spec,
|
|||
|
||||
if (conversion != '\0') {
|
||||
tmp = do_conversion(fieldobj, conversion);
|
||||
if (tmp == NULL)
|
||||
if (tmp == NULL || PyUnicode_READY(tmp) == -1)
|
||||
goto done;
|
||||
|
||||
/* do the assignment, transferring ownership: fieldobj = tmp */
|
||||
|
@ -919,14 +949,13 @@ output_markup(SubString *field_name, SubString *format_spec,
|
|||
if (format_spec_needs_expanding) {
|
||||
tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
|
||||
auto_number);
|
||||
if (tmp == NULL)
|
||||
if (tmp == NULL || PyUnicode_READY(tmp) == -1)
|
||||
goto done;
|
||||
|
||||
/* note that in the case we're expanding the format string,
|
||||
tmp must be kept around until after the call to
|
||||
render_field. */
|
||||
SubString_init(&expanded_format_spec,
|
||||
STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
|
||||
SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
|
||||
actual_format_spec = &expanded_format_spec;
|
||||
}
|
||||
else
|
||||
|
@ -961,14 +990,14 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
|
|||
SubString literal;
|
||||
SubString field_name;
|
||||
SubString format_spec;
|
||||
STRINGLIB_CHAR conversion;
|
||||
Py_UCS4 conversion;
|
||||
|
||||
MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
|
||||
MarkupIterator_init(&iter, input->str, input->start, input->end);
|
||||
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
|
||||
&field_name, &format_spec,
|
||||
&conversion,
|
||||
&format_spec_needs_expanding)) == 2) {
|
||||
if (!output_data(output, literal.ptr, literal.end - literal.ptr))
|
||||
if (!output_data(output, literal.str, literal.start, literal.end))
|
||||
return 0;
|
||||
if (field_present)
|
||||
if (!output_markup(&field_name, &format_spec,
|
||||
|
@ -990,9 +1019,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
|
|||
{
|
||||
OutputString output;
|
||||
PyObject *result = NULL;
|
||||
Py_ssize_t count;
|
||||
|
||||
output.obj = NULL; /* needed so cleanup code always works */
|
||||
output.data = NULL; /* needed so cleanup code always works */
|
||||
|
||||
/* check the recursion level */
|
||||
if (recursion_depth <= 0) {
|
||||
|
@ -1004,7 +1032,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
|
|||
/* initial size is the length of the format string, plus the size
|
||||
increment. seems like a reasonable default */
|
||||
if (!output_initialize(&output,
|
||||
input->end - input->ptr +
|
||||
input->end - input->start +
|
||||
INITIAL_SIZE_INCREMENT))
|
||||
goto done;
|
||||
|
||||
|
@ -1013,17 +1041,14 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
|
|||
goto done;
|
||||
}
|
||||
|
||||
count = output.ptr - STRINGLIB_STR(output.obj);
|
||||
if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
|
||||
result = PyUnicode_New(output.pos, output.maxchar);
|
||||
if (!result)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* transfer ownership to result */
|
||||
result = output.obj;
|
||||
output.obj = NULL;
|
||||
memcpy(PyUnicode_DATA(result), output.data, output.pos << (output.kind-1));
|
||||
|
||||
done:
|
||||
Py_XDECREF(output.obj);
|
||||
if (output.data)
|
||||
PyMem_Free(output.data);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1045,8 +1070,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
|
|||
|
||||
AutoNumber auto_number;
|
||||
|
||||
if (PyUnicode_READY(self) == -1)
|
||||
return NULL;
|
||||
|
||||
AutoNumber_Init(&auto_number);
|
||||
SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
|
||||
return build_string(&input, args, kwargs, recursion_depth, &auto_number);
|
||||
}
|
||||
|
||||
|
@ -1069,7 +1097,7 @@ do_string_format_map(PyObject *self, PyObject *obj)
|
|||
typedef struct {
|
||||
PyObject_HEAD
|
||||
|
||||
STRINGLIB_OBJECT *str;
|
||||
PyUnicodeObject *str;
|
||||
|
||||
MarkupIterator it_markup;
|
||||
} formatteriterobject;
|
||||
|
@ -1095,7 +1123,7 @@ formatteriter_next(formatteriterobject *it)
|
|||
SubString literal;
|
||||
SubString field_name;
|
||||
SubString format_spec;
|
||||
STRINGLIB_CHAR conversion;
|
||||
Py_UCS4 conversion;
|
||||
int format_spec_needs_expanding;
|
||||
int field_present;
|
||||
int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
|
||||
|
@ -1139,7 +1167,8 @@ formatteriter_next(formatteriterobject *it)
|
|||
Py_INCREF(conversion_str);
|
||||
}
|
||||
else
|
||||
conversion_str = STRINGLIB_NEW(&conversion, 1);
|
||||
conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
&conversion, 1);
|
||||
if (conversion_str == NULL)
|
||||
goto done;
|
||||
|
||||
|
@ -1196,7 +1225,7 @@ static PyTypeObject PyFormatterIter_Type = {
|
|||
describing the parsed elements. It's a wrapper around
|
||||
stringlib/string_format.h's MarkupIterator */
|
||||
static PyObject *
|
||||
formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self)
|
||||
formatter_parser(PyObject *ignored, PyUnicodeObject *self)
|
||||
{
|
||||
formatteriterobject *it;
|
||||
|
||||
|
@ -1205,6 +1234,9 @@ formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(self) == -1)
|
||||
return NULL;
|
||||
|
||||
it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
|
||||
if (it == NULL)
|
||||
return NULL;
|
||||
|
@ -1214,10 +1246,7 @@ formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self)
|
|||
it->str = self;
|
||||
|
||||
/* initialize the contained MarkupIterator */
|
||||
MarkupIterator_init(&it->it_markup,
|
||||
STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
|
||||
MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
|
||||
return (PyObject *)it;
|
||||
}
|
||||
|
||||
|
@ -1234,7 +1263,7 @@ formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self)
|
|||
typedef struct {
|
||||
PyObject_HEAD
|
||||
|
||||
STRINGLIB_OBJECT *str;
|
||||
PyUnicodeObject *str;
|
||||
|
||||
FieldNameIterator it_field;
|
||||
} fieldnameiterobject;
|
||||
|
@ -1336,7 +1365,7 @@ static PyTypeObject PyFieldNameIter_Type = {
|
|||
field_name_split. The iterator it returns is a
|
||||
FieldNameIterator */
|
||||
static PyObject *
|
||||
formatter_field_name_split(PyObject *ignored, STRINGLIB_OBJECT *self)
|
||||
formatter_field_name_split(PyObject *ignored, PyUnicodeObject *self)
|
||||
{
|
||||
SubString first;
|
||||
Py_ssize_t first_idx;
|
||||
|
@ -1350,6 +1379,9 @@ formatter_field_name_split(PyObject *ignored, STRINGLIB_OBJECT *self)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(self) == -1)
|
||||
return NULL;
|
||||
|
||||
it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
|
||||
if (it == NULL)
|
||||
return NULL;
|
||||
|
@ -1361,8 +1393,7 @@ formatter_field_name_split(PyObject *ignored, STRINGLIB_OBJECT *self)
|
|||
|
||||
/* Pass in auto_number = NULL. We'll return an empty string for
|
||||
first_obj in that case. */
|
||||
if (!field_name_split(STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self),
|
||||
if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
|
||||
&first, &first_idx, &it->it_field, NULL))
|
||||
goto done;
|
||||
|
|
@ -6,6 +6,8 @@
|
|||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH fastsearch
|
||||
#define STRINGLIB(F) stringlib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_CHAR Py_UNICODE
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
|
|
|
@ -20,10 +20,11 @@
|
|||
>> (8*sizeof(unsigned int) - MCACHE_SIZE_EXP))
|
||||
#define MCACHE_HASH_METHOD(type, name) \
|
||||
MCACHE_HASH((type)->tp_version_tag, \
|
||||
((PyUnicodeObject *)(name))->hash)
|
||||
((PyASCIIObject *)(name))->hash)
|
||||
#define MCACHE_CACHEABLE_NAME(name) \
|
||||
PyUnicode_CheckExact(name) && \
|
||||
PyUnicode_GET_SIZE(name) <= MCACHE_MAX_ATTR_SIZE
|
||||
PyUnicode_READY(name) != -1 && \
|
||||
PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE
|
||||
|
||||
struct method_cache_entry {
|
||||
unsigned int version;
|
||||
|
@ -3489,7 +3490,7 @@ object_format(PyObject *self, PyObject *args)
|
|||
if (self_as_str != NULL) {
|
||||
/* Issue 7994: If we're converting to a string, we
|
||||
should reject format specifications */
|
||||
if (PyUnicode_GET_SIZE(format_spec) > 0) {
|
||||
if (PyUnicode_GET_LENGTH(format_spec) > 0) {
|
||||
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||
"object.__format__ with a non-empty format "
|
||||
"string is deprecated", 1) < 0) {
|
||||
|
@ -5122,14 +5123,21 @@ slot_tp_str(PyObject *self)
|
|||
return res;
|
||||
}
|
||||
else {
|
||||
PyObject *ress;
|
||||
/* PyObject *ress; */
|
||||
PyErr_Clear();
|
||||
res = slot_tp_repr(self);
|
||||
if (!res)
|
||||
return NULL;
|
||||
/* XXX this is non-sensical. Why should we return
|
||||
a bytes object from __str__. Is this code even
|
||||
used? - mvl */
|
||||
assert(0);
|
||||
return res;
|
||||
/*
|
||||
ress = _PyUnicode_AsDefaultEncodedString(res);
|
||||
Py_DECREF(res);
|
||||
return ress;
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6206,7 +6214,7 @@ super_getattro(PyObject *self, PyObject *name)
|
|||
/* We want __class__ to return the class of the super object
|
||||
(i.e. super, or a subclass), not the class of su->obj. */
|
||||
skip = (PyUnicode_Check(name) &&
|
||||
PyUnicode_GET_SIZE(name) == 9 &&
|
||||
PyUnicode_GET_LENGTH(name) == 9 &&
|
||||
PyUnicode_CompareWithASCIIString(name, "__class__") == 0);
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,91 @@
|
|||
|
||||
size_t
|
||||
UNIOP(strlen)(const UNIOP_t *u)
|
||||
{
|
||||
int res = 0;
|
||||
while(*u++)
|
||||
res++;
|
||||
return res;
|
||||
}
|
||||
|
||||
UNIOP_t*
|
||||
UNIOP(strcpy)(UNIOP_t *s1, const UNIOP_t *s2)
|
||||
{
|
||||
UNIOP_t *u = s1;
|
||||
while ((*u++ = *s2++));
|
||||
return s1;
|
||||
}
|
||||
|
||||
UNIOP_t*
|
||||
UNIOP(strncpy)(UNIOP_t *s1, const UNIOP_t *s2, size_t n)
|
||||
{
|
||||
UNIOP_t *u = s1;
|
||||
while ((*u++ = *s2++))
|
||||
if (n-- == 0)
|
||||
break;
|
||||
return s1;
|
||||
}
|
||||
|
||||
UNIOP_t*
|
||||
UNIOP(strcat)(UNIOP_t *s1, const UNIOP_t *s2)
|
||||
{
|
||||
UNIOP_t *u1 = s1;
|
||||
u1 += UNIOP(strlen(u1));
|
||||
UNIOP(strcpy(u1, s2));
|
||||
return s1;
|
||||
}
|
||||
|
||||
int
|
||||
UNIOP(strcmp)(const UNIOP_t *s1, const UNIOP_t *s2)
|
||||
{
|
||||
while (*s1 && *s2 && *s1 == *s2)
|
||||
s1++, s2++;
|
||||
if (*s1 && *s2)
|
||||
return (*s1 < *s2) ? -1 : +1;
|
||||
if (*s1)
|
||||
return 1;
|
||||
if (*s2)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
UNIOP(strncmp)(const UNIOP_t *s1, const UNIOP_t *s2, size_t n)
|
||||
{
|
||||
register UNIOP_t u1, u2;
|
||||
for (; n != 0; n--) {
|
||||
u1 = *s1;
|
||||
u2 = *s2;
|
||||
if (u1 != u2)
|
||||
return (u1 < u2) ? -1 : +1;
|
||||
if (u1 == '\0')
|
||||
return 0;
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
UNIOP_t*
|
||||
UNIOP(strchr)(const UNIOP_t *s, UNIOP_t c)
|
||||
{
|
||||
const UNIOP_t *p;
|
||||
for (p = s; *p; p++)
|
||||
if (*p == c)
|
||||
return (UNIOP_t*)p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UNIOP_t*
|
||||
UNIOP(strrchr)(const UNIOP_t *s, UNIOP_t c)
|
||||
{
|
||||
const UNIOP_t *p;
|
||||
p = s + UNIOP(strlen)(s);
|
||||
while (p != s) {
|
||||
p--;
|
||||
if (*p == c)
|
||||
return (UNIOP_t*)p;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -329,12 +329,9 @@ gethandle(PyObject* obj, char* name)
|
|||
static PyObject*
|
||||
getenvironment(PyObject* environment)
|
||||
{
|
||||
int i;
|
||||
Py_ssize_t envsize;
|
||||
PyObject* out = NULL;
|
||||
PyObject* keys;
|
||||
PyObject* values;
|
||||
Py_UNICODE* p;
|
||||
Py_ssize_t i, envsize, totalsize;
|
||||
Py_UCS4 *buffer = NULL, *p, *end;
|
||||
PyObject *keys, *values, *res;
|
||||
|
||||
/* convert environment dictionary to windows enviroment string */
|
||||
if (! PyMapping_Check(environment)) {
|
||||
|
@ -350,14 +347,8 @@ getenvironment(PyObject* environment)
|
|||
if (!keys || !values)
|
||||
goto error;
|
||||
|
||||
out = PyUnicode_FromUnicode(NULL, 2048);
|
||||
if (! out)
|
||||
goto error;
|
||||
|
||||
p = PyUnicode_AS_UNICODE(out);
|
||||
|
||||
totalsize = 1; /* trailing null character */
|
||||
for (i = 0; i < envsize; i++) {
|
||||
Py_ssize_t ksize, vsize, totalsize;
|
||||
PyObject* key = PyList_GET_ITEM(keys, i);
|
||||
PyObject* value = PyList_GET_ITEM(values, i);
|
||||
|
||||
|
@ -366,36 +357,42 @@ getenvironment(PyObject* environment)
|
|||
"environment can only contain strings");
|
||||
goto error;
|
||||
}
|
||||
ksize = PyUnicode_GET_SIZE(key);
|
||||
vsize = PyUnicode_GET_SIZE(value);
|
||||
totalsize = (p - PyUnicode_AS_UNICODE(out)) + ksize + 1 +
|
||||
vsize + 1 + 1;
|
||||
if (totalsize > PyUnicode_GET_SIZE(out)) {
|
||||
Py_ssize_t offset = p - PyUnicode_AS_UNICODE(out);
|
||||
PyUnicode_Resize(&out, totalsize + 1024);
|
||||
p = PyUnicode_AS_UNICODE(out) + offset;
|
||||
}
|
||||
Py_UNICODE_COPY(p, PyUnicode_AS_UNICODE(key), ksize);
|
||||
p += ksize;
|
||||
totalsize += PyUnicode_GET_LENGTH(key) + 1; /* +1 for '=' */
|
||||
totalsize += PyUnicode_GET_LENGTH(value) + 1; /* +1 for '\0' */
|
||||
}
|
||||
|
||||
buffer = PyMem_Malloc(totalsize * sizeof(Py_UCS4));
|
||||
if (! buffer)
|
||||
goto error;
|
||||
p = buffer;
|
||||
end = buffer + totalsize;
|
||||
|
||||
for (i = 0; i < envsize; i++) {
|
||||
PyObject* key = PyList_GET_ITEM(keys, i);
|
||||
PyObject* value = PyList_GET_ITEM(values, i);
|
||||
if (!PyUnicode_AsUCS4(key, p, end - p, 0))
|
||||
goto error;
|
||||
p += PyUnicode_GET_LENGTH(key);
|
||||
*p++ = '=';
|
||||
Py_UNICODE_COPY(p, PyUnicode_AS_UNICODE(value), vsize);
|
||||
p += vsize;
|
||||
if (!PyUnicode_AsUCS4(value, p, end - p, 0))
|
||||
goto error;
|
||||
p += PyUnicode_GET_LENGTH(value);
|
||||
*p++ = '\0';
|
||||
}
|
||||
|
||||
/* add trailing null byte */
|
||||
*p++ = '\0';
|
||||
PyUnicode_Resize(&out, p - PyUnicode_AS_UNICODE(out));
|
||||
|
||||
/* PyObject_Print(out, stdout, 0); */
|
||||
assert(p == end);
|
||||
|
||||
Py_XDECREF(keys);
|
||||
Py_XDECREF(values);
|
||||
|
||||
return out;
|
||||
res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, p - buffer);
|
||||
PyMem_Free(buffer);
|
||||
return res;
|
||||
|
||||
error:
|
||||
Py_XDECREF(out);
|
||||
PyMem_Free(buffer);
|
||||
Py_XDECREF(keys);
|
||||
Py_XDECREF(values);
|
||||
return NULL;
|
||||
|
@ -609,7 +606,7 @@ sp_GetModuleFileName(PyObject* self, PyObject* args)
|
|||
if (! result)
|
||||
return PyErr_SetFromWindowsErr(GetLastError());
|
||||
|
||||
return PyUnicode_FromUnicode(filename, Py_UNICODE_strlen(filename));
|
||||
return PyUnicode_FromWideChar(filename, wcslen(filename));
|
||||
}
|
||||
|
||||
static PyMethodDef sp_functions[] = {
|
||||
|
|
|
@ -93,7 +93,7 @@ _PyWin_FindRegisteredModule(PyObject *moduleName,
|
|||
}
|
||||
if (fdp->suffix == NULL)
|
||||
return NULL;
|
||||
path = PyUnicode_FromUnicode(pathBuf, wcslen(pathBuf));
|
||||
path = PyUnicode_FromWideChar(pathBuf, wcslen(pathBuf));
|
||||
if (path == NULL)
|
||||
return NULL;
|
||||
fp = _Py_fopen(path, fdp->mode);
|
||||
|
|
|
@ -212,7 +212,6 @@ static PyObject *
|
|||
msvcrt_getwch(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_UNICODE ch;
|
||||
Py_UNICODE u[1];
|
||||
|
||||
if (!PyArg_ParseTuple(args, ":getwch"))
|
||||
return NULL;
|
||||
|
@ -220,8 +219,7 @@ msvcrt_getwch(PyObject *self, PyObject *args)
|
|||
Py_BEGIN_ALLOW_THREADS
|
||||
ch = _getwch();
|
||||
Py_END_ALLOW_THREADS
|
||||
u[0] = ch;
|
||||
return PyUnicode_FromUnicode(u, 1);
|
||||
return PyUnicode_FromOrdinal(ch);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(getwch_doc,
|
||||
|
@ -257,7 +255,6 @@ static PyObject *
|
|||
msvcrt_getwche(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_UNICODE ch;
|
||||
Py_UNICODE s[1];
|
||||
|
||||
if (!PyArg_ParseTuple(args, ":getwche"))
|
||||
return NULL;
|
||||
|
@ -265,8 +262,7 @@ msvcrt_getwche(PyObject *self, PyObject *args)
|
|||
Py_BEGIN_ALLOW_THREADS
|
||||
ch = _getwche();
|
||||
Py_END_ALLOW_THREADS
|
||||
s[0] = ch;
|
||||
return PyUnicode_FromUnicode(s, 1);
|
||||
return PyUnicode_FromOrdinal(ch);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(getwche_doc,
|
||||
|
|
|
@ -550,10 +550,6 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
|
|||
/* Define if you want to use the GNU readline library */
|
||||
/* #define WITH_READLINE 1 */
|
||||
|
||||
/* Define as the size of the unicode type. */
|
||||
/* This is enough for unicodeobject.h to do the "right thing" on Windows. */
|
||||
#define Py_UNICODE_SIZE 2
|
||||
|
||||
/* Use Python's own small-block memory-allocator. */
|
||||
#define WITH_PYMALLOC 1
|
||||
|
||||
|
|
|
@ -882,7 +882,7 @@ Reg2Py(BYTE *retDataBuf, DWORD retDataSize, DWORD typ)
|
|||
retDataSize -= 2;
|
||||
if (retDataSize <= 0)
|
||||
data = L"";
|
||||
obData = PyUnicode_FromUnicode(data, retDataSize/2);
|
||||
obData = PyUnicode_FromWideChar(data, retDataSize/2);
|
||||
break;
|
||||
}
|
||||
case REG_MULTI_SZ:
|
||||
|
@ -913,7 +913,7 @@ Reg2Py(BYTE *retDataBuf, DWORD retDataSize, DWORD typ)
|
|||
}
|
||||
PyList_SetItem(obData,
|
||||
index,
|
||||
PyUnicode_FromUnicode(str[index], len));
|
||||
PyUnicode_FromWideChar(str[index], len));
|
||||
}
|
||||
free(str);
|
||||
|
||||
|
@ -1123,7 +1123,7 @@ PyEnumKey(PyObject *self, PyObject *args)
|
|||
if (rc != ERROR_SUCCESS)
|
||||
return PyErr_SetFromWindowsErrWithFunction(rc, "RegEnumKeyEx");
|
||||
|
||||
retStr = PyUnicode_FromUnicode(tmpbuf, len);
|
||||
retStr = PyUnicode_FromWideChar(tmpbuf, len);
|
||||
return retStr; /* can be NULL */
|
||||
}
|
||||
|
||||
|
@ -1394,7 +1394,7 @@ PyQueryValue(PyObject *self, PyObject *args)
|
|||
"RegQueryValue");
|
||||
}
|
||||
|
||||
retStr = PyUnicode_FromUnicode(retBuf, wcslen(retBuf));
|
||||
retStr = PyUnicode_FromWideChar(retBuf, wcslen(retBuf));
|
||||
PyMem_Free(retBuf);
|
||||
return retStr;
|
||||
}
|
||||
|
|
|
@ -1258,14 +1258,16 @@ indenterror(struct tok_state *tok)
|
|||
#ifdef PGEN
|
||||
#define verify_identifier(tok) 1
|
||||
#else
|
||||
/* Verify that the identifier follows PEP 3131. */
|
||||
/* Verify that the identifier follows PEP 3131.
|
||||
All identifier strings are guaranteed to be "ready" unicode objects.
|
||||
*/
|
||||
static int
|
||||
verify_identifier(struct tok_state *tok)
|
||||
{
|
||||
PyObject *s;
|
||||
int result;
|
||||
s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
|
||||
if (s == NULL) {
|
||||
if (s == NULL || PyUnicode_READY(s) == -1) {
|
||||
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
|
||||
PyErr_Clear();
|
||||
tok->done = E_IDENTIFIER;
|
||||
|
|
|
@ -498,17 +498,19 @@ setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno,
|
|||
*filename = PyDict_GetItemString(globals, "__file__");
|
||||
if (*filename != NULL && PyUnicode_Check(*filename)) {
|
||||
Py_ssize_t len = PyUnicode_GetSize(*filename);
|
||||
Py_UNICODE *unicode = PyUnicode_AS_UNICODE(*filename);
|
||||
int kind = PyUnicode_KIND(*filename);
|
||||
void *data = PyUnicode_DATA(*filename);
|
||||
|
||||
/* if filename.lower().endswith((".pyc", ".pyo")): */
|
||||
if (len >= 4 &&
|
||||
unicode[len-4] == '.' &&
|
||||
Py_UNICODE_TOLOWER(unicode[len-3]) == 'p' &&
|
||||
Py_UNICODE_TOLOWER(unicode[len-2]) == 'y' &&
|
||||
(Py_UNICODE_TOLOWER(unicode[len-1]) == 'c' ||
|
||||
Py_UNICODE_TOLOWER(unicode[len-1]) == 'o'))
|
||||
PyUnicode_READ(kind, data, len-4) == '.' &&
|
||||
Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-3)) == 'p' &&
|
||||
Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-2)) == 'y' &&
|
||||
(Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-1)) == 'c' ||
|
||||
Py_UNICODE_TOLOWER(PyUnicode_READ(kind, data, len-1)) == 'o'))
|
||||
{
|
||||
*filename = PyUnicode_FromUnicode(unicode, len-1);
|
||||
*filename = PyUnicode_Substring(*filename, 0,
|
||||
PyUnicode_GET_LENGTH(*filename)-1);
|
||||
if (*filename == NULL)
|
||||
goto handle_error;
|
||||
}
|
||||
|
|
61
Python/ast.c
61
Python/ast.c
|
@ -528,26 +528,21 @@ static identifier
|
|||
new_identifier(const char* n, PyArena *arena)
|
||||
{
|
||||
PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
|
||||
Py_UNICODE *u;
|
||||
if (!id)
|
||||
if (!id || PyUnicode_READY(id) == -1)
|
||||
return NULL;
|
||||
u = PyUnicode_AS_UNICODE(id);
|
||||
/* Check whether there are non-ASCII characters in the
|
||||
identifier; if so, normalize to NFKC. */
|
||||
for (; *u; u++) {
|
||||
if (*u >= 128) {
|
||||
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
|
||||
PyObject *id2;
|
||||
if (!m)
|
||||
return NULL;
|
||||
id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id);
|
||||
Py_DECREF(m);
|
||||
if (!id2)
|
||||
return NULL;
|
||||
Py_DECREF(id);
|
||||
id = id2;
|
||||
break;
|
||||
}
|
||||
if (PyUnicode_MAX_CHAR_VALUE((PyUnicodeObject *)id) >= 128) {
|
||||
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
|
||||
PyObject *id2;
|
||||
if (!m)
|
||||
return NULL;
|
||||
id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id);
|
||||
Py_DECREF(m);
|
||||
if (!id2)
|
||||
return NULL;
|
||||
Py_DECREF(id);
|
||||
id = id2;
|
||||
}
|
||||
PyUnicode_InternInPlace(&id);
|
||||
PyArena_AddPyObject(arena, id);
|
||||
|
@ -3660,20 +3655,14 @@ parsenumber(struct compiling *c, const char *s)
|
|||
}
|
||||
|
||||
static PyObject *
|
||||
decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
|
||||
decode_utf8(struct compiling *c, const char **sPtr, const char *end)
|
||||
{
|
||||
PyObject *u, *v;
|
||||
char *s, *t;
|
||||
t = s = (char *)*sPtr;
|
||||
/* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
|
||||
while (s < end && (*s & 0x80)) s++;
|
||||
*sPtr = s;
|
||||
u = PyUnicode_DecodeUTF8(t, s - t, NULL);
|
||||
if (u == NULL)
|
||||
return NULL;
|
||||
v = PyUnicode_AsEncodedString(u, encoding, NULL);
|
||||
Py_DECREF(u);
|
||||
return v;
|
||||
return PyUnicode_DecodeUTF8(t, s - t, NULL);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -3707,22 +3696,20 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
|
|||
}
|
||||
if (*s & 0x80) { /* XXX inefficient */
|
||||
PyObject *w;
|
||||
char *r;
|
||||
Py_ssize_t rn, i;
|
||||
w = decode_utf8(c, &s, end, "utf-32-be");
|
||||
int kind;
|
||||
void *data;
|
||||
Py_ssize_t len, i;
|
||||
w = decode_utf8(c, &s, end);
|
||||
if (w == NULL) {
|
||||
Py_DECREF(u);
|
||||
return NULL;
|
||||
}
|
||||
r = PyBytes_AS_STRING(w);
|
||||
rn = Py_SIZE(w);
|
||||
assert(rn % 4 == 0);
|
||||
for (i = 0; i < rn; i += 4) {
|
||||
sprintf(p, "\\U%02x%02x%02x%02x",
|
||||
r[i + 0] & 0xFF,
|
||||
r[i + 1] & 0xFF,
|
||||
r[i + 2] & 0xFF,
|
||||
r[i + 3] & 0xFF);
|
||||
kind = PyUnicode_KIND(w);
|
||||
data = PyUnicode_DATA(w);
|
||||
len = PyUnicode_GET_LENGTH(w);
|
||||
for (i = 0; i < len; i++) {
|
||||
Py_UCS4 chr = PyUnicode_READ(kind, data, i);
|
||||
sprintf(p, "\\U%08x", chr);
|
||||
p += 10;
|
||||
}
|
||||
/* Should be impossible to overflow */
|
||||
|
|
|
@ -508,8 +508,8 @@ source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
|
|||
|
||||
if (PyUnicode_Check(cmd)) {
|
||||
cf->cf_flags |= PyCF_IGNORE_COOKIE;
|
||||
cmd = _PyUnicode_AsDefaultEncodedString(cmd);
|
||||
if (cmd == NULL)
|
||||
str = PyUnicode_AsUTF8AndSize(cmd, &size);
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
}
|
||||
else if (!PyObject_CheckReadBuffer(cmd)) {
|
||||
|
@ -518,9 +518,10 @@ source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
|
|||
funcname, what);
|
||||
return NULL;
|
||||
}
|
||||
if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) {
|
||||
else if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (strlen(str) != size) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"source code string cannot contain null bytes");
|
||||
|
@ -1395,24 +1396,13 @@ builtin_ord(PyObject *self, PyObject* obj)
|
|||
}
|
||||
}
|
||||
else if (PyUnicode_Check(obj)) {
|
||||
size = PyUnicode_GET_SIZE(obj);
|
||||
if (PyUnicode_READY(obj) == -1)
|
||||
return NULL;
|
||||
size = PyUnicode_GET_LENGTH(obj);
|
||||
if (size == 1) {
|
||||
ord = (long)*PyUnicode_AS_UNICODE(obj);
|
||||
ord = (long)PyUnicode_READ_CHAR(obj, 0);
|
||||
return PyLong_FromLong(ord);
|
||||
}
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if (size == 2) {
|
||||
/* Decode a valid surrogate pair */
|
||||
int c0 = PyUnicode_AS_UNICODE(obj)[0];
|
||||
int c1 = PyUnicode_AS_UNICODE(obj)[1];
|
||||
if (0xD800 <= c0 && c0 <= 0xDBFF &&
|
||||
0xDC00 <= c1 && c1 <= 0xDFFF) {
|
||||
ord = ((((c0 & 0x03FF) << 10) | (c1 & 0x03FF)) +
|
||||
0x00010000);
|
||||
return PyLong_FromLong(ord);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (PyByteArray_Check(obj)) {
|
||||
/* XXX Hopefully this is temporary */
|
||||
|
|
|
@ -2054,7 +2054,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
/* Inline the PyDict_GetItem() calls.
|
||||
WARNING: this is an extreme speed hack.
|
||||
Do not try this at home. */
|
||||
Py_hash_t hash = ((PyUnicodeObject *)w)->hash;
|
||||
Py_hash_t hash = ((PyASCIIObject *)w)->hash;
|
||||
if (hash != -1) {
|
||||
PyDictObject *d;
|
||||
PyDictEntry *e;
|
||||
|
@ -4456,7 +4456,8 @@ import_all_from(PyObject *locals, PyObject *v)
|
|||
}
|
||||
if (skip_leading_underscores &&
|
||||
PyUnicode_Check(name) &&
|
||||
PyUnicode_AS_UNICODE(name)[0] == '_')
|
||||
PyUnicode_READY(name) != -1 &&
|
||||
PyUnicode_READ_CHAR(name, 0) == '_')
|
||||
{
|
||||
Py_DECREF(name);
|
||||
continue;
|
||||
|
@ -4520,6 +4521,14 @@ unicode_concatenate(PyObject *v, PyObject *w,
|
|||
{
|
||||
/* This function implements 'variable += expr' when both arguments
|
||||
are (Unicode) strings. */
|
||||
|
||||
w = PyUnicode_Concat(v, w);
|
||||
Py_DECREF(v);
|
||||
return w;
|
||||
|
||||
/* XXX: This optimization is currently disabled as unicode objects in the
|
||||
new flexible representation are not in-place resizable anymore. */
|
||||
#if 0
|
||||
Py_ssize_t v_len = PyUnicode_GET_SIZE(v);
|
||||
Py_ssize_t w_len = PyUnicode_GET_SIZE(w);
|
||||
Py_ssize_t new_len = v_len + w_len;
|
||||
|
@ -4570,7 +4579,8 @@ unicode_concatenate(PyObject *v, PyObject *w,
|
|||
}
|
||||
}
|
||||
|
||||
if (Py_REFCNT(v) == 1 && !PyUnicode_CHECK_INTERNED(v)) {
|
||||
if (Py_REFCNT(v) == 1 && !PyUnicode_CHECK_INTERNED(v) &&
|
||||
!PyUnicode_IS_COMPACT((PyUnicodeObject *)v)) {
|
||||
/* Now we own the last reference to 'v', so we can resize it
|
||||
* in-place.
|
||||
*/
|
||||
|
@ -4594,6 +4604,7 @@ unicode_concatenate(PyObject *v, PyObject *w,
|
|||
Py_DECREF(v);
|
||||
return w;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_EXECUTION_PROFILE
|
||||
|
|
|
@ -513,27 +513,25 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
|
|||
|
||||
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
||||
{
|
||||
PyObject *restuple;
|
||||
Py_ssize_t start;
|
||||
Py_ssize_t end;
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t start, end, i, len;
|
||||
|
||||
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
|
||||
PyObject *res;
|
||||
Py_UNICODE *p;
|
||||
int kind;
|
||||
void *data;
|
||||
if (PyUnicodeEncodeError_GetStart(exc, &start))
|
||||
return NULL;
|
||||
if (PyUnicodeEncodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
res = PyUnicode_FromUnicode(NULL, end-start);
|
||||
len = end - start;
|
||||
res = PyUnicode_New(len, '?');
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
for (p = PyUnicode_AS_UNICODE(res), i = start;
|
||||
i<end; ++p, ++i)
|
||||
*p = '?';
|
||||
restuple = Py_BuildValue("(On)", res, end);
|
||||
Py_DECREF(res);
|
||||
return restuple;
|
||||
kind = PyUnicode_KIND(res);
|
||||
data = PyUnicode_DATA(res);
|
||||
for (i = 0; i < len; ++i)
|
||||
PyUnicode_WRITE(kind, data, i, '?');
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
}
|
||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
|
||||
Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
|
@ -543,20 +541,21 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
|||
}
|
||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
|
||||
PyObject *res;
|
||||
Py_UNICODE *p;
|
||||
int kind;
|
||||
void *data;
|
||||
if (PyUnicodeTranslateError_GetStart(exc, &start))
|
||||
return NULL;
|
||||
if (PyUnicodeTranslateError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
res = PyUnicode_FromUnicode(NULL, end-start);
|
||||
len = end - start;
|
||||
res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
for (p = PyUnicode_AS_UNICODE(res), i = start;
|
||||
i<end; ++p, ++i)
|
||||
*p = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
restuple = Py_BuildValue("(On)", res, end);
|
||||
Py_DECREF(res);
|
||||
return restuple;
|
||||
kind = PyUnicode_KIND(res);
|
||||
data = PyUnicode_DATA(res);
|
||||
for (i=0; i < len; i++)
|
||||
PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
|
||||
return Py_BuildValue("(Nn)", res, end);
|
||||
}
|
||||
else {
|
||||
wrong_exception_type(exc);
|
||||
|
@ -671,10 +670,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
|
|||
}
|
||||
}
|
||||
|
||||
static Py_UNICODE hexdigits[] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
|
||||
};
|
||||
static const char *hexdigits = "0123456789abcdef";
|
||||
|
||||
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
||||
{
|
||||
|
|
|
@ -197,16 +197,17 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
|
|||
{
|
||||
/* Name mangling: __private becomes _classname__private.
|
||||
This is independent from how the name is used. */
|
||||
const Py_UNICODE *p, *name = PyUnicode_AS_UNICODE(ident);
|
||||
Py_UNICODE *buffer;
|
||||
size_t nlen, plen;
|
||||
PyObject *result;
|
||||
size_t nlen, plen, ipriv;
|
||||
Py_UCS4 maxchar;
|
||||
if (privateobj == NULL || !PyUnicode_Check(privateobj) ||
|
||||
name == NULL || name[0] != '_' || name[1] != '_') {
|
||||
PyUnicode_READ_CHAR(ident, 0) != '_' ||
|
||||
PyUnicode_READ_CHAR(ident, 1) != '_') {
|
||||
Py_INCREF(ident);
|
||||
return ident;
|
||||
}
|
||||
p = PyUnicode_AS_UNICODE(privateobj);
|
||||
nlen = Py_UNICODE_strlen(name);
|
||||
nlen = PyUnicode_GET_LENGTH(ident);
|
||||
plen = PyUnicode_GET_LENGTH(privateobj);
|
||||
/* Don't mangle __id__ or names with dots.
|
||||
|
||||
The only time a name with a dot can occur is when
|
||||
|
@ -216,32 +217,37 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
|
|||
TODO(jhylton): Decide whether we want to support
|
||||
mangling of the module name, e.g. __M.X.
|
||||
*/
|
||||
if ((name[nlen-1] == '_' && name[nlen-2] == '_')
|
||||
|| Py_UNICODE_strchr(name, '.')) {
|
||||
if ((PyUnicode_READ_CHAR(ident, nlen-1) == '_' &&
|
||||
PyUnicode_READ_CHAR(ident, nlen-2) == '_') ||
|
||||
PyUnicode_FindChar(ident, '.', 0, nlen, 1) != -1) {
|
||||
Py_INCREF(ident);
|
||||
return ident; /* Don't mangle __whatever__ */
|
||||
}
|
||||
/* Strip leading underscores from class name */
|
||||
while (*p == '_')
|
||||
p++;
|
||||
if (*p == 0) {
|
||||
ipriv = 0;
|
||||
while (PyUnicode_READ_CHAR(privateobj, ipriv) == '_')
|
||||
ipriv++;
|
||||
if (ipriv == plen) {
|
||||
Py_INCREF(ident);
|
||||
return ident; /* Don't mangle if class is just underscores */
|
||||
}
|
||||
plen = Py_UNICODE_strlen(p);
|
||||
plen -= ipriv;
|
||||
|
||||
assert(1 <= PY_SSIZE_T_MAX - nlen);
|
||||
assert(1 + nlen <= PY_SSIZE_T_MAX - plen);
|
||||
|
||||
ident = PyUnicode_FromStringAndSize(NULL, 1 + nlen + plen);
|
||||
if (!ident)
|
||||
maxchar = PyUnicode_MAX_CHAR_VALUE(ident);
|
||||
if (PyUnicode_MAX_CHAR_VALUE(privateobj) > maxchar)
|
||||
maxchar = PyUnicode_MAX_CHAR_VALUE(privateobj);
|
||||
|
||||
result = PyUnicode_New(1 + nlen + plen, maxchar);
|
||||
if (!result)
|
||||
return 0;
|
||||
/* ident = "_" + p[:plen] + name # i.e. 1+plen+nlen bytes */
|
||||
buffer = PyUnicode_AS_UNICODE(ident);
|
||||
buffer[0] = '_';
|
||||
Py_UNICODE_strncpy(buffer+1, p, plen);
|
||||
Py_UNICODE_strcpy(buffer+1+plen, name);
|
||||
return ident;
|
||||
/* ident = "_" + priv[ipriv:] + ident # i.e. 1+plen+nlen bytes */
|
||||
PyUnicode_WRITE(PyUnicode_KIND(result), PyUnicode_DATA(result), 0, '_');
|
||||
PyUnicode_CopyCharacters(result, 1, privateobj, ipriv, plen);
|
||||
PyUnicode_CopyCharacters(result, plen+1, ident, 0, nlen);
|
||||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -2085,22 +2091,27 @@ compiler_import_as(struct compiler *c, identifier name, identifier asname)
|
|||
If there is a dot in name, we need to split it and emit a
|
||||
LOAD_ATTR for each name.
|
||||
*/
|
||||
const Py_UNICODE *src = PyUnicode_AS_UNICODE(name);
|
||||
const Py_UNICODE *dot = Py_UNICODE_strchr(src, '.');
|
||||
if (dot) {
|
||||
Py_ssize_t dot = PyUnicode_FindChar(name, '.', 0,
|
||||
PyUnicode_GET_LENGTH(name), 1);
|
||||
if (dot == -2)
|
||||
return -1;
|
||||
if (dot != -1) {
|
||||
/* Consume the base module name to get the first attribute */
|
||||
src = dot + 1;
|
||||
while (dot) {
|
||||
/* NB src is only defined when dot != NULL */
|
||||
Py_ssize_t pos = dot + 1;
|
||||
while (dot != -1) {
|
||||
PyObject *attr;
|
||||
dot = Py_UNICODE_strchr(src, '.');
|
||||
attr = PyUnicode_FromUnicode(src,
|
||||
dot ? dot - src : Py_UNICODE_strlen(src));
|
||||
dot = PyUnicode_FindChar(name, '.', pos,
|
||||
PyUnicode_GET_LENGTH(name), 1);
|
||||
if (dot == -2)
|
||||
return -1;
|
||||
attr = PyUnicode_Substring(name, pos,
|
||||
(dot != -1) ? dot :
|
||||
PyUnicode_GET_LENGTH(name));
|
||||
if (!attr)
|
||||
return -1;
|
||||
ADDOP_O(c, LOAD_ATTR, attr, names);
|
||||
Py_DECREF(attr);
|
||||
src = dot + 1;
|
||||
pos = dot + 1;
|
||||
}
|
||||
}
|
||||
return compiler_nameop(c, asname, Store);
|
||||
|
@ -2139,13 +2150,12 @@ compiler_import(struct compiler *c, stmt_ty s)
|
|||
}
|
||||
else {
|
||||
identifier tmp = alias->name;
|
||||
const Py_UNICODE *base = PyUnicode_AS_UNICODE(alias->name);
|
||||
Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
|
||||
if (dot)
|
||||
tmp = PyUnicode_FromUnicode(base,
|
||||
dot - base);
|
||||
Py_ssize_t dot = PyUnicode_FindChar(
|
||||
alias->name, '.', 0, PyUnicode_GET_LENGTH(alias->name), 1);
|
||||
if (dot != -1)
|
||||
tmp = PyUnicode_Substring(alias->name, 0, dot);
|
||||
r = compiler_nameop(c, tmp, Store);
|
||||
if (dot) {
|
||||
if (dot != -1) {
|
||||
Py_DECREF(tmp);
|
||||
}
|
||||
if (!r)
|
||||
|
@ -2208,7 +2218,7 @@ compiler_from_import(struct compiler *c, stmt_ty s)
|
|||
alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
|
||||
identifier store_name;
|
||||
|
||||
if (i == 0 && *PyUnicode_AS_UNICODE(alias->name) == '*') {
|
||||
if (i == 0 && PyUnicode_READ_CHAR(alias->name, 0) == '*') {
|
||||
assert(n == 1);
|
||||
ADDOP(c, IMPORT_STAR);
|
||||
return 1;
|
||||
|
@ -2522,7 +2532,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx)
|
|||
}
|
||||
|
||||
/* XXX Leave assert here, but handle __doc__ and the like better */
|
||||
assert(scope || PyUnicode_AS_UNICODE(name)[0] == '_');
|
||||
assert(scope || PyUnicode_READ_CHAR(name, 0) == '_');
|
||||
|
||||
switch (optype) {
|
||||
case OP_DEREF:
|
||||
|
@ -3045,8 +3055,7 @@ expr_constant(struct compiler *c, expr_ty e)
|
|||
return PyObject_IsTrue(e->v.Str.s);
|
||||
case Name_kind:
|
||||
/* optimize away names that can't be reassigned */
|
||||
id = PyBytes_AS_STRING(
|
||||
_PyUnicode_AsDefaultEncodedString(e->v.Name.id));
|
||||
id = PyUnicode_AsUTF8(e->v.Name.id);
|
||||
if (strcmp(id, "True") == 0) return 1;
|
||||
if (strcmp(id, "False") == 0) return 0;
|
||||
if (strcmp(id, "None") == 0) return 0;
|
||||
|
|
|
@ -395,7 +395,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
|
|||
/* remove trailing cr/lf and dots */
|
||||
while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.'))
|
||||
s_buf[--len] = L'\0';
|
||||
message = PyUnicode_FromUnicode(s_buf, len);
|
||||
message = PyUnicode_FromWideChar(s_buf, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -487,7 +487,7 @@ PyObject *PyErr_SetExcFromWindowsErrWithFilenameObject(
|
|||
/* remove trailing cr/lf and dots */
|
||||
while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.'))
|
||||
s_buf[--len] = L'\0';
|
||||
message = PyUnicode_FromUnicode(s_buf, len);
|
||||
message = PyUnicode_FromWideChar(s_buf, len);
|
||||
}
|
||||
|
||||
if (message == NULL)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -546,9 +546,6 @@ convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
|
||||
|
||||
|
||||
#define UNICODE_DEFAULT_ENCODING(arg) \
|
||||
_PyUnicode_AsDefaultEncodedString(arg)
|
||||
|
||||
/* Format an error message generated by convertsimple(). */
|
||||
|
||||
static char *
|
||||
|
@ -611,7 +608,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
|
||||
const char *format = *p_format;
|
||||
char c = *format++;
|
||||
PyObject *uarg;
|
||||
char *sarg;
|
||||
|
||||
switch (c) {
|
||||
|
||||
|
@ -838,8 +835,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
case 'C': {/* unicode char */
|
||||
int *p = va_arg(*p_va, int *);
|
||||
if (PyUnicode_Check(arg) &&
|
||||
PyUnicode_GET_SIZE(arg) == 1)
|
||||
*p = PyUnicode_AS_UNICODE(arg)[0];
|
||||
PyUnicode_GET_LENGTH(arg) == 1) {
|
||||
int kind = PyUnicode_KIND(arg);
|
||||
void *data = PyUnicode_DATA(arg);
|
||||
*p = PyUnicode_READ(kind, data, 0);
|
||||
}
|
||||
else
|
||||
return converterr("a unicode character", arg, msgbuf, bufsize);
|
||||
break;
|
||||
|
@ -889,13 +889,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
if (c == 'z' && arg == Py_None)
|
||||
PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0);
|
||||
else if (PyUnicode_Check(arg)) {
|
||||
uarg = UNICODE_DEFAULT_ENCODING(arg);
|
||||
if (uarg == NULL)
|
||||
Py_ssize_t len;
|
||||
sarg = PyUnicode_AsUTF8AndSize(arg, &len);
|
||||
if (sarg == NULL)
|
||||
return converterr(CONV_UNICODE,
|
||||
arg, msgbuf, bufsize);
|
||||
PyBuffer_FillInfo(p, arg,
|
||||
PyBytes_AS_STRING(uarg), PyBytes_GET_SIZE(uarg),
|
||||
1, 0);
|
||||
PyBuffer_FillInfo(p, arg, sarg, len, 1, 0);
|
||||
}
|
||||
else { /* any buffer-like object */
|
||||
char *buf;
|
||||
|
@ -918,12 +917,13 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
STORE_SIZE(0);
|
||||
}
|
||||
else if (PyUnicode_Check(arg)) {
|
||||
uarg = UNICODE_DEFAULT_ENCODING(arg);
|
||||
if (uarg == NULL)
|
||||
Py_ssize_t len;
|
||||
sarg = PyUnicode_AsUTF8AndSize(arg, &len);
|
||||
if (sarg == NULL)
|
||||
return converterr(CONV_UNICODE,
|
||||
arg, msgbuf, bufsize);
|
||||
*p = PyBytes_AS_STRING(uarg);
|
||||
STORE_SIZE(PyBytes_GET_SIZE(uarg));
|
||||
*p = sarg;
|
||||
STORE_SIZE(len);
|
||||
}
|
||||
else { /* any buffer-like object */
|
||||
/* XXX Really? */
|
||||
|
@ -937,22 +937,22 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
} else {
|
||||
/* "s" or "z" */
|
||||
char **p = va_arg(*p_va, char **);
|
||||
uarg = NULL;
|
||||
Py_ssize_t len;
|
||||
sarg = NULL;
|
||||
|
||||
if (c == 'z' && arg == Py_None)
|
||||
*p = NULL;
|
||||
else if (PyUnicode_Check(arg)) {
|
||||
uarg = UNICODE_DEFAULT_ENCODING(arg);
|
||||
if (uarg == NULL)
|
||||
sarg = PyUnicode_AsUTF8AndSize(arg, &len);
|
||||
if (sarg == NULL)
|
||||
return converterr(CONV_UNICODE,
|
||||
arg, msgbuf, bufsize);
|
||||
*p = PyBytes_AS_STRING(uarg);
|
||||
*p = sarg;
|
||||
}
|
||||
else
|
||||
return converterr(c == 'z' ? "str or None" : "str",
|
||||
arg, msgbuf, bufsize);
|
||||
if (*p != NULL && uarg != NULL &&
|
||||
(Py_ssize_t) strlen(*p) != PyBytes_GET_SIZE(uarg))
|
||||
if (*p != NULL && sarg != NULL && (Py_ssize_t) strlen(*p) != len)
|
||||
return converterr(
|
||||
c == 'z' ? "str without null bytes or None"
|
||||
: "str without null bytes",
|
||||
|
@ -976,6 +976,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
}
|
||||
else if (PyUnicode_Check(arg)) {
|
||||
*p = PyUnicode_AS_UNICODE(arg);
|
||||
if (*p == NULL)
|
||||
RETURN_ERR_OCCURRED;
|
||||
STORE_SIZE(PyUnicode_GET_SIZE(arg));
|
||||
}
|
||||
else
|
||||
|
@ -987,6 +989,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
*p = NULL;
|
||||
else if (PyUnicode_Check(arg)) {
|
||||
*p = PyUnicode_AS_UNICODE(arg);
|
||||
if (*p == NULL)
|
||||
RETURN_ERR_OCCURRED;
|
||||
if (Py_UNICODE_strlen(*p) != PyUnicode_GET_SIZE(arg))
|
||||
return converterr(
|
||||
"str without null character or None",
|
||||
|
|
339
Python/import.c
339
Python/import.c
|
@ -118,12 +118,12 @@ typedef unsigned short mode_t;
|
|||
#define MAGIC (3190 | ((long)'\r'<<16) | ((long)'\n'<<24))
|
||||
#define TAG "cpython-" MAJOR MINOR;
|
||||
#define CACHEDIR "__pycache__"
|
||||
static const Py_UNICODE CACHEDIR_UNICODE[] = {
|
||||
static const Py_UCS4 CACHEDIR_UNICODE[] = {
|
||||
'_', '_', 'p', 'y', 'c', 'a', 'c', 'h', 'e', '_', '_', '\0'};
|
||||
/* Current magic word and string tag as globals. */
|
||||
static long pyc_magic = MAGIC;
|
||||
static const char *pyc_tag = TAG;
|
||||
static const Py_UNICODE PYC_TAG_UNICODE[] = {
|
||||
static const Py_UCS4 PYC_TAG_UNICODE[] = {
|
||||
'c', 'p', 'y', 't', 'h', 'o', 'n', '-', PY_MAJOR_VERSION + 48, PY_MINOR_VERSION + 48, '\0'};
|
||||
#undef QUOTE
|
||||
#undef STRIFY
|
||||
|
@ -762,7 +762,7 @@ remove_module(PyObject *name)
|
|||
|
||||
static PyObject * get_sourcefile(PyObject *filename);
|
||||
static PyObject *make_source_pathname(PyObject *pathname);
|
||||
static PyObject* make_compiled_pathname(Py_UNICODE *pathname, int debug);
|
||||
static PyObject* make_compiled_pathname(PyObject *pathname, int debug);
|
||||
|
||||
/* Execute a code object in a module and return the module object
|
||||
* WITH INCREMENTED REFERENCE COUNT. If an error occurs, name is
|
||||
|
@ -886,10 +886,10 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
|
|||
/* Like strrchr(string, '/') but searches for the rightmost of either SEP
|
||||
or ALTSEP, if the latter is defined.
|
||||
*/
|
||||
static Py_UNICODE*
|
||||
rightmost_sep(Py_UNICODE *s)
|
||||
static Py_UCS4*
|
||||
rightmost_sep(Py_UCS4 *s)
|
||||
{
|
||||
Py_UNICODE *found, c;
|
||||
Py_UCS4 *found, c;
|
||||
for (found = NULL; (c = *s); s++) {
|
||||
if (c == SEP
|
||||
#ifdef ALTSEP
|
||||
|
@ -912,15 +912,21 @@ rightmost_sep(Py_UNICODE *s)
|
|||
foo.py -> __pycache__/foo.<tag>.pyc */
|
||||
|
||||
static PyObject*
|
||||
make_compiled_pathname(Py_UNICODE *pathname, int debug)
|
||||
make_compiled_pathname(PyObject *pathstr, int debug)
|
||||
{
|
||||
Py_UNICODE buf[MAXPATHLEN];
|
||||
Py_UCS4 *pathname;
|
||||
Py_UCS4 buf[MAXPATHLEN];
|
||||
size_t buflen = (size_t)MAXPATHLEN;
|
||||
size_t len = Py_UNICODE_strlen(pathname);
|
||||
size_t len;
|
||||
size_t i, save;
|
||||
Py_UNICODE *pos;
|
||||
Py_UCS4 *pos;
|
||||
int sep = SEP;
|
||||
|
||||
pathname = PyUnicode_AsUCS4Copy(pathstr);
|
||||
if (!pathname)
|
||||
return NULL;
|
||||
len = Py_UCS4_strlen(pathname);
|
||||
|
||||
/* Sanity check that the buffer has roughly enough space to hold what
|
||||
will eventually be the full path to the compiled file. The 5 extra
|
||||
bytes include the slash afer __pycache__, the two extra dots, the
|
||||
|
@ -930,8 +936,10 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
|
|||
sanity check before writing the extension to ensure we do not
|
||||
overflow the buffer.
|
||||
*/
|
||||
if (len + Py_UNICODE_strlen(CACHEDIR_UNICODE) + Py_UNICODE_strlen(PYC_TAG_UNICODE) + 5 > buflen)
|
||||
if (len + Py_UCS4_strlen(CACHEDIR_UNICODE) + Py_UCS4_strlen(PYC_TAG_UNICODE) + 5 > buflen) {
|
||||
PyMem_Free(pathname);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Find the last path separator and copy everything from the start of
|
||||
the source string up to and including the separator.
|
||||
|
@ -943,24 +951,28 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
|
|||
else {
|
||||
sep = *pos;
|
||||
i = pos - pathname + 1;
|
||||
Py_UNICODE_strncpy(buf, pathname, i);
|
||||
Py_UCS4_strncpy(buf, pathname, i);
|
||||
}
|
||||
|
||||
save = i;
|
||||
buf[i++] = '\0';
|
||||
/* Add __pycache__/ */
|
||||
Py_UNICODE_strcat(buf, CACHEDIR_UNICODE);
|
||||
i += Py_UNICODE_strlen(CACHEDIR_UNICODE) - 1;
|
||||
Py_UCS4_strcat(buf, CACHEDIR_UNICODE);
|
||||
i += Py_UCS4_strlen(CACHEDIR_UNICODE) - 1;
|
||||
buf[i++] = sep;
|
||||
buf[i] = '\0';
|
||||
/* Add the base filename, but remove the .py or .pyw extension, since
|
||||
the tag name must go before the extension.
|
||||
*/
|
||||
Py_UNICODE_strcat(buf, pathname + save);
|
||||
pos = Py_UNICODE_strrchr(buf + i, '.');
|
||||
Py_UCS4_strcat(buf, pathname + save);
|
||||
pos = Py_UCS4_strrchr(buf + i, '.');
|
||||
if (pos != NULL)
|
||||
*++pos = '\0';
|
||||
Py_UNICODE_strcat(buf, PYC_TAG_UNICODE);
|
||||
|
||||
/* pathname is not used from here on. */
|
||||
PyMem_Free(pathname);
|
||||
|
||||
Py_UCS4_strcat(buf, PYC_TAG_UNICODE);
|
||||
/* The length test above assumes that we're only adding one character
|
||||
to the end of what would normally be the extension. What if there
|
||||
is no extension, or the string ends in '.' or '.p', and otherwise
|
||||
|
@ -1010,7 +1022,7 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
|
|||
#if 0
|
||||
printf("strlen(buf): %d; buflen: %d\n", (int)strlen(buf), (int)buflen);
|
||||
#endif
|
||||
len = Py_UNICODE_strlen(buf);
|
||||
len = Py_UCS4_strlen(buf);
|
||||
if (len + 5 > buflen)
|
||||
return NULL;
|
||||
buf[len] = '.'; len++;
|
||||
|
@ -1018,7 +1030,7 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
|
|||
buf[len] = 'y'; len++;
|
||||
buf[len] = debug ? 'c' : 'o'; len++;
|
||||
assert(len <= buflen);
|
||||
return PyUnicode_FromUnicode(buf, len);
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, len);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1033,14 +1045,16 @@ make_compiled_pathname(Py_UNICODE *pathname, int debug)
|
|||
static PyObject*
|
||||
make_source_pathname(PyObject *pathobj)
|
||||
{
|
||||
Py_UNICODE buf[MAXPATHLEN];
|
||||
Py_UNICODE *pathname;
|
||||
Py_UNICODE *left, *right, *dot0, *dot1, sep;
|
||||
Py_UCS4 buf[MAXPATHLEN];
|
||||
Py_UCS4 *pathname;
|
||||
Py_UCS4 *left, *right, *dot0, *dot1, sep;
|
||||
size_t i, j;
|
||||
|
||||
if (PyUnicode_GET_SIZE(pathobj) > MAXPATHLEN)
|
||||
if (PyUnicode_GET_LENGTH(pathobj) > MAXPATHLEN)
|
||||
return NULL;
|
||||
pathname = PyUnicode_AsUCS4Copy(pathobj);
|
||||
if (!pathname)
|
||||
return NULL;
|
||||
pathname = PyUnicode_AS_UNICODE(pathobj);
|
||||
|
||||
/* Look back two slashes from the end. In between these two slashes
|
||||
must be the string __pycache__ or this is not a PEP 3147 style
|
||||
|
@ -1057,31 +1071,35 @@ make_source_pathname(PyObject *pathobj)
|
|||
left = pathname;
|
||||
else
|
||||
left++;
|
||||
if (right-left != Py_UNICODE_strlen(CACHEDIR_UNICODE) ||
|
||||
Py_UNICODE_strncmp(left, CACHEDIR_UNICODE, right-left) != 0)
|
||||
return NULL;
|
||||
if (right-left != Py_UCS4_strlen(CACHEDIR_UNICODE) ||
|
||||
Py_UCS4_strncmp(left, CACHEDIR_UNICODE, right-left) != 0)
|
||||
goto error;
|
||||
|
||||
/* Now verify that the path component to the right of the last slash
|
||||
has two dots in it.
|
||||
*/
|
||||
if ((dot0 = Py_UNICODE_strchr(right + 1, '.')) == NULL)
|
||||
return NULL;
|
||||
if ((dot1 = Py_UNICODE_strchr(dot0 + 1, '.')) == NULL)
|
||||
return NULL;
|
||||
if ((dot0 = Py_UCS4_strchr(right + 1, '.')) == NULL)
|
||||
goto error;
|
||||
if ((dot1 = Py_UCS4_strchr(dot0 + 1, '.')) == NULL)
|
||||
goto error;
|
||||
/* Too many dots? */
|
||||
if (Py_UNICODE_strchr(dot1 + 1, '.') != NULL)
|
||||
return NULL;
|
||||
if (Py_UCS4_strchr(dot1 + 1, '.') != NULL)
|
||||
goto error;
|
||||
|
||||
/* This is a PEP 3147 path. Start by copying everything from the
|
||||
start of pathname up to and including the leftmost slash. Then
|
||||
copy the file's basename, removing the magic tag and adding a .py
|
||||
suffix.
|
||||
*/
|
||||
Py_UNICODE_strncpy(buf, pathname, (i=left-pathname));
|
||||
Py_UNICODE_strncpy(buf+i, right+1, (j=dot0-right));
|
||||
Py_UCS4_strncpy(buf, pathname, (i=left-pathname));
|
||||
Py_UCS4_strncpy(buf+i, right+1, (j=dot0-right));
|
||||
buf[i+j] = 'p';
|
||||
buf[i+j+1] = 'y';
|
||||
return PyUnicode_FromUnicode(buf, i+j+2);
|
||||
PyMem_Free(pathname);
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, i+j+2);
|
||||
error:
|
||||
PyMem_Free(pathname);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Given a pathname for a Python source file, its time of last
|
||||
|
@ -1250,6 +1268,7 @@ static void
|
|||
write_compiled_module(PyCodeObject *co, PyObject *cpathname,
|
||||
struct stat *srcstat)
|
||||
{
|
||||
Py_UCS4 *cpathname_ucs4;
|
||||
FILE *fp;
|
||||
time_t mtime = srcstat->st_mtime;
|
||||
#ifdef MS_WINDOWS /* since Windows uses different permissions */
|
||||
|
@ -1267,18 +1286,23 @@ write_compiled_module(PyCodeObject *co, PyObject *cpathname,
|
|||
PyObject *cpathbytes;
|
||||
#endif
|
||||
PyObject *dirname;
|
||||
Py_UNICODE *dirsep;
|
||||
Py_UCS4 *dirsep;
|
||||
int res, ok;
|
||||
|
||||
/* Ensure that the __pycache__ directory exists. */
|
||||
dirsep = rightmost_sep(PyUnicode_AS_UNICODE(cpathname));
|
||||
cpathname_ucs4 = PyUnicode_AsUCS4Copy(cpathname);
|
||||
if (!cpathname_ucs4)
|
||||
return;
|
||||
dirsep = rightmost_sep(cpathname_ucs4);
|
||||
if (dirsep == NULL) {
|
||||
if (Py_VerboseFlag)
|
||||
PySys_FormatStderr("# no %s path found %R\n", CACHEDIR, cpathname);
|
||||
return;
|
||||
}
|
||||
dirname = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(cpathname),
|
||||
dirsep - PyUnicode_AS_UNICODE(cpathname));
|
||||
dirname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
cpathname_ucs4,
|
||||
dirsep - cpathname_ucs4);
|
||||
PyMem_Free(cpathname_ucs4);
|
||||
if (dirname == NULL) {
|
||||
PyErr_Clear();
|
||||
return;
|
||||
|
@ -1461,9 +1485,7 @@ load_source_module(PyObject *name, PyObject *pathname, FILE *fp)
|
|||
goto error;
|
||||
}
|
||||
#endif
|
||||
cpathname = make_compiled_pathname(
|
||||
PyUnicode_AS_UNICODE(pathname),
|
||||
!Py_OptimizeFlag);
|
||||
cpathname = make_compiled_pathname(pathname, !Py_OptimizeFlag);
|
||||
|
||||
if (cpathname != NULL)
|
||||
fpc = check_compiled_module(pathname, st.st_mtime, cpathname);
|
||||
|
@ -1512,16 +1534,18 @@ static PyObject *
|
|||
get_sourcefile(PyObject *filename)
|
||||
{
|
||||
Py_ssize_t len;
|
||||
Py_UNICODE *fileuni;
|
||||
Py_UCS4 *fileuni;
|
||||
PyObject *py;
|
||||
struct stat statbuf;
|
||||
|
||||
len = PyUnicode_GET_SIZE(filename);
|
||||
len = PyUnicode_GET_LENGTH(filename);
|
||||
if (len == 0)
|
||||
Py_RETURN_NONE;
|
||||
|
||||
/* don't match *.pyc or *.pyo? */
|
||||
fileuni = PyUnicode_AS_UNICODE(filename);
|
||||
fileuni = PyUnicode_AsUCS4Copy(filename);
|
||||
if (!fileuni)
|
||||
return NULL;
|
||||
if (len < 5
|
||||
|| fileuni[len-4] != '.'
|
||||
|| (fileuni[len-3] != 'p' && fileuni[len-3] != 'P')
|
||||
|
@ -1535,7 +1559,7 @@ get_sourcefile(PyObject *filename)
|
|||
py = make_source_pathname(filename);
|
||||
if (py == NULL) {
|
||||
PyErr_Clear();
|
||||
py = PyUnicode_FromUnicode(fileuni, len - 1);
|
||||
py = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, fileuni, len - 1);
|
||||
}
|
||||
if (py == NULL)
|
||||
goto error;
|
||||
|
@ -1548,6 +1572,7 @@ get_sourcefile(PyObject *filename)
|
|||
error:
|
||||
PyErr_Clear();
|
||||
unchanged:
|
||||
PyMem_Free(fileuni);
|
||||
Py_INCREF(filename);
|
||||
return filename;
|
||||
}
|
||||
|
@ -1739,8 +1764,7 @@ find_module_path(PyObject *fullname, PyObject *name, PyObject *path,
|
|||
PyObject *path_hooks, PyObject *path_importer_cache,
|
||||
PyObject **p_path, PyObject **p_loader, struct filedescr **p_fd)
|
||||
{
|
||||
Py_UNICODE buf[MAXPATHLEN+1];
|
||||
Py_ssize_t buflen = MAXPATHLEN+1;
|
||||
Py_UCS4 buf[MAXPATHLEN+1];
|
||||
PyObject *path_unicode, *filename;
|
||||
Py_ssize_t len;
|
||||
struct stat statbuf;
|
||||
|
@ -1759,15 +1783,15 @@ find_module_path(PyObject *fullname, PyObject *name, PyObject *path,
|
|||
else
|
||||
return 0;
|
||||
|
||||
len = PyUnicode_GET_SIZE(path_unicode);
|
||||
if (len + 2 + PyUnicode_GET_SIZE(name) + MAXSUFFIXSIZE >= buflen) {
|
||||
len = PyUnicode_GET_LENGTH(path_unicode);
|
||||
if (!PyUnicode_AsUCS4(path_unicode, buf, PY_ARRAY_LENGTH(buf), 1)) {
|
||||
Py_DECREF(path_unicode);
|
||||
return 0; /* Too long */
|
||||
PyErr_Clear();
|
||||
return 0;
|
||||
}
|
||||
Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(path_unicode));
|
||||
Py_DECREF(path_unicode);
|
||||
|
||||
if (Py_UNICODE_strlen(buf) != len)
|
||||
if (Py_UCS4_strlen(buf) != len)
|
||||
return 0; /* path contains '\0' */
|
||||
|
||||
/* sys.path_hooks import hook */
|
||||
|
@ -1804,10 +1828,14 @@ find_module_path(PyObject *fullname, PyObject *name, PyObject *path,
|
|||
#endif
|
||||
)
|
||||
buf[len++] = SEP;
|
||||
Py_UNICODE_strcpy(buf+len, PyUnicode_AS_UNICODE(name));
|
||||
len += PyUnicode_GET_SIZE(name);
|
||||
if (!PyUnicode_AsUCS4(name, buf+len, PY_ARRAY_LENGTH(buf)-len, 1)) {
|
||||
PyErr_Clear();
|
||||
return 0;
|
||||
}
|
||||
len += PyUnicode_GET_LENGTH(name);
|
||||
|
||||
filename = PyUnicode_FromUnicode(buf, len);
|
||||
filename = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buf, len);
|
||||
if (filename == NULL)
|
||||
return -1;
|
||||
|
||||
|
@ -1989,6 +2017,12 @@ find_module(PyObject *fullname, PyObject *name, PyObject *search_path_list,
|
|||
if (p_loader != NULL)
|
||||
*p_loader = NULL;
|
||||
|
||||
if (PyUnicode_GET_LENGTH(name) > MAXPATHLEN) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"module name is too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* sys.meta_path import hook */
|
||||
if (p_loader != NULL) {
|
||||
PyObject *meta_path;
|
||||
|
@ -2704,7 +2738,7 @@ static PyObject *get_parent(PyObject *globals,
|
|||
int level);
|
||||
static PyObject *load_next(PyObject *mod, PyObject *altmod,
|
||||
PyObject *inputname, PyObject **p_outputname,
|
||||
Py_UNICODE *buf, Py_ssize_t *p_buflen,
|
||||
Py_UCS4 *buf, Py_ssize_t *p_buflen,
|
||||
Py_ssize_t bufsize);
|
||||
static int mark_miss(PyObject *name);
|
||||
static int ensure_fromlist(PyObject *mod, PyObject *fromlist,
|
||||
|
@ -2718,37 +2752,47 @@ static PyObject *
|
|||
import_module_level(PyObject *name, PyObject *globals, PyObject *locals,
|
||||
PyObject *fromlist, int level)
|
||||
{
|
||||
Py_UNICODE buf[MAXPATHLEN+1];
|
||||
Py_UCS4 buf[MAXPATHLEN+1];
|
||||
Py_ssize_t buflen;
|
||||
Py_ssize_t bufsize = MAXPATHLEN+1;
|
||||
PyObject *parent, *head, *next, *tail, *inputname, *outputname;
|
||||
PyObject *parent_name, *ensure_name;
|
||||
const Py_UNICODE *nameunicode;
|
||||
Py_ssize_t sep, altsep;
|
||||
|
||||
nameunicode = PyUnicode_AS_UNICODE(name);
|
||||
if (PyUnicode_READY(name))
|
||||
return NULL;
|
||||
|
||||
if (Py_UNICODE_strchr(nameunicode, SEP) != NULL
|
||||
sep = PyUnicode_FindChar(name, SEP, 0, PyUnicode_GET_LENGTH(name), 1);
|
||||
if (sep == -2)
|
||||
return NULL;
|
||||
#ifdef ALTSEP
|
||||
|| Py_UNICODE_strchr(nameunicode, ALTSEP) != NULL
|
||||
altsep = PyUnicode_FindChar(name, ALTSEP, 0, PyUnicode_GET_LENGTH(name), 1);
|
||||
if (altsep == -2)
|
||||
return NULL;
|
||||
#else
|
||||
altsep = -1;
|
||||
#endif
|
||||
) {
|
||||
if (sep != -1 || altsep != -1)
|
||||
{
|
||||
PyErr_SetString(PyExc_ImportError,
|
||||
"Import by filename is not supported.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
parent = get_parent(globals, &parent_name, level);
|
||||
if (parent == NULL)
|
||||
if (parent == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buflen = PyUnicode_GET_SIZE(parent_name);
|
||||
if (buflen+1 > bufsize) {
|
||||
if (PyUnicode_READY(parent_name))
|
||||
return NULL;
|
||||
buflen = PyUnicode_GET_LENGTH(parent_name);
|
||||
if (!PyUnicode_AsUCS4(parent_name, buf, PY_ARRAY_LENGTH(buf), 1)) {
|
||||
Py_DECREF(parent_name);
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Module name too long");
|
||||
return NULL;
|
||||
}
|
||||
Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(parent_name));
|
||||
Py_DECREF(parent_name);
|
||||
|
||||
head = load_next(parent, level < 0 ? Py_None : parent, name, &outputname,
|
||||
|
@ -2799,7 +2843,8 @@ import_module_level(PyObject *name, PyObject *globals, PyObject *locals,
|
|||
|
||||
Py_DECREF(head);
|
||||
|
||||
ensure_name = PyUnicode_FromUnicode(buf, Py_UNICODE_strlen(buf));
|
||||
ensure_name = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buf, Py_UCS4_strlen(buf));
|
||||
if (ensure_name == NULL) {
|
||||
Py_DECREF(tail);
|
||||
return NULL;
|
||||
|
@ -2859,8 +2904,6 @@ PyImport_ImportModuleLevel(const char *name, PyObject *globals, PyObject *locals
|
|||
static PyObject *
|
||||
get_parent(PyObject *globals, PyObject **p_name, int level)
|
||||
{
|
||||
Py_UNICODE name[MAXPATHLEN+1];
|
||||
const Py_ssize_t bufsize = MAXPATHLEN+1;
|
||||
PyObject *nameobj;
|
||||
|
||||
static PyObject *namestr = NULL;
|
||||
|
@ -2897,7 +2940,7 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
|
|||
"__package__ set to non-string");
|
||||
return NULL;
|
||||
}
|
||||
if (PyUnicode_GET_SIZE(pkgname) == 0) {
|
||||
if (PyUnicode_GET_LENGTH(pkgname) == 0) {
|
||||
if (level > 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Attempted relative import in non-package");
|
||||
|
@ -2905,12 +2948,8 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
|
|||
}
|
||||
goto return_none;
|
||||
}
|
||||
if (PyUnicode_GET_SIZE(pkgname)+1 > bufsize) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Package name too long");
|
||||
return NULL;
|
||||
}
|
||||
Py_UNICODE_strcpy(name, PyUnicode_AS_UNICODE(pkgname));
|
||||
Py_INCREF(pkgname);
|
||||
nameobj = pkgname;
|
||||
} else {
|
||||
/* __package__ not set, so figure it out and set it */
|
||||
modname = PyDict_GetItem(globals, namestr);
|
||||
|
@ -2922,74 +2961,71 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
|
|||
/* __path__ is set, so modname is already the package name */
|
||||
int error;
|
||||
|
||||
if (PyUnicode_GET_SIZE(modname)+1 > bufsize) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Module name too long");
|
||||
return NULL;
|
||||
}
|
||||
Py_UNICODE_strcpy(name, PyUnicode_AS_UNICODE(modname));
|
||||
error = PyDict_SetItem(globals, pkgstr, modname);
|
||||
if (error) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Could not set __package__");
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(modname);
|
||||
nameobj = modname;
|
||||
} else {
|
||||
/* Normal module, so work out the package name if any */
|
||||
Py_UNICODE *start = PyUnicode_AS_UNICODE(modname);
|
||||
Py_UNICODE *lastdot = Py_UNICODE_strrchr(start, '.');
|
||||
Py_ssize_t len;
|
||||
int error;
|
||||
if (lastdot == NULL && level > 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Attempted relative import in non-package");
|
||||
len = PyUnicode_FindChar(modname, '.',
|
||||
0, PyUnicode_GET_LENGTH(modname), -1);
|
||||
if (len == -2)
|
||||
return NULL;
|
||||
}
|
||||
if (lastdot == NULL) {
|
||||
error = PyDict_SetItem(globals, pkgstr, Py_None);
|
||||
if (error) {
|
||||
if (len < 0) {
|
||||
if (level > 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Attempted relative import in non-package");
|
||||
return NULL;
|
||||
}
|
||||
if (PyDict_SetItem(globals, pkgstr, Py_None)) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Could not set __package__");
|
||||
return NULL;
|
||||
}
|
||||
goto return_none;
|
||||
}
|
||||
len = lastdot - start;
|
||||
if (len+1 > bufsize) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Module name too long");
|
||||
pkgname = PyUnicode_Substring(modname, 0, len);
|
||||
if (pkgname == NULL)
|
||||
return NULL;
|
||||
}
|
||||
Py_UNICODE_strncpy(name, start, len);
|
||||
name[len] = '\0';
|
||||
pkgname = PyUnicode_FromUnicode(name, len);
|
||||
if (pkgname == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
error = PyDict_SetItem(globals, pkgstr, pkgname);
|
||||
Py_DECREF(pkgname);
|
||||
if (error) {
|
||||
if (PyDict_SetItem(globals, pkgstr, pkgname)) {
|
||||
Py_DECREF(pkgname);
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Could not set __package__");
|
||||
return NULL;
|
||||
}
|
||||
nameobj = pkgname;
|
||||
}
|
||||
}
|
||||
while (--level > 0) {
|
||||
Py_UNICODE *dot = Py_UNICODE_strrchr(name, '.');
|
||||
if (dot == NULL) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Attempted relative import beyond "
|
||||
"toplevel package");
|
||||
if (level > 1) {
|
||||
Py_ssize_t dot, end = PyUnicode_GET_LENGTH(nameobj);
|
||||
PyObject *newname;
|
||||
while (--level > 0) {
|
||||
dot = PyUnicode_FindChar(nameobj, '.', 0, end, -1);
|
||||
if (dot == -2) {
|
||||
Py_DECREF(nameobj);
|
||||
return NULL;
|
||||
}
|
||||
if (dot < 0) {
|
||||
Py_DECREF(nameobj);
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Attempted relative import beyond "
|
||||
"toplevel package");
|
||||
return NULL;
|
||||
}
|
||||
end = dot;
|
||||
}
|
||||
newname = PyUnicode_Substring(nameobj, 0, end);
|
||||
Py_DECREF(nameobj);
|
||||
if (newname == NULL)
|
||||
return NULL;
|
||||
}
|
||||
*dot = '\0';
|
||||
nameobj = newname;
|
||||
}
|
||||
|
||||
nameobj = PyUnicode_FromUnicode(name, Py_UNICODE_strlen(name));
|
||||
if (nameobj == NULL)
|
||||
return NULL;
|
||||
|
||||
modules = PyImport_GetModuleDict();
|
||||
parent = PyDict_GetItem(modules, nameobj);
|
||||
if (parent == NULL) {
|
||||
|
@ -3021,7 +3057,7 @@ get_parent(PyObject *globals, PyObject **p_name, int level)
|
|||
If this is violated... Who cares? */
|
||||
|
||||
return_none:
|
||||
nameobj = PyUnicode_FromUnicode(NULL, 0);
|
||||
nameobj = PyUnicode_New(0, 0);
|
||||
if (nameobj == NULL)
|
||||
return NULL;
|
||||
*p_name = nameobj;
|
||||
|
@ -3032,28 +3068,28 @@ return_none:
|
|||
static PyObject *
|
||||
load_next(PyObject *mod, PyObject *altmod,
|
||||
PyObject *inputname, PyObject **p_outputname,
|
||||
Py_UNICODE *buf, Py_ssize_t *p_buflen, Py_ssize_t bufsize)
|
||||
Py_UCS4 *buf, Py_ssize_t *p_buflen, Py_ssize_t bufsize)
|
||||
{
|
||||
const Py_UNICODE *dot;
|
||||
Py_UCS4 *dot;
|
||||
Py_ssize_t len;
|
||||
Py_UNICODE *p;
|
||||
Py_UCS4 *p;
|
||||
PyObject *fullname, *name, *result, *mark_name;
|
||||
const Py_UNICODE *nameuni;
|
||||
const Py_UCS4 *nameuni;
|
||||
|
||||
*p_outputname = NULL;
|
||||
|
||||
if (PyUnicode_GET_SIZE(inputname) == 0) {
|
||||
if (PyUnicode_GET_LENGTH(inputname) == 0) {
|
||||
/* completely empty module name should only happen in
|
||||
'from . import' (or '__import__("")')*/
|
||||
Py_INCREF(mod);
|
||||
return mod;
|
||||
}
|
||||
|
||||
nameuni = PyUnicode_AS_UNICODE(inputname);
|
||||
nameuni = PyUnicode_AsUCS4Copy(inputname);
|
||||
if (nameuni == NULL)
|
||||
return NULL;
|
||||
|
||||
dot = Py_UNICODE_strchr(nameuni, '.');
|
||||
dot = Py_UCS4_strchr(nameuni, '.');
|
||||
if (dot != NULL) {
|
||||
len = dot - nameuni;
|
||||
if (len == 0) {
|
||||
|
@ -3063,7 +3099,7 @@ load_next(PyObject *mod, PyObject *altmod,
|
|||
}
|
||||
}
|
||||
else
|
||||
len = PyUnicode_GET_SIZE(inputname);
|
||||
len = PyUnicode_GET_LENGTH(inputname);
|
||||
|
||||
if (*p_buflen+len+1 >= bufsize) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
|
@ -3076,14 +3112,16 @@ load_next(PyObject *mod, PyObject *altmod,
|
|||
*p++ = '.';
|
||||
*p_buflen += 1;
|
||||
}
|
||||
Py_UNICODE_strncpy(p, nameuni, len);
|
||||
Py_UCS4_strncpy(p, nameuni, len);
|
||||
p[len] = '\0';
|
||||
*p_buflen += len;
|
||||
|
||||
fullname = PyUnicode_FromUnicode(buf, *p_buflen);
|
||||
fullname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buf, *p_buflen);
|
||||
if (fullname == NULL)
|
||||
return NULL;
|
||||
name = PyUnicode_FromUnicode(p, len);
|
||||
name = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
p, len);
|
||||
if (name == NULL) {
|
||||
Py_DECREF(fullname);
|
||||
return NULL;
|
||||
|
@ -3096,7 +3134,8 @@ load_next(PyObject *mod, PyObject *altmod,
|
|||
result = import_submodule(altmod, name, name);
|
||||
Py_DECREF(name);
|
||||
if (result != NULL && result != Py_None) {
|
||||
mark_name = PyUnicode_FromUnicode(buf, *p_buflen);
|
||||
mark_name = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buf, *p_buflen);
|
||||
if (mark_name == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
|
@ -3107,7 +3146,7 @@ load_next(PyObject *mod, PyObject *altmod,
|
|||
return NULL;
|
||||
}
|
||||
Py_DECREF(mark_name);
|
||||
Py_UNICODE_strncpy(buf, nameuni, len);
|
||||
Py_UCS4_strncpy(buf, nameuni, len);
|
||||
buf[len] = '\0';
|
||||
*p_buflen = len;
|
||||
}
|
||||
|
@ -3125,7 +3164,8 @@ load_next(PyObject *mod, PyObject *altmod,
|
|||
}
|
||||
|
||||
if (dot != NULL) {
|
||||
*p_outputname = PyUnicode_FromUnicode(dot+1, Py_UNICODE_strlen(dot+1));
|
||||
*p_outputname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
dot+1, Py_UCS4_strlen(dot+1));
|
||||
if (*p_outputname == NULL) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
|
@ -3166,7 +3206,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, PyObject *name,
|
|||
Py_DECREF(item);
|
||||
return 0;
|
||||
}
|
||||
if (PyUnicode_AS_UNICODE(item)[0] == '*') {
|
||||
if (PyUnicode_READ_CHAR(item, 0) == '*') {
|
||||
PyObject *all;
|
||||
Py_DECREF(item);
|
||||
/* See if the package defines __all__ */
|
||||
|
@ -3304,7 +3344,7 @@ PyImport_ReloadModule(PyObject *m)
|
|||
PyObject *modules = PyImport_GetModuleDict();
|
||||
PyObject *path_list = NULL, *loader = NULL, *existing_m = NULL;
|
||||
PyObject *nameobj, *bufobj, *subnameobj;
|
||||
Py_UNICODE *name, *subname;
|
||||
Py_UCS4 *name = NULL, *subname;
|
||||
struct filedescr *fdp;
|
||||
FILE *fp = NULL;
|
||||
PyObject *newm = NULL;
|
||||
|
@ -3321,7 +3361,7 @@ PyImport_ReloadModule(PyObject *m)
|
|||
return NULL;
|
||||
}
|
||||
nameobj = PyModule_GetNameObject(m);
|
||||
if (nameobj == NULL)
|
||||
if (nameobj == NULL || PyUnicode_READY(nameobj) == -1)
|
||||
return NULL;
|
||||
if (m != PyDict_GetItem(modules, nameobj)) {
|
||||
PyErr_Format(PyExc_ImportError,
|
||||
|
@ -3343,8 +3383,12 @@ PyImport_ReloadModule(PyObject *m)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
name = PyUnicode_AS_UNICODE(nameobj);
|
||||
subname = Py_UNICODE_strrchr(name, '.');
|
||||
name = PyUnicode_AsUCS4Copy(nameobj);
|
||||
if (!name) {
|
||||
Py_DECREF(nameobj);
|
||||
return NULL;
|
||||
}
|
||||
subname = Py_UCS4_strrchr(name, '.');
|
||||
if (subname == NULL) {
|
||||
Py_INCREF(nameobj);
|
||||
subnameobj = nameobj;
|
||||
|
@ -3353,7 +3397,8 @@ PyImport_ReloadModule(PyObject *m)
|
|||
PyObject *parentname, *parent;
|
||||
Py_ssize_t len;
|
||||
len = subname - name;
|
||||
parentname = PyUnicode_FromUnicode(name, len);
|
||||
parentname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
name, len);
|
||||
if (parentname == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
@ -3370,8 +3415,9 @@ PyImport_ReloadModule(PyObject *m)
|
|||
if (path_list == NULL)
|
||||
PyErr_Clear();
|
||||
subname++;
|
||||
len = PyUnicode_GET_SIZE(nameobj) - (len + 1);
|
||||
subnameobj = PyUnicode_FromUnicode(subname, len);
|
||||
len = PyUnicode_GET_LENGTH(nameobj) - (len + 1);
|
||||
subnameobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
subname, len);
|
||||
}
|
||||
if (subnameobj == NULL)
|
||||
goto error;
|
||||
|
@ -3403,6 +3449,7 @@ PyImport_ReloadModule(PyObject *m)
|
|||
error:
|
||||
imp_modules_reloading_clear();
|
||||
Py_DECREF(nameobj);
|
||||
PyMem_Free(name);
|
||||
return newm;
|
||||
}
|
||||
|
||||
|
@ -3910,9 +3957,7 @@ imp_cache_from_source(PyObject *self, PyObject *args, PyObject *kws)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
cpathname = make_compiled_pathname(
|
||||
PyUnicode_AS_UNICODE(pathname),
|
||||
debug);
|
||||
cpathname = make_compiled_pathname(pathname, debug);
|
||||
Py_DECREF(pathname);
|
||||
|
||||
if (cpathname == NULL) {
|
||||
|
@ -4105,7 +4150,7 @@ NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds)
|
|||
&pathobj))
|
||||
return -1;
|
||||
|
||||
if (PyUnicode_GET_SIZE(pathobj) == 0) {
|
||||
if (PyUnicode_GET_LENGTH(pathobj) == 0) {
|
||||
PyErr_SetString(PyExc_ImportError, "empty pathname");
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -311,9 +311,7 @@ w_object(PyObject *v, WFILE *p)
|
|||
}
|
||||
else if (PyUnicode_CheckExact(v)) {
|
||||
PyObject *utf8;
|
||||
utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
|
||||
PyUnicode_GET_SIZE(v),
|
||||
"surrogatepass");
|
||||
utf8 = PyUnicode_AsEncodedString(v, "utf8", "surrogatepass");
|
||||
if (utf8 == NULL) {
|
||||
p->depth--;
|
||||
p->error = WFERR_UNMARSHALLABLE;
|
||||
|
|
|
@ -183,24 +183,6 @@ fold_binops_on_constants(unsigned char *codestr, PyObject *consts, PyObject **ob
|
|||
break;
|
||||
case BINARY_SUBSCR:
|
||||
newconst = PyObject_GetItem(v, w);
|
||||
/* #5057: if v is unicode, there might be differences between
|
||||
wide and narrow builds in cases like '\U00012345'[0].
|
||||
Wide builds will return a non-BMP char, whereas narrow builds
|
||||
will return a surrogate. In both the cases skip the
|
||||
optimization in order to produce compatible pycs.
|
||||
*/
|
||||
if (newconst != NULL &&
|
||||
PyUnicode_Check(v) && PyUnicode_Check(newconst)) {
|
||||
Py_UNICODE ch = PyUnicode_AS_UNICODE(newconst)[0];
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
if (ch > 0xFFFF) {
|
||||
#else
|
||||
if (ch >= 0xD800 && ch <= 0xDFFF) {
|
||||
#endif
|
||||
Py_DECREF(newconst);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BINARY_LSHIFT:
|
||||
newconst = PyNumber_Lshift(v, w);
|
||||
|
|
|
@ -1525,10 +1525,10 @@ symtable_visit_alias(struct symtable *st, alias_ty a)
|
|||
*/
|
||||
PyObject *store_name;
|
||||
PyObject *name = (a->asname == NULL) ? a->name : a->asname;
|
||||
const Py_UNICODE *base = PyUnicode_AS_UNICODE(name);
|
||||
Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
|
||||
if (dot) {
|
||||
store_name = PyUnicode_FromUnicode(base, dot - base);
|
||||
Py_ssize_t dot = PyUnicode_FindChar(name, '.', 0,
|
||||
PyUnicode_GET_LENGTH(name), 1);
|
||||
if (dot != -1) {
|
||||
store_name = PyUnicode_Substring(name, 0, dot);
|
||||
if (!store_name)
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -229,8 +229,8 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
|
|||
PyObject *lineobj = NULL;
|
||||
PyObject *res;
|
||||
char buf[MAXPATHLEN+1];
|
||||
Py_UNICODE *u, *p;
|
||||
Py_ssize_t len;
|
||||
int kind;
|
||||
void *data;
|
||||
|
||||
/* open the file */
|
||||
if (filename == NULL)
|
||||
|
@ -285,13 +285,16 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
|
|||
}
|
||||
|
||||
/* remove the indentation of the line */
|
||||
u = PyUnicode_AS_UNICODE(lineobj);
|
||||
len = PyUnicode_GET_SIZE(lineobj);
|
||||
for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
|
||||
len--;
|
||||
if (u != p) {
|
||||
kind = PyUnicode_KIND(lineobj);
|
||||
data = PyUnicode_DATA(lineobj);
|
||||
for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||
if (ch != ' ' && ch != '\t' && ch != '\014')
|
||||
break;
|
||||
}
|
||||
if (i) {
|
||||
PyObject *truncated;
|
||||
truncated = PyUnicode_FromUnicode(p, len);
|
||||
truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj));
|
||||
if (truncated) {
|
||||
Py_DECREF(lineobj);
|
||||
lineobj = truncated;
|
||||
|
@ -476,13 +479,26 @@ dump_hexadecimal(int width, unsigned long value, int fd)
|
|||
static void
|
||||
dump_ascii(int fd, PyObject *text)
|
||||
{
|
||||
PyASCIIObject *ascii = (PyASCIIObject *)text;
|
||||
Py_ssize_t i, size;
|
||||
int truncated;
|
||||
Py_UNICODE *u;
|
||||
char c;
|
||||
int kind;
|
||||
void *data;
|
||||
Py_UCS4 ch;
|
||||
|
||||
size = PyUnicode_GET_SIZE(text);
|
||||
u = PyUnicode_AS_UNICODE(text);
|
||||
size = ascii->length;
|
||||
kind = ascii->state.kind;
|
||||
if (ascii->state.compact) {
|
||||
if (ascii->state.ascii)
|
||||
data = ((PyASCIIObject*)text) + 1;
|
||||
else
|
||||
data = ((PyCompactUnicodeObject*)text) + 1;
|
||||
}
|
||||
else {
|
||||
data = ((PyUnicodeObject *)text)->data.any;
|
||||
if (data == NULL)
|
||||
return;
|
||||
}
|
||||
|
||||
if (MAX_STRING_LENGTH < size) {
|
||||
size = MAX_STRING_LENGTH;
|
||||
|
@ -491,27 +507,28 @@ dump_ascii(int fd, PyObject *text)
|
|||
else
|
||||
truncated = 0;
|
||||
|
||||
for (i=0; i < size; i++, u++) {
|
||||
if (*u < 128) {
|
||||
c = (char)*u;
|
||||
for (i=0; i < size; i++) {
|
||||
ch = PyUnicode_READ(kind, data, i);
|
||||
if (ch < 128) {
|
||||
char c = (char)ch;
|
||||
write(fd, &c, 1);
|
||||
}
|
||||
else if (*u < 256) {
|
||||
else if (ch < 256) {
|
||||
PUTS(fd, "\\x");
|
||||
dump_hexadecimal(2, *u, fd);
|
||||
dump_hexadecimal(2, ch, fd);
|
||||
}
|
||||
else
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
if (*u < 65536)
|
||||
if (ch < 65536)
|
||||
#endif
|
||||
{
|
||||
PUTS(fd, "\\u");
|
||||
dump_hexadecimal(4, *u, fd);
|
||||
dump_hexadecimal(4, ch, fd);
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
}
|
||||
else {
|
||||
PUTS(fd, "\\U");
|
||||
dump_hexadecimal(8, *u, fd);
|
||||
dump_hexadecimal(8, ch, fd);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -542,7 +559,7 @@ dump_frame(int fd, PyFrameObject *frame)
|
|||
}
|
||||
|
||||
/* PyFrame_GetLineNumber() was introduced in Python 2.7.0 and 3.2.0 */
|
||||
lineno = PyCode_Addr2Line(frame->f_code, frame->f_lasti);
|
||||
lineno = PyCode_Addr2Line(code, frame->f_lasti);
|
||||
PUTS(fd, ", line ");
|
||||
dump_decimal(fd, lineno);
|
||||
PUTS(fd, " in ");
|
||||
|
|
|
@ -51,6 +51,8 @@ _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned
|
|||
_type_void_ptr = gdb.lookup_type('void').pointer() # void*
|
||||
_type_size_t = gdb.lookup_type('size_t')
|
||||
|
||||
_is_pep393 = 'data' in [f.name for f in gdb.lookup_type('PyUnicodeObject').target().fields()]
|
||||
|
||||
SIZEOF_VOID_P = _type_void_ptr.sizeof
|
||||
|
||||
|
||||
|
@ -1123,11 +1125,30 @@ class PyUnicodeObjectPtr(PyObjectPtr):
|
|||
# Py_ssize_t length; /* Length of raw Unicode data in buffer */
|
||||
# Py_UNICODE *str; /* Raw Unicode buffer */
|
||||
field_length = long(self.field('length'))
|
||||
field_str = self.field('str')
|
||||
if _is_pep393:
|
||||
# Python 3.3 and newer
|
||||
may_have_surrogates = False
|
||||
field_state = long(self.field('state'))
|
||||
repr_kind = (field_state & 0xC) >> 2
|
||||
if repr_kind == 0:
|
||||
# string is not ready
|
||||
may_have_surrogates = True
|
||||
field_str = self.field('wstr')
|
||||
field_length = self.field('wstr_length')
|
||||
elif repr_kind == 1:
|
||||
field_str = self.field('data')['latin1']
|
||||
elif repr_kind == 2:
|
||||
field_str = self.field('data')['ucs2']
|
||||
elif repr_kind == 3:
|
||||
field_str = self.field('data')['ucs4']
|
||||
else:
|
||||
# Python 3.2 and earlier
|
||||
field_str = self.field('str')
|
||||
may_have_surrogates = self.char_width() == 2
|
||||
|
||||
# Gather a list of ints from the Py_UNICODE array; these are either
|
||||
# UCS-2 or UCS-4 code points:
|
||||
if self.char_width() > 2:
|
||||
# UCS-1, UCS-2 or UCS-4 code points:
|
||||
if not may_have_surrogates:
|
||||
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
|
||||
else:
|
||||
# A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
|
||||
|
|
|
@ -767,7 +767,6 @@ with_fpectl
|
|||
with_libm
|
||||
with_libc
|
||||
enable_big_digits
|
||||
with_wide_unicode
|
||||
with_computed_gotos
|
||||
'
|
||||
ac_precious_vars='build_alias
|
||||
|
@ -778,7 +777,8 @@ CFLAGS
|
|||
LDFLAGS
|
||||
LIBS
|
||||
CPPFLAGS
|
||||
CPP'
|
||||
CPP
|
||||
CPPFLAGS'
|
||||
|
||||
|
||||
# Initialize some variables set by options.
|
||||
|
@ -1438,7 +1438,6 @@ Optional Packages:
|
|||
--with-fpectl enable SIGFPE catching
|
||||
--with-libm=STRING math library
|
||||
--with-libc=STRING C library
|
||||
--with-wide-unicode Use 4-byte Unicode characters (default is 2 bytes)
|
||||
--with(out)-computed-gotos
|
||||
Use computed gotos in evaluation loop (enabled by
|
||||
default on supported compilers)
|
||||
|
@ -12370,65 +12369,19 @@ fi
|
|||
$as_echo "$ac_cv_wchar_t_signed" >&6; }
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking what type to use for str" >&5
|
||||
$as_echo_n "checking what type to use for str... " >&6; }
|
||||
|
||||
# Check whether --with-wide-unicode was given.
|
||||
if test "${with_wide_unicode+set}" = set; then :
|
||||
withval=$with_wide_unicode;
|
||||
if test "$withval" != no
|
||||
then unicode_size="4"
|
||||
else unicode_size="2"
|
||||
fi
|
||||
|
||||
else
|
||||
|
||||
case "$have_ucs4_tcl" in
|
||||
yes) unicode_size="4";;
|
||||
*) unicode_size="2" ;;
|
||||
esac
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
case "$unicode_size" in
|
||||
4)
|
||||
$as_echo "#define Py_UNICODE_SIZE 4" >>confdefs.h
|
||||
|
||||
ABIFLAGS="${ABIFLAGS}u"
|
||||
;;
|
||||
*) $as_echo "#define Py_UNICODE_SIZE 2" >>confdefs.h
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
|
||||
# wchar_t is only usable if it maps to an unsigned type
|
||||
if test "$unicode_size" = "$ac_cv_sizeof_wchar_t" \
|
||||
if test "$ac_cv_sizeof_wchar_t" -ge 2 \
|
||||
-a "$ac_cv_wchar_t_signed" = "no"
|
||||
then
|
||||
PY_UNICODE_TYPE="wchar_t"
|
||||
HAVE_USABLE_WCHAR_T="yes"
|
||||
|
||||
$as_echo "#define HAVE_USABLE_WCHAR_T 1" >>confdefs.h
|
||||
|
||||
$as_echo "#define PY_UNICODE_TYPE wchar_t" >>confdefs.h
|
||||
|
||||
elif test "$ac_cv_sizeof_short" = "$unicode_size"
|
||||
then
|
||||
PY_UNICODE_TYPE="unsigned short"
|
||||
$as_echo "#define PY_UNICODE_TYPE unsigned short" >>confdefs.h
|
||||
|
||||
elif test "$ac_cv_sizeof_long" = "$unicode_size"
|
||||
then
|
||||
PY_UNICODE_TYPE="unsigned long"
|
||||
$as_echo "#define PY_UNICODE_TYPE unsigned long" >>confdefs.h
|
||||
|
||||
else
|
||||
PY_UNICODE_TYPE="no type found"
|
||||
HAVE_USABLE_WCHAR_T="no usable wchar_t found"
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $PY_UNICODE_TYPE" >&5
|
||||
$as_echo "$PY_UNICODE_TYPE" >&6; }
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $HAVE_USABLE_WCHAR_T" >&5
|
||||
$as_echo "$HAVE_USABLE_WCHAR_T" >&6; }
|
||||
|
||||
# check for endianness
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue