Issue #26057: Got rid of nonneeded use of PyUnicode_FromObject().

This commit is contained in:
Serhiy Storchaka 2016-04-13 15:37:23 +03:00
parent 131b8f8eee
commit 21a663ea28
6 changed files with 240 additions and 641 deletions

View File

@ -20,10 +20,6 @@
<encoding>_decode(char_buffer_obj[,errors='strict']) -> <encoding>_decode(char_buffer_obj[,errors='strict']) ->
(Unicode object, bytes consumed) (Unicode object, bytes consumed)
<encoding>_encode() interfaces also accept non-Unicode object as
input. The objects are then converted to Unicode using
PyUnicode_FromObject() prior to applying the conversion.
These <encoding>s are available: utf_8, unicode_escape, These <encoding>s are available: utf_8, unicode_escape,
raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
mbcs (on win32). mbcs (on win32).
@ -718,7 +714,7 @@ _codecs_unicode_internal_encode_impl(PyModuleDef *module, PyObject *obj,
/*[clinic input] /*[clinic input]
_codecs.utf_7_encode _codecs.utf_7_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -728,22 +724,13 @@ _codecs_utf_7_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=a7accc496a32b759 input=fd91a78f103b0421]*/ /*[clinic end generated code: output=a7accc496a32b759 input=fd91a78f103b0421]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.utf_8_encode _codecs.utf_8_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -753,17 +740,8 @@ _codecs_utf_8_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=ec831d80e7aedede input=2c22d40532f071f3]*/ /*[clinic end generated code: output=ec831d80e7aedede input=2c22d40532f071f3]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(PyUnicode_AsEncodedString(str, "utf-8", errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/* This version provides access to the byteorder parameter of the /* This version provides access to the byteorder parameter of the
@ -775,7 +753,7 @@ _codecs_utf_8_encode_impl(PyModuleDef *module, PyObject *str,
/*[clinic input] /*[clinic input]
_codecs.utf_16_encode _codecs.utf_16_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
byteorder: int = 0 byteorder: int = 0
/ /
@ -786,22 +764,13 @@ _codecs_utf_16_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors, int byteorder) const char *errors, int byteorder)
/*[clinic end generated code: output=93ac58e960a9ee4d input=3935a489b2d5385e]*/ /*[clinic end generated code: output=93ac58e960a9ee4d input=3935a489b2d5385e]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.utf_16_le_encode _codecs.utf_16_le_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -811,22 +780,13 @@ _codecs_utf_16_le_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=422bedb8da34fb66 input=bc27df05d1d20dfe]*/ /*[clinic end generated code: output=422bedb8da34fb66 input=bc27df05d1d20dfe]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.utf_16_be_encode _codecs.utf_16_be_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -836,17 +796,8 @@ _codecs_utf_16_be_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=3aa7ee9502acdd77 input=5a69d4112763462b]*/ /*[clinic end generated code: output=3aa7ee9502acdd77 input=5a69d4112763462b]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/* This version provides access to the byteorder parameter of the /* This version provides access to the byteorder parameter of the
@ -858,7 +809,7 @@ _codecs_utf_16_be_encode_impl(PyModuleDef *module, PyObject *str,
/*[clinic input] /*[clinic input]
_codecs.utf_32_encode _codecs.utf_32_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
byteorder: int = 0 byteorder: int = 0
/ /
@ -869,22 +820,13 @@ _codecs_utf_32_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors, int byteorder) const char *errors, int byteorder)
/*[clinic end generated code: output=3e7d5a003b02baed input=434a1efa492b8d58]*/ /*[clinic end generated code: output=3e7d5a003b02baed input=434a1efa492b8d58]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.utf_32_le_encode _codecs.utf_32_le_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -894,22 +836,13 @@ _codecs_utf_32_le_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=5dda641cd33dbfc2 input=dfa2d7dc78b99422]*/ /*[clinic end generated code: output=5dda641cd33dbfc2 input=dfa2d7dc78b99422]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.utf_32_be_encode _codecs.utf_32_be_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -919,22 +852,13 @@ _codecs_utf_32_be_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=ccca8b44d91a7c7a input=4595617b18169002]*/ /*[clinic end generated code: output=ccca8b44d91a7c7a input=4595617b18169002]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.unicode_escape_encode _codecs.unicode_escape_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -944,22 +868,13 @@ _codecs_unicode_escape_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=389f23d2b8f8d80b input=8273506f14076912]*/ /*[clinic end generated code: output=389f23d2b8f8d80b input=8273506f14076912]*/
{ {
PyObject *v; return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.raw_unicode_escape_encode _codecs.raw_unicode_escape_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -969,22 +884,13 @@ _codecs_raw_unicode_escape_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=fec4e39d6ec37a62 input=181755d5dfacef3c]*/ /*[clinic end generated code: output=fec4e39d6ec37a62 input=181755d5dfacef3c]*/
{ {
PyObject *v; return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.latin_1_encode _codecs.latin_1_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -994,22 +900,13 @@ _codecs_latin_1_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=ecf00eb8e48c889c input=f03f6dcf1d84bee4]*/ /*[clinic end generated code: output=ecf00eb8e48c889c input=f03f6dcf1d84bee4]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.ascii_encode _codecs.ascii_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -1019,22 +916,13 @@ _codecs_ascii_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=a9d18fc6b6b91cfb input=d87e25a10a593fee]*/ /*[clinic end generated code: output=a9d18fc6b6b91cfb input=d87e25a10a593fee]*/
{ {
PyObject *v; return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.charmap_encode _codecs.charmap_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
mapping: object = NULL mapping: object = NULL
/ /
@ -1045,20 +933,11 @@ _codecs_charmap_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors, PyObject *mapping) const char *errors, PyObject *mapping)
/*[clinic end generated code: output=14ca42b83853c643 input=85f4172661e8dad9]*/ /*[clinic end generated code: output=14ca42b83853c643 input=85f4172661e8dad9]*/
{ {
PyObject *v;
if (mapping == Py_None) if (mapping == Py_None)
mapping = NULL; mapping = NULL;
str = PyUnicode_FromObject(str); return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
if (str == NULL || PyUnicode_READY(str) < 0) { PyUnicode_GET_LENGTH(str));
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
@ -1078,7 +957,7 @@ _codecs_charmap_build_impl(PyModuleDef *module, PyObject *map)
/*[clinic input] /*[clinic input]
_codecs.mbcs_encode _codecs.mbcs_encode
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -1088,23 +967,14 @@ _codecs_mbcs_encode_impl(PyModuleDef *module, PyObject *str,
const char *errors) const char *errors)
/*[clinic end generated code: output=d1a013bc68798bd7 input=65c09ee1e4203263]*/ /*[clinic end generated code: output=d1a013bc68798bd7 input=65c09ee1e4203263]*/
{ {
PyObject *v; return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
/*[clinic input] /*[clinic input]
_codecs.code_page_encode _codecs.code_page_encode
code_page: int code_page: int
str: object str: unicode
errors: str(accept={str, NoneType}) = NULL errors: str(accept={str, NoneType}) = NULL
/ /
[clinic start generated code]*/ [clinic start generated code]*/
@ -1114,19 +984,8 @@ _codecs_code_page_encode_impl(PyModuleDef *module, int code_page,
PyObject *str, const char *errors) PyObject *str, const char *errors)
/*[clinic end generated code: output=3b406618dbfbce25 input=c8562ec460c2e309]*/ /*[clinic end generated code: output=3b406618dbfbce25 input=c8562ec460c2e309]*/
{ {
PyObject *v; return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
PyUnicode_GET_LENGTH(str));
str = PyUnicode_FromObject(str);
if (str == NULL || PyUnicode_READY(str) < 0) {
Py_XDECREF(str);
return NULL;
}
v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
str,
errors),
PyUnicode_GET_LENGTH(str));
Py_DECREF(str);
return v;
} }
#endif /* HAVE_MBCS */ #endif /* HAVE_MBCS */

View File

@ -1401,7 +1401,7 @@ static int
idna_converter(PyObject *obj, struct maybe_idna *data) idna_converter(PyObject *obj, struct maybe_idna *data)
{ {
size_t len; size_t len;
PyObject *obj2, *obj3; PyObject *obj2;
if (obj == NULL) { if (obj == NULL) {
idna_cleanup(data); idna_cleanup(data);
return 1; return 1;
@ -1416,31 +1416,27 @@ idna_converter(PyObject *obj, struct maybe_idna *data)
data->buf = PyByteArray_AsString(obj); data->buf = PyByteArray_AsString(obj);
len = PyByteArray_Size(obj); len = PyByteArray_Size(obj);
} }
else if (PyUnicode_Check(obj) && PyUnicode_READY(obj) == 0 && PyUnicode_IS_COMPACT_ASCII(obj)) { else if (PyUnicode_Check(obj)) {
data->buf = PyUnicode_DATA(obj); if (PyUnicode_READY(obj) == 0 && PyUnicode_IS_COMPACT_ASCII(obj)) {
len = PyUnicode_GET_LENGTH(obj); data->buf = PyUnicode_DATA(obj);
len = PyUnicode_GET_LENGTH(obj);
}
else {
obj2 = PyUnicode_AsEncodedString(obj, "idna", NULL);
if (!obj2) {
PyErr_SetString(PyExc_TypeError, "encoding of hostname failed");
return 0;
}
assert(PyBytes_Check(obj2));
data->obj = obj2;
data->buf = PyBytes_AS_STRING(obj2);
len = PyBytes_GET_SIZE(obj2);
}
} }
else { else {
obj2 = PyUnicode_FromObject(obj); PyErr_Format(PyExc_TypeError, "str, bytes or bytearray expected, not %s",
if (!obj2) { obj->ob_type->tp_name);
PyErr_Format(PyExc_TypeError, "string or unicode text buffer expected, not %s", return 0;
obj->ob_type->tp_name);
return 0;
}
obj3 = PyUnicode_AsEncodedString(obj2, "idna", NULL);
Py_DECREF(obj2);
if (!obj3) {
PyErr_SetString(PyExc_TypeError, "encoding of hostname failed");
return 0;
}
if (!PyBytes_Check(obj3)) {
Py_DECREF(obj3);
PyErr_SetString(PyExc_TypeError, "encoding of hostname failed to return bytes");
return 0;
}
data->obj = obj3;
data->buf = PyBytes_AS_STRING(obj3);
len = PyBytes_GET_SIZE(obj3);
} }
if (strlen(data->buf) != len) { if (strlen(data->buf) != len) {
Py_CLEAR(data->obj); Py_CLEAR(data->obj);

View File

@ -123,11 +123,6 @@ STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
/* /*
Wraps stringlib_parse_args_finds() and additionally ensures that the Wraps stringlib_parse_args_finds() and additionally ensures that the
first argument is a unicode object. first argument is a unicode object.
Note that we receive a pointer to the pointer of the substring object,
so when we create that object in this function we don't DECREF it,
because it continues living in the caller functions (those functions,
after finishing using the substring, must DECREF it).
*/ */
Py_LOCAL_INLINE(int) Py_LOCAL_INLINE(int)
@ -135,14 +130,10 @@ STRINGLIB(parse_args_finds_unicode)(const char * function_name, PyObject *args,
PyObject **substring, PyObject **substring,
Py_ssize_t *start, Py_ssize_t *end) Py_ssize_t *start, Py_ssize_t *end)
{ {
PyObject *tmp_substring; if(STRINGLIB(parse_args_finds)(function_name, args, substring,
if(STRINGLIB(parse_args_finds)(function_name, args, &tmp_substring,
start, end)) { start, end)) {
tmp_substring = PyUnicode_FromObject(tmp_substring); if (ensure_unicode(*substring) < 0)
if (!tmp_substring)
return 0; return 0;
*substring = tmp_substring;
return 1; return 1;
} }
return 0; return 0;

View File

@ -751,6 +751,18 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#undef BLOOM_UPDATE #undef BLOOM_UPDATE
} }
static int
ensure_unicode(PyObject *obj)
{
if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError,
"must be str, not %.100s",
Py_TYPE(obj)->tp_name);
return -1;
}
return PyUnicode_READY(obj);
}
/* Compilation of templated routines */ /* Compilation of templated routines */
#include "stringlib/asciilib.h" #include "stringlib/asciilib.h"
@ -3066,7 +3078,7 @@ PyUnicode_FromEncodedObject(PyObject *obj,
/* Retrieve a bytes buffer view through the PEP 3118 buffer interface */ /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */
if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) { if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"coercing to str: need a bytes-like object, %.80s found", "decoding to str: need a bytes-like object, %.80s found",
Py_TYPE(obj)->tp_name); Py_TYPE(obj)->tp_name);
return NULL; return NULL;
} }
@ -3787,19 +3799,17 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
output = arg; output = arg;
Py_INCREF(output); Py_INCREF(output);
} }
else { else if (PyUnicode_Check(arg)) {
arg = PyUnicode_FromObject(arg);
if (!arg)
return 0;
output = PyUnicode_EncodeFSDefault(arg); output = PyUnicode_EncodeFSDefault(arg);
Py_DECREF(arg);
if (!output) if (!output)
return 0; return 0;
if (!PyBytes_Check(output)) { assert(PyBytes_Check(output));
Py_DECREF(output); }
PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes"); else {
return 0; PyErr_Format(PyExc_TypeError,
} "must be str or bytes, not %.100s",
Py_TYPE(arg)->tp_name);
return 0;
} }
size = PyBytes_GET_SIZE(output); size = PyBytes_GET_SIZE(output);
data = PyBytes_AS_STRING(output); data = PyBytes_AS_STRING(output);
@ -3871,7 +3881,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
if (PyUnicode_UTF8(unicode) == NULL) { if (PyUnicode_UTF8(unicode) == NULL) {
assert(!PyUnicode_IS_COMPACT_ASCII(unicode)); assert(!PyUnicode_IS_COMPACT_ASCII(unicode));
bytes = _PyUnicode_AsUTF8String(unicode, "strict"); bytes = _PyUnicode_AsUTF8String(unicode, NULL);
if (bytes == NULL) if (bytes == NULL)
return NULL; return NULL;
_PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1); _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1);
@ -8860,10 +8870,8 @@ _PyUnicode_TranslateCharmap(PyObject *input,
kind = PyUnicode_KIND(input); kind = PyUnicode_KIND(input);
size = PyUnicode_GET_LENGTH(input); size = PyUnicode_GET_LENGTH(input);
if (size == 0) { if (size == 0)
Py_INCREF(input); return PyUnicode_FromObject(input);
return input;
}
/* allocate enough for a simple 1:1 translation without /* allocate enough for a simple 1:1 translation without
replacements, if we need more, we'll resize */ replacements, if we need more, we'll resize */
@ -8974,14 +8982,9 @@ PyUnicode_Translate(PyObject *str,
PyObject *mapping, PyObject *mapping,
const char *errors) const char *errors)
{ {
PyObject *result; if (ensure_unicode(str) < 0)
str = PyUnicode_FromObject(str);
if (str == NULL)
return NULL; return NULL;
result = _PyUnicode_TranslateCharmap(str, mapping, errors); return _PyUnicode_TranslateCharmap(str, mapping, errors);
Py_DECREF(str);
return result;
} }
static Py_UCS4 static Py_UCS4
@ -9163,9 +9166,10 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
} }
static Py_ssize_t static Py_ssize_t
any_find_slice(int direction, PyObject* s1, PyObject* s2, any_find_slice(PyObject* s1, PyObject* s2,
Py_ssize_t start, Py_ssize_t start,
Py_ssize_t end) Py_ssize_t end,
int direction)
{ {
int kind1, kind2; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
@ -9334,54 +9338,35 @@ PyUnicode_Count(PyObject *str,
Py_ssize_t end) Py_ssize_t end)
{ {
Py_ssize_t result; Py_ssize_t result;
PyObject* str_obj;
PyObject* sub_obj;
int kind1, kind2; int kind1, kind2;
void *buf1 = NULL, *buf2 = NULL; void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str); if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
if (!str_obj)
return -1; return -1;
sub_obj = PyUnicode_FromObject(substr);
if (!sub_obj) {
Py_DECREF(str_obj);
return -1;
}
if (PyUnicode_READY(sub_obj) == -1 || PyUnicode_READY(str_obj) == -1) {
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
return -1;
}
kind1 = PyUnicode_KIND(str_obj); kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(sub_obj); kind2 = PyUnicode_KIND(substr);
if (kind1 < kind2) { if (kind1 < kind2)
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
return 0; return 0;
}
len1 = PyUnicode_GET_LENGTH(str_obj); len1 = PyUnicode_GET_LENGTH(str);
len2 = PyUnicode_GET_LENGTH(sub_obj); len2 = PyUnicode_GET_LENGTH(substr);
ADJUST_INDICES(start, end, len1); ADJUST_INDICES(start, end, len1);
if (end - start < len2) { if (end - start < len2)
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
return 0; return 0;
}
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(sub_obj); buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sub_obj, kind1); buf2 = _PyUnicode_AsKind(substr, kind1);
if (!buf2) if (!buf2)
goto onError; goto onError;
} }
switch (kind1) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj)) if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr))
result = asciilib_count( result = asciilib_count(
((Py_UCS1*)buf1) + start, end - start, ((Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX buf2, len2, PY_SSIZE_T_MAX
@ -9408,16 +9393,11 @@ PyUnicode_Count(PyObject *str,
assert(0); result = 0; assert(0); result = 0;
} }
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
if (kind2 != kind1) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
return result; return result;
onError: onError:
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
if (kind2 != kind1 && buf2) if (kind2 != kind1 && buf2)
PyMem_Free(buf2); PyMem_Free(buf2);
return -1; return -1;
@ -9425,35 +9405,15 @@ PyUnicode_Count(PyObject *str,
Py_ssize_t Py_ssize_t
PyUnicode_Find(PyObject *str, PyUnicode_Find(PyObject *str,
PyObject *sub, PyObject *substr,
Py_ssize_t start, Py_ssize_t start,
Py_ssize_t end, Py_ssize_t end,
int direction) int direction)
{ {
Py_ssize_t result; if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
str = PyUnicode_FromObject(str);
if (!str)
return -2; return -2;
sub = PyUnicode_FromObject(sub);
if (!sub) {
Py_DECREF(str);
return -2;
}
if (PyUnicode_READY(sub) == -1 || PyUnicode_READY(str) == -1) {
Py_DECREF(sub);
Py_DECREF(str);
return -2;
}
result = any_find_slice(direction, return any_find_slice(str, substr, start, end, direction);
str, sub, start, end
);
Py_DECREF(str);
Py_DECREF(sub);
return result;
} }
Py_ssize_t Py_ssize_t
@ -9556,22 +9516,10 @@ PyUnicode_Tailmatch(PyObject *str,
Py_ssize_t end, Py_ssize_t end,
int direction) int direction)
{ {
Py_ssize_t result; if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
str = PyUnicode_FromObject(str);
if (str == NULL)
return -1; return -1;
substr = PyUnicode_FromObject(substr);
if (substr == NULL) {
Py_DECREF(str);
return -1;
}
result = tailmatch(str, substr, return tailmatch(str, substr, start, end, direction);
start, end, direction);
Py_DECREF(str);
Py_DECREF(substr);
return result;
} }
/* Apply fixfct filter to the Unicode object self and return a /* Apply fixfct filter to the Unicode object self and return a
@ -10177,13 +10125,8 @@ PyUnicode_Splitlines(PyObject *string, int keepends)
{ {
PyObject *list; PyObject *list;
string = PyUnicode_FromObject(string); if (ensure_unicode(string) < 0)
if (string == NULL)
return NULL; return NULL;
if (PyUnicode_READY(string) == -1) {
Py_DECREF(string);
return NULL;
}
switch (PyUnicode_KIND(string)) { switch (PyUnicode_KIND(string)) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
@ -10210,7 +10153,6 @@ PyUnicode_Splitlines(PyObject *string, int keepends)
assert(0); assert(0);
list = 0; list = 0;
} }
Py_DECREF(string);
return list; return list;
} }
@ -10771,28 +10713,27 @@ unicode_casefold(PyObject *self)
} }
/* Argument converter. Coerces to a single unicode character */ /* Argument converter. Accepts a single Unicode character. */
static int static int
convert_uc(PyObject *obj, void *addr) convert_uc(PyObject *obj, void *addr)
{ {
Py_UCS4 *fillcharloc = (Py_UCS4 *)addr; Py_UCS4 *fillcharloc = (Py_UCS4 *)addr;
PyObject *uniobj;
uniobj = PyUnicode_FromObject(obj); if (!PyUnicode_Check(obj)) {
if (uniobj == NULL) { PyErr_Format(PyExc_TypeError,
PyErr_SetString(PyExc_TypeError, "The fill character must be a unicode character, "
"The fill character cannot be converted to Unicode"); "not %.100s", Py_TYPE(obj)->tp_name);
return 0; return 0;
} }
if (PyUnicode_GET_LENGTH(uniobj) != 1) { if (PyUnicode_READY(obj) < 0)
return 0;
if (PyUnicode_GET_LENGTH(obj) != 1) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"The fill character must be exactly one character long"); "The fill character must be exactly one character long");
Py_DECREF(uniobj);
return 0; return 0;
} }
*fillcharloc = PyUnicode_READ_CHAR(uniobj, 0); *fillcharloc = PyUnicode_READ_CHAR(obj, 0);
Py_DECREF(uniobj);
return 1; return 1;
} }
@ -11114,59 +11055,43 @@ _PyUnicode_EQ(PyObject *aa, PyObject *bb)
} }
int int
PyUnicode_Contains(PyObject *container, PyObject *element) PyUnicode_Contains(PyObject *str, PyObject *substr)
{ {
PyObject *str, *sub;
int kind1, kind2; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
int result; int result;
/* Coerce the two arguments */ if (!PyUnicode_Check(substr)) {
sub = PyUnicode_FromObject(element);
if (!sub) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"'in <string>' requires string as left operand, not %s", "'in <string>' requires string as left operand, not %.100s",
element->ob_type->tp_name); Py_TYPE(substr)->tp_name);
return -1; return -1;
} }
if (PyUnicode_READY(substr) == -1)
str = PyUnicode_FromObject(container); return -1;
if (!str) { if (ensure_unicode(str) < 0)
Py_DECREF(sub);
return -1; return -1;
}
kind1 = PyUnicode_KIND(str); kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(sub); kind2 = PyUnicode_KIND(substr);
if (kind1 < kind2) { if (kind1 < kind2)
Py_DECREF(sub);
Py_DECREF(str);
return 0; return 0;
}
len1 = PyUnicode_GET_LENGTH(str); len1 = PyUnicode_GET_LENGTH(str);
len2 = PyUnicode_GET_LENGTH(sub); len2 = PyUnicode_GET_LENGTH(substr);
if (len1 < len2) { if (len1 < len2)
Py_DECREF(sub);
Py_DECREF(str);
return 0; return 0;
}
buf1 = PyUnicode_DATA(str); buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(sub); buf2 = PyUnicode_DATA(substr);
if (len2 == 1) { if (len2 == 1) {
Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0); Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1; result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1;
Py_DECREF(sub);
Py_DECREF(str);
return result; return result;
} }
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sub, kind1); buf2 = _PyUnicode_AsKind(substr, kind1);
if (!buf2) { if (!buf2)
Py_DECREF(sub);
Py_DECREF(str);
return -1; return -1;
}
} }
switch (kind1) { switch (kind1) {
@ -11184,9 +11109,6 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
assert(0); assert(0);
} }
Py_DECREF(str);
Py_DECREF(sub);
if (kind2 != kind1) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
@ -11198,56 +11120,40 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
PyObject * PyObject *
PyUnicode_Concat(PyObject *left, PyObject *right) PyUnicode_Concat(PyObject *left, PyObject *right)
{ {
PyObject *u = NULL, *v = NULL, *w; PyObject *result;
Py_UCS4 maxchar, maxchar2; Py_UCS4 maxchar, maxchar2;
Py_ssize_t u_len, v_len, new_len; Py_ssize_t left_len, right_len, new_len;
/* Coerce the two arguments */ if (ensure_unicode(left) < 0 || ensure_unicode(right) < 0)
u = PyUnicode_FromObject(left); return NULL;
if (u == NULL)
goto onError;
v = PyUnicode_FromObject(right);
if (v == NULL)
goto onError;
/* Shortcuts */ /* Shortcuts */
if (v == unicode_empty) { if (left == unicode_empty)
Py_DECREF(v); return PyUnicode_FromObject(right);
return u; if (right == unicode_empty)
} return PyUnicode_FromObject(left);
if (u == unicode_empty) {
Py_DECREF(u);
return v;
}
u_len = PyUnicode_GET_LENGTH(u); left_len = PyUnicode_GET_LENGTH(left);
v_len = PyUnicode_GET_LENGTH(v); right_len = PyUnicode_GET_LENGTH(right);
if (u_len > PY_SSIZE_T_MAX - v_len) { if (left_len > PY_SSIZE_T_MAX - right_len) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"strings are too large to concat"); "strings are too large to concat");
goto onError; return NULL;
} }
new_len = u_len + v_len; new_len = left_len + right_len;
maxchar = PyUnicode_MAX_CHAR_VALUE(u); maxchar = PyUnicode_MAX_CHAR_VALUE(left);
maxchar2 = PyUnicode_MAX_CHAR_VALUE(v); maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
maxchar = Py_MAX(maxchar, maxchar2); maxchar = Py_MAX(maxchar, maxchar2);
/* Concat the two Unicode strings */ /* Concat the two Unicode strings */
w = PyUnicode_New(new_len, maxchar); result = PyUnicode_New(new_len, maxchar);
if (w == NULL) if (result == NULL)
goto onError; return NULL;
_PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len); _PyUnicode_FastCopyCharacters(result, 0, left, 0, left_len);
_PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len); _PyUnicode_FastCopyCharacters(result, left_len, right, 0, right_len);
Py_DECREF(u); assert(_PyUnicode_CheckConsistency(result, 1));
Py_DECREF(v); return result;
assert(_PyUnicode_CheckConsistency(w, 1));
return w;
onError:
Py_XDECREF(u);
Py_XDECREF(v);
return NULL;
} }
void void
@ -11362,25 +11268,21 @@ unicode_count(PyObject *self, PyObject *args)
kind1 = PyUnicode_KIND(self); kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring); kind2 = PyUnicode_KIND(substring);
if (kind1 < kind2) { if (kind1 < kind2)
Py_DECREF(substring);
return PyLong_FromLong(0); return PyLong_FromLong(0);
}
len1 = PyUnicode_GET_LENGTH(self); len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring); len2 = PyUnicode_GET_LENGTH(substring);
ADJUST_INDICES(start, end, len1); ADJUST_INDICES(start, end, len1);
if (end - start < len2) { if (end - start < len2)
Py_DECREF(substring);
return PyLong_FromLong(0); return PyLong_FromLong(0);
}
buf1 = PyUnicode_DATA(self); buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring); buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1); buf2 = _PyUnicode_AsKind(substring, kind1);
if (!buf2) { if (!buf2)
Py_DECREF(substring);
return NULL; return NULL;
}
} }
switch (kind1) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
@ -11410,8 +11312,6 @@ unicode_count(PyObject *self, PyObject *args)
if (kind2 != kind1) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
Py_DECREF(substring);
return result; return result;
} }
@ -11549,18 +11449,10 @@ unicode_find(PyObject *self, PyObject *args)
&start, &end)) &start, &end))
return NULL; return NULL;
if (PyUnicode_READY(self) == -1) { if (PyUnicode_READY(self) == -1)
Py_DECREF(substring);
return NULL; return NULL;
}
if (PyUnicode_READY(substring) == -1) {
Py_DECREF(substring);
return NULL;
}
result = any_find_slice(1, self, substring, start, end); result = any_find_slice(self, substring, start, end, 1);
Py_DECREF(substring);
if (result == -2) if (result == -2)
return NULL; return NULL;
@ -11637,18 +11529,10 @@ unicode_index(PyObject *self, PyObject *args)
&start, &end)) &start, &end))
return NULL; return NULL;
if (PyUnicode_READY(self) == -1) { if (PyUnicode_READY(self) == -1)
Py_DECREF(substring);
return NULL; return NULL;
}
if (PyUnicode_READY(substring) == -1) {
Py_DECREF(substring);
return NULL;
}
result = any_find_slice(1, self, substring, start, end); result = any_find_slice(self, substring, start, end, 1);
Py_DECREF(substring);
if (result == -2) if (result == -2)
return NULL; return NULL;
@ -12457,40 +12341,15 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
} }
PyObject * PyObject *
PyUnicode_Replace(PyObject *obj, PyUnicode_Replace(PyObject *str,
PyObject *subobj, PyObject *substr,
PyObject *replobj, PyObject *replstr,
Py_ssize_t maxcount) Py_ssize_t maxcount)
{ {
PyObject *self; if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0 ||
PyObject *str1; ensure_unicode(replstr) < 0)
PyObject *str2;
PyObject *result;
self = PyUnicode_FromObject(obj);
if (self == NULL)
return NULL; return NULL;
str1 = PyUnicode_FromObject(subobj); return replace(str, substr, replstr, maxcount);
if (str1 == NULL) {
Py_DECREF(self);
return NULL;
}
str2 = PyUnicode_FromObject(replobj);
if (str2 == NULL) {
Py_DECREF(self);
Py_DECREF(str1);
return NULL;
}
if (PyUnicode_READY(self) == -1 ||
PyUnicode_READY(str1) == -1 ||
PyUnicode_READY(str2) == -1)
result = NULL;
else
result = replace(self, str1, str2, maxcount);
Py_DECREF(self);
Py_DECREF(str1);
Py_DECREF(str2);
return result;
} }
PyDoc_STRVAR(replace__doc__, PyDoc_STRVAR(replace__doc__,
@ -12506,28 +12365,12 @@ unicode_replace(PyObject *self, PyObject *args)
PyObject *str1; PyObject *str1;
PyObject *str2; PyObject *str2;
Py_ssize_t maxcount = -1; Py_ssize_t maxcount = -1;
PyObject *result;
if (!PyArg_ParseTuple(args, "OO|n:replace", &str1, &str2, &maxcount)) if (!PyArg_ParseTuple(args, "UU|n:replace", &str1, &str2, &maxcount))
return NULL; return NULL;
if (PyUnicode_READY(self) == -1) if (PyUnicode_READY(self) == -1)
return NULL; return NULL;
str1 = PyUnicode_FromObject(str1); return replace(self, str1, str2, maxcount);
if (str1 == NULL)
return NULL;
str2 = PyUnicode_FromObject(str2);
if (str2 == NULL) {
Py_DECREF(str1);
return NULL;
}
if (PyUnicode_READY(str1) == -1 || PyUnicode_READY(str2) == -1)
result = NULL;
else
result = replace(self, str1, str2, maxcount);
Py_DECREF(str1);
Py_DECREF(str2);
return result;
} }
static PyObject * static PyObject *
@ -12716,18 +12559,10 @@ unicode_rfind(PyObject *self, PyObject *args)
&start, &end)) &start, &end))
return NULL; return NULL;
if (PyUnicode_READY(self) == -1) { if (PyUnicode_READY(self) == -1)
Py_DECREF(substring);
return NULL; return NULL;
}
if (PyUnicode_READY(substring) == -1) {
Py_DECREF(substring);
return NULL;
}
result = any_find_slice(-1, self, substring, start, end); result = any_find_slice(self, substring, start, end, -1);
Py_DECREF(substring);
if (result == -2) if (result == -2)
return NULL; return NULL;
@ -12753,18 +12588,10 @@ unicode_rindex(PyObject *self, PyObject *args)
&start, &end)) &start, &end))
return NULL; return NULL;
if (PyUnicode_READY(self) == -1) { if (PyUnicode_READY(self) == -1)
Py_DECREF(substring);
return NULL; return NULL;
}
if (PyUnicode_READY(substring) == -1) {
Py_DECREF(substring);
return NULL;
}
result = any_find_slice(-1, self, substring, start, end); result = any_find_slice(self, substring, start, end, -1);
Py_DECREF(substring);
if (result == -2) if (result == -2)
return NULL; return NULL;
@ -12804,24 +12631,10 @@ unicode_rjust(PyObject *self, PyObject *args)
PyObject * PyObject *
PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
{ {
PyObject *result; if (ensure_unicode(s) < 0 || (sep != NULL && ensure_unicode(sep) < 0))
s = PyUnicode_FromObject(s);
if (s == NULL)
return NULL; return NULL;
if (sep != NULL) {
sep = PyUnicode_FromObject(sep);
if (sep == NULL) {
Py_DECREF(s);
return NULL;
}
}
result = split(s, sep, maxsplit); return split(s, sep, maxsplit);
Py_DECREF(s);
Py_XDECREF(sep);
return result;
} }
PyDoc_STRVAR(split__doc__, PyDoc_STRVAR(split__doc__,
@ -12846,35 +12659,26 @@ unicode_split(PyObject *self, PyObject *args, PyObject *kwds)
if (substring == Py_None) if (substring == Py_None)
return split(self, NULL, maxcount); return split(self, NULL, maxcount);
else if (PyUnicode_Check(substring))
if (PyUnicode_Check(substring))
return split(self, substring, maxcount); return split(self, substring, maxcount);
else
return PyUnicode_Split(self, substring, maxcount); PyErr_Format(PyExc_TypeError,
"must be str or None, not %.100s",
Py_TYPE(substring)->tp_name);
return NULL;
} }
PyObject * PyObject *
PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
{ {
PyObject* str_obj;
PyObject* sep_obj;
PyObject* out; PyObject* out;
int kind1, kind2; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str_in); if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
if (!str_obj)
return NULL; return NULL;
sep_obj = PyUnicode_FromObject(sep_in);
if (!sep_obj) {
Py_DECREF(str_obj);
return NULL;
}
if (PyUnicode_READY(sep_obj) == -1 || PyUnicode_READY(str_obj) == -1) {
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
return NULL;
}
kind1 = PyUnicode_KIND(str_obj); kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj); kind2 = PyUnicode_KIND(sep_obj);
@ -12888,8 +12692,6 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty); out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty);
Py_DECREF(unicode_empty); Py_DECREF(unicode_empty);
} }
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
return out; return out;
} }
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str_obj);
@ -12897,7 +12699,7 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1); buf2 = _PyUnicode_AsKind(sep_obj, kind1);
if (!buf2) if (!buf2)
goto onError; return NULL;
} }
switch (kind1) { switch (kind1) {
@ -12918,39 +12720,23 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
out = 0; out = 0;
} }
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
if (kind2 != kind1) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
return out; return out;
onError:
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
if (kind2 != kind1 && buf2)
PyMem_Free(buf2);
return NULL;
} }
PyObject * PyObject *
PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
{ {
PyObject* str_obj;
PyObject* sep_obj;
PyObject* out; PyObject* out;
int kind1, kind2; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str_in); if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
if (!str_obj)
return NULL; return NULL;
sep_obj = PyUnicode_FromObject(sep_in);
if (!sep_obj) {
Py_DECREF(str_obj);
return NULL;
}
kind1 = PyUnicode_KIND(str_obj); kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj); kind2 = PyUnicode_KIND(sep_obj);
@ -12964,8 +12750,6 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj); out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj);
Py_DECREF(unicode_empty); Py_DECREF(unicode_empty);
} }
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
return out; return out;
} }
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str_obj);
@ -12973,7 +12757,7 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1); buf2 = _PyUnicode_AsKind(sep_obj, kind1);
if (!buf2) if (!buf2)
goto onError; return NULL;
} }
switch (kind1) { switch (kind1) {
@ -12994,18 +12778,10 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
out = 0; out = 0;
} }
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
if (kind2 != kind1) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
return out; return out;
onError:
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
if (kind2 != kind1 && buf2)
PyMem_Free(buf2);
return NULL;
} }
PyDoc_STRVAR(partition__doc__, PyDoc_STRVAR(partition__doc__,
@ -13037,24 +12813,10 @@ unicode_rpartition(PyObject *self, PyObject *separator)
PyObject * PyObject *
PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
{ {
PyObject *result; if (ensure_unicode(s) < 0 || (sep != NULL && ensure_unicode(sep) < 0))
s = PyUnicode_FromObject(s);
if (s == NULL)
return NULL; return NULL;
if (sep != NULL) {
sep = PyUnicode_FromObject(sep);
if (sep == NULL) {
Py_DECREF(s);
return NULL;
}
}
result = rsplit(s, sep, maxsplit); return rsplit(s, sep, maxsplit);
Py_DECREF(s);
Py_XDECREF(sep);
return result;
} }
PyDoc_STRVAR(rsplit__doc__, PyDoc_STRVAR(rsplit__doc__,
@ -13079,10 +12841,14 @@ unicode_rsplit(PyObject *self, PyObject *args, PyObject *kwds)
if (substring == Py_None) if (substring == Py_None)
return rsplit(self, NULL, maxcount); return rsplit(self, NULL, maxcount);
else if (PyUnicode_Check(substring))
if (PyUnicode_Check(substring))
return rsplit(self, substring, maxcount); return rsplit(self, substring, maxcount);
else
return PyUnicode_RSplit(self, substring, maxcount); PyErr_Format(PyExc_TypeError,
"must be str or None, not %.100s",
Py_TYPE(substring)->tp_name);
return NULL;
} }
PyDoc_STRVAR(splitlines__doc__, PyDoc_STRVAR(splitlines__doc__,
@ -13363,11 +13129,15 @@ unicode_startswith(PyObject *self,
if (PyTuple_Check(subobj)) { if (PyTuple_Check(subobj)) {
Py_ssize_t i; Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
substring = PyUnicode_FromObject(PyTuple_GET_ITEM(subobj, i)); substring = PyTuple_GET_ITEM(subobj, i);
if (substring == NULL) if (!PyUnicode_Check(substring)) {
PyErr_Format(PyExc_TypeError,
"tuple for startswith must only contain str, "
"not %.100s",
Py_TYPE(substring)->tp_name);
return NULL; return NULL;
}
result = tailmatch(self, substring, start, end, -1); result = tailmatch(self, substring, start, end, -1);
Py_DECREF(substring);
if (result == -1) if (result == -1)
return NULL; return NULL;
if (result) { if (result) {
@ -13377,15 +13147,13 @@ unicode_startswith(PyObject *self,
/* nothing matched */ /* nothing matched */
Py_RETURN_FALSE; Py_RETURN_FALSE;
} }
substring = PyUnicode_FromObject(subobj); if (!PyUnicode_Check(subobj)) {
if (substring == NULL) { PyErr_Format(PyExc_TypeError,
if (PyErr_ExceptionMatches(PyExc_TypeError)) "startswith first arg must be str or "
PyErr_Format(PyExc_TypeError, "startswith first arg must be str or " "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name);
"a tuple of str, not %s", Py_TYPE(subobj)->tp_name);
return NULL; return NULL;
} }
result = tailmatch(self, substring, start, end, -1); result = tailmatch(self, subobj, start, end, -1);
Py_DECREF(substring);
if (result == -1) if (result == -1)
return NULL; return NULL;
return PyBool_FromLong(result); return PyBool_FromLong(result);
@ -13415,12 +13183,15 @@ unicode_endswith(PyObject *self,
if (PyTuple_Check(subobj)) { if (PyTuple_Check(subobj)) {
Py_ssize_t i; Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
substring = PyUnicode_FromObject( substring = PyTuple_GET_ITEM(subobj, i);
PyTuple_GET_ITEM(subobj, i)); if (!PyUnicode_Check(substring)) {
if (substring == NULL) PyErr_Format(PyExc_TypeError,
"tuple for endswith must only contain str, "
"not %.100s",
Py_TYPE(substring)->tp_name);
return NULL; return NULL;
}
result = tailmatch(self, substring, start, end, +1); result = tailmatch(self, substring, start, end, +1);
Py_DECREF(substring);
if (result == -1) if (result == -1)
return NULL; return NULL;
if (result) { if (result) {
@ -13429,15 +13200,13 @@ unicode_endswith(PyObject *self,
} }
Py_RETURN_FALSE; Py_RETURN_FALSE;
} }
substring = PyUnicode_FromObject(subobj); if (!PyUnicode_Check(subobj)) {
if (substring == NULL) { PyErr_Format(PyExc_TypeError,
if (PyErr_ExceptionMatches(PyExc_TypeError)) "endswith first arg must be str or "
PyErr_Format(PyExc_TypeError, "endswith first arg must be str or " "a tuple of str, not %.100s", Py_TYPE(subobj)->tp_name);
"a tuple of str, not %s", Py_TYPE(subobj)->tp_name);
return NULL; return NULL;
} }
result = tailmatch(self, substring, start, end, +1); result = tailmatch(self, subobj, start, end, +1);
Py_DECREF(substring);
if (result == -1) if (result == -1)
return NULL; return NULL;
return PyBool_FromLong(result); return PyBool_FromLong(result);
@ -14907,13 +14676,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
return NULL; return NULL;
} }
ctx.fmtstr = PyUnicode_FromObject(format); if (ensure_unicode(format) < 0)
if (ctx.fmtstr == NULL)
return NULL; return NULL;
if (PyUnicode_READY(ctx.fmtstr) == -1) {
Py_DECREF(ctx.fmtstr); ctx.fmtstr = format;
return NULL;
}
ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr); ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr); ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr); ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
@ -14973,11 +14739,9 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (ctx.args_owned) { if (ctx.args_owned) {
Py_DECREF(ctx.args); Py_DECREF(ctx.args);
} }
Py_DECREF(ctx.fmtstr);
return _PyUnicodeWriter_Finish(&ctx.writer); return _PyUnicodeWriter_Finish(&ctx.writer);
onError: onError:
Py_DECREF(ctx.fmtstr);
_PyUnicodeWriter_Dealloc(&ctx.writer); _PyUnicodeWriter_Dealloc(&ctx.writer);
if (ctx.args_owned) { if (ctx.args_owned) {
Py_DECREF(ctx.args); Py_DECREF(ctx.args);

View File

@ -1931,9 +1931,8 @@ builtin_input_impl(PyModuleDef *module, PyObject *prompt)
Py_CLEAR(stringpo); Py_CLEAR(stringpo);
if (po == NULL) if (po == NULL)
goto _readline_errors; goto _readline_errors;
promptstr = PyBytes_AsString(po); assert(PyBytes_Check(po));
if (promptstr == NULL) promptstr = PyBytes_AS_STRING(po);
goto _readline_errors;
} }
else { else {
po = NULL; po = NULL;

View File

@ -1056,35 +1056,25 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
return converterr("(AsCharBuffer failed)", return converterr("(AsCharBuffer failed)",
arg, msgbuf, bufsize); arg, msgbuf, bufsize);
} }
else { else if (PyUnicode_Check(arg)) {
PyObject *u;
/* Convert object to Unicode */
u = PyUnicode_FromObject(arg);
if (u == NULL)
return converterr(
"string or unicode or text buffer",
arg, msgbuf, bufsize);
/* Encode object; use default error handling */ /* Encode object; use default error handling */
s = PyUnicode_AsEncodedString(u, s = PyUnicode_AsEncodedString(arg,
encoding, encoding,
NULL); NULL);
Py_DECREF(u);
if (s == NULL) if (s == NULL)
return converterr("(encoding failed)", return converterr("(encoding failed)",
arg, msgbuf, bufsize); arg, msgbuf, bufsize);
if (!PyBytes_Check(s)) { assert(PyBytes_Check(s));
Py_DECREF(s);
return converterr(
"(encoder failed to return bytes)",
arg, msgbuf, bufsize);
}
size = PyBytes_GET_SIZE(s); size = PyBytes_GET_SIZE(s);
ptr = PyBytes_AS_STRING(s); ptr = PyBytes_AS_STRING(s);
if (ptr == NULL) if (ptr == NULL)
ptr = ""; ptr = "";
} }
else {
return converterr(
recode_strings ? "str" : "str, bytes or bytearray",
arg, msgbuf, bufsize);
}
/* Write output; output is guaranteed to be 0-terminated */ /* Write output; output is guaranteed to be 0-terminated */
if (*format == '#') { if (*format == '#') {