Add an optional size argument to _Py_char2wchar()

_Py_char2wchar() callers usually need the result size in characters. Since it's
trivial to compute it in _Py_char2wchar() (O(1) whereas wcslen() is O(n)), add
an option to get it.
This commit is contained in:
Victor Stinner 2010-10-16 23:16:16 +00:00
parent 0a1b8cba90
commit 168e117e0a
5 changed files with 26 additions and 17 deletions

View File

@ -6,7 +6,8 @@ extern "C" {
#endif #endif
PyAPI_FUNC(wchar_t *) _Py_char2wchar( PyAPI_FUNC(wchar_t *) _Py_char2wchar(
const char *arg); const char *arg,
size_t *size);
PyAPI_FUNC(char*) _Py_wchar2char( PyAPI_FUNC(char*) _Py_wchar2char(
const wchar_t *text); const wchar_t *text);

View File

@ -486,10 +486,12 @@ Py_Main(int argc, wchar_t **argv)
/* Use utf-8 on Mac OS X */ /* Use utf-8 on Mac OS X */
unicode = PyUnicode_FromString(p); unicode = PyUnicode_FromString(p);
#else #else
wchar_t *wchar = _Py_char2wchar(p); wchar_t *wchar;
size_t len;
wchar = _Py_char2wchar(p, &len);
if (wchar == NULL) if (wchar == NULL)
continue; continue;
unicode = PyUnicode_FromWideChar(wchar, wcslen(wchar)); unicode = PyUnicode_FromWideChar(wchar, len);
PyMem_Free(wchar); PyMem_Free(wchar);
#endif #endif
if (unicode == NULL) if (unicode == NULL)

View File

@ -41,7 +41,7 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL)); oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) { for (i = 0; i < argc; i++) {
argv_copy[i] = _Py_char2wchar(argv[i]); argv_copy[i] = _Py_char2wchar(argv[i], NULL);
if (!argv_copy[i]) if (!argv_copy[i])
return 1; return 1;
argv_copy2[i] = argv_copy[i]; argv_copy2[i] = argv_copy[i];

View File

@ -1783,17 +1783,18 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
/* locale encoding with surrogateescape */ /* locale encoding with surrogateescape */
wchar_t *wchar; wchar_t *wchar;
PyObject *unicode; PyObject *unicode;
size_t len;
if (s[size] != '\0' || size != strlen(s)) { if (s[size] != '\0' || size != strlen(s)) {
PyErr_SetString(PyExc_TypeError, "embedded NUL character"); PyErr_SetString(PyExc_TypeError, "embedded NUL character");
return NULL; return NULL;
} }
wchar = _Py_char2wchar(s); wchar = _Py_char2wchar(s, &len);
if (wchar == NULL) if (wchar == NULL)
return NULL; return NULL;
unicode = PyUnicode_FromWideChar(wchar, -1); unicode = PyUnicode_FromWideChar(wchar, len);
PyMem_Free(wchar); PyMem_Free(wchar);
return unicode; return unicode;
} }

View File

@ -13,11 +13,12 @@
Use _Py_wchar2char() to encode the character string back to a byte string. Use _Py_wchar2char() to encode the character string back to a byte string.
Return a pointer to a newly allocated (wide) character string (use Return a pointer to a newly allocated wide character string (use
PyMem_Free() to free the memory), or NULL on error (conversion error or PyMem_Free() to free the memory) and write the number of written wide
memory error). */ characters excluding the null character into *size if size is not NULL, or
NULL on error (conversion error or memory error). */
wchar_t* wchar_t*
_Py_char2wchar(const char* arg) _Py_char2wchar(const char* arg, size_t *size)
{ {
wchar_t *res; wchar_t *res;
#ifdef HAVE_BROKEN_MBSTOWCS #ifdef HAVE_BROKEN_MBSTOWCS
@ -47,8 +48,11 @@ _Py_char2wchar(const char* arg)
for (tmp = res; *tmp != 0 && for (tmp = res; *tmp != 0 &&
(*tmp < 0xd800 || *tmp > 0xdfff); tmp++) (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
; ;
if (*tmp == 0) if (*tmp == 0) {
if (size != NULL)
*size = count;
return res; return res;
}
} }
PyMem_Free(res); PyMem_Free(res);
} }
@ -113,6 +117,8 @@ _Py_char2wchar(const char* arg)
*out++ = 0xdc00 + *in++; *out++ = 0xdc00 + *in++;
*out = 0; *out = 0;
#endif #endif
if (size != NULL)
*size = out - res;
return res; return res;
oom: oom:
fprintf(stderr, "out of memory\n"); fprintf(stderr, "out of memory\n");
@ -325,12 +331,11 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
return -1; return -1;
} }
cbuf[res] = '\0'; /* buf will be null terminated */ cbuf[res] = '\0'; /* buf will be null terminated */
wbuf = _Py_char2wchar(cbuf); wbuf = _Py_char2wchar(cbuf, &r1);
if (wbuf == NULL) { if (wbuf == NULL) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
r1 = wcslen(wbuf);
if (bufsiz <= r1) { if (bufsiz <= r1) {
PyMem_Free(wbuf); PyMem_Free(wbuf);
errno = EINVAL; errno = EINVAL;
@ -366,12 +371,11 @@ _Py_wrealpath(const wchar_t *path,
if (res == NULL) if (res == NULL)
return NULL; return NULL;
wresolved_path = _Py_char2wchar(cresolved_path); wresolved_path = _Py_char2wchar(cresolved_path, &r);
if (wresolved_path == NULL) { if (wresolved_path == NULL) {
errno = EINVAL; errno = EINVAL;
return NULL; return NULL;
} }
r = wcslen(wresolved_path);
if (resolved_path_size <= r) { if (resolved_path_size <= r) {
PyMem_Free(wresolved_path); PyMem_Free(wresolved_path);
errno = EINVAL; errno = EINVAL;
@ -394,13 +398,14 @@ _Py_wgetcwd(wchar_t *buf, size_t size)
#else #else
char fname[PATH_MAX]; char fname[PATH_MAX];
wchar_t *wname; wchar_t *wname;
size_t len;
if (getcwd(fname, PATH_MAX) == NULL) if (getcwd(fname, PATH_MAX) == NULL)
return NULL; return NULL;
wname = _Py_char2wchar(fname); wname = _Py_char2wchar(fname, &len);
if (wname == NULL) if (wname == NULL)
return NULL; return NULL;
if (size <= wcslen(wname)) { if (size <= len) {
PyMem_Free(wname); PyMem_Free(wname);
return NULL; return NULL;
} }