mirror of https://github.com/python/cpython
363 lines
9.4 KiB
C
363 lines
9.4 KiB
C
|
/* NOTE: this API is -ONLY- for use with single byte character strings. */
|
||
|
/* Do not use it with Unicode. */
|
||
|
|
||
|
#include "bytes_methods.h"
|
||
|
|
||
|
#ifndef STRINGLIB_MUTABLE
|
||
|
#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
|
||
|
#define STRINGLIB_MUTABLE 0
|
||
|
#endif
|
||
|
|
||
|
/* the more complicated methods. parts of these should be pulled out into the
|
||
|
shared code in bytes_methods.c to cut down on duplicate code bloat. */
|
||
|
|
||
|
PyDoc_STRVAR(expandtabs__doc__,
|
||
|
"B.expandtabs([tabsize]) -> modified copy of B\n\
|
||
|
\n\
|
||
|
Return a copy of B where all tab characters are expanded using spaces.\n\
|
||
|
If tabsize is not given, a tab size of 8 characters is assumed.");
|
||
|
|
||
|
static PyObject*
|
||
|
stringlib_expandtabs(PyObject *self, PyObject *args)
|
||
|
{
|
||
|
const char *e, *p;
|
||
|
char *q;
|
||
|
Py_ssize_t i, j, old_j;
|
||
|
PyObject *u;
|
||
|
int tabsize = 8;
|
||
|
|
||
|
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
|
||
|
return NULL;
|
||
|
|
||
|
/* First pass: determine size of output string */
|
||
|
i = j = old_j = 0;
|
||
|
e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
|
||
|
for (p = STRINGLIB_STR(self); p < e; p++)
|
||
|
if (*p == '\t') {
|
||
|
if (tabsize > 0) {
|
||
|
j += tabsize - (j % tabsize);
|
||
|
/* XXX: this depends on a signed integer overflow to < 0 */
|
||
|
/* C compilers, including gcc, do -NOT- guarantee this. */
|
||
|
if (old_j > j) {
|
||
|
PyErr_SetString(PyExc_OverflowError,
|
||
|
"result is too long");
|
||
|
return NULL;
|
||
|
}
|
||
|
old_j = j;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
j++;
|
||
|
if (*p == '\n' || *p == '\r') {
|
||
|
i += j;
|
||
|
old_j = j = 0;
|
||
|
/* XXX: this depends on a signed integer overflow to < 0 */
|
||
|
/* C compilers, including gcc, do -NOT- guarantee this. */
|
||
|
if (i < 0) {
|
||
|
PyErr_SetString(PyExc_OverflowError,
|
||
|
"result is too long");
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ((i + j) < 0) {
|
||
|
/* XXX: this depends on a signed integer overflow to < 0 */
|
||
|
/* C compilers, including gcc, do -NOT- guarantee this. */
|
||
|
PyErr_SetString(PyExc_OverflowError, "result is too long");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* Second pass: create output string and fill it */
|
||
|
u = STRINGLIB_NEW(NULL, i + j);
|
||
|
if (!u)
|
||
|
return NULL;
|
||
|
|
||
|
j = 0;
|
||
|
q = STRINGLIB_STR(u);
|
||
|
|
||
|
for (p = STRINGLIB_STR(self); p < e; p++)
|
||
|
if (*p == '\t') {
|
||
|
if (tabsize > 0) {
|
||
|
i = tabsize - (j % tabsize);
|
||
|
j += i;
|
||
|
while (i--)
|
||
|
*q++ = ' ';
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
j++;
|
||
|
*q++ = *p;
|
||
|
if (*p == '\n' || *p == '\r')
|
||
|
j = 0;
|
||
|
}
|
||
|
|
||
|
return u;
|
||
|
}
|
||
|
|
||
|
Py_LOCAL_INLINE(PyObject *)
|
||
|
pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
|
||
|
{
|
||
|
PyObject *u;
|
||
|
|
||
|
if (left < 0)
|
||
|
left = 0;
|
||
|
if (right < 0)
|
||
|
right = 0;
|
||
|
|
||
|
if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
|
||
|
#if STRINGLIB_MUTABLE
|
||
|
/* We're defined as returning a copy; If the object is mutable
|
||
|
* that means we must make an identical copy. */
|
||
|
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||
|
#else
|
||
|
Py_INCREF(self);
|
||
|
return (PyObject *)self;
|
||
|
#endif /* STRINGLIB_MUTABLE */
|
||
|
}
|
||
|
|
||
|
u = STRINGLIB_NEW(NULL,
|
||
|
left + STRINGLIB_LEN(self) + right);
|
||
|
if (u) {
|
||
|
if (left)
|
||
|
memset(STRINGLIB_STR(u), fill, left);
|
||
|
Py_MEMCPY(STRINGLIB_STR(u) + left,
|
||
|
STRINGLIB_STR(self),
|
||
|
STRINGLIB_LEN(self));
|
||
|
if (right)
|
||
|
memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
|
||
|
fill, right);
|
||
|
}
|
||
|
|
||
|
return u;
|
||
|
}
|
||
|
|
||
|
PyDoc_STRVAR(ljust__doc__,
|
||
|
"B.ljust(width[, fillchar]) -> modified copy of B\n"
|
||
|
"\n"
|
||
|
"Return B left justified in a string of length width. Padding is\n"
|
||
|
"done using the specified fill character (default is a space).");
|
||
|
|
||
|
static PyObject *
|
||
|
stringlib_ljust(PyObject *self, PyObject *args)
|
||
|
{
|
||
|
Py_ssize_t width;
|
||
|
char fillchar = ' ';
|
||
|
|
||
|
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
|
||
|
return NULL;
|
||
|
|
||
|
if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
|
||
|
#if STRINGLIB_MUTABLE
|
||
|
/* We're defined as returning a copy; If the object is mutable
|
||
|
* that means we must make an identical copy. */
|
||
|
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||
|
#else
|
||
|
Py_INCREF(self);
|
||
|
return (PyObject*) self;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
|
||
|
}
|
||
|
|
||
|
|
||
|
PyDoc_STRVAR(rjust__doc__,
|
||
|
"B.rjust(width[, fillchar]) -> modified copy of B\n"
|
||
|
"\n"
|
||
|
"Return B right justified in a string of length width. Padding is\n"
|
||
|
"done using the specified fill character (default is a space)");
|
||
|
|
||
|
static PyObject *
|
||
|
stringlib_rjust(PyObject *self, PyObject *args)
|
||
|
{
|
||
|
Py_ssize_t width;
|
||
|
char fillchar = ' ';
|
||
|
|
||
|
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
|
||
|
return NULL;
|
||
|
|
||
|
if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
|
||
|
#if STRINGLIB_MUTABLE
|
||
|
/* We're defined as returning a copy; If the object is mutable
|
||
|
* that means we must make an identical copy. */
|
||
|
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||
|
#else
|
||
|
Py_INCREF(self);
|
||
|
return (PyObject*) self;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
|
||
|
}
|
||
|
|
||
|
|
||
|
PyDoc_STRVAR(center__doc__,
|
||
|
"B.center(width[, fillchar]) -> modified copy of B\n"
|
||
|
"\n"
|
||
|
"Return B centered in a string of length width. Padding is\n"
|
||
|
"done using the specified fill character (default is a space)");
|
||
|
|
||
|
static PyObject *
|
||
|
stringlib_center(PyObject *self, PyObject *args)
|
||
|
{
|
||
|
Py_ssize_t marg, left;
|
||
|
Py_ssize_t width;
|
||
|
char fillchar = ' ';
|
||
|
|
||
|
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
|
||
|
return NULL;
|
||
|
|
||
|
if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
|
||
|
#if STRINGLIB_MUTABLE
|
||
|
/* We're defined as returning a copy; If the object is mutable
|
||
|
* that means we must make an identical copy. */
|
||
|
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||
|
#else
|
||
|
Py_INCREF(self);
|
||
|
return (PyObject*) self;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
marg = width - STRINGLIB_LEN(self);
|
||
|
left = marg / 2 + (marg & width & 1);
|
||
|
|
||
|
return pad(self, left, marg - left, fillchar);
|
||
|
}
|
||
|
|
||
|
PyDoc_STRVAR(zfill__doc__,
|
||
|
"B.zfill(width) -> modified copy of B\n"
|
||
|
"\n"
|
||
|
"Pad a numeric string B with zeros on the left, to fill a field\n"
|
||
|
"of the specified width. B is never truncated.");
|
||
|
|
||
|
static PyObject *
|
||
|
stringlib_zfill(PyObject *self, PyObject *args)
|
||
|
{
|
||
|
Py_ssize_t fill;
|
||
|
PyObject *s;
|
||
|
char *p;
|
||
|
Py_ssize_t width;
|
||
|
|
||
|
if (!PyArg_ParseTuple(args, "n:zfill", &width))
|
||
|
return NULL;
|
||
|
|
||
|
if (STRINGLIB_LEN(self) >= width) {
|
||
|
if (STRINGLIB_CHECK_EXACT(self)) {
|
||
|
#if STRINGLIB_MUTABLE
|
||
|
/* We're defined as returning a copy; If the object is mutable
|
||
|
* that means we must make an identical copy. */
|
||
|
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||
|
#else
|
||
|
Py_INCREF(self);
|
||
|
return (PyObject*) self;
|
||
|
#endif
|
||
|
}
|
||
|
else
|
||
|
return STRINGLIB_NEW(
|
||
|
STRINGLIB_STR(self),
|
||
|
STRINGLIB_LEN(self)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
fill = width - STRINGLIB_LEN(self);
|
||
|
|
||
|
s = pad(self, fill, 0, '0');
|
||
|
|
||
|
if (s == NULL)
|
||
|
return NULL;
|
||
|
|
||
|
p = STRINGLIB_STR(s);
|
||
|
if (p[fill] == '+' || p[fill] == '-') {
|
||
|
/* move sign to beginning of string */
|
||
|
p[0] = p[fill];
|
||
|
p[fill] = '0';
|
||
|
}
|
||
|
|
||
|
return (PyObject*) s;
|
||
|
}
|
||
|
|
||
|
|
||
|
#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
|
||
|
str = STRINGLIB_NEW((data) + (left), \
|
||
|
(right) - (left)); \
|
||
|
if (str == NULL) \
|
||
|
goto onError; \
|
||
|
if (PyList_Append(list, str)) { \
|
||
|
Py_DECREF(str); \
|
||
|
goto onError; \
|
||
|
} \
|
||
|
else \
|
||
|
Py_DECREF(str);
|
||
|
|
||
|
PyDoc_STRVAR(splitlines__doc__,
|
||
|
"B.splitlines([keepends]) -> list of lines\n\
|
||
|
\n\
|
||
|
Return a list of the lines in B, breaking at line boundaries.\n\
|
||
|
Line breaks are not included in the resulting list unless keepends\n\
|
||
|
is given and true.");
|
||
|
|
||
|
static PyObject*
|
||
|
stringlib_splitlines(PyObject *self, PyObject *args)
|
||
|
{
|
||
|
register Py_ssize_t i;
|
||
|
register Py_ssize_t j;
|
||
|
Py_ssize_t len;
|
||
|
int keepends = 0;
|
||
|
PyObject *list;
|
||
|
PyObject *str;
|
||
|
char *data;
|
||
|
|
||
|
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
|
||
|
return NULL;
|
||
|
|
||
|
data = STRINGLIB_STR(self);
|
||
|
len = STRINGLIB_LEN(self);
|
||
|
|
||
|
/* This does not use the preallocated list because splitlines is
|
||
|
usually run with hundreds of newlines. The overhead of
|
||
|
switching between PyList_SET_ITEM and append causes about a
|
||
|
2-3% slowdown for that common case. A smarter implementation
|
||
|
could move the if check out, so the SET_ITEMs are done first
|
||
|
and the appends only done when the prealloc buffer is full.
|
||
|
That's too much work for little gain.*/
|
||
|
|
||
|
list = PyList_New(0);
|
||
|
if (!list)
|
||
|
goto onError;
|
||
|
|
||
|
for (i = j = 0; i < len; ) {
|
||
|
Py_ssize_t eol;
|
||
|
|
||
|
/* Find a line and append it */
|
||
|
while (i < len && data[i] != '\n' && data[i] != '\r')
|
||
|
i++;
|
||
|
|
||
|
/* Skip the line break reading CRLF as one line break */
|
||
|
eol = i;
|
||
|
if (i < len) {
|
||
|
if (data[i] == '\r' && i + 1 < len &&
|
||
|
data[i+1] == '\n')
|
||
|
i += 2;
|
||
|
else
|
||
|
i++;
|
||
|
if (keepends)
|
||
|
eol = i;
|
||
|
}
|
||
|
_STRINGLIB_SPLIT_APPEND(data, j, eol);
|
||
|
j = i;
|
||
|
}
|
||
|
if (j < len) {
|
||
|
_STRINGLIB_SPLIT_APPEND(data, j, len);
|
||
|
}
|
||
|
|
||
|
return list;
|
||
|
|
||
|
onError:
|
||
|
Py_XDECREF(list);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
#undef _STRINGLIB_SPLIT_APPEND
|
||
|
|