mirror of https://github.com/python/cpython
gh-81283: compiler: remove indent from docstring (#106411)
Co-authored-by: Éric <merwok@netwok.org>
This commit is contained in:
parent
bbf6297985
commit
2566b74b26
|
@ -79,6 +79,13 @@ Other Language Changes
|
|||
* Allow the *count* argument of :meth:`str.replace` to be a keyword.
|
||||
(Contributed by Hugo van Kemenade in :gh:`106487`.)
|
||||
|
||||
* Compiler now strip indents from docstrings.
|
||||
This will reduce the size of :term:`bytecode cache <bytecode>` (e.g. ``.pyc`` file).
|
||||
For example, cache file size for ``sqlalchemy.orm.session`` in SQLAlchemy 2.0
|
||||
is reduced by about 5%.
|
||||
This change will affect tools using docstrings, like :mod:`doctest`.
|
||||
(Contributed by Inada Naoki in :gh:`81283`.)
|
||||
|
||||
New Modules
|
||||
===========
|
||||
|
||||
|
|
|
@ -91,6 +91,8 @@ int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
|
|||
|
||||
/* Access compiler internals for unit testing */
|
||||
|
||||
PyAPI_FUNC(PyObject*) _PyCompile_CleanDoc(PyObject *doc);
|
||||
|
||||
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
|
||||
PyObject *ast,
|
||||
PyObject *filename,
|
||||
|
|
|
@ -881,29 +881,28 @@ def cleandoc(doc):
|
|||
|
||||
Any whitespace that can be uniformly removed from the second line
|
||||
onwards is removed."""
|
||||
try:
|
||||
lines = doc.expandtabs().split('\n')
|
||||
except UnicodeError:
|
||||
return None
|
||||
else:
|
||||
# Find minimum indentation of any non-blank lines after first line.
|
||||
margin = sys.maxsize
|
||||
for line in lines[1:]:
|
||||
content = len(line.lstrip())
|
||||
if content:
|
||||
indent = len(line) - content
|
||||
margin = min(margin, indent)
|
||||
# Remove indentation.
|
||||
if lines:
|
||||
lines[0] = lines[0].lstrip()
|
||||
if margin < sys.maxsize:
|
||||
for i in range(1, len(lines)): lines[i] = lines[i][margin:]
|
||||
# Remove any trailing or leading blank lines.
|
||||
while lines and not lines[-1]:
|
||||
lines.pop()
|
||||
while lines and not lines[0]:
|
||||
lines.pop(0)
|
||||
return '\n'.join(lines)
|
||||
lines = doc.expandtabs().split('\n')
|
||||
|
||||
# Find minimum indentation of any non-blank lines after first line.
|
||||
margin = sys.maxsize
|
||||
for line in lines[1:]:
|
||||
content = len(line.lstrip(' '))
|
||||
if content:
|
||||
indent = len(line) - content
|
||||
margin = min(margin, indent)
|
||||
# Remove indentation.
|
||||
if lines:
|
||||
lines[0] = lines[0].lstrip(' ')
|
||||
if margin < sys.maxsize:
|
||||
for i in range(1, len(lines)):
|
||||
lines[i] = lines[i][margin:]
|
||||
# Remove any trailing or leading blank lines.
|
||||
while lines and not lines[-1]:
|
||||
lines.pop()
|
||||
while lines and not lines[0]:
|
||||
lines.pop(0)
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def getfile(object):
|
||||
"""Work out which source or compiled file an object was defined in."""
|
||||
|
|
|
@ -1287,14 +1287,14 @@ The NORMALIZE_WHITESPACE flag causes all sequences of whitespace to be
|
|||
treated as equal:
|
||||
|
||||
>>> def f(x):
|
||||
... '>>> print(1, 2, 3)\n 1 2\n 3'
|
||||
... '\n>>> print(1, 2, 3)\n 1 2\n 3'
|
||||
|
||||
>>> # Without the flag:
|
||||
>>> test = doctest.DocTestFinder().find(f)[0]
|
||||
>>> doctest.DocTestRunner(verbose=False).run(test)
|
||||
... # doctest: +ELLIPSIS
|
||||
**********************************************************************
|
||||
File ..., line 2, in f
|
||||
File ..., line 3, in f
|
||||
Failed example:
|
||||
print(1, 2, 3)
|
||||
Expected:
|
||||
|
|
|
@ -596,9 +596,40 @@ class TestRetrievingSourceCode(GetSourceBase):
|
|||
self.assertEqual(finddoc(int.from_bytes), int.from_bytes.__doc__)
|
||||
self.assertEqual(finddoc(int.real), int.real.__doc__)
|
||||
|
||||
cleandoc_testdata = [
|
||||
# first line should have different margin
|
||||
(' An\n indented\n docstring.', 'An\nindented\n docstring.'),
|
||||
# trailing whitespace are not removed.
|
||||
(' An \n \n indented \n docstring. ',
|
||||
'An \n \nindented \n docstring. '),
|
||||
# NUL is not termination.
|
||||
('doc\0string\n\n second\0line\n third\0line\0',
|
||||
'doc\0string\n\nsecond\0line\nthird\0line\0'),
|
||||
# first line is lstrip()-ped. other lines are kept when no margin.[w:
|
||||
(' ', ''),
|
||||
# compiler.cleandoc() doesn't strip leading/trailing newlines
|
||||
# to keep maximum backward compatibility.
|
||||
# inspect.cleandoc() removes them.
|
||||
('\n\n\n first paragraph\n\n second paragraph\n\n',
|
||||
'\n\n\nfirst paragraph\n\n second paragraph\n\n'),
|
||||
(' \n \n \n ', '\n \n \n '),
|
||||
]
|
||||
|
||||
def test_cleandoc(self):
|
||||
self.assertEqual(inspect.cleandoc('An\n indented\n docstring.'),
|
||||
'An\nindented\ndocstring.')
|
||||
func = inspect.cleandoc
|
||||
for i, (input, expected) in enumerate(self.cleandoc_testdata):
|
||||
# only inspect.cleandoc() strip \n
|
||||
expected = expected.strip('\n')
|
||||
with self.subTest(i=i):
|
||||
self.assertEqual(func(input), expected)
|
||||
|
||||
@cpython_only
|
||||
def test_c_cleandoc(self):
|
||||
import _testinternalcapi
|
||||
func = _testinternalcapi.compiler_cleandoc
|
||||
for i, (input, expected) in enumerate(self.cleandoc_testdata):
|
||||
with self.subTest(i=i):
|
||||
self.assertEqual(func(input), expected)
|
||||
|
||||
def test_getcomments(self):
|
||||
self.assertEqual(inspect.getcomments(mod), '# line 1\n')
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Compiler now strips indents from docstrings. It reduces ``pyc`` file size 5%
|
||||
when the module is heavily documented. This change affects to ``__doc__`` so
|
||||
tools like doctest will be affected.
|
|
@ -15,7 +15,7 @@
|
|||
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
|
||||
#include "pycore_bitutils.h" // _Py_bswap32()
|
||||
#include "pycore_bytesobject.h" // _PyBytes_Find()
|
||||
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
|
||||
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble, _PyCompile_CleanDoc
|
||||
#include "pycore_ceval.h" // _PyEval_AddPendingCall
|
||||
#include "pycore_fileutils.h" // _Py_normpath
|
||||
#include "pycore_frame.h" // _PyInterpreterFrame
|
||||
|
@ -704,6 +704,23 @@ set_eval_frame_record(PyObject *self, PyObject *list)
|
|||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
||||
_testinternalcapi.compiler_cleandoc -> object
|
||||
|
||||
doc: unicode
|
||||
|
||||
C implementation of inspect.cleandoc().
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_testinternalcapi_compiler_cleandoc_impl(PyObject *module, PyObject *doc)
|
||||
/*[clinic end generated code: output=2dd203a80feff5bc input=2de03fab931d9cdc]*/
|
||||
{
|
||||
return _PyCompile_CleanDoc(doc);
|
||||
}
|
||||
|
||||
|
||||
/*[clinic input]
|
||||
|
||||
_testinternalcapi.compiler_codegen -> object
|
||||
|
@ -1448,6 +1465,7 @@ static PyMethodDef module_functions[] = {
|
|||
{"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
|
||||
{"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL},
|
||||
{"set_eval_frame_record", set_eval_frame_record, METH_O, NULL},
|
||||
_TESTINTERNALCAPI_COMPILER_CLEANDOC_METHODDEF
|
||||
_TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF
|
||||
_TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF
|
||||
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF
|
||||
|
|
|
@ -8,6 +8,65 @@ preserve
|
|||
#endif
|
||||
|
||||
|
||||
PyDoc_STRVAR(_testinternalcapi_compiler_cleandoc__doc__,
|
||||
"compiler_cleandoc($module, /, doc)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"C implementation of inspect.cleandoc().");
|
||||
|
||||
#define _TESTINTERNALCAPI_COMPILER_CLEANDOC_METHODDEF \
|
||||
{"compiler_cleandoc", _PyCFunction_CAST(_testinternalcapi_compiler_cleandoc), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_compiler_cleandoc__doc__},
|
||||
|
||||
static PyObject *
|
||||
_testinternalcapi_compiler_cleandoc_impl(PyObject *module, PyObject *doc);
|
||||
|
||||
static PyObject *
|
||||
_testinternalcapi_compiler_cleandoc(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
||||
|
||||
#define NUM_KEYWORDS 1
|
||||
static struct {
|
||||
PyGC_Head _this_is_not_used;
|
||||
PyObject_VAR_HEAD
|
||||
PyObject *ob_item[NUM_KEYWORDS];
|
||||
} _kwtuple = {
|
||||
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
|
||||
.ob_item = { &_Py_ID(doc), },
|
||||
};
|
||||
#undef NUM_KEYWORDS
|
||||
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
|
||||
|
||||
#else // !Py_BUILD_CORE
|
||||
# define KWTUPLE NULL
|
||||
#endif // !Py_BUILD_CORE
|
||||
|
||||
static const char * const _keywords[] = {"doc", NULL};
|
||||
static _PyArg_Parser _parser = {
|
||||
.keywords = _keywords,
|
||||
.fname = "compiler_cleandoc",
|
||||
.kwtuple = KWTUPLE,
|
||||
};
|
||||
#undef KWTUPLE
|
||||
PyObject *argsbuf[1];
|
||||
PyObject *doc;
|
||||
|
||||
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
|
||||
if (!args) {
|
||||
goto exit;
|
||||
}
|
||||
if (!PyUnicode_Check(args[0])) {
|
||||
_PyArg_BadArgument("compiler_cleandoc", "argument 'doc'", "str", args[0]);
|
||||
goto exit;
|
||||
}
|
||||
doc = args[0];
|
||||
return_value = _testinternalcapi_compiler_cleandoc_impl(module, doc);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_testinternalcapi_compiler_codegen__doc__,
|
||||
"compiler_codegen($module, /, ast, filename, optimize, compile_mode=0)\n"
|
||||
"--\n"
|
||||
|
@ -206,4 +265,4 @@ _testinternalcapi_assemble_code_object(PyObject *module, PyObject *const *args,
|
|||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=2965f1578b986218 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=811d50772c8f285a input=a9049054013a1b77]*/
|
||||
|
|
|
@ -1704,10 +1704,16 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts)
|
|||
if (c->c_optimize < 2) {
|
||||
docstring = _PyAST_GetDocString(stmts);
|
||||
if (docstring) {
|
||||
PyObject *cleandoc = _PyCompile_CleanDoc(docstring);
|
||||
if (cleandoc == NULL) {
|
||||
return ERROR;
|
||||
}
|
||||
i = 1;
|
||||
st = (stmt_ty)asdl_seq_GET(stmts, 0);
|
||||
assert(st->kind == Expr_kind);
|
||||
VISIT(c, expr, st->v.Expr.value);
|
||||
location loc = LOC(st->v.Expr.value);
|
||||
ADDOP_LOAD_CONST(c, loc, cleandoc);
|
||||
Py_DECREF(cleandoc);
|
||||
RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store));
|
||||
}
|
||||
}
|
||||
|
@ -2252,11 +2258,19 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f
|
|||
/* if not -OO mode, add docstring */
|
||||
if (c->c_optimize < 2) {
|
||||
docstring = _PyAST_GetDocString(body);
|
||||
if (docstring) {
|
||||
docstring = _PyCompile_CleanDoc(docstring);
|
||||
if (docstring == NULL) {
|
||||
compiler_exit_scope(c);
|
||||
return ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) {
|
||||
compiler_exit_scope(c);
|
||||
return ERROR;
|
||||
}
|
||||
Py_XDECREF(docstring);
|
||||
|
||||
c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args);
|
||||
c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
|
||||
|
@ -7967,6 +7981,89 @@ error:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
// C implementation of inspect.cleandoc()
|
||||
//
|
||||
// Difference from inspect.cleandoc():
|
||||
// - Do not remove leading and trailing blank lines to keep lineno.
|
||||
PyObject *
|
||||
_PyCompile_CleanDoc(PyObject *doc)
|
||||
{
|
||||
doc = PyObject_CallMethod(doc, "expandtabs", NULL);
|
||||
if (doc == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t doc_size;
|
||||
const char *doc_utf8 = PyUnicode_AsUTF8AndSize(doc, &doc_size);
|
||||
if (doc_utf8 == NULL) {
|
||||
Py_DECREF(doc);
|
||||
return NULL;
|
||||
}
|
||||
const char *p = doc_utf8;
|
||||
const char *pend = p + doc_size;
|
||||
|
||||
// First pass: find minimum indentation of any non-blank lines
|
||||
// after first line.
|
||||
while (p < pend && *p++ != '\n') {
|
||||
}
|
||||
|
||||
Py_ssize_t margin = PY_SSIZE_T_MAX;
|
||||
while (p < pend) {
|
||||
const char *s = p;
|
||||
while (*p == ' ') p++;
|
||||
if (p < pend && *p != '\n') {
|
||||
margin = Py_MIN(margin, p - s);
|
||||
}
|
||||
while (p < pend && *p++ != '\n') {
|
||||
}
|
||||
}
|
||||
if (margin == PY_SSIZE_T_MAX) {
|
||||
margin = 0;
|
||||
}
|
||||
|
||||
// Second pass: write cleandoc into buff.
|
||||
|
||||
// copy first line without leading spaces.
|
||||
p = doc_utf8;
|
||||
while (*p == ' ') {
|
||||
p++;
|
||||
}
|
||||
if (p == doc_utf8 && margin == 0 ) {
|
||||
// doc is already clean.
|
||||
return doc;
|
||||
}
|
||||
|
||||
char *buff = PyMem_Malloc(doc_size);
|
||||
char *w = buff;
|
||||
|
||||
while (p < pend) {
|
||||
int ch = *w++ = *p++;
|
||||
if (ch == '\n') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// copy subsequent lines without margin.
|
||||
while (p < pend) {
|
||||
for (Py_ssize_t i = 0; i < margin; i++, p++) {
|
||||
if (*p != ' ') {
|
||||
assert(*p == '\n' || *p == '\0');
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (p < pend) {
|
||||
int ch = *w++ = *p++;
|
||||
if (ch == '\n') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Py_DECREF(doc);
|
||||
return PyUnicode_FromStringAndSize(buff, w - buff);
|
||||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags,
|
||||
int optimize, int compile_mode)
|
||||
|
|
Loading…
Reference in New Issue