Patch #923098: Share interned strings in marshal.
This commit is contained in:
parent
8d97e33bb7
commit
ef82d2fdfe
|
@ -283,20 +283,31 @@ data must be opened in binary mode.
|
|||
|
||||
Numeric values are stored with the least significant byte first.
|
||||
|
||||
\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file}
|
||||
The module supports two versions of the data format: version 0 is the
|
||||
historical version, version 1 (new in Python 2.4) shares interned
|
||||
strings in the file, and upon unmarshalling. \var{Py_MARSHAL_VERSION}
|
||||
indicates the current file format (currently 1).
|
||||
|
||||
\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file, int version}
|
||||
Marshal a \ctype{long} integer, \var{value}, to \var{file}. This
|
||||
will only write the least-significant 32 bits of \var{value};
|
||||
regardless of the size of the native \ctype{long} type.
|
||||
|
||||
\versionchanged[\var{version} indicates the file format]{2.4}
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value,
|
||||
FILE *file}
|
||||
FILE *file, int version}
|
||||
Marshal a Python object, \var{value}, to \var{file}.
|
||||
|
||||
\versionchanged[\var{version} indicates the file format]{2.4}
|
||||
\end{cfuncdesc}
|
||||
|
||||
\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value}
|
||||
\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value, int version}
|
||||
Return a string object containing the marshalled representation of
|
||||
\var{value}.
|
||||
|
||||
\versionchanged[\var{version} indicates the file format]{2.4}
|
||||
\end{cfuncdesc}
|
||||
|
||||
The following functions allow marshalled values to be read back in.
|
||||
|
|
|
@ -73,6 +73,9 @@ The module defines these functions:
|
|||
a \exception{ValueError} exception is raised --- but garbage data
|
||||
will also be written to the file. The object will not be properly
|
||||
read back by \function{load()}.
|
||||
|
||||
\versionadded[The \var{version} argument indicates the data
|
||||
format that \code{dumps} should use.]{2.4}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{load}{file}
|
||||
|
@ -86,11 +89,14 @@ The module defines these functions:
|
|||
\code{None} for the unmarshallable type.}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{dumps}{value}
|
||||
\begin{funcdesc}{dumps}{value\optional{, version}}
|
||||
Return the string that would be written to a file by
|
||||
\code{dump(\var{value}, \var{file})}. The value must be a supported
|
||||
type. Raise a \exception{ValueError} exception if value has (or
|
||||
contains an object that has) an unsupported type.
|
||||
|
||||
\versionadded[The \var{version} argument indicates the data
|
||||
format that \code{dumps} should use.]{2.4}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{loads}{string}
|
||||
|
@ -98,3 +104,13 @@ The module defines these functions:
|
|||
\exception{EOFError}, \exception{ValueError} or
|
||||
\exception{TypeError}. Extra characters in the string are ignored.
|
||||
\end{funcdesc}
|
||||
|
||||
In addition, the following constants are defined:
|
||||
|
||||
\begin{datadesc}{version}
|
||||
Indicates the format that the module uses. Version 0 is the
|
||||
historical format, version 1 (added in Python 2.4) shares
|
||||
interned strings. The current version is 1.
|
||||
|
||||
\versionadded{2.4}
|
||||
\end{datadesc}
|
|
@ -7,9 +7,11 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *);
|
||||
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *);
|
||||
PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *);
|
||||
#define Py_MARSHAL_VERSION 1
|
||||
|
||||
PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int);
|
||||
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int);
|
||||
PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int);
|
||||
|
||||
PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *);
|
||||
PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *);
|
||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1?
|
|||
Core and builtins
|
||||
-----------------
|
||||
|
||||
- marshal now shares interned strings. This change introduces
|
||||
a new .pyc magic.
|
||||
|
||||
- Bug #966623. classes created with type() in an exec(, {}) don't
|
||||
have a __module__, but code in typeobject assumed it would always
|
||||
be there.
|
||||
|
|
|
@ -26,9 +26,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
|
|||
a .pyc file in text mode the magic number will be wrong; also, the
|
||||
Apple MPW compiler swaps their values, botching string constants.
|
||||
|
||||
Apparently, there was a distinction made between even and odd
|
||||
bytecodes that is related to Unicode. The details aren't clear,
|
||||
but the magic number has been odd for a long time.
|
||||
The magic numbers must be spaced apart atleast 2 values, as the
|
||||
-U interpeter flag will cause MAGIC+1 being used. They have been
|
||||
odd numbers for some time now.
|
||||
|
||||
There were a variety of old schemes for setting the magic number.
|
||||
The current working scheme is to increment the previous value by
|
||||
|
@ -47,9 +47,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
|
|||
Python 2.3a0: 62011
|
||||
Python 2.3a0: 62021
|
||||
Python 2.3a0: 62011 (!)
|
||||
Python 2.4a0: 62031
|
||||
Python 2.4a0: 62041
|
||||
*/
|
||||
#define MAGIC (62031 | ((long)'\r'<<16) | ((long)'\n'<<24))
|
||||
#define MAGIC (62041 | ((long)'\r'<<16) | ((long)'\n'<<24))
|
||||
|
||||
/* Magic word as global; note that _PyImport_Init() can change the
|
||||
value of this global to accommodate for alterations of how the
|
||||
|
@ -797,10 +797,10 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime)
|
|||
"# can't create %s\n", cpathname);
|
||||
return;
|
||||
}
|
||||
PyMarshal_WriteLongToFile(pyc_magic, fp);
|
||||
PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION);
|
||||
/* First write a 0 for mtime */
|
||||
PyMarshal_WriteLongToFile(0L, fp);
|
||||
PyMarshal_WriteObjectToFile((PyObject *)co, fp);
|
||||
PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
|
||||
PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
|
||||
if (fflush(fp) != 0 || ferror(fp)) {
|
||||
if (Py_VerboseFlag)
|
||||
PySys_WriteStderr("# can't write %s\n", cpathname);
|
||||
|
@ -811,7 +811,7 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime)
|
|||
}
|
||||
/* Now write the true mtime */
|
||||
fseek(fp, 4L, 0);
|
||||
PyMarshal_WriteLongToFile(mtime, fp);
|
||||
PyMarshal_WriteLongToFile(mtime, fp, Py_MARSHAL_VERSION);
|
||||
fflush(fp);
|
||||
fclose(fp);
|
||||
if (Py_VerboseFlag)
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
#define TYPE_COMPLEX 'x'
|
||||
#define TYPE_LONG 'l'
|
||||
#define TYPE_STRING 's'
|
||||
#define TYPE_INTERNED 't'
|
||||
#define TYPE_STRINGREF 'R'
|
||||
#define TYPE_TUPLE '('
|
||||
#define TYPE_LIST '['
|
||||
#define TYPE_DICT '{'
|
||||
|
@ -42,6 +44,7 @@ typedef struct {
|
|||
PyObject *str;
|
||||
char *ptr;
|
||||
char *end;
|
||||
PyObject *strings; /* dict on marshal, list on unmarshal */
|
||||
} WFILE;
|
||||
|
||||
#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
|
||||
|
@ -189,7 +192,24 @@ w_object(PyObject *v, WFILE *p)
|
|||
}
|
||||
#endif
|
||||
else if (PyString_Check(v)) {
|
||||
w_byte(TYPE_STRING, p);
|
||||
if (p->strings && PyString_CHECK_INTERNED(v)) {
|
||||
PyObject *o = PyDict_GetItem(p->strings, v);
|
||||
if (o) {
|
||||
long w = PyInt_AsLong(o);
|
||||
w_byte(TYPE_STRINGREF, p);
|
||||
w_long(w, p);
|
||||
goto exit;
|
||||
}
|
||||
else {
|
||||
o = PyInt_FromLong(PyDict_Size(p->strings));
|
||||
PyDict_SetItem(p->strings, v, o);
|
||||
Py_DECREF(o);
|
||||
w_byte(TYPE_INTERNED, p);
|
||||
}
|
||||
}
|
||||
else {
|
||||
w_byte(TYPE_STRING, p);
|
||||
}
|
||||
n = PyString_GET_SIZE(v);
|
||||
w_long((long)n, p);
|
||||
w_string(PyString_AS_STRING(v), n, p);
|
||||
|
@ -269,28 +289,32 @@ w_object(PyObject *v, WFILE *p)
|
|||
w_byte(TYPE_UNKNOWN, p);
|
||||
p->error = 1;
|
||||
}
|
||||
|
||||
exit:
|
||||
p->depth--;
|
||||
}
|
||||
|
||||
/* version currently has no effect for writing longs. */
|
||||
void
|
||||
PyMarshal_WriteLongToFile(long x, FILE *fp)
|
||||
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
|
||||
{
|
||||
WFILE wf;
|
||||
wf.fp = fp;
|
||||
wf.error = 0;
|
||||
wf.depth = 0;
|
||||
wf.strings = NULL;
|
||||
w_long(x, &wf);
|
||||
}
|
||||
|
||||
void
|
||||
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp)
|
||||
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
|
||||
{
|
||||
WFILE wf;
|
||||
wf.fp = fp;
|
||||
wf.error = 0;
|
||||
wf.depth = 0;
|
||||
wf.strings = (version > 0) ? PyDict_New() : NULL;
|
||||
w_object(x, &wf);
|
||||
Py_XDECREF(wf.strings);
|
||||
}
|
||||
|
||||
typedef WFILE RFILE; /* Same struct with different invariants */
|
||||
|
@ -491,6 +515,7 @@ r_object(RFILE *p)
|
|||
}
|
||||
#endif
|
||||
|
||||
case TYPE_INTERNED:
|
||||
case TYPE_STRING:
|
||||
n = r_long(p);
|
||||
if (n < 0) {
|
||||
|
@ -506,6 +531,16 @@ r_object(RFILE *p)
|
|||
"EOF read where object expected");
|
||||
}
|
||||
}
|
||||
if (type == TYPE_INTERNED) {
|
||||
PyString_InternInPlace(&v);
|
||||
PyList_Append(p->strings, v);
|
||||
}
|
||||
return v;
|
||||
|
||||
case TYPE_STRINGREF:
|
||||
n = r_long(p);
|
||||
v = PyList_GET_ITEM(p->strings, n);
|
||||
Py_INCREF(v);
|
||||
return v;
|
||||
|
||||
#ifdef Py_USING_UNICODE
|
||||
|
@ -673,6 +708,7 @@ PyMarshal_ReadShortFromFile(FILE *fp)
|
|||
{
|
||||
RFILE rf;
|
||||
rf.fp = fp;
|
||||
rf.strings = NULL;
|
||||
return r_short(&rf);
|
||||
}
|
||||
|
||||
|
@ -681,6 +717,7 @@ PyMarshal_ReadLongFromFile(FILE *fp)
|
|||
{
|
||||
RFILE rf;
|
||||
rf.fp = fp;
|
||||
rf.strings = NULL;
|
||||
return r_long(&rf);
|
||||
}
|
||||
|
||||
|
@ -747,22 +784,30 @@ PyObject *
|
|||
PyMarshal_ReadObjectFromFile(FILE *fp)
|
||||
{
|
||||
RFILE rf;
|
||||
PyObject *result;
|
||||
rf.fp = fp;
|
||||
return read_object(&rf);
|
||||
rf.strings = PyList_New(0);
|
||||
result = r_object(&rf);
|
||||
Py_DECREF(rf.strings);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyMarshal_ReadObjectFromString(char *str, int len)
|
||||
{
|
||||
RFILE rf;
|
||||
PyObject *result;
|
||||
rf.fp = NULL;
|
||||
rf.ptr = str;
|
||||
rf.end = str + len;
|
||||
return read_object(&rf);
|
||||
rf.strings = PyList_New(0);
|
||||
result = r_object(&rf);
|
||||
Py_DECREF(rf.strings);
|
||||
return result;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */
|
||||
PyMarshal_WriteObjectToString(PyObject *x, int version)
|
||||
{
|
||||
WFILE wf;
|
||||
wf.fp = NULL;
|
||||
|
@ -773,7 +818,9 @@ PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */
|
|||
wf.end = wf.ptr + PyString_Size(wf.str);
|
||||
wf.error = 0;
|
||||
wf.depth = 0;
|
||||
wf.strings = (version > 0) ? PyDict_New() : NULL;
|
||||
w_object(x, &wf);
|
||||
Py_XDECREF(wf.strings);
|
||||
if (wf.str != NULL)
|
||||
_PyString_Resize(&wf.str,
|
||||
(int) (wf.ptr -
|
||||
|
@ -796,7 +843,8 @@ marshal_dump(PyObject *self, PyObject *args)
|
|||
WFILE wf;
|
||||
PyObject *x;
|
||||
PyObject *f;
|
||||
if (!PyArg_ParseTuple(args, "OO:dump", &x, &f))
|
||||
int version = Py_MARSHAL_VERSION;
|
||||
if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
|
||||
return NULL;
|
||||
if (!PyFile_Check(f)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
|
@ -808,7 +856,9 @@ marshal_dump(PyObject *self, PyObject *args)
|
|||
wf.ptr = wf.end = NULL;
|
||||
wf.error = 0;
|
||||
wf.depth = 0;
|
||||
wf.strings = (version > 0) ? PyDict_New() : 0;
|
||||
w_object(x, &wf);
|
||||
Py_XDECREF(wf.strings);
|
||||
if (wf.error) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
(wf.error==1)?"unmarshallable object"
|
||||
|
@ -823,7 +873,7 @@ static PyObject *
|
|||
marshal_load(PyObject *self, PyObject *args)
|
||||
{
|
||||
RFILE rf;
|
||||
PyObject *f;
|
||||
PyObject *f, *result;
|
||||
if (!PyArg_ParseTuple(args, "O:load", &f))
|
||||
return NULL;
|
||||
if (!PyFile_Check(f)) {
|
||||
|
@ -832,16 +882,20 @@ marshal_load(PyObject *self, PyObject *args)
|
|||
return NULL;
|
||||
}
|
||||
rf.fp = PyFile_AsFile(f);
|
||||
return read_object(&rf);
|
||||
rf.strings = PyList_New(0);
|
||||
result = read_object(&rf);
|
||||
Py_DECREF(rf.strings);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
marshal_dumps(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *x;
|
||||
if (!PyArg_ParseTuple(args, "O:dumps", &x))
|
||||
int version = Py_MARSHAL_VERSION;
|
||||
if (!PyArg_ParseTuple(args, "O|i:dumps", &x, version))
|
||||
return NULL;
|
||||
return PyMarshal_WriteObjectToString(x);
|
||||
return PyMarshal_WriteObjectToString(x, version);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -850,12 +904,16 @@ marshal_loads(PyObject *self, PyObject *args)
|
|||
RFILE rf;
|
||||
char *s;
|
||||
int n;
|
||||
if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
|
||||
PyObject* result;
|
||||
if (!PyArg_ParseTuple(args, "s#|i:loads", &s, &n))
|
||||
return NULL;
|
||||
rf.fp = NULL;
|
||||
rf.ptr = s;
|
||||
rf.end = s + n;
|
||||
return read_object(&rf);
|
||||
rf.strings = PyList_New(0);
|
||||
result = read_object(&rf);
|
||||
Py_DECREF(rf.strings);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef marshal_methods[] = {
|
||||
|
@ -869,5 +927,6 @@ static PyMethodDef marshal_methods[] = {
|
|||
PyMODINIT_FUNC
|
||||
PyMarshal_Init(void)
|
||||
{
|
||||
(void) Py_InitModule("marshal", marshal_methods);
|
||||
PyObject *mod = Py_InitModule("marshal", marshal_methods);
|
||||
PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue