Integrate the changes from PyXML's version of pyexpat.c revisions

1.47, 1.48, 1.49 (name interning support).
This commit is contained in:
Fred Drake 2002-06-27 19:40:48 +00:00
parent d805fefff4
commit b91a36b230
1 changed files with 108 additions and 45 deletions

View File

@ -60,6 +60,7 @@ typedef struct {
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
PyObject *intern; /* Dictionary to intern strings */
PyObject **handlers;
} xmlparseobject;
@ -123,7 +124,7 @@ set_error(xmlparseobject *self)
Returns None if str is a null pointer. */
static PyObject *
conv_string_to_unicode(XML_Char *str)
conv_string_to_unicode(const XML_Char *str)
{
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
@ -132,8 +133,7 @@ conv_string_to_unicode(XML_Char *str)
Py_INCREF(Py_None);
return Py_None;
}
return PyUnicode_DecodeUTF8((const char *)str,
strlen((const char *)str),
return PyUnicode_DecodeUTF8(str, strlen(str),
"strict");
}
@ -155,7 +155,7 @@ conv_string_len_to_unicode(const XML_Char *str, int len)
Returns None if str is a null pointer. */
static PyObject *
conv_string_to_utf8(XML_Char *str)
conv_string_to_utf8(const XML_Char *str)
{
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
@ -164,7 +164,7 @@ conv_string_to_utf8(XML_Char *str)
Py_INCREF(Py_None);
return Py_None;
}
return PyString_FromString((const char *)str);
return PyString_FromString(str);
}
static PyObject *
@ -275,6 +275,25 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
? conv_string_to_unicode : conv_string_to_utf8)
#endif
static PyObject*
string_intern(xmlparseobject *self, const char* str)
{
PyObject *result = STRING_CONV_FUNC(str);
PyObject *value;
if (!self->intern)
return result;
value = PyDict_GetItem(self->intern, result);
if (!value) {
if (PyDict_SetItem(self->intern, result, result) == 0)
return result;
else
return NULL;
}
Py_INCREF(value);
Py_DECREF(result);
return value;
}
static void
my_StartElementHandler(void *userData,
const XML_Char *name, const XML_Char **atts)
@ -307,7 +326,7 @@ my_StartElementHandler(void *userData,
return;
}
for (i = 0; i < max; i += 2) {
PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]);
PyObject *n = string_intern(self, (XML_Char *) atts[i]);
PyObject *v;
if (n == NULL) {
flag_error(self);
@ -336,7 +355,7 @@ my_StartElementHandler(void *userData,
Py_DECREF(v);
}
}
args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container);
args = Py_BuildValue("(NN)", string_intern(self, name), container);
if (args == NULL) {
Py_DECREF(container);
return;
@ -394,13 +413,13 @@ my_##NAME##Handler PARAMS {\
VOID_HANDLER(EndElement,
(void *userData, const XML_Char *name),
("(O&)", STRING_CONV_FUNC, name))
("(N)", string_intern(self, name)))
VOID_HANDLER(ProcessingInstruction,
(void *userData,
const XML_Char *target,
const XML_Char *data),
("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data))
("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
#ifndef Py_USING_UNICODE
VOID_HANDLER(CharacterData,
@ -421,10 +440,10 @@ VOID_HANDLER(UnparsedEntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
("(O&O&O&O&O&)",
STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base,
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId,
STRING_CONV_FUNC,notationName))
("(NNNNN)",
string_intern(self, entityName), string_intern(self, base),
string_intern(self, systemId), string_intern(self, publicId),
string_intern(self, notationName)))
#ifndef Py_USING_UNICODE
VOID_HANDLER(EntityDecl,
@ -437,11 +456,12 @@ VOID_HANDLER(EntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
("O&iNO&O&O&O&",
STRING_CONV_FUNC,entityName, is_parameter_entity,
("NiNNNNN",
string_intern(self, entityName), is_parameter_entity,
conv_string_len_to_utf8(value, value_length),
STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
string_intern(self, base), string_intern(self, systemId),
string_intern(self, publicId),
string_intern(self, notationName)))
#else
VOID_HANDLER(EntityDecl,
(void *userData,
@ -453,13 +473,14 @@ VOID_HANDLER(EntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
("O&iNO&O&O&O&",
STRING_CONV_FUNC,entityName, is_parameter_entity,
("NiNNNNN",
string_intern(self, entityName), is_parameter_entity,
(self->returns_unicode
? conv_string_len_to_unicode(value, value_length)
: conv_string_len_to_utf8(value, value_length)),
STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId,
STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName))
string_intern(self, base), string_intern(self, systemId),
string_intern(self, publicId),
string_intern(self, notationName)))
#endif
VOID_HANDLER(XmlDecl,
@ -473,7 +494,7 @@ VOID_HANDLER(XmlDecl,
static PyObject *
conv_content_model(XML_Content * const model,
PyObject *(*conv_string)(XML_Char *))
PyObject *(*conv_string)(const XML_Char *))
{
PyObject *result = NULL;
PyObject *children = PyTuple_New(model->numchildren);
@ -514,8 +535,8 @@ VOID_HANDLER(ElementDecl,
(void *userData,
const XML_Char *name,
XML_Content *model),
("O&O&",
STRING_CONV_FUNC,name,
("NO&",
string_intern(self, name),
(self->returns_unicode ? conv_content_model_unicode
: conv_content_model_utf8),model))
#else
@ -523,8 +544,8 @@ VOID_HANDLER(ElementDecl,
(void *userData,
const XML_Char *name,
XML_Content *model),
("O&O&",
STRING_CONV_FUNC,name, conv_content_model_utf8,model))
("NO&",
string_intern(self, name), conv_content_model_utf8,model))
#endif
VOID_HANDLER(AttlistDecl,
@ -534,8 +555,8 @@ VOID_HANDLER(AttlistDecl,
const XML_Char *att_type,
const XML_Char *dflt,
int isrequired),
("(O&O&O&O&i)",
STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname,
("(NNO&O&i)",
string_intern(self, elname), string_intern(self, attname),
STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
isrequired))
@ -545,24 +566,25 @@ VOID_HANDLER(NotationDecl,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId),
("(O&O&O&O&)",
STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base,
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
("(NNNN)",
string_intern(self, notationName), string_intern(self, base),
string_intern(self, systemId), string_intern(self, publicId)))
VOID_HANDLER(StartNamespaceDecl,
(void *userData,
const XML_Char *prefix,
const XML_Char *uri),
("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri))
("(NN)",
string_intern(self, prefix), string_intern(self, uri)))
VOID_HANDLER(EndNamespaceDecl,
(void *userData,
const XML_Char *prefix),
("(O&)", STRING_CONV_FUNC,prefix))
("(N)", string_intern(self, prefix)))
VOID_HANDLER(Comment,
(void *userData, const XML_Char *prefix),
("(O&)", STRING_CONV_FUNC,prefix))
(void *userData, const XML_Char *data),
("(O&)", STRING_CONV_FUNC,data))
VOID_HANDLER(StartCdataSection,
(void *userData),
@ -605,9 +627,9 @@ RC_HANDLER(int, ExternalEntityRef,
const XML_Char *systemId,
const XML_Char *publicId),
int rc=0;,
("(O&O&O&O&)",
STRING_CONV_FUNC,context, STRING_CONV_FUNC,base,
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId),
("(O&NNN)",
STRING_CONV_FUNC,context, string_intern(self, base),
string_intern(self, systemId), string_intern(self, publicId)),
rc = PyInt_AsLong(rv);, rc,
XML_GetUserData(parser))
@ -617,8 +639,8 @@ VOID_HANDLER(StartDoctypeDecl,
(void *userData, const XML_Char *doctypeName,
const XML_Char *sysid, const XML_Char *pubid,
int has_internal_subset),
("(O&O&O&i)", STRING_CONV_FUNC,doctypeName,
STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid,
("(NNNi)", string_intern(self, doctypeName),
string_intern(self, sysid), string_intern(self, pubid),
has_internal_subset))
VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
@ -856,6 +878,8 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
new_parser->handlers = 0;
new_parser->intern = self->intern;
Py_XINCREF(new_parser->intern);
#ifdef Py_TPFLAGS_HAVE_GC
PyObject_GC_Track(new_parser);
#else
@ -988,7 +1012,7 @@ XML_Encoding * info)
#endif
static PyObject *
newxmlparseobject(char *encoding, char *namespace_separator)
newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
{
int i;
xmlparseobject *self;
@ -1022,6 +1046,8 @@ newxmlparseobject(char *encoding, char *namespace_separator)
else {
self->itself = XML_ParserCreate(encoding);
}
self->intern = intern;
Py_XINCREF(self->intern);
#ifdef Py_TPFLAGS_HAVE_GC
PyObject_GC_Track(self);
#else
@ -1074,6 +1100,7 @@ xmlparse_dealloc(xmlparseobject *self)
}
free(self->handlers);
}
Py_XDECREF(self->intern);
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
/* Code for versions before 1.6 */
free(self);
@ -1118,6 +1145,16 @@ xmlparse_getattr(xmlparseobject *self, char *name)
return PyInt_FromLong((long) self->returns_unicode);
if (strcmp(name, "specified_attributes") == 0)
return PyInt_FromLong((long) self->specified_attributes);
if (strcmp(name, "intern") == 0) {
if (self->intern == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
else {
Py_INCREF(self->intern);
return self->intern;
}
}
handlernum = handlername2int(name);
@ -1138,6 +1175,7 @@ xmlparse_getattr(xmlparseobject *self, char *name)
PyList_Append(rc, PyString_FromString("ordered_attributes"));
PyList_Append(rc, PyString_FromString("returns_unicode"));
PyList_Append(rc, PyString_FromString("specified_attributes"));
PyList_Append(rc, PyString_FromString("intern"));
return rc;
}
@ -1221,6 +1259,8 @@ static int
xmlparse_clear(xmlparseobject *op)
{
clear_handlers(op, 0);
Py_XDECREF(op->intern);
op->intern = 0;
return 0;
}
#endif
@ -1275,10 +1315,14 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
{
char *encoding = NULL;
char *namespace_separator = NULL;
static char *kwlist[] = {"encoding", "namespace_separator", NULL};
PyObject *intern = NULL;
PyObject *result;
int intern_decref = 0;
static char *kwlist[] = {"encoding", "namespace_separator",
"intern", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist,
&encoding, &namespace_separator))
if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
&encoding, &namespace_separator, &intern))
return NULL;
if (namespace_separator != NULL
&& strlen(namespace_separator) > 1) {
@ -1287,7 +1331,26 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
" character, omitted, or None");
return NULL;
}
return newxmlparseobject(encoding, namespace_separator);
/* Explicitly passing None means no interning is desired.
Not passing anything means that a new dictionary is used. */
if (intern == Py_None)
intern = NULL;
else if (intern == NULL) {
intern = PyDict_New();
if (!intern)
return NULL;
intern_decref = 1;
}
else if (!PyDict_Check(intern)) {
PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
return NULL;
}
result = newxmlparseobject(encoding, namespace_separator, intern);
if (intern_decref) {
Py_DECREF(intern);
}
return result;
}
PyDoc_STRVAR(pyexpat_ErrorString__doc__,