Issue #14007: implement doctype() method calling in XMLParser of _elementtree.
Includes exposing a doctype handler from expat through pyexpat.
This commit is contained in:
parent
20d4174b3d
commit
2b6b73e7e1
|
@ -43,6 +43,8 @@ struct PyExpat_CAPI
|
||||||
XML_Parser parser, XML_UnknownEncodingHandler handler,
|
XML_Parser parser, XML_UnknownEncodingHandler handler,
|
||||||
void *encodingHandlerData);
|
void *encodingHandlerData);
|
||||||
void (*SetUserData)(XML_Parser parser, void *userData);
|
void (*SetUserData)(XML_Parser parser, void *userData);
|
||||||
|
void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
|
||||||
|
XML_StartDoctypeDeclHandler start);
|
||||||
/* always add new stuff to the end! */
|
/* always add new stuff to the end! */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2009,7 +2009,6 @@ class TreeBuilderTest(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(lst, ['toplevel'])
|
self.assertEqual(lst, ['toplevel'])
|
||||||
|
|
||||||
@unittest.expectedFailure # XXX issue 14007 with C ElementTree
|
|
||||||
def test_doctype(self):
|
def test_doctype(self):
|
||||||
class DoctypeParser:
|
class DoctypeParser:
|
||||||
_doctype = None
|
_doctype = None
|
||||||
|
@ -2030,6 +2029,10 @@ class TreeBuilderTest(unittest.TestCase):
|
||||||
|
|
||||||
class XMLParserTest(unittest.TestCase):
|
class XMLParserTest(unittest.TestCase):
|
||||||
sample1 = '<file><line>22</line></file>'
|
sample1 = '<file><line>22</line></file>'
|
||||||
|
sample2 = ('<!DOCTYPE html PUBLIC'
|
||||||
|
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
||||||
|
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
|
||||||
|
'<html>text</html>')
|
||||||
|
|
||||||
def _check_sample_element(self, e):
|
def _check_sample_element(self, e):
|
||||||
self.assertEqual(e.tag, 'file')
|
self.assertEqual(e.tag, 'file')
|
||||||
|
@ -2055,6 +2058,20 @@ class XMLParserTest(unittest.TestCase):
|
||||||
parser.feed(self.sample1)
|
parser.feed(self.sample1)
|
||||||
self._check_sample_element(parser.close())
|
self._check_sample_element(parser.close())
|
||||||
|
|
||||||
|
def test_subclass_doctype(self):
|
||||||
|
_doctype = None
|
||||||
|
class MyParserWithDoctype(ET.XMLParser):
|
||||||
|
def doctype(self, name, pubid, system):
|
||||||
|
nonlocal _doctype
|
||||||
|
_doctype = (name, pubid, system)
|
||||||
|
|
||||||
|
parser = MyParserWithDoctype()
|
||||||
|
parser.feed(self.sample2)
|
||||||
|
parser.close()
|
||||||
|
self.assertEqual(_doctype,
|
||||||
|
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
|
||||||
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||||
|
|
||||||
|
|
||||||
class NoAcceleratorTest(unittest.TestCase):
|
class NoAcceleratorTest(unittest.TestCase):
|
||||||
# Test that the C accelerator was not imported for pyET
|
# Test that the C accelerator was not imported for pyET
|
||||||
|
|
|
@ -2257,24 +2257,27 @@ typedef struct {
|
||||||
|
|
||||||
XML_Parser parser;
|
XML_Parser parser;
|
||||||
|
|
||||||
PyObject* target;
|
PyObject *target;
|
||||||
PyObject* entity;
|
PyObject *entity;
|
||||||
|
|
||||||
PyObject* names;
|
PyObject *names;
|
||||||
|
|
||||||
PyObject* handle_start;
|
PyObject *handle_start;
|
||||||
PyObject* handle_data;
|
PyObject *handle_data;
|
||||||
PyObject* handle_end;
|
PyObject *handle_end;
|
||||||
|
|
||||||
PyObject* handle_comment;
|
PyObject *handle_comment;
|
||||||
PyObject* handle_pi;
|
PyObject *handle_pi;
|
||||||
|
PyObject *handle_doctype;
|
||||||
|
|
||||||
PyObject* handle_close;
|
PyObject *handle_close;
|
||||||
|
|
||||||
} XMLParserObject;
|
} XMLParserObject;
|
||||||
|
|
||||||
static PyTypeObject XMLParser_Type;
|
static PyTypeObject XMLParser_Type;
|
||||||
|
|
||||||
|
#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
|
||||||
|
|
||||||
/* helpers */
|
/* helpers */
|
||||||
|
|
||||||
LOCAL(PyObject*)
|
LOCAL(PyObject*)
|
||||||
|
@ -2601,6 +2604,78 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
expat_start_doctype_handler(XMLParserObject *self,
|
||||||
|
const XML_Char *doctype_name,
|
||||||
|
const XML_Char *sysid,
|
||||||
|
const XML_Char *pubid,
|
||||||
|
int has_internal_subset)
|
||||||
|
{
|
||||||
|
PyObject *self_pyobj = (PyObject *)self;
|
||||||
|
PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
|
||||||
|
PyObject *parser_doctype = NULL;
|
||||||
|
PyObject *res = NULL;
|
||||||
|
|
||||||
|
doctype_name_obj = makeuniversal(self, doctype_name);
|
||||||
|
if (!doctype_name_obj)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (sysid) {
|
||||||
|
sysid_obj = makeuniversal(self, sysid);
|
||||||
|
if (!sysid_obj) {
|
||||||
|
Py_DECREF(doctype_name_obj);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Py_INCREF(Py_None);
|
||||||
|
sysid_obj = Py_None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pubid) {
|
||||||
|
pubid_obj = makeuniversal(self, pubid);
|
||||||
|
if (!pubid_obj) {
|
||||||
|
Py_DECREF(doctype_name_obj);
|
||||||
|
Py_DECREF(sysid_obj);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Py_INCREF(Py_None);
|
||||||
|
pubid_obj = Py_None;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the target has a handler for doctype, call it. */
|
||||||
|
if (self->handle_doctype) {
|
||||||
|
res = PyObject_CallFunction(self->handle_doctype, "OOO",
|
||||||
|
doctype_name_obj, pubid_obj, sysid_obj);
|
||||||
|
Py_CLEAR(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now see if the parser itself has a doctype method. If yes and it's
|
||||||
|
* a subclass, call it but warn about deprecation. If it's not a subclass
|
||||||
|
* (i.e. vanilla XMLParser), do nothing.
|
||||||
|
*/
|
||||||
|
parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
|
||||||
|
if (parser_doctype) {
|
||||||
|
if (!XMLParser_CheckExact(self_pyobj)) {
|
||||||
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
||||||
|
"This method of XMLParser is deprecated. Define"
|
||||||
|
" doctype() method on the TreeBuilder target.",
|
||||||
|
1) < 0) {
|
||||||
|
goto clear;
|
||||||
|
}
|
||||||
|
res = PyObject_CallFunction(parser_doctype, "OOO",
|
||||||
|
doctype_name_obj, pubid_obj, sysid_obj);
|
||||||
|
Py_CLEAR(res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clear:
|
||||||
|
Py_XDECREF(parser_doctype);
|
||||||
|
Py_DECREF(doctype_name_obj);
|
||||||
|
Py_DECREF(pubid_obj);
|
||||||
|
Py_DECREF(sysid_obj);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
|
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
|
||||||
const XML_Char* data_in)
|
const XML_Char* data_in)
|
||||||
|
@ -2676,6 +2751,7 @@ xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
self->target = self->entity = self->names = NULL;
|
self->target = self->entity = self->names = NULL;
|
||||||
self->handle_start = self->handle_data = self->handle_end = NULL;
|
self->handle_start = self->handle_data = self->handle_end = NULL;
|
||||||
self->handle_comment = self->handle_pi = self->handle_close = NULL;
|
self->handle_comment = self->handle_pi = self->handle_close = NULL;
|
||||||
|
self->handle_doctype = NULL;
|
||||||
}
|
}
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
}
|
}
|
||||||
|
@ -2730,6 +2806,7 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
|
self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
|
||||||
self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
|
self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
|
||||||
self_xp->handle_close = PyObject_GetAttrString(target, "close");
|
self_xp->handle_close = PyObject_GetAttrString(target, "close");
|
||||||
|
self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
|
||||||
|
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
|
|
||||||
|
@ -2758,6 +2835,10 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
self_xp->parser,
|
self_xp->parser,
|
||||||
(XML_ProcessingInstructionHandler) expat_pi_handler
|
(XML_ProcessingInstructionHandler) expat_pi_handler
|
||||||
);
|
);
|
||||||
|
EXPAT(SetStartDoctypeDeclHandler)(
|
||||||
|
self_xp->parser,
|
||||||
|
(XML_StartDoctypeDeclHandler) expat_start_doctype_handler
|
||||||
|
);
|
||||||
EXPAT(SetUnknownEncodingHandler)(
|
EXPAT(SetUnknownEncodingHandler)(
|
||||||
self_xp->parser,
|
self_xp->parser,
|
||||||
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
|
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
|
||||||
|
@ -2794,6 +2875,7 @@ xmlparser_gc_clear(XMLParserObject *self)
|
||||||
Py_XDECREF(self->handle_end);
|
Py_XDECREF(self->handle_end);
|
||||||
Py_XDECREF(self->handle_data);
|
Py_XDECREF(self->handle_data);
|
||||||
Py_XDECREF(self->handle_start);
|
Py_XDECREF(self->handle_start);
|
||||||
|
Py_XDECREF(self->handle_doctype);
|
||||||
|
|
||||||
Py_XDECREF(self->target);
|
Py_XDECREF(self->target);
|
||||||
Py_XDECREF(self->entity);
|
Py_XDECREF(self->entity);
|
||||||
|
@ -2950,7 +3032,13 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
xmlparser_setevents(XMLParserObject* self, PyObject* args)
|
xmlparser_doctype(XMLParserObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
xmlparser_setevents(XMLParserObject *self, PyObject* args)
|
||||||
{
|
{
|
||||||
/* activate element event reporting */
|
/* activate element event reporting */
|
||||||
|
|
||||||
|
@ -3054,6 +3142,7 @@ static PyMethodDef xmlparser_methods[] = {
|
||||||
{"close", (PyCFunction) xmlparser_close, METH_VARARGS},
|
{"close", (PyCFunction) xmlparser_close, METH_VARARGS},
|
||||||
{"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
|
{"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
|
||||||
{"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
|
{"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
|
||||||
|
{"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1904,6 +1904,7 @@ MODULE_INITFUNC(void)
|
||||||
capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
|
capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
|
||||||
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
|
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
|
||||||
capi.SetUserData = XML_SetUserData;
|
capi.SetUserData = XML_SetUserData;
|
||||||
|
capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
|
||||||
|
|
||||||
/* export using capsule */
|
/* export using capsule */
|
||||||
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
|
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
|
||||||
|
|
Loading…
Reference in New Issue