Instead of doing a make test, run the regression tests out of the installed
[python.git] / Modules / pyexpat.c
blob8a10babde3bc6e7c81c7834ddfe491d70d484f82
1 #include "Python.h"
2 #include <ctype.h>
4 #include "frameobject.h"
5 #include "expat.h"
7 #include "pyexpat.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
11 #ifndef PyDoc_STRVAR
14 * fdrake says:
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
23 #endif
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
28 #else
29 #define FIX_TRACE
30 #endif
32 enum HandlerTypes {
33 StartElement,
34 EndElement,
35 ProcessingInstruction,
36 CharacterData,
37 UnparsedEntityDecl,
38 NotationDecl,
39 StartNamespaceDecl,
40 EndNamespaceDecl,
41 Comment,
42 StartCdataSection,
43 EndCdataSection,
44 Default,
45 DefaultHandlerExpand,
46 NotStandalone,
47 ExternalEntityRef,
48 StartDoctypeDecl,
49 EndDoctypeDecl,
50 EntityDecl,
51 XmlDecl,
52 ElementDecl,
53 AttlistDecl,
54 #if XML_COMBINED_VERSION >= 19504
55 SkippedEntity,
56 #endif
57 _DummyDecl
60 static PyObject *ErrorObject;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
66 typedef struct {
67 PyObject_HEAD
69 XML_Parser itself;
70 int returns_unicode; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes; /* Return attributes as a list. */
73 int specified_attributes; /* Report only specified attributes. */
74 int in_callback; /* Is a callback active? */
75 int ns_prefixes; /* Namespace-triplets mode? */
76 XML_Char *buffer; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size; /* Size of buffer, in XML_Char units */
79 int buffer_used; /* Buffer units in use */
80 PyObject *intern; /* Dictionary to intern strings */
81 PyObject **handlers;
82 } xmlparseobject;
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype;
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
89 typedef void* xmlhandler;
91 struct HandlerInfo {
92 const char *name;
93 xmlhandlersetter setter;
94 xmlhandler handler;
95 PyCodeObject *tb_code;
96 PyObject *nameobj;
99 static struct HandlerInfo handler_info[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
104 static int
105 set_error_attr(PyObject *err, char *name, int value)
107 PyObject *v = PyInt_FromLong(value);
109 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110 Py_XDECREF(v);
111 return 0;
113 Py_DECREF(v);
114 return 1;
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
120 static PyObject *
121 set_error(xmlparseobject *self, enum XML_Error code)
123 PyObject *err;
124 char buffer[256];
125 XML_Parser parser = self->itself;
126 int lineno = XML_GetErrorLineNumber(parser);
127 int column = XML_GetErrorColumnNumber(parser);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer, "%.200s: line %i, column %i",
132 XML_ErrorString(code), lineno, column);
133 err = PyObject_CallFunction(ErrorObject, "s", buffer);
134 if ( err != NULL
135 && set_error_attr(err, "code", code)
136 && set_error_attr(err, "offset", column)
137 && set_error_attr(err, "lineno", lineno)) {
138 PyErr_SetObject(ErrorObject, err);
140 Py_XDECREF(err);
141 return NULL;
144 static int
145 have_handler(xmlparseobject *self, int type)
147 PyObject *handler = self->handlers[type];
148 return handler != NULL;
151 static PyObject *
152 get_handler_name(struct HandlerInfo *hinfo)
154 PyObject *name = hinfo->nameobj;
155 if (name == NULL) {
156 name = PyString_FromString(hinfo->name);
157 hinfo->nameobj = name;
159 Py_XINCREF(name);
160 return name;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
168 static PyObject *
169 conv_string_to_unicode(const XML_Char *str)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
174 if (str == NULL) {
175 Py_INCREF(Py_None);
176 return Py_None;
178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
181 static PyObject *
182 conv_string_len_to_unicode(const XML_Char *str, int len)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
187 if (str == NULL) {
188 Py_INCREF(Py_None);
189 return Py_None;
191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
193 #endif
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
198 static PyObject *
199 conv_string_to_utf8(const XML_Char *str)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
204 if (str == NULL) {
205 Py_INCREF(Py_None);
206 return Py_None;
208 return PyString_FromString(str);
211 static PyObject *
212 conv_string_len_to_utf8(const XML_Char *str, int len)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
217 if (str == NULL) {
218 Py_INCREF(Py_None);
219 return Py_None;
221 return PyString_FromStringAndSize((const char *)str, len);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject *self, int initial);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
231 static int
232 error_external_entity_ref_handler(XML_Parser parser,
233 const XML_Char *context,
234 const XML_Char *base,
235 const XML_Char *systemId,
236 const XML_Char *publicId)
238 return 0;
241 static void
242 flag_error(xmlparseobject *self)
244 clear_handlers(self, 0);
245 XML_SetExternalEntityRefHandler(self->itself,
246 error_external_entity_ref_handler);
249 static PyCodeObject*
250 getcode(enum HandlerTypes slot, char* func_name, int lineno)
252 PyObject *code = NULL;
253 PyObject *name = NULL;
254 PyObject *nulltuple = NULL;
255 PyObject *filename = NULL;
257 if (handler_info[slot].tb_code == NULL) {
258 code = PyString_FromString("");
259 if (code == NULL)
260 goto failed;
261 name = PyString_FromString(func_name);
262 if (name == NULL)
263 goto failed;
264 nulltuple = PyTuple_New(0);
265 if (nulltuple == NULL)
266 goto failed;
267 filename = PyString_FromString(__FILE__);
268 handler_info[slot].tb_code =
269 PyCode_New(0, /* argcount */
270 0, /* nlocals */
271 0, /* stacksize */
272 0, /* flags */
273 code, /* code */
274 nulltuple, /* consts */
275 nulltuple, /* names */
276 nulltuple, /* varnames */
277 #if PYTHON_API_VERSION >= 1010
278 nulltuple, /* freevars */
279 nulltuple, /* cellvars */
280 #endif
281 filename, /* filename */
282 name, /* name */
283 lineno, /* firstlineno */
284 code /* lnotab */
286 if (handler_info[slot].tb_code == NULL)
287 goto failed;
288 Py_DECREF(code);
289 Py_DECREF(nulltuple);
290 Py_DECREF(filename);
291 Py_DECREF(name);
293 return handler_info[slot].tb_code;
294 failed:
295 Py_XDECREF(code);
296 Py_XDECREF(name);
297 return NULL;
300 #ifdef FIX_TRACE
301 static int
302 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
304 int result = 0;
305 if (!tstate->use_tracing || tstate->tracing)
306 return 0;
307 if (tstate->c_profilefunc != NULL) {
308 tstate->tracing++;
309 result = tstate->c_profilefunc(tstate->c_profileobj,
310 f, code , val);
311 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
312 || (tstate->c_profilefunc != NULL));
313 tstate->tracing--;
314 if (result)
315 return result;
317 if (tstate->c_tracefunc != NULL) {
318 tstate->tracing++;
319 result = tstate->c_tracefunc(tstate->c_traceobj,
320 f, code , val);
321 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
322 || (tstate->c_profilefunc != NULL));
323 tstate->tracing--;
325 return result;
328 static int
329 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
331 PyObject *type, *value, *traceback, *arg;
332 int err;
334 if (tstate->c_tracefunc == NULL)
335 return 0;
337 PyErr_Fetch(&type, &value, &traceback);
338 if (value == NULL) {
339 value = Py_None;
340 Py_INCREF(value);
342 #if PY_VERSION_HEX < 0x02040000
343 arg = Py_BuildValue("(OOO)", type, value, traceback);
344 #else
345 arg = PyTuple_Pack(3, type, value, traceback);
346 #endif
347 if (arg == NULL) {
348 PyErr_Restore(type, value, traceback);
349 return 0;
351 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
352 Py_DECREF(arg);
353 if (err == 0)
354 PyErr_Restore(type, value, traceback);
355 else {
356 Py_XDECREF(type);
357 Py_XDECREF(value);
358 Py_XDECREF(traceback);
360 return err;
362 #endif
364 static PyObject*
365 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
366 xmlparseobject *self)
368 PyThreadState *tstate = PyThreadState_GET();
369 PyFrameObject *f;
370 PyObject *res;
372 if (c == NULL)
373 return NULL;
375 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
376 if (f == NULL)
377 return NULL;
378 tstate->frame = f;
379 #ifdef FIX_TRACE
380 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
381 return NULL;
383 #endif
384 res = PyEval_CallObject(func, args);
385 if (res == NULL) {
386 if (tstate->curexc_traceback == NULL)
387 PyTraceBack_Here(f);
388 XML_StopParser(self->itself, XML_FALSE);
389 #ifdef FIX_TRACE
390 if (trace_frame_exc(tstate, f) < 0) {
391 return NULL;
394 else {
395 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
396 Py_XDECREF(res);
397 res = NULL;
400 #else
402 #endif
403 tstate->frame = f->f_back;
404 Py_DECREF(f);
405 return res;
408 #ifndef Py_USING_UNICODE
409 #define STRING_CONV_FUNC conv_string_to_utf8
410 #else
411 /* Python 2.0 and later versions, when built with Unicode support */
412 #define STRING_CONV_FUNC (self->returns_unicode \
413 ? conv_string_to_unicode : conv_string_to_utf8)
414 #endif
416 static PyObject*
417 string_intern(xmlparseobject *self, const char* str)
419 PyObject *result = STRING_CONV_FUNC(str);
420 PyObject *value;
421 /* result can be NULL if the unicode conversion failed. */
422 if (!result)
423 return result;
424 if (!self->intern)
425 return result;
426 value = PyDict_GetItem(self->intern, result);
427 if (!value) {
428 if (PyDict_SetItem(self->intern, result, result) == 0)
429 return result;
430 else
431 return NULL;
433 Py_INCREF(value);
434 Py_DECREF(result);
435 return value;
438 /* Return 0 on success, -1 on exception.
439 * flag_error() will be called before return if needed.
441 static int
442 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
444 PyObject *args;
445 PyObject *temp;
447 args = PyTuple_New(1);
448 if (args == NULL)
449 return -1;
450 #ifdef Py_USING_UNICODE
451 temp = (self->returns_unicode
452 ? conv_string_len_to_unicode(buffer, len)
453 : conv_string_len_to_utf8(buffer, len));
454 #else
455 temp = conv_string_len_to_utf8(buffer, len);
456 #endif
457 if (temp == NULL) {
458 Py_DECREF(args);
459 flag_error(self);
460 return -1;
462 PyTuple_SET_ITEM(args, 0, temp);
463 /* temp is now a borrowed reference; consider it unused. */
464 self->in_callback = 1;
465 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
466 self->handlers[CharacterData], args, self);
467 /* temp is an owned reference again, or NULL */
468 self->in_callback = 0;
469 Py_DECREF(args);
470 if (temp == NULL) {
471 flag_error(self);
472 return -1;
474 Py_DECREF(temp);
475 return 0;
478 static int
479 flush_character_buffer(xmlparseobject *self)
481 int rc;
482 if (self->buffer == NULL || self->buffer_used == 0)
483 return 0;
484 rc = call_character_handler(self, self->buffer, self->buffer_used);
485 self->buffer_used = 0;
486 return rc;
489 static void
490 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
492 xmlparseobject *self = (xmlparseobject *) userData;
493 if (self->buffer == NULL)
494 call_character_handler(self, data, len);
495 else {
496 if ((self->buffer_used + len) > self->buffer_size) {
497 if (flush_character_buffer(self) < 0)
498 return;
499 /* handler might have changed; drop the rest on the floor
500 * if there isn't a handler anymore
502 if (!have_handler(self, CharacterData))
503 return;
505 if (len > self->buffer_size) {
506 call_character_handler(self, data, len);
507 self->buffer_used = 0;
509 else {
510 memcpy(self->buffer + self->buffer_used,
511 data, len * sizeof(XML_Char));
512 self->buffer_used += len;
517 static void
518 my_StartElementHandler(void *userData,
519 const XML_Char *name, const XML_Char *atts[])
521 xmlparseobject *self = (xmlparseobject *)userData;
523 if (have_handler(self, StartElement)) {
524 PyObject *container, *rv, *args;
525 int i, max;
527 if (flush_character_buffer(self) < 0)
528 return;
529 /* Set max to the number of slots filled in atts[]; max/2 is
530 * the number of attributes we need to process.
532 if (self->specified_attributes) {
533 max = XML_GetSpecifiedAttributeCount(self->itself);
535 else {
536 max = 0;
537 while (atts[max] != NULL)
538 max += 2;
540 /* Build the container. */
541 if (self->ordered_attributes)
542 container = PyList_New(max);
543 else
544 container = PyDict_New();
545 if (container == NULL) {
546 flag_error(self);
547 return;
549 for (i = 0; i < max; i += 2) {
550 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
551 PyObject *v;
552 if (n == NULL) {
553 flag_error(self);
554 Py_DECREF(container);
555 return;
557 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
558 if (v == NULL) {
559 flag_error(self);
560 Py_DECREF(container);
561 Py_DECREF(n);
562 return;
564 if (self->ordered_attributes) {
565 PyList_SET_ITEM(container, i, n);
566 PyList_SET_ITEM(container, i+1, v);
568 else if (PyDict_SetItem(container, n, v)) {
569 flag_error(self);
570 Py_DECREF(n);
571 Py_DECREF(v);
572 return;
574 else {
575 Py_DECREF(n);
576 Py_DECREF(v);
579 args = string_intern(self, name);
580 if (args != NULL)
581 args = Py_BuildValue("(NN)", args, container);
582 if (args == NULL) {
583 Py_DECREF(container);
584 return;
586 /* Container is now a borrowed reference; ignore it. */
587 self->in_callback = 1;
588 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
589 self->handlers[StartElement], args, self);
590 self->in_callback = 0;
591 Py_DECREF(args);
592 if (rv == NULL) {
593 flag_error(self);
594 return;
596 Py_DECREF(rv);
600 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
601 RETURN, GETUSERDATA) \
602 static RC \
603 my_##NAME##Handler PARAMS {\
604 xmlparseobject *self = GETUSERDATA ; \
605 PyObject *args = NULL; \
606 PyObject *rv = NULL; \
607 INIT \
609 if (have_handler(self, NAME)) { \
610 if (flush_character_buffer(self) < 0) \
611 return RETURN; \
612 args = Py_BuildValue PARAM_FORMAT ;\
613 if (!args) { flag_error(self); return RETURN;} \
614 self->in_callback = 1; \
615 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
616 self->handlers[NAME], args, self); \
617 self->in_callback = 0; \
618 Py_DECREF(args); \
619 if (rv == NULL) { \
620 flag_error(self); \
621 return RETURN; \
623 CONVERSION \
624 Py_DECREF(rv); \
626 return RETURN; \
629 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
630 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
631 (xmlparseobject *)userData)
633 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
634 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
635 rc = PyInt_AsLong(rv);, rc, \
636 (xmlparseobject *)userData)
638 VOID_HANDLER(EndElement,
639 (void *userData, const XML_Char *name),
640 ("(N)", string_intern(self, name)))
642 VOID_HANDLER(ProcessingInstruction,
643 (void *userData,
644 const XML_Char *target,
645 const XML_Char *data),
646 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
648 VOID_HANDLER(UnparsedEntityDecl,
649 (void *userData,
650 const XML_Char *entityName,
651 const XML_Char *base,
652 const XML_Char *systemId,
653 const XML_Char *publicId,
654 const XML_Char *notationName),
655 ("(NNNNN)",
656 string_intern(self, entityName), string_intern(self, base),
657 string_intern(self, systemId), string_intern(self, publicId),
658 string_intern(self, notationName)))
660 #ifndef Py_USING_UNICODE
661 VOID_HANDLER(EntityDecl,
662 (void *userData,
663 const XML_Char *entityName,
664 int is_parameter_entity,
665 const XML_Char *value,
666 int value_length,
667 const XML_Char *base,
668 const XML_Char *systemId,
669 const XML_Char *publicId,
670 const XML_Char *notationName),
671 ("NiNNNNN",
672 string_intern(self, entityName), is_parameter_entity,
673 conv_string_len_to_utf8(value, value_length),
674 string_intern(self, base), string_intern(self, systemId),
675 string_intern(self, publicId),
676 string_intern(self, notationName)))
677 #else
678 VOID_HANDLER(EntityDecl,
679 (void *userData,
680 const XML_Char *entityName,
681 int is_parameter_entity,
682 const XML_Char *value,
683 int value_length,
684 const XML_Char *base,
685 const XML_Char *systemId,
686 const XML_Char *publicId,
687 const XML_Char *notationName),
688 ("NiNNNNN",
689 string_intern(self, entityName), is_parameter_entity,
690 (self->returns_unicode
691 ? conv_string_len_to_unicode(value, value_length)
692 : conv_string_len_to_utf8(value, value_length)),
693 string_intern(self, base), string_intern(self, systemId),
694 string_intern(self, publicId),
695 string_intern(self, notationName)))
696 #endif
698 VOID_HANDLER(XmlDecl,
699 (void *userData,
700 const XML_Char *version,
701 const XML_Char *encoding,
702 int standalone),
703 ("(O&O&i)",
704 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
705 standalone))
707 static PyObject *
708 conv_content_model(XML_Content * const model,
709 PyObject *(*conv_string)(const XML_Char *))
711 PyObject *result = NULL;
712 PyObject *children = PyTuple_New(model->numchildren);
713 int i;
715 if (children != NULL) {
716 assert(model->numchildren < INT_MAX);
717 for (i = 0; i < (int)model->numchildren; ++i) {
718 PyObject *child = conv_content_model(&model->children[i],
719 conv_string);
720 if (child == NULL) {
721 Py_XDECREF(children);
722 return NULL;
724 PyTuple_SET_ITEM(children, i, child);
726 result = Py_BuildValue("(iiO&N)",
727 model->type, model->quant,
728 conv_string,model->name, children);
730 return result;
733 static void
734 my_ElementDeclHandler(void *userData,
735 const XML_Char *name,
736 XML_Content *model)
738 xmlparseobject *self = (xmlparseobject *)userData;
739 PyObject *args = NULL;
741 if (have_handler(self, ElementDecl)) {
742 PyObject *rv = NULL;
743 PyObject *modelobj, *nameobj;
745 if (flush_character_buffer(self) < 0)
746 goto finally;
747 #ifdef Py_USING_UNICODE
748 modelobj = conv_content_model(model,
749 (self->returns_unicode
750 ? conv_string_to_unicode
751 : conv_string_to_utf8));
752 #else
753 modelobj = conv_content_model(model, conv_string_to_utf8);
754 #endif
755 if (modelobj == NULL) {
756 flag_error(self);
757 goto finally;
759 nameobj = string_intern(self, name);
760 if (nameobj == NULL) {
761 Py_DECREF(modelobj);
762 flag_error(self);
763 goto finally;
765 args = Py_BuildValue("NN", nameobj, modelobj);
766 if (args == NULL) {
767 Py_DECREF(modelobj);
768 flag_error(self);
769 goto finally;
771 self->in_callback = 1;
772 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
773 self->handlers[ElementDecl], args, self);
774 self->in_callback = 0;
775 if (rv == NULL) {
776 flag_error(self);
777 goto finally;
779 Py_DECREF(rv);
781 finally:
782 Py_XDECREF(args);
783 XML_FreeContentModel(self->itself, model);
784 return;
787 VOID_HANDLER(AttlistDecl,
788 (void *userData,
789 const XML_Char *elname,
790 const XML_Char *attname,
791 const XML_Char *att_type,
792 const XML_Char *dflt,
793 int isrequired),
794 ("(NNO&O&i)",
795 string_intern(self, elname), string_intern(self, attname),
796 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
797 isrequired))
799 #if XML_COMBINED_VERSION >= 19504
800 VOID_HANDLER(SkippedEntity,
801 (void *userData,
802 const XML_Char *entityName,
803 int is_parameter_entity),
804 ("Ni",
805 string_intern(self, entityName), is_parameter_entity))
806 #endif
808 VOID_HANDLER(NotationDecl,
809 (void *userData,
810 const XML_Char *notationName,
811 const XML_Char *base,
812 const XML_Char *systemId,
813 const XML_Char *publicId),
814 ("(NNNN)",
815 string_intern(self, notationName), string_intern(self, base),
816 string_intern(self, systemId), string_intern(self, publicId)))
818 VOID_HANDLER(StartNamespaceDecl,
819 (void *userData,
820 const XML_Char *prefix,
821 const XML_Char *uri),
822 ("(NN)",
823 string_intern(self, prefix), string_intern(self, uri)))
825 VOID_HANDLER(EndNamespaceDecl,
826 (void *userData,
827 const XML_Char *prefix),
828 ("(N)", string_intern(self, prefix)))
830 VOID_HANDLER(Comment,
831 (void *userData, const XML_Char *data),
832 ("(O&)", STRING_CONV_FUNC,data))
834 VOID_HANDLER(StartCdataSection,
835 (void *userData),
836 ("()"))
838 VOID_HANDLER(EndCdataSection,
839 (void *userData),
840 ("()"))
842 #ifndef Py_USING_UNICODE
843 VOID_HANDLER(Default,
844 (void *userData, const XML_Char *s, int len),
845 ("(N)", conv_string_len_to_utf8(s,len)))
847 VOID_HANDLER(DefaultHandlerExpand,
848 (void *userData, const XML_Char *s, int len),
849 ("(N)", conv_string_len_to_utf8(s,len)))
850 #else
851 VOID_HANDLER(Default,
852 (void *userData, const XML_Char *s, int len),
853 ("(N)", (self->returns_unicode
854 ? conv_string_len_to_unicode(s,len)
855 : conv_string_len_to_utf8(s,len))))
857 VOID_HANDLER(DefaultHandlerExpand,
858 (void *userData, const XML_Char *s, int len),
859 ("(N)", (self->returns_unicode
860 ? conv_string_len_to_unicode(s,len)
861 : conv_string_len_to_utf8(s,len))))
862 #endif
864 INT_HANDLER(NotStandalone,
865 (void *userData),
866 ("()"))
868 RC_HANDLER(int, ExternalEntityRef,
869 (XML_Parser parser,
870 const XML_Char *context,
871 const XML_Char *base,
872 const XML_Char *systemId,
873 const XML_Char *publicId),
874 int rc=0;,
875 ("(O&NNN)",
876 STRING_CONV_FUNC,context, string_intern(self, base),
877 string_intern(self, systemId), string_intern(self, publicId)),
878 rc = PyInt_AsLong(rv);, rc,
879 XML_GetUserData(parser))
881 /* XXX UnknownEncodingHandler */
883 VOID_HANDLER(StartDoctypeDecl,
884 (void *userData, const XML_Char *doctypeName,
885 const XML_Char *sysid, const XML_Char *pubid,
886 int has_internal_subset),
887 ("(NNNi)", string_intern(self, doctypeName),
888 string_intern(self, sysid), string_intern(self, pubid),
889 has_internal_subset))
891 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
893 /* ---------------------------------------------------------------- */
895 static PyObject *
896 get_parse_result(xmlparseobject *self, int rv)
898 if (PyErr_Occurred()) {
899 return NULL;
901 if (rv == 0) {
902 return set_error(self, XML_GetErrorCode(self->itself));
904 if (flush_character_buffer(self) < 0) {
905 return NULL;
907 return PyInt_FromLong(rv);
910 PyDoc_STRVAR(xmlparse_Parse__doc__,
911 "Parse(data[, isfinal])\n\
912 Parse XML data. `isfinal' should be true at end of input.");
914 static PyObject *
915 xmlparse_Parse(xmlparseobject *self, PyObject *args)
917 char *s;
918 int slen;
919 int isFinal = 0;
921 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
922 return NULL;
924 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
927 /* File reading copied from cPickle */
929 #define BUF_SIZE 2048
931 static int
932 readinst(char *buf, int buf_size, PyObject *meth)
934 PyObject *arg = NULL;
935 PyObject *bytes = NULL;
936 PyObject *str = NULL;
937 int len = -1;
939 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
940 goto finally;
942 if ((arg = PyTuple_New(1)) == NULL) {
943 Py_DECREF(bytes);
944 goto finally;
947 PyTuple_SET_ITEM(arg, 0, bytes);
949 #if PY_VERSION_HEX < 0x02020000
950 str = PyObject_CallObject(meth, arg);
951 #else
952 str = PyObject_Call(meth, arg, NULL);
953 #endif
954 if (str == NULL)
955 goto finally;
957 /* XXX what to do if it returns a Unicode string? */
958 if (!PyString_Check(str)) {
959 PyErr_Format(PyExc_TypeError,
960 "read() did not return a string object (type=%.400s)",
961 str->ob_type->tp_name);
962 goto finally;
964 len = PyString_GET_SIZE(str);
965 if (len > buf_size) {
966 PyErr_Format(PyExc_ValueError,
967 "read() returned too much data: "
968 "%i bytes requested, %i returned",
969 buf_size, len);
970 goto finally;
972 memcpy(buf, PyString_AsString(str), len);
973 finally:
974 Py_XDECREF(arg);
975 Py_XDECREF(str);
976 return len;
979 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
980 "ParseFile(file)\n\
981 Parse XML data from file-like object.");
983 static PyObject *
984 xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
986 int rv = 1;
987 FILE *fp;
988 PyObject *readmethod = NULL;
990 if (PyFile_Check(f)) {
991 fp = PyFile_AsFile(f);
993 else {
994 fp = NULL;
995 readmethod = PyObject_GetAttrString(f, "read");
996 if (readmethod == NULL) {
997 PyErr_Clear();
998 PyErr_SetString(PyExc_TypeError,
999 "argument must have 'read' attribute");
1000 return NULL;
1003 for (;;) {
1004 int bytes_read;
1005 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
1006 if (buf == NULL) {
1007 Py_XDECREF(readmethod);
1008 return PyErr_NoMemory();
1011 if (fp) {
1012 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
1013 if (bytes_read < 0) {
1014 PyErr_SetFromErrno(PyExc_IOError);
1015 return NULL;
1018 else {
1019 bytes_read = readinst(buf, BUF_SIZE, readmethod);
1020 if (bytes_read < 0) {
1021 Py_DECREF(readmethod);
1022 return NULL;
1025 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
1026 if (PyErr_Occurred()) {
1027 Py_XDECREF(readmethod);
1028 return NULL;
1031 if (!rv || bytes_read == 0)
1032 break;
1034 Py_XDECREF(readmethod);
1035 return get_parse_result(self, rv);
1038 PyDoc_STRVAR(xmlparse_SetBase__doc__,
1039 "SetBase(base_url)\n\
1040 Set the base URL for the parser.");
1042 static PyObject *
1043 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1045 char *base;
1047 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
1048 return NULL;
1049 if (!XML_SetBase(self->itself, base)) {
1050 return PyErr_NoMemory();
1052 Py_INCREF(Py_None);
1053 return Py_None;
1056 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1057 "GetBase() -> url\n\
1058 Return base URL string for the parser.");
1060 static PyObject *
1061 xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
1063 return Py_BuildValue("z", XML_GetBase(self->itself));
1066 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1067 "GetInputContext() -> string\n\
1068 Return the untranslated text of the input that caused the current event.\n\
1069 If the event was generated by a large amount of text (such as a start tag\n\
1070 for an element with many attributes), not all of the text may be available.");
1072 static PyObject *
1073 xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
1075 if (self->in_callback) {
1076 int offset, size;
1077 const char *buffer
1078 = XML_GetInputContext(self->itself, &offset, &size);
1080 if (buffer != NULL)
1081 return PyString_FromStringAndSize(buffer + offset,
1082 size - offset);
1083 else
1084 Py_RETURN_NONE;
1086 else
1087 Py_RETURN_NONE;
1090 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1091 "ExternalEntityParserCreate(context[, encoding])\n\
1092 Create a parser for parsing an external entity based on the\n\
1093 information passed to the ExternalEntityRefHandler.");
1095 static PyObject *
1096 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1098 char *context;
1099 char *encoding = NULL;
1100 xmlparseobject *new_parser;
1101 int i;
1103 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1104 &context, &encoding)) {
1105 return NULL;
1108 #ifndef Py_TPFLAGS_HAVE_GC
1109 /* Python versions 2.0 and 2.1 */
1110 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1111 #else
1112 /* Python versions 2.2 and later */
1113 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1114 #endif
1116 if (new_parser == NULL)
1117 return NULL;
1118 new_parser->buffer_size = self->buffer_size;
1119 new_parser->buffer_used = 0;
1120 if (self->buffer != NULL) {
1121 new_parser->buffer = malloc(new_parser->buffer_size);
1122 if (new_parser->buffer == NULL) {
1123 #ifndef Py_TPFLAGS_HAVE_GC
1124 /* Code for versions 2.0 and 2.1 */
1125 PyObject_Del(new_parser);
1126 #else
1127 /* Code for versions 2.2 and later. */
1128 PyObject_GC_Del(new_parser);
1129 #endif
1130 return PyErr_NoMemory();
1133 else
1134 new_parser->buffer = NULL;
1135 new_parser->returns_unicode = self->returns_unicode;
1136 new_parser->ordered_attributes = self->ordered_attributes;
1137 new_parser->specified_attributes = self->specified_attributes;
1138 new_parser->in_callback = 0;
1139 new_parser->ns_prefixes = self->ns_prefixes;
1140 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1141 encoding);
1142 new_parser->handlers = 0;
1143 new_parser->intern = self->intern;
1144 Py_XINCREF(new_parser->intern);
1145 #ifdef Py_TPFLAGS_HAVE_GC
1146 PyObject_GC_Track(new_parser);
1147 #else
1148 PyObject_GC_Init(new_parser);
1149 #endif
1151 if (!new_parser->itself) {
1152 Py_DECREF(new_parser);
1153 return PyErr_NoMemory();
1156 XML_SetUserData(new_parser->itself, (void *)new_parser);
1158 /* allocate and clear handlers first */
1159 for (i = 0; handler_info[i].name != NULL; i++)
1160 /* do nothing */;
1162 new_parser->handlers = malloc(sizeof(PyObject *) * i);
1163 if (!new_parser->handlers) {
1164 Py_DECREF(new_parser);
1165 return PyErr_NoMemory();
1167 clear_handlers(new_parser, 1);
1169 /* then copy handlers from self */
1170 for (i = 0; handler_info[i].name != NULL; i++) {
1171 PyObject *handler = self->handlers[i];
1172 if (handler != NULL) {
1173 Py_INCREF(handler);
1174 new_parser->handlers[i] = handler;
1175 handler_info[i].setter(new_parser->itself,
1176 handler_info[i].handler);
1179 return (PyObject *)new_parser;
1182 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1183 "SetParamEntityParsing(flag) -> success\n\
1184 Controls parsing of parameter entities (including the external DTD\n\
1185 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1186 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1187 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1188 was successful.");
1190 static PyObject*
1191 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1193 int flag;
1194 if (!PyArg_ParseTuple(args, "i", &flag))
1195 return NULL;
1196 flag = XML_SetParamEntityParsing(p->itself, flag);
1197 return PyInt_FromLong(flag);
1201 #if XML_COMBINED_VERSION >= 19505
1202 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1203 "UseForeignDTD([flag])\n\
1204 Allows the application to provide an artificial external subset if one is\n\
1205 not specified as part of the document instance. This readily allows the\n\
1206 use of a 'default' document type controlled by the application, while still\n\
1207 getting the advantage of providing document type information to the parser.\n\
1208 'flag' defaults to True if not provided.");
1210 static PyObject *
1211 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1213 PyObject *flagobj = NULL;
1214 XML_Bool flag = XML_TRUE;
1215 enum XML_Error rc;
1216 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
1217 return NULL;
1218 if (flagobj != NULL)
1219 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1220 rc = XML_UseForeignDTD(self->itself, flag);
1221 if (rc != XML_ERROR_NONE) {
1222 return set_error(self, rc);
1224 Py_INCREF(Py_None);
1225 return Py_None;
1227 #endif
1229 static struct PyMethodDef xmlparse_methods[] = {
1230 {"Parse", (PyCFunction)xmlparse_Parse,
1231 METH_VARARGS, xmlparse_Parse__doc__},
1232 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1233 METH_O, xmlparse_ParseFile__doc__},
1234 {"SetBase", (PyCFunction)xmlparse_SetBase,
1235 METH_VARARGS, xmlparse_SetBase__doc__},
1236 {"GetBase", (PyCFunction)xmlparse_GetBase,
1237 METH_NOARGS, xmlparse_GetBase__doc__},
1238 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1239 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1240 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1241 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1242 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1243 METH_NOARGS, xmlparse_GetInputContext__doc__},
1244 #if XML_COMBINED_VERSION >= 19505
1245 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1246 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1247 #endif
1248 {NULL, NULL} /* sentinel */
1251 /* ---------- */
1254 #ifdef Py_USING_UNICODE
1256 /* pyexpat international encoding support.
1257 Make it as simple as possible.
1260 static char template_buffer[257];
1261 PyObject *template_string = NULL;
1263 static void
1264 init_template_buffer(void)
1266 int i;
1267 for (i = 0; i < 256; i++) {
1268 template_buffer[i] = i;
1270 template_buffer[256] = 0;
1273 static int
1274 PyUnknownEncodingHandler(void *encodingHandlerData,
1275 const XML_Char *name,
1276 XML_Encoding *info)
1278 PyUnicodeObject *_u_string = NULL;
1279 int result = 0;
1280 int i;
1282 /* Yes, supports only 8bit encodings */
1283 _u_string = (PyUnicodeObject *)
1284 PyUnicode_Decode(template_buffer, 256, name, "replace");
1286 if (_u_string == NULL)
1287 return result;
1289 for (i = 0; i < 256; i++) {
1290 /* Stupid to access directly, but fast */
1291 Py_UNICODE c = _u_string->str[i];
1292 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1293 info->map[i] = -1;
1294 else
1295 info->map[i] = c;
1297 info->data = NULL;
1298 info->convert = NULL;
1299 info->release = NULL;
1300 result = 1;
1301 Py_DECREF(_u_string);
1302 return result;
1305 #endif
1307 static PyObject *
1308 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1310 int i;
1311 xmlparseobject *self;
1313 #ifdef Py_TPFLAGS_HAVE_GC
1314 /* Code for versions 2.2 and later */
1315 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1316 #else
1317 self = PyObject_New(xmlparseobject, &Xmlparsetype);
1318 #endif
1319 if (self == NULL)
1320 return NULL;
1322 #ifdef Py_USING_UNICODE
1323 self->returns_unicode = 1;
1324 #else
1325 self->returns_unicode = 0;
1326 #endif
1328 self->buffer = NULL;
1329 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1330 self->buffer_used = 0;
1331 self->ordered_attributes = 0;
1332 self->specified_attributes = 0;
1333 self->in_callback = 0;
1334 self->ns_prefixes = 0;
1335 self->handlers = NULL;
1336 if (namespace_separator != NULL) {
1337 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1339 else {
1340 self->itself = XML_ParserCreate(encoding);
1342 self->intern = intern;
1343 Py_XINCREF(self->intern);
1344 #ifdef Py_TPFLAGS_HAVE_GC
1345 PyObject_GC_Track(self);
1346 #else
1347 PyObject_GC_Init(self);
1348 #endif
1349 if (self->itself == NULL) {
1350 PyErr_SetString(PyExc_RuntimeError,
1351 "XML_ParserCreate failed");
1352 Py_DECREF(self);
1353 return NULL;
1355 XML_SetUserData(self->itself, (void *)self);
1356 #ifdef Py_USING_UNICODE
1357 XML_SetUnknownEncodingHandler(self->itself,
1358 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1359 #endif
1361 for (i = 0; handler_info[i].name != NULL; i++)
1362 /* do nothing */;
1364 self->handlers = malloc(sizeof(PyObject *) * i);
1365 if (!self->handlers) {
1366 Py_DECREF(self);
1367 return PyErr_NoMemory();
1369 clear_handlers(self, 1);
1371 return (PyObject*)self;
1375 static void
1376 xmlparse_dealloc(xmlparseobject *self)
1378 int i;
1379 #ifdef Py_TPFLAGS_HAVE_GC
1380 PyObject_GC_UnTrack(self);
1381 #else
1382 PyObject_GC_Fini(self);
1383 #endif
1384 if (self->itself != NULL)
1385 XML_ParserFree(self->itself);
1386 self->itself = NULL;
1388 if (self->handlers != NULL) {
1389 PyObject *temp;
1390 for (i = 0; handler_info[i].name != NULL; i++) {
1391 temp = self->handlers[i];
1392 self->handlers[i] = NULL;
1393 Py_XDECREF(temp);
1395 free(self->handlers);
1396 self->handlers = NULL;
1398 if (self->buffer != NULL) {
1399 free(self->buffer);
1400 self->buffer = NULL;
1402 Py_XDECREF(self->intern);
1403 #ifndef Py_TPFLAGS_HAVE_GC
1404 /* Code for versions 2.0 and 2.1 */
1405 PyObject_Del(self);
1406 #else
1407 /* Code for versions 2.2 and later. */
1408 PyObject_GC_Del(self);
1409 #endif
1412 static int
1413 handlername2int(const char *name)
1415 int i;
1416 for (i = 0; handler_info[i].name != NULL; i++) {
1417 if (strcmp(name, handler_info[i].name) == 0) {
1418 return i;
1421 return -1;
1424 static PyObject *
1425 get_pybool(int istrue)
1427 PyObject *result = istrue ? Py_True : Py_False;
1428 Py_INCREF(result);
1429 return result;
1432 static PyObject *
1433 xmlparse_getattr(xmlparseobject *self, char *name)
1435 int handlernum = handlername2int(name);
1437 if (handlernum != -1) {
1438 PyObject *result = self->handlers[handlernum];
1439 if (result == NULL)
1440 result = Py_None;
1441 Py_INCREF(result);
1442 return result;
1444 if (name[0] == 'E') {
1445 if (strcmp(name, "ErrorCode") == 0)
1446 return PyInt_FromLong((long)
1447 XML_GetErrorCode(self->itself));
1448 if (strcmp(name, "ErrorLineNumber") == 0)
1449 return PyInt_FromLong((long)
1450 XML_GetErrorLineNumber(self->itself));
1451 if (strcmp(name, "ErrorColumnNumber") == 0)
1452 return PyInt_FromLong((long)
1453 XML_GetErrorColumnNumber(self->itself));
1454 if (strcmp(name, "ErrorByteIndex") == 0)
1455 return PyInt_FromLong((long)
1456 XML_GetErrorByteIndex(self->itself));
1458 if (name[0] == 'C') {
1459 if (strcmp(name, "CurrentLineNumber") == 0)
1460 return PyInt_FromLong((long)
1461 XML_GetCurrentLineNumber(self->itself));
1462 if (strcmp(name, "CurrentColumnNumber") == 0)
1463 return PyInt_FromLong((long)
1464 XML_GetCurrentColumnNumber(self->itself));
1465 if (strcmp(name, "CurrentByteIndex") == 0)
1466 return PyInt_FromLong((long)
1467 XML_GetCurrentByteIndex(self->itself));
1469 if (name[0] == 'b') {
1470 if (strcmp(name, "buffer_size") == 0)
1471 return PyInt_FromLong((long) self->buffer_size);
1472 if (strcmp(name, "buffer_text") == 0)
1473 return get_pybool(self->buffer != NULL);
1474 if (strcmp(name, "buffer_used") == 0)
1475 return PyInt_FromLong((long) self->buffer_used);
1477 if (strcmp(name, "namespace_prefixes") == 0)
1478 return get_pybool(self->ns_prefixes);
1479 if (strcmp(name, "ordered_attributes") == 0)
1480 return get_pybool(self->ordered_attributes);
1481 if (strcmp(name, "returns_unicode") == 0)
1482 return get_pybool((long) self->returns_unicode);
1483 if (strcmp(name, "specified_attributes") == 0)
1484 return get_pybool((long) self->specified_attributes);
1485 if (strcmp(name, "intern") == 0) {
1486 if (self->intern == NULL) {
1487 Py_INCREF(Py_None);
1488 return Py_None;
1490 else {
1491 Py_INCREF(self->intern);
1492 return self->intern;
1496 #define APPEND(list, str) \
1497 do { \
1498 PyObject *o = PyString_FromString(str); \
1499 if (o != NULL) \
1500 PyList_Append(list, o); \
1501 Py_XDECREF(o); \
1502 } while (0)
1504 if (strcmp(name, "__members__") == 0) {
1505 int i;
1506 PyObject *rc = PyList_New(0);
1507 if (!rc)
1508 return NULL;
1509 for (i = 0; handler_info[i].name != NULL; i++) {
1510 PyObject *o = get_handler_name(&handler_info[i]);
1511 if (o != NULL)
1512 PyList_Append(rc, o);
1513 Py_XDECREF(o);
1515 APPEND(rc, "ErrorCode");
1516 APPEND(rc, "ErrorLineNumber");
1517 APPEND(rc, "ErrorColumnNumber");
1518 APPEND(rc, "ErrorByteIndex");
1519 APPEND(rc, "CurrentLineNumber");
1520 APPEND(rc, "CurrentColumnNumber");
1521 APPEND(rc, "CurrentByteIndex");
1522 APPEND(rc, "buffer_size");
1523 APPEND(rc, "buffer_text");
1524 APPEND(rc, "buffer_used");
1525 APPEND(rc, "namespace_prefixes");
1526 APPEND(rc, "ordered_attributes");
1527 APPEND(rc, "returns_unicode");
1528 APPEND(rc, "specified_attributes");
1529 APPEND(rc, "intern");
1531 #undef APPEND
1532 return rc;
1534 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1537 static int
1538 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1540 int handlernum = handlername2int(name);
1541 if (handlernum >= 0) {
1542 xmlhandler c_handler = NULL;
1543 PyObject *temp = self->handlers[handlernum];
1545 if (v == Py_None)
1546 v = NULL;
1547 else if (v != NULL) {
1548 Py_INCREF(v);
1549 c_handler = handler_info[handlernum].handler;
1551 self->handlers[handlernum] = v;
1552 Py_XDECREF(temp);
1553 handler_info[handlernum].setter(self->itself, c_handler);
1554 return 1;
1556 return 0;
1559 static int
1560 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1562 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1563 if (v == NULL) {
1564 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1565 return -1;
1567 if (strcmp(name, "buffer_text") == 0) {
1568 if (PyObject_IsTrue(v)) {
1569 if (self->buffer == NULL) {
1570 self->buffer = malloc(self->buffer_size);
1571 if (self->buffer == NULL) {
1572 PyErr_NoMemory();
1573 return -1;
1575 self->buffer_used = 0;
1578 else if (self->buffer != NULL) {
1579 if (flush_character_buffer(self) < 0)
1580 return -1;
1581 free(self->buffer);
1582 self->buffer = NULL;
1584 return 0;
1586 if (strcmp(name, "namespace_prefixes") == 0) {
1587 if (PyObject_IsTrue(v))
1588 self->ns_prefixes = 1;
1589 else
1590 self->ns_prefixes = 0;
1591 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1592 return 0;
1594 if (strcmp(name, "ordered_attributes") == 0) {
1595 if (PyObject_IsTrue(v))
1596 self->ordered_attributes = 1;
1597 else
1598 self->ordered_attributes = 0;
1599 return 0;
1601 if (strcmp(name, "returns_unicode") == 0) {
1602 if (PyObject_IsTrue(v)) {
1603 #ifndef Py_USING_UNICODE
1604 PyErr_SetString(PyExc_ValueError,
1605 "Unicode support not available");
1606 return -1;
1607 #else
1608 self->returns_unicode = 1;
1609 #endif
1611 else
1612 self->returns_unicode = 0;
1613 return 0;
1615 if (strcmp(name, "specified_attributes") == 0) {
1616 if (PyObject_IsTrue(v))
1617 self->specified_attributes = 1;
1618 else
1619 self->specified_attributes = 0;
1620 return 0;
1622 if (strcmp(name, "CharacterDataHandler") == 0) {
1623 /* If we're changing the character data handler, flush all
1624 * cached data with the old handler. Not sure there's a
1625 * "right" thing to do, though, but this probably won't
1626 * happen.
1628 if (flush_character_buffer(self) < 0)
1629 return -1;
1631 if (sethandler(self, name, v)) {
1632 return 0;
1634 PyErr_SetString(PyExc_AttributeError, name);
1635 return -1;
1638 #ifdef WITH_CYCLE_GC
1639 static int
1640 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1642 int i;
1643 for (i = 0; handler_info[i].name != NULL; i++)
1644 Py_VISIT(op->handlers[i]);
1645 return 0;
1648 static int
1649 xmlparse_clear(xmlparseobject *op)
1651 clear_handlers(op, 0);
1652 Py_CLEAR(op->intern);
1653 return 0;
1655 #endif
1657 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1659 static PyTypeObject Xmlparsetype = {
1660 PyObject_HEAD_INIT(NULL)
1661 0, /*ob_size*/
1662 "pyexpat.xmlparser", /*tp_name*/
1663 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1664 0, /*tp_itemsize*/
1665 /* methods */
1666 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1667 (printfunc)0, /*tp_print*/
1668 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1669 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1670 (cmpfunc)0, /*tp_compare*/
1671 (reprfunc)0, /*tp_repr*/
1672 0, /*tp_as_number*/
1673 0, /*tp_as_sequence*/
1674 0, /*tp_as_mapping*/
1675 (hashfunc)0, /*tp_hash*/
1676 (ternaryfunc)0, /*tp_call*/
1677 (reprfunc)0, /*tp_str*/
1678 0, /* tp_getattro */
1679 0, /* tp_setattro */
1680 0, /* tp_as_buffer */
1681 #ifdef Py_TPFLAGS_HAVE_GC
1682 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1683 #else
1684 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1685 #endif
1686 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1687 #ifdef WITH_CYCLE_GC
1688 (traverseproc)xmlparse_traverse, /* tp_traverse */
1689 (inquiry)xmlparse_clear /* tp_clear */
1690 #else
1691 0, 0
1692 #endif
1695 /* End of code for xmlparser objects */
1696 /* -------------------------------------------------------- */
1698 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1699 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1700 Return a new XML parser object.");
1702 static PyObject *
1703 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1705 char *encoding = NULL;
1706 char *namespace_separator = NULL;
1707 PyObject *intern = NULL;
1708 PyObject *result;
1709 int intern_decref = 0;
1710 static char *kwlist[] = {"encoding", "namespace_separator",
1711 "intern", NULL};
1713 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1714 &encoding, &namespace_separator, &intern))
1715 return NULL;
1716 if (namespace_separator != NULL
1717 && strlen(namespace_separator) > 1) {
1718 PyErr_SetString(PyExc_ValueError,
1719 "namespace_separator must be at most one"
1720 " character, omitted, or None");
1721 return NULL;
1723 /* Explicitly passing None means no interning is desired.
1724 Not passing anything means that a new dictionary is used. */
1725 if (intern == Py_None)
1726 intern = NULL;
1727 else if (intern == NULL) {
1728 intern = PyDict_New();
1729 if (!intern)
1730 return NULL;
1731 intern_decref = 1;
1733 else if (!PyDict_Check(intern)) {
1734 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1735 return NULL;
1738 result = newxmlparseobject(encoding, namespace_separator, intern);
1739 if (intern_decref) {
1740 Py_DECREF(intern);
1742 return result;
1745 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1746 "ErrorString(errno) -> string\n\
1747 Returns string error for given number.");
1749 static PyObject *
1750 pyexpat_ErrorString(PyObject *self, PyObject *args)
1752 long code = 0;
1754 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1755 return NULL;
1756 return Py_BuildValue("z", XML_ErrorString((int)code));
1759 /* List of methods defined in the module */
1761 static struct PyMethodDef pyexpat_methods[] = {
1762 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1763 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1764 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1765 METH_VARARGS, pyexpat_ErrorString__doc__},
1767 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
1770 /* Module docstring */
1772 PyDoc_STRVAR(pyexpat_module_documentation,
1773 "Python wrapper for Expat parser.");
1775 /* Return a Python string that represents the version number without the
1776 * extra cruft added by revision control, even if the right options were
1777 * given to the "cvs export" command to make it not include the extra
1778 * cruft.
1780 static PyObject *
1781 get_version_string(void)
1783 static char *rcsid = "$Revision$";
1784 char *rev = rcsid;
1785 int i = 0;
1787 while (!isdigit(Py_CHARMASK(*rev)))
1788 ++rev;
1789 while (rev[i] != ' ' && rev[i] != '\0')
1790 ++i;
1792 return PyString_FromStringAndSize(rev, i);
1795 /* Initialization function for the module */
1797 #ifndef MODULE_NAME
1798 #define MODULE_NAME "pyexpat"
1799 #endif
1801 #ifndef MODULE_INITFUNC
1802 #define MODULE_INITFUNC initpyexpat
1803 #endif
1805 #ifndef PyMODINIT_FUNC
1806 # ifdef MS_WINDOWS
1807 # define PyMODINIT_FUNC __declspec(dllexport) void
1808 # else
1809 # define PyMODINIT_FUNC void
1810 # endif
1811 #endif
1813 PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
1815 PyMODINIT_FUNC
1816 MODULE_INITFUNC(void)
1818 PyObject *m, *d;
1819 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1820 PyObject *errors_module;
1821 PyObject *modelmod_name;
1822 PyObject *model_module;
1823 PyObject *sys_modules;
1824 static struct PyExpat_CAPI capi;
1825 PyObject* capi_object;
1827 if (errmod_name == NULL)
1828 return;
1829 modelmod_name = PyString_FromString(MODULE_NAME ".model");
1830 if (modelmod_name == NULL)
1831 return;
1833 Xmlparsetype.ob_type = &PyType_Type;
1835 /* Create the module and add the functions */
1836 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1837 pyexpat_module_documentation);
1838 if (m == NULL)
1839 return;
1841 /* Add some symbolic constants to the module */
1842 if (ErrorObject == NULL) {
1843 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1844 NULL, NULL);
1845 if (ErrorObject == NULL)
1846 return;
1848 Py_INCREF(ErrorObject);
1849 PyModule_AddObject(m, "error", ErrorObject);
1850 Py_INCREF(ErrorObject);
1851 PyModule_AddObject(m, "ExpatError", ErrorObject);
1852 Py_INCREF(&Xmlparsetype);
1853 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1855 PyModule_AddObject(m, "__version__", get_version_string());
1856 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1857 (char *) XML_ExpatVersion());
1859 XML_Expat_Version info = XML_ExpatVersionInfo();
1860 PyModule_AddObject(m, "version_info",
1861 Py_BuildValue("(iii)", info.major,
1862 info.minor, info.micro));
1864 #ifdef Py_USING_UNICODE
1865 init_template_buffer();
1866 #endif
1867 /* XXX When Expat supports some way of figuring out how it was
1868 compiled, this should check and set native_encoding
1869 appropriately.
1871 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1873 sys_modules = PySys_GetObject("modules");
1874 d = PyModule_GetDict(m);
1875 errors_module = PyDict_GetItem(d, errmod_name);
1876 if (errors_module == NULL) {
1877 errors_module = PyModule_New(MODULE_NAME ".errors");
1878 if (errors_module != NULL) {
1879 PyDict_SetItem(sys_modules, errmod_name, errors_module);
1880 /* gives away the reference to errors_module */
1881 PyModule_AddObject(m, "errors", errors_module);
1884 Py_DECREF(errmod_name);
1885 model_module = PyDict_GetItem(d, modelmod_name);
1886 if (model_module == NULL) {
1887 model_module = PyModule_New(MODULE_NAME ".model");
1888 if (model_module != NULL) {
1889 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1890 /* gives away the reference to model_module */
1891 PyModule_AddObject(m, "model", model_module);
1894 Py_DECREF(modelmod_name);
1895 if (errors_module == NULL || model_module == NULL)
1896 /* Don't core dump later! */
1897 return;
1899 #if XML_COMBINED_VERSION > 19505
1901 const XML_Feature *features = XML_GetFeatureList();
1902 PyObject *list = PyList_New(0);
1903 if (list == NULL)
1904 /* just ignore it */
1905 PyErr_Clear();
1906 else {
1907 int i = 0;
1908 for (; features[i].feature != XML_FEATURE_END; ++i) {
1909 int ok;
1910 PyObject *item = Py_BuildValue("si", features[i].name,
1911 features[i].value);
1912 if (item == NULL) {
1913 Py_DECREF(list);
1914 list = NULL;
1915 break;
1917 ok = PyList_Append(list, item);
1918 Py_DECREF(item);
1919 if (ok < 0) {
1920 PyErr_Clear();
1921 break;
1924 if (list != NULL)
1925 PyModule_AddObject(m, "features", list);
1928 #endif
1930 #define MYCONST(name) \
1931 PyModule_AddStringConstant(errors_module, #name, \
1932 (char*)XML_ErrorString(name))
1934 MYCONST(XML_ERROR_NO_MEMORY);
1935 MYCONST(XML_ERROR_SYNTAX);
1936 MYCONST(XML_ERROR_NO_ELEMENTS);
1937 MYCONST(XML_ERROR_INVALID_TOKEN);
1938 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1939 MYCONST(XML_ERROR_PARTIAL_CHAR);
1940 MYCONST(XML_ERROR_TAG_MISMATCH);
1941 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1942 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1943 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1944 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1945 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1946 MYCONST(XML_ERROR_ASYNC_ENTITY);
1947 MYCONST(XML_ERROR_BAD_CHAR_REF);
1948 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1949 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1950 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1951 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1952 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1953 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1954 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1955 MYCONST(XML_ERROR_NOT_STANDALONE);
1956 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1957 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1958 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1959 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1960 /* Added in Expat 1.95.7. */
1961 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1962 /* Added in Expat 1.95.8. */
1963 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1964 MYCONST(XML_ERROR_INCOMPLETE_PE);
1965 MYCONST(XML_ERROR_XML_DECL);
1966 MYCONST(XML_ERROR_TEXT_DECL);
1967 MYCONST(XML_ERROR_PUBLICID);
1968 MYCONST(XML_ERROR_SUSPENDED);
1969 MYCONST(XML_ERROR_NOT_SUSPENDED);
1970 MYCONST(XML_ERROR_ABORTED);
1971 MYCONST(XML_ERROR_FINISHED);
1972 MYCONST(XML_ERROR_SUSPEND_PE);
1974 PyModule_AddStringConstant(errors_module, "__doc__",
1975 "Constants used to describe error conditions.");
1977 #undef MYCONST
1979 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1980 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1981 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1982 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1983 #undef MYCONST
1985 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1986 PyModule_AddStringConstant(model_module, "__doc__",
1987 "Constants used to interpret content model information.");
1989 MYCONST(XML_CTYPE_EMPTY);
1990 MYCONST(XML_CTYPE_ANY);
1991 MYCONST(XML_CTYPE_MIXED);
1992 MYCONST(XML_CTYPE_NAME);
1993 MYCONST(XML_CTYPE_CHOICE);
1994 MYCONST(XML_CTYPE_SEQ);
1996 MYCONST(XML_CQUANT_NONE);
1997 MYCONST(XML_CQUANT_OPT);
1998 MYCONST(XML_CQUANT_REP);
1999 MYCONST(XML_CQUANT_PLUS);
2000 #undef MYCONST
2002 /* initialize pyexpat dispatch table */
2003 capi.size = sizeof(capi);
2004 capi.magic = PyExpat_CAPI_MAGIC;
2005 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2006 capi.MINOR_VERSION = XML_MINOR_VERSION;
2007 capi.MICRO_VERSION = XML_MICRO_VERSION;
2008 capi.ErrorString = XML_ErrorString;
2009 capi.GetErrorCode = XML_GetErrorCode;
2010 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2011 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
2012 capi.Parse = XML_Parse;
2013 capi.ParserCreate_MM = XML_ParserCreate_MM;
2014 capi.ParserFree = XML_ParserFree;
2015 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2016 capi.SetCommentHandler = XML_SetCommentHandler;
2017 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2018 capi.SetElementHandler = XML_SetElementHandler;
2019 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2020 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2021 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2022 capi.SetUserData = XML_SetUserData;
2024 /* export as cobject */
2025 capi_object = PyCObject_FromVoidPtr(&capi, NULL);
2026 if (capi_object)
2027 PyModule_AddObject(m, "expat_CAPI", capi_object);
2030 static void
2031 clear_handlers(xmlparseobject *self, int initial)
2033 int i = 0;
2034 PyObject *temp;
2036 for (; handler_info[i].name != NULL; i++) {
2037 if (initial)
2038 self->handlers[i] = NULL;
2039 else {
2040 temp = self->handlers[i];
2041 self->handlers[i] = NULL;
2042 Py_XDECREF(temp);
2043 handler_info[i].setter(self->itself, NULL);
2048 static struct HandlerInfo handler_info[] = {
2049 {"StartElementHandler",
2050 (xmlhandlersetter)XML_SetStartElementHandler,
2051 (xmlhandler)my_StartElementHandler},
2052 {"EndElementHandler",
2053 (xmlhandlersetter)XML_SetEndElementHandler,
2054 (xmlhandler)my_EndElementHandler},
2055 {"ProcessingInstructionHandler",
2056 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2057 (xmlhandler)my_ProcessingInstructionHandler},
2058 {"CharacterDataHandler",
2059 (xmlhandlersetter)XML_SetCharacterDataHandler,
2060 (xmlhandler)my_CharacterDataHandler},
2061 {"UnparsedEntityDeclHandler",
2062 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
2063 (xmlhandler)my_UnparsedEntityDeclHandler},
2064 {"NotationDeclHandler",
2065 (xmlhandlersetter)XML_SetNotationDeclHandler,
2066 (xmlhandler)my_NotationDeclHandler},
2067 {"StartNamespaceDeclHandler",
2068 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
2069 (xmlhandler)my_StartNamespaceDeclHandler},
2070 {"EndNamespaceDeclHandler",
2071 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
2072 (xmlhandler)my_EndNamespaceDeclHandler},
2073 {"CommentHandler",
2074 (xmlhandlersetter)XML_SetCommentHandler,
2075 (xmlhandler)my_CommentHandler},
2076 {"StartCdataSectionHandler",
2077 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
2078 (xmlhandler)my_StartCdataSectionHandler},
2079 {"EndCdataSectionHandler",
2080 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
2081 (xmlhandler)my_EndCdataSectionHandler},
2082 {"DefaultHandler",
2083 (xmlhandlersetter)XML_SetDefaultHandler,
2084 (xmlhandler)my_DefaultHandler},
2085 {"DefaultHandlerExpand",
2086 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2087 (xmlhandler)my_DefaultHandlerExpandHandler},
2088 {"NotStandaloneHandler",
2089 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2090 (xmlhandler)my_NotStandaloneHandler},
2091 {"ExternalEntityRefHandler",
2092 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
2093 (xmlhandler)my_ExternalEntityRefHandler},
2094 {"StartDoctypeDeclHandler",
2095 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
2096 (xmlhandler)my_StartDoctypeDeclHandler},
2097 {"EndDoctypeDeclHandler",
2098 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
2099 (xmlhandler)my_EndDoctypeDeclHandler},
2100 {"EntityDeclHandler",
2101 (xmlhandlersetter)XML_SetEntityDeclHandler,
2102 (xmlhandler)my_EntityDeclHandler},
2103 {"XmlDeclHandler",
2104 (xmlhandlersetter)XML_SetXmlDeclHandler,
2105 (xmlhandler)my_XmlDeclHandler},
2106 {"ElementDeclHandler",
2107 (xmlhandlersetter)XML_SetElementDeclHandler,
2108 (xmlhandler)my_ElementDeclHandler},
2109 {"AttlistDeclHandler",
2110 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2111 (xmlhandler)my_AttlistDeclHandler},
2112 #if XML_COMBINED_VERSION >= 19504
2113 {"SkippedEntityHandler",
2114 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2115 (xmlhandler)my_SkippedEntityHandler},
2116 #endif
2118 {NULL, NULL, NULL} /* sentinel */