Added information on function name added to LogRecord, and the 'extra' keyword parameter.
[python.git] / Modules / pyexpat.c
blob76b7cf9ba0f0bc2898830c94530a7629534e3e57
1 #include "Python.h"
2 #include <ctype.h>
4 #include "frameobject.h"
5 #include "expat.h"
7 #include "pyexpat.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
11 #ifndef PyDoc_STRVAR
14 * fdrake says:
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
23 #endif
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
28 #else
29 #define FIX_TRACE
30 #endif
32 enum HandlerTypes {
33 StartElement,
34 EndElement,
35 ProcessingInstruction,
36 CharacterData,
37 UnparsedEntityDecl,
38 NotationDecl,
39 StartNamespaceDecl,
40 EndNamespaceDecl,
41 Comment,
42 StartCdataSection,
43 EndCdataSection,
44 Default,
45 DefaultHandlerExpand,
46 NotStandalone,
47 ExternalEntityRef,
48 StartDoctypeDecl,
49 EndDoctypeDecl,
50 EntityDecl,
51 XmlDecl,
52 ElementDecl,
53 AttlistDecl,
54 #if XML_COMBINED_VERSION >= 19504
55 SkippedEntity,
56 #endif
57 _DummyDecl
60 static PyObject *ErrorObject;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
66 typedef struct {
67 PyObject_HEAD
69 XML_Parser itself;
70 int returns_unicode; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes; /* Return attributes as a list. */
73 int specified_attributes; /* Report only specified attributes. */
74 int in_callback; /* Is a callback active? */
75 int ns_prefixes; /* Namespace-triplets mode? */
76 XML_Char *buffer; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size; /* Size of buffer, in XML_Char units */
79 int buffer_used; /* Buffer units in use */
80 PyObject *intern; /* Dictionary to intern strings */
81 PyObject **handlers;
82 } xmlparseobject;
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype;
88 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
89 typedef void* xmlhandler;
91 struct HandlerInfo {
92 const char *name;
93 xmlhandlersetter setter;
94 xmlhandler handler;
95 PyCodeObject *tb_code;
96 PyObject *nameobj;
99 static struct HandlerInfo handler_info[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
104 static int
105 set_error_attr(PyObject *err, char *name, int value)
107 PyObject *v = PyInt_FromLong(value);
109 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
110 Py_DECREF(v);
111 return 0;
113 Py_DECREF(v);
114 return 1;
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
120 static PyObject *
121 set_error(xmlparseobject *self, enum XML_Error code)
123 PyObject *err;
124 char buffer[256];
125 XML_Parser parser = self->itself;
126 int lineno = XML_GetErrorLineNumber(parser);
127 int column = XML_GetErrorColumnNumber(parser);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer, "%.200s: line %i, column %i",
132 XML_ErrorString(code), lineno, column);
133 err = PyObject_CallFunction(ErrorObject, "s", buffer);
134 if ( err != NULL
135 && set_error_attr(err, "code", code)
136 && set_error_attr(err, "offset", column)
137 && set_error_attr(err, "lineno", lineno)) {
138 PyErr_SetObject(ErrorObject, err);
140 Py_DECREF(err);
141 return NULL;
144 static int
145 have_handler(xmlparseobject *self, int type)
147 PyObject *handler = self->handlers[type];
148 return handler != NULL;
151 static PyObject *
152 get_handler_name(struct HandlerInfo *hinfo)
154 PyObject *name = hinfo->nameobj;
155 if (name == NULL) {
156 name = PyString_FromString(hinfo->name);
157 hinfo->nameobj = name;
159 Py_XINCREF(name);
160 return name;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
168 static PyObject *
169 conv_string_to_unicode(const XML_Char *str)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
174 if (str == NULL) {
175 Py_INCREF(Py_None);
176 return Py_None;
178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
181 static PyObject *
182 conv_string_len_to_unicode(const XML_Char *str, int len)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
187 if (str == NULL) {
188 Py_INCREF(Py_None);
189 return Py_None;
191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
193 #endif
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
198 static PyObject *
199 conv_string_to_utf8(const XML_Char *str)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
204 if (str == NULL) {
205 Py_INCREF(Py_None);
206 return Py_None;
208 return PyString_FromString(str);
211 static PyObject *
212 conv_string_len_to_utf8(const XML_Char *str, int len)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
217 if (str == NULL) {
218 Py_INCREF(Py_None);
219 return Py_None;
221 return PyString_FromStringAndSize((const char *)str, len);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject *self, int initial);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
231 static int
232 error_external_entity_ref_handler(XML_Parser parser,
233 const XML_Char *context,
234 const XML_Char *base,
235 const XML_Char *systemId,
236 const XML_Char *publicId)
238 return 0;
241 static void
242 flag_error(xmlparseobject *self)
244 clear_handlers(self, 0);
245 XML_SetExternalEntityRefHandler(self->itself,
246 error_external_entity_ref_handler);
249 static PyCodeObject*
250 getcode(enum HandlerTypes slot, char* func_name, int lineno)
252 PyObject *code = NULL;
253 PyObject *name = NULL;
254 PyObject *nulltuple = NULL;
255 PyObject *filename = NULL;
257 if (handler_info[slot].tb_code == NULL) {
258 code = PyString_FromString("");
259 if (code == NULL)
260 goto failed;
261 name = PyString_FromString(func_name);
262 if (name == NULL)
263 goto failed;
264 nulltuple = PyTuple_New(0);
265 if (nulltuple == NULL)
266 goto failed;
267 filename = PyString_FromString(__FILE__);
268 handler_info[slot].tb_code =
269 PyCode_New(0, /* argcount */
270 0, /* nlocals */
271 0, /* stacksize */
272 0, /* flags */
273 code, /* code */
274 nulltuple, /* consts */
275 nulltuple, /* names */
276 nulltuple, /* varnames */
277 #if PYTHON_API_VERSION >= 1010
278 nulltuple, /* freevars */
279 nulltuple, /* cellvars */
280 #endif
281 filename, /* filename */
282 name, /* name */
283 lineno, /* firstlineno */
284 code /* lnotab */
286 if (handler_info[slot].tb_code == NULL)
287 goto failed;
288 Py_DECREF(code);
289 Py_DECREF(nulltuple);
290 Py_DECREF(filename);
291 Py_DECREF(name);
293 return handler_info[slot].tb_code;
294 failed:
295 Py_XDECREF(code);
296 Py_XDECREF(name);
297 return NULL;
300 #ifdef FIX_TRACE
301 static int
302 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
304 int result = 0;
305 if (!tstate->use_tracing || tstate->tracing)
306 return 0;
307 if (tstate->c_profilefunc != NULL) {
308 tstate->tracing++;
309 result = tstate->c_profilefunc(tstate->c_profileobj,
310 f, code , val);
311 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
312 || (tstate->c_profilefunc != NULL));
313 tstate->tracing--;
314 if (result)
315 return result;
317 if (tstate->c_tracefunc != NULL) {
318 tstate->tracing++;
319 result = tstate->c_tracefunc(tstate->c_traceobj,
320 f, code , val);
321 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
322 || (tstate->c_profilefunc != NULL));
323 tstate->tracing--;
325 return result;
328 static int
329 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
331 PyObject *type, *value, *traceback, *arg;
332 int err;
334 if (tstate->c_tracefunc == NULL)
335 return 0;
337 PyErr_Fetch(&type, &value, &traceback);
338 if (value == NULL) {
339 value = Py_None;
340 Py_INCREF(value);
342 #if PY_VERSION_HEX < 0x02040000
343 arg = Py_BuildValue("(OOO)", type, value, traceback);
344 #else
345 arg = PyTuple_Pack(3, type, value, traceback);
346 #endif
347 if (arg == NULL) {
348 PyErr_Restore(type, value, traceback);
349 return 0;
351 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
352 Py_DECREF(arg);
353 if (err == 0)
354 PyErr_Restore(type, value, traceback);
355 else {
356 Py_XDECREF(type);
357 Py_XDECREF(value);
358 Py_XDECREF(traceback);
360 return err;
362 #endif
364 static PyObject*
365 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
366 xmlparseobject *self)
368 PyThreadState *tstate = PyThreadState_GET();
369 PyFrameObject *f;
370 PyObject *res;
372 if (c == NULL)
373 return NULL;
375 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
376 if (f == NULL)
377 return NULL;
378 tstate->frame = f;
379 #ifdef FIX_TRACE
380 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
381 return NULL;
383 #endif
384 res = PyEval_CallObject(func, args);
385 if (res == NULL) {
386 if (tstate->curexc_traceback == NULL)
387 PyTraceBack_Here(f);
388 XML_StopParser(self->itself, XML_FALSE);
389 #ifdef FIX_TRACE
390 if (trace_frame_exc(tstate, f) < 0) {
391 return NULL;
394 else {
395 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
396 Py_XDECREF(res);
397 res = NULL;
400 #else
402 #endif
403 tstate->frame = f->f_back;
404 Py_DECREF(f);
405 return res;
408 #ifndef Py_USING_UNICODE
409 #define STRING_CONV_FUNC conv_string_to_utf8
410 #else
411 /* Python 2.0 and later versions, when built with Unicode support */
412 #define STRING_CONV_FUNC (self->returns_unicode \
413 ? conv_string_to_unicode : conv_string_to_utf8)
414 #endif
416 static PyObject*
417 string_intern(xmlparseobject *self, const char* str)
419 PyObject *result = STRING_CONV_FUNC(str);
420 PyObject *value;
421 /* result can be NULL if the unicode conversion failed. */
422 if (!result)
423 return result;
424 if (!self->intern)
425 return result;
426 value = PyDict_GetItem(self->intern, result);
427 if (!value) {
428 if (PyDict_SetItem(self->intern, result, result) == 0)
429 return result;
430 else
431 return NULL;
433 Py_INCREF(value);
434 Py_DECREF(result);
435 return value;
438 /* Return 0 on success, -1 on exception.
439 * flag_error() will be called before return if needed.
441 static int
442 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
444 PyObject *args;
445 PyObject *temp;
447 args = PyTuple_New(1);
448 if (args == NULL)
449 return -1;
450 #ifdef Py_USING_UNICODE
451 temp = (self->returns_unicode
452 ? conv_string_len_to_unicode(buffer, len)
453 : conv_string_len_to_utf8(buffer, len));
454 #else
455 temp = conv_string_len_to_utf8(buffer, len);
456 #endif
457 if (temp == NULL) {
458 Py_DECREF(args);
459 flag_error(self);
460 return -1;
462 PyTuple_SET_ITEM(args, 0, temp);
463 /* temp is now a borrowed reference; consider it unused. */
464 self->in_callback = 1;
465 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
466 self->handlers[CharacterData], args, self);
467 /* temp is an owned reference again, or NULL */
468 self->in_callback = 0;
469 Py_DECREF(args);
470 if (temp == NULL) {
471 flag_error(self);
472 return -1;
474 Py_DECREF(temp);
475 return 0;
478 static int
479 flush_character_buffer(xmlparseobject *self)
481 int rc;
482 if (self->buffer == NULL || self->buffer_used == 0)
483 return 0;
484 rc = call_character_handler(self, self->buffer, self->buffer_used);
485 self->buffer_used = 0;
486 return rc;
489 static void
490 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
492 xmlparseobject *self = (xmlparseobject *) userData;
493 if (self->buffer == NULL)
494 call_character_handler(self, data, len);
495 else {
496 if ((self->buffer_used + len) > self->buffer_size) {
497 if (flush_character_buffer(self) < 0)
498 return;
499 /* handler might have changed; drop the rest on the floor
500 * if there isn't a handler anymore
502 if (!have_handler(self, CharacterData))
503 return;
505 if (len > self->buffer_size) {
506 call_character_handler(self, data, len);
507 self->buffer_used = 0;
509 else {
510 memcpy(self->buffer + self->buffer_used,
511 data, len * sizeof(XML_Char));
512 self->buffer_used += len;
517 static void
518 my_StartElementHandler(void *userData,
519 const XML_Char *name, const XML_Char *atts[])
521 xmlparseobject *self = (xmlparseobject *)userData;
523 if (have_handler(self, StartElement)) {
524 PyObject *container, *rv, *args;
525 int i, max;
527 if (flush_character_buffer(self) < 0)
528 return;
529 /* Set max to the number of slots filled in atts[]; max/2 is
530 * the number of attributes we need to process.
532 if (self->specified_attributes) {
533 max = XML_GetSpecifiedAttributeCount(self->itself);
535 else {
536 max = 0;
537 while (atts[max] != NULL)
538 max += 2;
540 /* Build the container. */
541 if (self->ordered_attributes)
542 container = PyList_New(max);
543 else
544 container = PyDict_New();
545 if (container == NULL) {
546 flag_error(self);
547 return;
549 for (i = 0; i < max; i += 2) {
550 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
551 PyObject *v;
552 if (n == NULL) {
553 flag_error(self);
554 Py_DECREF(container);
555 return;
557 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
558 if (v == NULL) {
559 flag_error(self);
560 Py_DECREF(container);
561 Py_DECREF(n);
562 return;
564 if (self->ordered_attributes) {
565 PyList_SET_ITEM(container, i, n);
566 PyList_SET_ITEM(container, i+1, v);
568 else if (PyDict_SetItem(container, n, v)) {
569 flag_error(self);
570 Py_DECREF(n);
571 Py_DECREF(v);
572 return;
574 else {
575 Py_DECREF(n);
576 Py_DECREF(v);
579 args = string_intern(self, name);
580 if (args != NULL)
581 args = Py_BuildValue("(NN)", args, container);
582 if (args == NULL) {
583 Py_DECREF(container);
584 return;
586 /* Container is now a borrowed reference; ignore it. */
587 self->in_callback = 1;
588 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
589 self->handlers[StartElement], args, self);
590 self->in_callback = 0;
591 Py_DECREF(args);
592 if (rv == NULL) {
593 flag_error(self);
594 return;
596 Py_DECREF(rv);
600 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
601 RETURN, GETUSERDATA) \
602 static RC \
603 my_##NAME##Handler PARAMS {\
604 xmlparseobject *self = GETUSERDATA ; \
605 PyObject *args = NULL; \
606 PyObject *rv = NULL; \
607 INIT \
609 if (have_handler(self, NAME)) { \
610 if (flush_character_buffer(self) < 0) \
611 return RETURN; \
612 args = Py_BuildValue PARAM_FORMAT ;\
613 if (!args) { flag_error(self); return RETURN;} \
614 self->in_callback = 1; \
615 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
616 self->handlers[NAME], args, self); \
617 self->in_callback = 0; \
618 Py_DECREF(args); \
619 if (rv == NULL) { \
620 flag_error(self); \
621 return RETURN; \
623 CONVERSION \
624 Py_DECREF(rv); \
626 return RETURN; \
629 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
630 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
631 (xmlparseobject *)userData)
633 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
634 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
635 rc = PyInt_AsLong(rv);, rc, \
636 (xmlparseobject *)userData)
638 VOID_HANDLER(EndElement,
639 (void *userData, const XML_Char *name),
640 ("(N)", string_intern(self, name)))
642 VOID_HANDLER(ProcessingInstruction,
643 (void *userData,
644 const XML_Char *target,
645 const XML_Char *data),
646 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
648 VOID_HANDLER(UnparsedEntityDecl,
649 (void *userData,
650 const XML_Char *entityName,
651 const XML_Char *base,
652 const XML_Char *systemId,
653 const XML_Char *publicId,
654 const XML_Char *notationName),
655 ("(NNNNN)",
656 string_intern(self, entityName), string_intern(self, base),
657 string_intern(self, systemId), string_intern(self, publicId),
658 string_intern(self, notationName)))
660 #ifndef Py_USING_UNICODE
661 VOID_HANDLER(EntityDecl,
662 (void *userData,
663 const XML_Char *entityName,
664 int is_parameter_entity,
665 const XML_Char *value,
666 int value_length,
667 const XML_Char *base,
668 const XML_Char *systemId,
669 const XML_Char *publicId,
670 const XML_Char *notationName),
671 ("NiNNNNN",
672 string_intern(self, entityName), is_parameter_entity,
673 conv_string_len_to_utf8(value, value_length),
674 string_intern(self, base), string_intern(self, systemId),
675 string_intern(self, publicId),
676 string_intern(self, notationName)))
677 #else
678 VOID_HANDLER(EntityDecl,
679 (void *userData,
680 const XML_Char *entityName,
681 int is_parameter_entity,
682 const XML_Char *value,
683 int value_length,
684 const XML_Char *base,
685 const XML_Char *systemId,
686 const XML_Char *publicId,
687 const XML_Char *notationName),
688 ("NiNNNNN",
689 string_intern(self, entityName), is_parameter_entity,
690 (self->returns_unicode
691 ? conv_string_len_to_unicode(value, value_length)
692 : conv_string_len_to_utf8(value, value_length)),
693 string_intern(self, base), string_intern(self, systemId),
694 string_intern(self, publicId),
695 string_intern(self, notationName)))
696 #endif
698 VOID_HANDLER(XmlDecl,
699 (void *userData,
700 const XML_Char *version,
701 const XML_Char *encoding,
702 int standalone),
703 ("(O&O&i)",
704 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
705 standalone))
707 static PyObject *
708 conv_content_model(XML_Content * const model,
709 PyObject *(*conv_string)(const XML_Char *))
711 PyObject *result = NULL;
712 PyObject *children = PyTuple_New(model->numchildren);
713 int i;
715 if (children != NULL) {
716 assert(model->numchildren < INT_MAX);
717 for (i = 0; i < (int)model->numchildren; ++i) {
718 PyObject *child = conv_content_model(&model->children[i],
719 conv_string);
720 if (child == NULL) {
721 Py_XDECREF(children);
722 return NULL;
724 PyTuple_SET_ITEM(children, i, child);
726 result = Py_BuildValue("(iiO&N)",
727 model->type, model->quant,
728 conv_string,model->name, children);
730 return result;
733 static void
734 my_ElementDeclHandler(void *userData,
735 const XML_Char *name,
736 XML_Content *model)
738 xmlparseobject *self = (xmlparseobject *)userData;
739 PyObject *args = NULL;
741 if (have_handler(self, ElementDecl)) {
742 PyObject *rv = NULL;
743 PyObject *modelobj, *nameobj;
745 if (flush_character_buffer(self) < 0)
746 goto finally;
747 #ifdef Py_USING_UNICODE
748 modelobj = conv_content_model(model,
749 (self->returns_unicode
750 ? conv_string_to_unicode
751 : conv_string_to_utf8));
752 #else
753 modelobj = conv_content_model(model, conv_string_to_utf8);
754 #endif
755 if (modelobj == NULL) {
756 flag_error(self);
757 goto finally;
759 nameobj = string_intern(self, name);
760 if (nameobj == NULL) {
761 Py_DECREF(modelobj);
762 flag_error(self);
763 goto finally;
765 args = Py_BuildValue("NN", nameobj, modelobj);
766 if (args == NULL) {
767 Py_DECREF(modelobj);
768 flag_error(self);
769 goto finally;
771 self->in_callback = 1;
772 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
773 self->handlers[ElementDecl], args, self);
774 self->in_callback = 0;
775 if (rv == NULL) {
776 flag_error(self);
777 goto finally;
779 Py_DECREF(rv);
781 finally:
782 Py_XDECREF(args);
783 XML_FreeContentModel(self->itself, model);
784 return;
787 VOID_HANDLER(AttlistDecl,
788 (void *userData,
789 const XML_Char *elname,
790 const XML_Char *attname,
791 const XML_Char *att_type,
792 const XML_Char *dflt,
793 int isrequired),
794 ("(NNO&O&i)",
795 string_intern(self, elname), string_intern(self, attname),
796 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
797 isrequired))
799 #if XML_COMBINED_VERSION >= 19504
800 VOID_HANDLER(SkippedEntity,
801 (void *userData,
802 const XML_Char *entityName,
803 int is_parameter_entity),
804 ("Ni",
805 string_intern(self, entityName), is_parameter_entity))
806 #endif
808 VOID_HANDLER(NotationDecl,
809 (void *userData,
810 const XML_Char *notationName,
811 const XML_Char *base,
812 const XML_Char *systemId,
813 const XML_Char *publicId),
814 ("(NNNN)",
815 string_intern(self, notationName), string_intern(self, base),
816 string_intern(self, systemId), string_intern(self, publicId)))
818 VOID_HANDLER(StartNamespaceDecl,
819 (void *userData,
820 const XML_Char *prefix,
821 const XML_Char *uri),
822 ("(NN)",
823 string_intern(self, prefix), string_intern(self, uri)))
825 VOID_HANDLER(EndNamespaceDecl,
826 (void *userData,
827 const XML_Char *prefix),
828 ("(N)", string_intern(self, prefix)))
830 VOID_HANDLER(Comment,
831 (void *userData, const XML_Char *data),
832 ("(O&)", STRING_CONV_FUNC,data))
834 VOID_HANDLER(StartCdataSection,
835 (void *userData),
836 ("()"))
838 VOID_HANDLER(EndCdataSection,
839 (void *userData),
840 ("()"))
842 #ifndef Py_USING_UNICODE
843 VOID_HANDLER(Default,
844 (void *userData, const XML_Char *s, int len),
845 ("(N)", conv_string_len_to_utf8(s,len)))
847 VOID_HANDLER(DefaultHandlerExpand,
848 (void *userData, const XML_Char *s, int len),
849 ("(N)", conv_string_len_to_utf8(s,len)))
850 #else
851 VOID_HANDLER(Default,
852 (void *userData, const XML_Char *s, int len),
853 ("(N)", (self->returns_unicode
854 ? conv_string_len_to_unicode(s,len)
855 : conv_string_len_to_utf8(s,len))))
857 VOID_HANDLER(DefaultHandlerExpand,
858 (void *userData, const XML_Char *s, int len),
859 ("(N)", (self->returns_unicode
860 ? conv_string_len_to_unicode(s,len)
861 : conv_string_len_to_utf8(s,len))))
862 #endif
864 INT_HANDLER(NotStandalone,
865 (void *userData),
866 ("()"))
868 RC_HANDLER(int, ExternalEntityRef,
869 (XML_Parser parser,
870 const XML_Char *context,
871 const XML_Char *base,
872 const XML_Char *systemId,
873 const XML_Char *publicId),
874 int rc=0;,
875 ("(O&NNN)",
876 STRING_CONV_FUNC,context, string_intern(self, base),
877 string_intern(self, systemId), string_intern(self, publicId)),
878 rc = PyInt_AsLong(rv);, rc,
879 XML_GetUserData(parser))
881 /* XXX UnknownEncodingHandler */
883 VOID_HANDLER(StartDoctypeDecl,
884 (void *userData, const XML_Char *doctypeName,
885 const XML_Char *sysid, const XML_Char *pubid,
886 int has_internal_subset),
887 ("(NNNi)", string_intern(self, doctypeName),
888 string_intern(self, sysid), string_intern(self, pubid),
889 has_internal_subset))
891 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
893 /* ---------------------------------------------------------------- */
895 static PyObject *
896 get_parse_result(xmlparseobject *self, int rv)
898 if (PyErr_Occurred()) {
899 return NULL;
901 if (rv == 0) {
902 return set_error(self, XML_GetErrorCode(self->itself));
904 if (flush_character_buffer(self) < 0) {
905 return NULL;
907 return PyInt_FromLong(rv);
910 PyDoc_STRVAR(xmlparse_Parse__doc__,
911 "Parse(data[, isfinal])\n\
912 Parse XML data. `isfinal' should be true at end of input.");
914 static PyObject *
915 xmlparse_Parse(xmlparseobject *self, PyObject *args)
917 char *s;
918 int slen;
919 int isFinal = 0;
921 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
922 return NULL;
924 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
927 /* File reading copied from cPickle */
929 #define BUF_SIZE 2048
931 static int
932 readinst(char *buf, int buf_size, PyObject *meth)
934 PyObject *arg = NULL;
935 PyObject *bytes = NULL;
936 PyObject *str = NULL;
937 int len = -1;
939 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
940 goto finally;
942 if ((arg = PyTuple_New(1)) == NULL) {
943 Py_DECREF(bytes);
944 goto finally;
947 PyTuple_SET_ITEM(arg, 0, bytes);
949 #if PY_VERSION_HEX < 0x02020000
950 str = PyObject_CallObject(meth, arg);
951 #else
952 str = PyObject_Call(meth, arg, NULL);
953 #endif
954 if (str == NULL)
955 goto finally;
957 /* XXX what to do if it returns a Unicode string? */
958 if (!PyString_Check(str)) {
959 PyErr_Format(PyExc_TypeError,
960 "read() did not return a string object (type=%.400s)",
961 str->ob_type->tp_name);
962 goto finally;
964 len = PyString_GET_SIZE(str);
965 if (len > buf_size) {
966 PyErr_Format(PyExc_ValueError,
967 "read() returned too much data: "
968 "%i bytes requested, %i returned",
969 buf_size, len);
970 goto finally;
972 memcpy(buf, PyString_AsString(str), len);
973 finally:
974 Py_XDECREF(arg);
975 Py_XDECREF(str);
976 return len;
979 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
980 "ParseFile(file)\n\
981 Parse XML data from file-like object.");
983 static PyObject *
984 xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
986 int rv = 1;
987 PyObject *f;
988 FILE *fp;
989 PyObject *readmethod = NULL;
991 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
992 return NULL;
994 if (PyFile_Check(f)) {
995 fp = PyFile_AsFile(f);
997 else{
998 fp = NULL;
999 readmethod = PyObject_GetAttrString(f, "read");
1000 if (readmethod == NULL) {
1001 PyErr_Clear();
1002 PyErr_SetString(PyExc_TypeError,
1003 "argument must have 'read' attribute");
1004 return NULL;
1007 for (;;) {
1008 int bytes_read;
1009 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
1010 if (buf == NULL) {
1011 Py_XDECREF(readmethod);
1012 return PyErr_NoMemory();
1015 if (fp) {
1016 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
1017 if (bytes_read < 0) {
1018 PyErr_SetFromErrno(PyExc_IOError);
1019 return NULL;
1022 else {
1023 bytes_read = readinst(buf, BUF_SIZE, readmethod);
1024 if (bytes_read < 0) {
1025 Py_DECREF(readmethod);
1026 return NULL;
1029 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
1030 if (PyErr_Occurred()) {
1031 Py_XDECREF(readmethod);
1032 return NULL;
1035 if (!rv || bytes_read == 0)
1036 break;
1038 Py_XDECREF(readmethod);
1039 return get_parse_result(self, rv);
1042 PyDoc_STRVAR(xmlparse_SetBase__doc__,
1043 "SetBase(base_url)\n\
1044 Set the base URL for the parser.");
1046 static PyObject *
1047 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1049 char *base;
1051 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
1052 return NULL;
1053 if (!XML_SetBase(self->itself, base)) {
1054 return PyErr_NoMemory();
1056 Py_INCREF(Py_None);
1057 return Py_None;
1060 PyDoc_STRVAR(xmlparse_GetBase__doc__,
1061 "GetBase() -> url\n\
1062 Return base URL string for the parser.");
1064 static PyObject *
1065 xmlparse_GetBase(xmlparseobject *self, PyObject *args)
1067 if (!PyArg_ParseTuple(args, ":GetBase"))
1068 return NULL;
1070 return Py_BuildValue("z", XML_GetBase(self->itself));
1073 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
1074 "GetInputContext() -> string\n\
1075 Return the untranslated text of the input that caused the current event.\n\
1076 If the event was generated by a large amount of text (such as a start tag\n\
1077 for an element with many attributes), not all of the text may be available.");
1079 static PyObject *
1080 xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
1082 PyObject *result = NULL;
1084 if (PyArg_ParseTuple(args, ":GetInputContext")) {
1085 if (self->in_callback) {
1086 int offset, size;
1087 const char *buffer
1088 = XML_GetInputContext(self->itself, &offset, &size);
1090 if (buffer != NULL)
1091 result = PyString_FromStringAndSize(buffer + offset, size - offset);
1092 else {
1093 result = Py_None;
1094 Py_INCREF(result);
1097 else {
1098 result = Py_None;
1099 Py_INCREF(result);
1102 return result;
1105 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
1106 "ExternalEntityParserCreate(context[, encoding])\n\
1107 Create a parser for parsing an external entity based on the\n\
1108 information passed to the ExternalEntityRefHandler.");
1110 static PyObject *
1111 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1113 char *context;
1114 char *encoding = NULL;
1115 xmlparseobject *new_parser;
1116 int i;
1118 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
1119 &context, &encoding)) {
1120 return NULL;
1123 #ifndef Py_TPFLAGS_HAVE_GC
1124 /* Python versions 2.0 and 2.1 */
1125 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
1126 #else
1127 /* Python versions 2.2 and later */
1128 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1129 #endif
1131 if (new_parser == NULL)
1132 return NULL;
1133 new_parser->buffer_size = self->buffer_size;
1134 new_parser->buffer_used = 0;
1135 if (self->buffer != NULL) {
1136 new_parser->buffer = malloc(new_parser->buffer_size);
1137 if (new_parser->buffer == NULL) {
1138 #ifndef Py_TPFLAGS_HAVE_GC
1139 /* Code for versions 2.0 and 2.1 */
1140 PyObject_Del(new_parser);
1141 #else
1142 /* Code for versions 2.2 and later. */
1143 PyObject_GC_Del(new_parser);
1144 #endif
1145 return PyErr_NoMemory();
1148 else
1149 new_parser->buffer = NULL;
1150 new_parser->returns_unicode = self->returns_unicode;
1151 new_parser->ordered_attributes = self->ordered_attributes;
1152 new_parser->specified_attributes = self->specified_attributes;
1153 new_parser->in_callback = 0;
1154 new_parser->ns_prefixes = self->ns_prefixes;
1155 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
1156 encoding);
1157 new_parser->handlers = 0;
1158 new_parser->intern = self->intern;
1159 Py_XINCREF(new_parser->intern);
1160 #ifdef Py_TPFLAGS_HAVE_GC
1161 PyObject_GC_Track(new_parser);
1162 #else
1163 PyObject_GC_Init(new_parser);
1164 #endif
1166 if (!new_parser->itself) {
1167 Py_DECREF(new_parser);
1168 return PyErr_NoMemory();
1171 XML_SetUserData(new_parser->itself, (void *)new_parser);
1173 /* allocate and clear handlers first */
1174 for (i = 0; handler_info[i].name != NULL; i++)
1175 /* do nothing */;
1177 new_parser->handlers = malloc(sizeof(PyObject *) * i);
1178 if (!new_parser->handlers) {
1179 Py_DECREF(new_parser);
1180 return PyErr_NoMemory();
1182 clear_handlers(new_parser, 1);
1184 /* then copy handlers from self */
1185 for (i = 0; handler_info[i].name != NULL; i++) {
1186 PyObject *handler = self->handlers[i];
1187 if (handler != NULL) {
1188 Py_INCREF(handler);
1189 new_parser->handlers[i] = handler;
1190 handler_info[i].setter(new_parser->itself,
1191 handler_info[i].handler);
1194 return (PyObject *)new_parser;
1197 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
1198 "SetParamEntityParsing(flag) -> success\n\
1199 Controls parsing of parameter entities (including the external DTD\n\
1200 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1201 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1202 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1203 was successful.");
1205 static PyObject*
1206 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
1208 int flag;
1209 if (!PyArg_ParseTuple(args, "i", &flag))
1210 return NULL;
1211 flag = XML_SetParamEntityParsing(p->itself, flag);
1212 return PyInt_FromLong(flag);
1216 #if XML_COMBINED_VERSION >= 19505
1217 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1218 "UseForeignDTD([flag])\n\
1219 Allows the application to provide an artificial external subset if one is\n\
1220 not specified as part of the document instance. This readily allows the\n\
1221 use of a 'default' document type controlled by the application, while still\n\
1222 getting the advantage of providing document type information to the parser.\n\
1223 'flag' defaults to True if not provided.");
1225 static PyObject *
1226 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1228 PyObject *flagobj = NULL;
1229 XML_Bool flag = XML_TRUE;
1230 enum XML_Error rc;
1231 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1232 return NULL;
1233 if (flagobj != NULL)
1234 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1235 rc = XML_UseForeignDTD(self->itself, flag);
1236 if (rc != XML_ERROR_NONE) {
1237 return set_error(self, rc);
1239 Py_INCREF(Py_None);
1240 return Py_None;
1242 #endif
1244 static struct PyMethodDef xmlparse_methods[] = {
1245 {"Parse", (PyCFunction)xmlparse_Parse,
1246 METH_VARARGS, xmlparse_Parse__doc__},
1247 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
1248 METH_VARARGS, xmlparse_ParseFile__doc__},
1249 {"SetBase", (PyCFunction)xmlparse_SetBase,
1250 METH_VARARGS, xmlparse_SetBase__doc__},
1251 {"GetBase", (PyCFunction)xmlparse_GetBase,
1252 METH_VARARGS, xmlparse_GetBase__doc__},
1253 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
1254 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
1255 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1256 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
1257 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1258 METH_VARARGS, xmlparse_GetInputContext__doc__},
1259 #if XML_COMBINED_VERSION >= 19505
1260 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1261 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1262 #endif
1263 {NULL, NULL} /* sentinel */
1266 /* ---------- */
1269 #ifdef Py_USING_UNICODE
1271 /* pyexpat international encoding support.
1272 Make it as simple as possible.
1275 static char template_buffer[257];
1276 PyObject *template_string = NULL;
1278 static void
1279 init_template_buffer(void)
1281 int i;
1282 for (i = 0; i < 256; i++) {
1283 template_buffer[i] = i;
1285 template_buffer[256] = 0;
1288 static int
1289 PyUnknownEncodingHandler(void *encodingHandlerData,
1290 const XML_Char *name,
1291 XML_Encoding *info)
1293 PyUnicodeObject *_u_string = NULL;
1294 int result = 0;
1295 int i;
1297 /* Yes, supports only 8bit encodings */
1298 _u_string = (PyUnicodeObject *)
1299 PyUnicode_Decode(template_buffer, 256, name, "replace");
1301 if (_u_string == NULL)
1302 return result;
1304 for (i = 0; i < 256; i++) {
1305 /* Stupid to access directly, but fast */
1306 Py_UNICODE c = _u_string->str[i];
1307 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1308 info->map[i] = -1;
1309 else
1310 info->map[i] = c;
1312 info->data = NULL;
1313 info->convert = NULL;
1314 info->release = NULL;
1315 result = 1;
1316 Py_DECREF(_u_string);
1317 return result;
1320 #endif
1322 static PyObject *
1323 newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
1325 int i;
1326 xmlparseobject *self;
1328 #ifdef Py_TPFLAGS_HAVE_GC
1329 /* Code for versions 2.2 and later */
1330 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1331 #else
1332 self = PyObject_New(xmlparseobject, &Xmlparsetype);
1333 #endif
1334 if (self == NULL)
1335 return NULL;
1337 #ifdef Py_USING_UNICODE
1338 self->returns_unicode = 1;
1339 #else
1340 self->returns_unicode = 0;
1341 #endif
1343 self->buffer = NULL;
1344 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1345 self->buffer_used = 0;
1346 self->ordered_attributes = 0;
1347 self->specified_attributes = 0;
1348 self->in_callback = 0;
1349 self->ns_prefixes = 0;
1350 self->handlers = NULL;
1351 if (namespace_separator != NULL) {
1352 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1354 else {
1355 self->itself = XML_ParserCreate(encoding);
1357 self->intern = intern;
1358 Py_XINCREF(self->intern);
1359 #ifdef Py_TPFLAGS_HAVE_GC
1360 PyObject_GC_Track(self);
1361 #else
1362 PyObject_GC_Init(self);
1363 #endif
1364 if (self->itself == NULL) {
1365 PyErr_SetString(PyExc_RuntimeError,
1366 "XML_ParserCreate failed");
1367 Py_DECREF(self);
1368 return NULL;
1370 XML_SetUserData(self->itself, (void *)self);
1371 #ifdef Py_USING_UNICODE
1372 XML_SetUnknownEncodingHandler(self->itself,
1373 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1374 #endif
1376 for (i = 0; handler_info[i].name != NULL; i++)
1377 /* do nothing */;
1379 self->handlers = malloc(sizeof(PyObject *) * i);
1380 if (!self->handlers) {
1381 Py_DECREF(self);
1382 return PyErr_NoMemory();
1384 clear_handlers(self, 1);
1386 return (PyObject*)self;
1390 static void
1391 xmlparse_dealloc(xmlparseobject *self)
1393 int i;
1394 #ifdef Py_TPFLAGS_HAVE_GC
1395 PyObject_GC_UnTrack(self);
1396 #else
1397 PyObject_GC_Fini(self);
1398 #endif
1399 if (self->itself != NULL)
1400 XML_ParserFree(self->itself);
1401 self->itself = NULL;
1403 if (self->handlers != NULL) {
1404 PyObject *temp;
1405 for (i = 0; handler_info[i].name != NULL; i++) {
1406 temp = self->handlers[i];
1407 self->handlers[i] = NULL;
1408 Py_XDECREF(temp);
1410 free(self->handlers);
1411 self->handlers = NULL;
1413 if (self->buffer != NULL) {
1414 free(self->buffer);
1415 self->buffer = NULL;
1417 Py_XDECREF(self->intern);
1418 #ifndef Py_TPFLAGS_HAVE_GC
1419 /* Code for versions 2.0 and 2.1 */
1420 PyObject_Del(self);
1421 #else
1422 /* Code for versions 2.2 and later. */
1423 PyObject_GC_Del(self);
1424 #endif
1427 static int
1428 handlername2int(const char *name)
1430 int i;
1431 for (i = 0; handler_info[i].name != NULL; i++) {
1432 if (strcmp(name, handler_info[i].name) == 0) {
1433 return i;
1436 return -1;
1439 static PyObject *
1440 get_pybool(int istrue)
1442 PyObject *result = istrue ? Py_True : Py_False;
1443 Py_INCREF(result);
1444 return result;
1447 static PyObject *
1448 xmlparse_getattr(xmlparseobject *self, char *name)
1450 int handlernum = handlername2int(name);
1452 if (handlernum != -1) {
1453 PyObject *result = self->handlers[handlernum];
1454 if (result == NULL)
1455 result = Py_None;
1456 Py_INCREF(result);
1457 return result;
1459 if (name[0] == 'E') {
1460 if (strcmp(name, "ErrorCode") == 0)
1461 return PyInt_FromLong((long)
1462 XML_GetErrorCode(self->itself));
1463 if (strcmp(name, "ErrorLineNumber") == 0)
1464 return PyInt_FromLong((long)
1465 XML_GetErrorLineNumber(self->itself));
1466 if (strcmp(name, "ErrorColumnNumber") == 0)
1467 return PyInt_FromLong((long)
1468 XML_GetErrorColumnNumber(self->itself));
1469 if (strcmp(name, "ErrorByteIndex") == 0)
1470 return PyInt_FromLong((long)
1471 XML_GetErrorByteIndex(self->itself));
1473 if (name[0] == 'C') {
1474 if (strcmp(name, "CurrentLineNumber") == 0)
1475 return PyInt_FromLong((long)
1476 XML_GetCurrentLineNumber(self->itself));
1477 if (strcmp(name, "CurrentColumnNumber") == 0)
1478 return PyInt_FromLong((long)
1479 XML_GetCurrentColumnNumber(self->itself));
1480 if (strcmp(name, "CurrentByteIndex") == 0)
1481 return PyInt_FromLong((long)
1482 XML_GetCurrentByteIndex(self->itself));
1484 if (name[0] == 'b') {
1485 if (strcmp(name, "buffer_size") == 0)
1486 return PyInt_FromLong((long) self->buffer_size);
1487 if (strcmp(name, "buffer_text") == 0)
1488 return get_pybool(self->buffer != NULL);
1489 if (strcmp(name, "buffer_used") == 0)
1490 return PyInt_FromLong((long) self->buffer_used);
1492 if (strcmp(name, "namespace_prefixes") == 0)
1493 return get_pybool(self->ns_prefixes);
1494 if (strcmp(name, "ordered_attributes") == 0)
1495 return get_pybool(self->ordered_attributes);
1496 if (strcmp(name, "returns_unicode") == 0)
1497 return get_pybool((long) self->returns_unicode);
1498 if (strcmp(name, "specified_attributes") == 0)
1499 return get_pybool((long) self->specified_attributes);
1500 if (strcmp(name, "intern") == 0) {
1501 if (self->intern == NULL) {
1502 Py_INCREF(Py_None);
1503 return Py_None;
1505 else {
1506 Py_INCREF(self->intern);
1507 return self->intern;
1511 #define APPEND(list, str) \
1512 do { \
1513 PyObject *o = PyString_FromString(str); \
1514 if (o != NULL) \
1515 PyList_Append(list, o); \
1516 Py_XDECREF(o); \
1517 } while (0)
1519 if (strcmp(name, "__members__") == 0) {
1520 int i;
1521 PyObject *rc = PyList_New(0);
1522 for (i = 0; handler_info[i].name != NULL; i++) {
1523 PyObject *o = get_handler_name(&handler_info[i]);
1524 if (o != NULL)
1525 PyList_Append(rc, o);
1526 Py_XDECREF(o);
1528 APPEND(rc, "ErrorCode");
1529 APPEND(rc, "ErrorLineNumber");
1530 APPEND(rc, "ErrorColumnNumber");
1531 APPEND(rc, "ErrorByteIndex");
1532 APPEND(rc, "CurrentLineNumber");
1533 APPEND(rc, "CurrentColumnNumber");
1534 APPEND(rc, "CurrentByteIndex");
1535 APPEND(rc, "buffer_size");
1536 APPEND(rc, "buffer_text");
1537 APPEND(rc, "buffer_used");
1538 APPEND(rc, "namespace_prefixes");
1539 APPEND(rc, "ordered_attributes");
1540 APPEND(rc, "returns_unicode");
1541 APPEND(rc, "specified_attributes");
1542 APPEND(rc, "intern");
1544 #undef APPEND
1545 return rc;
1547 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
1550 static int
1551 sethandler(xmlparseobject *self, const char *name, PyObject* v)
1553 int handlernum = handlername2int(name);
1554 if (handlernum >= 0) {
1555 xmlhandler c_handler = NULL;
1556 PyObject *temp = self->handlers[handlernum];
1558 if (v == Py_None)
1559 v = NULL;
1560 else if (v != NULL) {
1561 Py_INCREF(v);
1562 c_handler = handler_info[handlernum].handler;
1564 self->handlers[handlernum] = v;
1565 Py_XDECREF(temp);
1566 handler_info[handlernum].setter(self->itself, c_handler);
1567 return 1;
1569 return 0;
1572 static int
1573 xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
1575 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1576 if (v == NULL) {
1577 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1578 return -1;
1580 if (strcmp(name, "buffer_text") == 0) {
1581 if (PyObject_IsTrue(v)) {
1582 if (self->buffer == NULL) {
1583 self->buffer = malloc(self->buffer_size);
1584 if (self->buffer == NULL) {
1585 PyErr_NoMemory();
1586 return -1;
1588 self->buffer_used = 0;
1591 else if (self->buffer != NULL) {
1592 if (flush_character_buffer(self) < 0)
1593 return -1;
1594 free(self->buffer);
1595 self->buffer = NULL;
1597 return 0;
1599 if (strcmp(name, "namespace_prefixes") == 0) {
1600 if (PyObject_IsTrue(v))
1601 self->ns_prefixes = 1;
1602 else
1603 self->ns_prefixes = 0;
1604 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1605 return 0;
1607 if (strcmp(name, "ordered_attributes") == 0) {
1608 if (PyObject_IsTrue(v))
1609 self->ordered_attributes = 1;
1610 else
1611 self->ordered_attributes = 0;
1612 return 0;
1614 if (strcmp(name, "returns_unicode") == 0) {
1615 if (PyObject_IsTrue(v)) {
1616 #ifndef Py_USING_UNICODE
1617 PyErr_SetString(PyExc_ValueError,
1618 "Unicode support not available");
1619 return -1;
1620 #else
1621 self->returns_unicode = 1;
1622 #endif
1624 else
1625 self->returns_unicode = 0;
1626 return 0;
1628 if (strcmp(name, "specified_attributes") == 0) {
1629 if (PyObject_IsTrue(v))
1630 self->specified_attributes = 1;
1631 else
1632 self->specified_attributes = 0;
1633 return 0;
1635 if (strcmp(name, "CharacterDataHandler") == 0) {
1636 /* If we're changing the character data handler, flush all
1637 * cached data with the old handler. Not sure there's a
1638 * "right" thing to do, though, but this probably won't
1639 * happen.
1641 if (flush_character_buffer(self) < 0)
1642 return -1;
1644 if (sethandler(self, name, v)) {
1645 return 0;
1647 PyErr_SetString(PyExc_AttributeError, name);
1648 return -1;
1651 #ifdef WITH_CYCLE_GC
1652 static int
1653 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1655 int i, err;
1656 for (i = 0; handler_info[i].name != NULL; i++) {
1657 if (!op->handlers[i])
1658 continue;
1659 err = visit(op->handlers[i], arg);
1660 if (err)
1661 return err;
1663 return 0;
1666 static int
1667 xmlparse_clear(xmlparseobject *op)
1669 clear_handlers(op, 0);
1670 Py_XDECREF(op->intern);
1671 op->intern = 0;
1672 return 0;
1674 #endif
1676 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1678 static PyTypeObject Xmlparsetype = {
1679 PyObject_HEAD_INIT(NULL)
1680 0, /*ob_size*/
1681 "pyexpat.xmlparser", /*tp_name*/
1682 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1683 0, /*tp_itemsize*/
1684 /* methods */
1685 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1686 (printfunc)0, /*tp_print*/
1687 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1688 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1689 (cmpfunc)0, /*tp_compare*/
1690 (reprfunc)0, /*tp_repr*/
1691 0, /*tp_as_number*/
1692 0, /*tp_as_sequence*/
1693 0, /*tp_as_mapping*/
1694 (hashfunc)0, /*tp_hash*/
1695 (ternaryfunc)0, /*tp_call*/
1696 (reprfunc)0, /*tp_str*/
1697 0, /* tp_getattro */
1698 0, /* tp_setattro */
1699 0, /* tp_as_buffer */
1700 #ifdef Py_TPFLAGS_HAVE_GC
1701 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1702 #else
1703 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1704 #endif
1705 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1706 #ifdef WITH_CYCLE_GC
1707 (traverseproc)xmlparse_traverse, /* tp_traverse */
1708 (inquiry)xmlparse_clear /* tp_clear */
1709 #else
1710 0, 0
1711 #endif
1714 /* End of code for xmlparser objects */
1715 /* -------------------------------------------------------- */
1717 PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
1718 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1719 Return a new XML parser object.");
1721 static PyObject *
1722 pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1724 char *encoding = NULL;
1725 char *namespace_separator = NULL;
1726 PyObject *intern = NULL;
1727 PyObject *result;
1728 int intern_decref = 0;
1729 static const char *kwlist[] = {"encoding", "namespace_separator",
1730 "intern", NULL};
1732 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1733 &encoding, &namespace_separator, &intern))
1734 return NULL;
1735 if (namespace_separator != NULL
1736 && strlen(namespace_separator) > 1) {
1737 PyErr_SetString(PyExc_ValueError,
1738 "namespace_separator must be at most one"
1739 " character, omitted, or None");
1740 return NULL;
1742 /* Explicitly passing None means no interning is desired.
1743 Not passing anything means that a new dictionary is used. */
1744 if (intern == Py_None)
1745 intern = NULL;
1746 else if (intern == NULL) {
1747 intern = PyDict_New();
1748 if (!intern)
1749 return NULL;
1750 intern_decref = 1;
1752 else if (!PyDict_Check(intern)) {
1753 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1754 return NULL;
1757 result = newxmlparseobject(encoding, namespace_separator, intern);
1758 if (intern_decref) {
1759 Py_DECREF(intern);
1761 return result;
1764 PyDoc_STRVAR(pyexpat_ErrorString__doc__,
1765 "ErrorString(errno) -> string\n\
1766 Returns string error for given number.");
1768 static PyObject *
1769 pyexpat_ErrorString(PyObject *self, PyObject *args)
1771 long code = 0;
1773 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1774 return NULL;
1775 return Py_BuildValue("z", XML_ErrorString((int)code));
1778 /* List of methods defined in the module */
1780 static struct PyMethodDef pyexpat_methods[] = {
1781 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1782 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1783 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1784 METH_VARARGS, pyexpat_ErrorString__doc__},
1786 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
1789 /* Module docstring */
1791 PyDoc_STRVAR(pyexpat_module_documentation,
1792 "Python wrapper for Expat parser.");
1794 /* Return a Python string that represents the version number without the
1795 * extra cruft added by revision control, even if the right options were
1796 * given to the "cvs export" command to make it not include the extra
1797 * cruft.
1799 static PyObject *
1800 get_version_string(void)
1802 static char *rcsid = "$Revision$";
1803 char *rev = rcsid;
1804 int i = 0;
1806 while (!isdigit(Py_CHARMASK(*rev)))
1807 ++rev;
1808 while (rev[i] != ' ' && rev[i] != '\0')
1809 ++i;
1811 return PyString_FromStringAndSize(rev, i);
1814 /* Initialization function for the module */
1816 #ifndef MODULE_NAME
1817 #define MODULE_NAME "pyexpat"
1818 #endif
1820 #ifndef MODULE_INITFUNC
1821 #define MODULE_INITFUNC initpyexpat
1822 #endif
1824 #ifndef PyMODINIT_FUNC
1825 # ifdef MS_WINDOWS
1826 # define PyMODINIT_FUNC __declspec(dllexport) void
1827 # else
1828 # define PyMODINIT_FUNC void
1829 # endif
1830 #endif
1832 PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
1834 PyMODINIT_FUNC
1835 MODULE_INITFUNC(void)
1837 PyObject *m, *d;
1838 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
1839 PyObject *errors_module;
1840 PyObject *modelmod_name;
1841 PyObject *model_module;
1842 PyObject *sys_modules;
1843 static struct PyExpat_CAPI capi;
1844 PyObject* capi_object;
1846 if (errmod_name == NULL)
1847 return;
1848 modelmod_name = PyString_FromString(MODULE_NAME ".model");
1849 if (modelmod_name == NULL)
1850 return;
1852 Xmlparsetype.ob_type = &PyType_Type;
1854 /* Create the module and add the functions */
1855 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
1856 pyexpat_module_documentation);
1857 if (m == NULL)
1858 return;
1860 /* Add some symbolic constants to the module */
1861 if (ErrorObject == NULL) {
1862 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1863 NULL, NULL);
1864 if (ErrorObject == NULL)
1865 return;
1867 Py_INCREF(ErrorObject);
1868 PyModule_AddObject(m, "error", ErrorObject);
1869 Py_INCREF(ErrorObject);
1870 PyModule_AddObject(m, "ExpatError", ErrorObject);
1871 Py_INCREF(&Xmlparsetype);
1872 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1874 PyModule_AddObject(m, "__version__", get_version_string());
1875 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1876 (char *) XML_ExpatVersion());
1878 XML_Expat_Version info = XML_ExpatVersionInfo();
1879 PyModule_AddObject(m, "version_info",
1880 Py_BuildValue("(iii)", info.major,
1881 info.minor, info.micro));
1883 #ifdef Py_USING_UNICODE
1884 init_template_buffer();
1885 #endif
1886 /* XXX When Expat supports some way of figuring out how it was
1887 compiled, this should check and set native_encoding
1888 appropriately.
1890 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1892 sys_modules = PySys_GetObject("modules");
1893 d = PyModule_GetDict(m);
1894 errors_module = PyDict_GetItem(d, errmod_name);
1895 if (errors_module == NULL) {
1896 errors_module = PyModule_New(MODULE_NAME ".errors");
1897 if (errors_module != NULL) {
1898 PyDict_SetItem(sys_modules, errmod_name, errors_module);
1899 /* gives away the reference to errors_module */
1900 PyModule_AddObject(m, "errors", errors_module);
1903 Py_DECREF(errmod_name);
1904 model_module = PyDict_GetItem(d, modelmod_name);
1905 if (model_module == NULL) {
1906 model_module = PyModule_New(MODULE_NAME ".model");
1907 if (model_module != NULL) {
1908 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1909 /* gives away the reference to model_module */
1910 PyModule_AddObject(m, "model", model_module);
1913 Py_DECREF(modelmod_name);
1914 if (errors_module == NULL || model_module == NULL)
1915 /* Don't core dump later! */
1916 return;
1918 #if XML_COMBINED_VERSION > 19505
1920 const XML_Feature *features = XML_GetFeatureList();
1921 PyObject *list = PyList_New(0);
1922 if (list == NULL)
1923 /* just ignore it */
1924 PyErr_Clear();
1925 else {
1926 int i = 0;
1927 for (; features[i].feature != XML_FEATURE_END; ++i) {
1928 int ok;
1929 PyObject *item = Py_BuildValue("si", features[i].name,
1930 features[i].value);
1931 if (item == NULL) {
1932 Py_DECREF(list);
1933 list = NULL;
1934 break;
1936 ok = PyList_Append(list, item);
1937 Py_DECREF(item);
1938 if (ok < 0) {
1939 PyErr_Clear();
1940 break;
1943 if (list != NULL)
1944 PyModule_AddObject(m, "features", list);
1947 #endif
1949 #define MYCONST(name) \
1950 PyModule_AddStringConstant(errors_module, #name, \
1951 (char*)XML_ErrorString(name))
1953 MYCONST(XML_ERROR_NO_MEMORY);
1954 MYCONST(XML_ERROR_SYNTAX);
1955 MYCONST(XML_ERROR_NO_ELEMENTS);
1956 MYCONST(XML_ERROR_INVALID_TOKEN);
1957 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1958 MYCONST(XML_ERROR_PARTIAL_CHAR);
1959 MYCONST(XML_ERROR_TAG_MISMATCH);
1960 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1961 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1962 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1963 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1964 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1965 MYCONST(XML_ERROR_ASYNC_ENTITY);
1966 MYCONST(XML_ERROR_BAD_CHAR_REF);
1967 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1968 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1969 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1970 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1971 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1972 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1973 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1974 MYCONST(XML_ERROR_NOT_STANDALONE);
1975 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1976 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1977 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1978 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1979 /* Added in Expat 1.95.7. */
1980 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1981 /* Added in Expat 1.95.8. */
1982 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1983 MYCONST(XML_ERROR_INCOMPLETE_PE);
1984 MYCONST(XML_ERROR_XML_DECL);
1985 MYCONST(XML_ERROR_TEXT_DECL);
1986 MYCONST(XML_ERROR_PUBLICID);
1987 MYCONST(XML_ERROR_SUSPENDED);
1988 MYCONST(XML_ERROR_NOT_SUSPENDED);
1989 MYCONST(XML_ERROR_ABORTED);
1990 MYCONST(XML_ERROR_FINISHED);
1991 MYCONST(XML_ERROR_SUSPEND_PE);
1993 PyModule_AddStringConstant(errors_module, "__doc__",
1994 "Constants used to describe error conditions.");
1996 #undef MYCONST
1998 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1999 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2000 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2001 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
2002 #undef MYCONST
2004 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2005 PyModule_AddStringConstant(model_module, "__doc__",
2006 "Constants used to interpret content model information.");
2008 MYCONST(XML_CTYPE_EMPTY);
2009 MYCONST(XML_CTYPE_ANY);
2010 MYCONST(XML_CTYPE_MIXED);
2011 MYCONST(XML_CTYPE_NAME);
2012 MYCONST(XML_CTYPE_CHOICE);
2013 MYCONST(XML_CTYPE_SEQ);
2015 MYCONST(XML_CQUANT_NONE);
2016 MYCONST(XML_CQUANT_OPT);
2017 MYCONST(XML_CQUANT_REP);
2018 MYCONST(XML_CQUANT_PLUS);
2019 #undef MYCONST
2021 /* initialize pyexpat dispatch table */
2022 capi.size = sizeof(capi);
2023 capi.magic = PyExpat_CAPI_MAGIC;
2024 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2025 capi.MINOR_VERSION = XML_MINOR_VERSION;
2026 capi.MICRO_VERSION = XML_MICRO_VERSION;
2027 capi.ErrorString = XML_ErrorString;
2028 capi.GetErrorCode = XML_GetErrorCode;
2029 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2030 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
2031 capi.Parse = XML_Parse;
2032 capi.ParserCreate_MM = XML_ParserCreate_MM;
2033 capi.ParserFree = XML_ParserFree;
2034 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2035 capi.SetCommentHandler = XML_SetCommentHandler;
2036 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2037 capi.SetElementHandler = XML_SetElementHandler;
2038 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2039 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2040 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2041 capi.SetUserData = XML_SetUserData;
2043 /* export as cobject */
2044 capi_object = PyCObject_FromVoidPtr(&capi, NULL);
2045 if (capi_object)
2046 PyModule_AddObject(m, "expat_CAPI", capi_object);
2049 static void
2050 clear_handlers(xmlparseobject *self, int initial)
2052 int i = 0;
2053 PyObject *temp;
2055 for (; handler_info[i].name != NULL; i++) {
2056 if (initial)
2057 self->handlers[i] = NULL;
2058 else {
2059 temp = self->handlers[i];
2060 self->handlers[i] = NULL;
2061 Py_XDECREF(temp);
2062 handler_info[i].setter(self->itself, NULL);
2067 static struct HandlerInfo handler_info[] = {
2068 {"StartElementHandler",
2069 (xmlhandlersetter)XML_SetStartElementHandler,
2070 (xmlhandler)my_StartElementHandler},
2071 {"EndElementHandler",
2072 (xmlhandlersetter)XML_SetEndElementHandler,
2073 (xmlhandler)my_EndElementHandler},
2074 {"ProcessingInstructionHandler",
2075 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2076 (xmlhandler)my_ProcessingInstructionHandler},
2077 {"CharacterDataHandler",
2078 (xmlhandlersetter)XML_SetCharacterDataHandler,
2079 (xmlhandler)my_CharacterDataHandler},
2080 {"UnparsedEntityDeclHandler",
2081 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
2082 (xmlhandler)my_UnparsedEntityDeclHandler},
2083 {"NotationDeclHandler",
2084 (xmlhandlersetter)XML_SetNotationDeclHandler,
2085 (xmlhandler)my_NotationDeclHandler},
2086 {"StartNamespaceDeclHandler",
2087 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
2088 (xmlhandler)my_StartNamespaceDeclHandler},
2089 {"EndNamespaceDeclHandler",
2090 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
2091 (xmlhandler)my_EndNamespaceDeclHandler},
2092 {"CommentHandler",
2093 (xmlhandlersetter)XML_SetCommentHandler,
2094 (xmlhandler)my_CommentHandler},
2095 {"StartCdataSectionHandler",
2096 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
2097 (xmlhandler)my_StartCdataSectionHandler},
2098 {"EndCdataSectionHandler",
2099 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
2100 (xmlhandler)my_EndCdataSectionHandler},
2101 {"DefaultHandler",
2102 (xmlhandlersetter)XML_SetDefaultHandler,
2103 (xmlhandler)my_DefaultHandler},
2104 {"DefaultHandlerExpand",
2105 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2106 (xmlhandler)my_DefaultHandlerExpandHandler},
2107 {"NotStandaloneHandler",
2108 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2109 (xmlhandler)my_NotStandaloneHandler},
2110 {"ExternalEntityRefHandler",
2111 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
2112 (xmlhandler)my_ExternalEntityRefHandler},
2113 {"StartDoctypeDeclHandler",
2114 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
2115 (xmlhandler)my_StartDoctypeDeclHandler},
2116 {"EndDoctypeDeclHandler",
2117 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
2118 (xmlhandler)my_EndDoctypeDeclHandler},
2119 {"EntityDeclHandler",
2120 (xmlhandlersetter)XML_SetEntityDeclHandler,
2121 (xmlhandler)my_EntityDeclHandler},
2122 {"XmlDeclHandler",
2123 (xmlhandlersetter)XML_SetXmlDeclHandler,
2124 (xmlhandler)my_XmlDeclHandler},
2125 {"ElementDeclHandler",
2126 (xmlhandlersetter)XML_SetElementDeclHandler,
2127 (xmlhandler)my_ElementDeclHandler},
2128 {"AttlistDeclHandler",
2129 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2130 (xmlhandler)my_AttlistDeclHandler},
2131 #if XML_COMBINED_VERSION >= 19504
2132 {"SkippedEntityHandler",
2133 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2134 (xmlhandler)my_SkippedEntityHandler},
2135 #endif
2137 {NULL, NULL, NULL} /* sentinel */