Removed defensive test in Handler.close
[python.git] / Modules / _elementtree.c
bloba43fe2ec33b9433e0dc565a1382559312a69c80b
1 /*
2 * ElementTree
3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
5 * elementtree accelerator
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
40 * info@pythonware.com
41 * http://www.pythonware.com
44 /* Licensed to PSF under a Contributor Agreement. */
45 /* See http://www.python.org/2.4/license for licensing details. */
47 #include "Python.h"
49 #define VERSION "1.0.5"
51 /* -------------------------------------------------------------------- */
52 /* configuration */
54 /* Leave defined to include the expat-based XMLParser type */
55 #define USE_EXPAT
57 /* Define to to all expat calls via pyexpat's embedded expat library */
58 /* #define USE_PYEXPAT_CAPI */
60 /* An element can hold this many children without extra memory
61 allocations. */
62 #define STATIC_CHILDREN 4
64 /* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
69 /* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
72 32-bit platforms. */
74 /* -------------------------------------------------------------------- */
76 #if 0
77 static int memory = 0;
78 #define ALLOC(size, comment)\
79 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80 #define RELEASE(size, comment)\
81 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
82 #else
83 #define ALLOC(size, comment)
84 #define RELEASE(size, comment)
85 #endif
87 /* compiler tweaks */
88 #if defined(_MSC_VER)
89 #define LOCAL(type) static __inline type __fastcall
90 #else
91 #define LOCAL(type) static type
92 #endif
94 /* compatibility macros */
95 #if (PY_VERSION_HEX < 0x02040000)
96 #define PyDict_CheckExact PyDict_Check
97 #if (PY_VERSION_HEX < 0x02020000)
98 #define PyList_CheckExact PyList_Check
99 #define PyString_CheckExact PyString_Check
100 #if (PY_VERSION_HEX >= 0x01060000)
101 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
102 #endif
103 #endif
104 #endif
106 #if (PY_VERSION_HEX >= 0x02050000)
107 #define PY_CONST const /* 2.5 adds const to some API:s */
108 #else
109 #define PY_CONST
110 #endif
112 #if !defined(Py_RETURN_NONE)
113 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
114 #endif
116 /* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
119 info. */
120 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124 /* glue functions (see the init function for details) */
125 static PyObject* elementtree_copyelement_obj;
126 static PyObject* elementtree_deepcopy_obj;
127 static PyObject* elementtree_getiterator_obj;
128 static PyObject* elementpath_obj;
130 /* helpers */
132 LOCAL(PyObject*)
133 deepcopy(PyObject* object, PyObject* memo)
135 /* do a deep copy of the given object */
137 PyObject* args;
138 PyObject* result;
140 if (!elementtree_deepcopy_obj) {
141 PyErr_SetString(
142 PyExc_RuntimeError,
143 "deepcopy helper not found"
145 return NULL;
148 args = PyTuple_New(2);
149 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
150 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
152 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
154 Py_DECREF(args);
156 return result;
159 LOCAL(PyObject*)
160 list_join(PyObject* list)
162 /* join list elements (destroying the list in the process) */
164 PyObject* joiner;
165 PyObject* function;
166 PyObject* args;
167 PyObject* result;
169 switch (PyList_GET_SIZE(list)) {
170 case 0:
171 Py_DECREF(list);
172 return PyString_FromString("");
173 case 1:
174 result = PyList_GET_ITEM(list, 0);
175 Py_INCREF(result);
176 Py_DECREF(list);
177 return result;
180 /* two or more elements: slice out a suitable separator from the
181 first member, and use that to join the entire list */
183 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
184 if (!joiner)
185 return NULL;
187 function = PyObject_GetAttrString(joiner, "join");
188 if (!function) {
189 Py_DECREF(joiner);
190 return NULL;
193 args = PyTuple_New(1);
194 PyTuple_SET_ITEM(args, 0, list);
196 result = PyObject_CallObject(function, args);
198 Py_DECREF(args); /* also removes list */
199 Py_DECREF(function);
200 Py_DECREF(joiner);
202 return result;
205 #if (PY_VERSION_HEX < 0x02020000)
206 LOCAL(int)
207 PyDict_Update(PyObject* dict, PyObject* other)
209 /* PyDict_Update emulation for 2.1 and earlier */
211 PyObject* res;
213 res = PyObject_CallMethod(dict, "update", "O", other);
214 if (!res)
215 return -1;
217 Py_DECREF(res);
218 return 0;
220 #endif
222 /* -------------------------------------------------------------------- */
223 /* the element type */
225 typedef struct {
227 /* attributes (a dictionary object), or None if no attributes */
228 PyObject* attrib;
230 /* child elements */
231 int length; /* actual number of items */
232 int allocated; /* allocated items */
234 /* this either points to _children or to a malloced buffer */
235 PyObject* *children;
237 PyObject* _children[STATIC_CHILDREN];
239 } ElementObjectExtra;
241 typedef struct {
242 PyObject_HEAD
244 /* element tag (a string). */
245 PyObject* tag;
247 /* text before first child. note that this is a tagged pointer;
248 use JOIN_OBJ to get the object pointer. the join flag is used
249 to distinguish lists created by the tree builder from lists
250 assigned to the attribute by application code; the former
251 should be joined before being returned to the user, the latter
252 should be left intact. */
253 PyObject* text;
255 /* text after this element, in parent. note that this is a tagged
256 pointer; use JOIN_OBJ to get the object pointer. */
257 PyObject* tail;
259 ElementObjectExtra* extra;
261 } ElementObject;
263 staticforward PyTypeObject Element_Type;
265 #define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
267 /* -------------------------------------------------------------------- */
268 /* element constructor and destructor */
270 LOCAL(int)
271 element_new_extra(ElementObject* self, PyObject* attrib)
273 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
274 if (!self->extra)
275 return -1;
277 if (!attrib)
278 attrib = Py_None;
280 Py_INCREF(attrib);
281 self->extra->attrib = attrib;
283 self->extra->length = 0;
284 self->extra->allocated = STATIC_CHILDREN;
285 self->extra->children = self->extra->_children;
287 return 0;
290 LOCAL(void)
291 element_dealloc_extra(ElementObject* self)
293 int i;
295 Py_DECREF(self->extra->attrib);
297 for (i = 0; i < self->extra->length; i++)
298 Py_DECREF(self->extra->children[i]);
300 if (self->extra->children != self->extra->_children)
301 PyObject_Free(self->extra->children);
303 PyObject_Free(self->extra);
306 LOCAL(PyObject*)
307 element_new(PyObject* tag, PyObject* attrib)
309 ElementObject* self;
311 self = PyObject_New(ElementObject, &Element_Type);
312 if (self == NULL)
313 return NULL;
315 /* use None for empty dictionaries */
316 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
317 attrib = Py_None;
319 self->extra = NULL;
321 if (attrib != Py_None) {
323 if (element_new_extra(self, attrib) < 0)
324 return NULL;
326 self->extra->length = 0;
327 self->extra->allocated = STATIC_CHILDREN;
328 self->extra->children = self->extra->_children;
332 Py_INCREF(tag);
333 self->tag = tag;
335 Py_INCREF(Py_None);
336 self->text = Py_None;
338 Py_INCREF(Py_None);
339 self->tail = Py_None;
341 ALLOC(sizeof(ElementObject), "create element");
343 return (PyObject*) self;
346 LOCAL(int)
347 element_resize(ElementObject* self, int extra)
349 int size;
350 PyObject* *children;
352 /* make sure self->children can hold the given number of extra
353 elements. set an exception and return -1 if allocation failed */
355 if (!self->extra)
356 element_new_extra(self, NULL);
358 size = self->extra->length + extra;
360 if (size > self->extra->allocated) {
361 /* use Python 2.4's list growth strategy */
362 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
363 if (self->extra->children != self->extra->_children) {
364 children = PyObject_Realloc(self->extra->children,
365 size * sizeof(PyObject*));
366 if (!children)
367 goto nomemory;
368 } else {
369 children = PyObject_Malloc(size * sizeof(PyObject*));
370 if (!children)
371 goto nomemory;
372 /* copy existing children from static area to malloc buffer */
373 memcpy(children, self->extra->children,
374 self->extra->length * sizeof(PyObject*));
376 self->extra->children = children;
377 self->extra->allocated = size;
380 return 0;
382 nomemory:
383 PyErr_NoMemory();
384 return -1;
387 LOCAL(int)
388 element_add_subelement(ElementObject* self, PyObject* element)
390 /* add a child element to a parent */
392 if (element_resize(self, 1) < 0)
393 return -1;
395 Py_INCREF(element);
396 self->extra->children[self->extra->length] = element;
398 self->extra->length++;
400 return 0;
403 LOCAL(PyObject*)
404 element_get_attrib(ElementObject* self)
406 /* return borrowed reference to attrib dictionary */
407 /* note: this function assumes that the extra section exists */
409 PyObject* res = self->extra->attrib;
411 if (res == Py_None) {
412 /* create missing dictionary */
413 res = PyDict_New();
414 if (!res)
415 return NULL;
416 self->extra->attrib = res;
419 return res;
422 LOCAL(PyObject*)
423 element_get_text(ElementObject* self)
425 /* return borrowed reference to text attribute */
427 PyObject* res = self->text;
429 if (JOIN_GET(res)) {
430 res = JOIN_OBJ(res);
431 if (PyList_CheckExact(res)) {
432 res = list_join(res);
433 if (!res)
434 return NULL;
435 self->text = res;
439 return res;
442 LOCAL(PyObject*)
443 element_get_tail(ElementObject* self)
445 /* return borrowed reference to text attribute */
447 PyObject* res = self->tail;
449 if (JOIN_GET(res)) {
450 res = JOIN_OBJ(res);
451 if (PyList_CheckExact(res)) {
452 res = list_join(res);
453 if (!res)
454 return NULL;
455 self->tail = res;
459 return res;
462 static PyObject*
463 element(PyObject* self, PyObject* args, PyObject* kw)
465 PyObject* elem;
467 PyObject* tag;
468 PyObject* attrib = NULL;
469 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
470 &PyDict_Type, &attrib))
471 return NULL;
473 if (attrib || kw) {
474 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
475 if (!attrib)
476 return NULL;
477 if (kw)
478 PyDict_Update(attrib, kw);
479 } else {
480 Py_INCREF(Py_None);
481 attrib = Py_None;
484 elem = element_new(tag, attrib);
486 Py_DECREF(attrib);
488 return elem;
491 static PyObject*
492 subelement(PyObject* self, PyObject* args, PyObject* kw)
494 PyObject* elem;
496 ElementObject* parent;
497 PyObject* tag;
498 PyObject* attrib = NULL;
499 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
500 &Element_Type, &parent, &tag,
501 &PyDict_Type, &attrib))
502 return NULL;
504 if (attrib || kw) {
505 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
506 if (!attrib)
507 return NULL;
508 if (kw)
509 PyDict_Update(attrib, kw);
510 } else {
511 Py_INCREF(Py_None);
512 attrib = Py_None;
515 elem = element_new(tag, attrib);
517 Py_DECREF(attrib);
519 if (element_add_subelement(parent, elem) < 0)
520 return NULL;
522 return elem;
525 static void
526 element_dealloc(ElementObject* self)
528 if (self->extra)
529 element_dealloc_extra(self);
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
536 RELEASE(sizeof(ElementObject), "destroy element");
538 PyObject_Del(self);
541 /* -------------------------------------------------------------------- */
542 /* methods (in alphabetical order) */
544 static PyObject*
545 element_append(ElementObject* self, PyObject* args)
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
554 Py_RETURN_NONE;
557 static PyObject*
558 element_clear(ElementObject* self, PyObject* args)
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
576 Py_RETURN_NONE;
579 static PyObject*
580 element_copy(ElementObject* self, PyObject* args)
582 int i;
583 ElementObject* element;
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
591 if (!element)
592 return NULL;
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
602 if (self->extra) {
604 if (element_resize(element, self->extra->length) < 0)
605 return NULL;
607 for (i = 0; i < self->extra->length; i++) {
608 Py_INCREF(self->extra->children[i]);
609 element->extra->children[i] = self->extra->children[i];
612 element->extra->length = self->extra->length;
616 return (PyObject*) element;
619 static PyObject*
620 element_deepcopy(ElementObject* self, PyObject* args)
622 int i;
623 ElementObject* element;
624 PyObject* tag;
625 PyObject* attrib;
626 PyObject* text;
627 PyObject* tail;
628 PyObject* id;
630 PyObject* memo;
631 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
632 return NULL;
634 tag = deepcopy(self->tag, memo);
635 if (!tag)
636 return NULL;
638 if (self->extra) {
639 attrib = deepcopy(self->extra->attrib, memo);
640 if (!attrib) {
641 Py_DECREF(tag);
642 return NULL;
644 } else {
645 Py_INCREF(Py_None);
646 attrib = Py_None;
649 element = (ElementObject*) element_new(tag, attrib);
651 Py_DECREF(tag);
652 Py_DECREF(attrib);
654 if (!element)
655 return NULL;
657 text = deepcopy(JOIN_OBJ(self->text), memo);
658 if (!text)
659 goto error;
660 Py_DECREF(element->text);
661 element->text = JOIN_SET(text, JOIN_GET(self->text));
663 tail = deepcopy(JOIN_OBJ(self->tail), memo);
664 if (!tail)
665 goto error;
666 Py_DECREF(element->tail);
667 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
669 if (self->extra) {
671 if (element_resize(element, self->extra->length) < 0)
672 goto error;
674 for (i = 0; i < self->extra->length; i++) {
675 PyObject* child = deepcopy(self->extra->children[i], memo);
676 if (!child) {
677 element->extra->length = i;
678 goto error;
680 element->extra->children[i] = child;
683 element->extra->length = self->extra->length;
687 /* add object to memo dictionary (so deepcopy won't visit it again) */
688 id = PyInt_FromLong((Py_uintptr_t) self);
690 i = PyDict_SetItem(memo, id, (PyObject*) element);
692 Py_DECREF(id);
694 if (i < 0)
695 goto error;
697 return (PyObject*) element;
699 error:
700 Py_DECREF(element);
701 return NULL;
704 LOCAL(int)
705 checkpath(PyObject* tag)
707 int i, check = 1;
709 /* check if a tag contains an xpath character */
711 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
713 #if defined(Py_USING_UNICODE)
714 if (PyUnicode_Check(tag)) {
715 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
716 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
717 if (p[i] == '{')
718 check = 0;
719 else if (p[i] == '}')
720 check = 1;
721 else if (check && PATHCHAR(p[i]))
722 return 1;
724 return 0;
726 #endif
727 if (PyString_Check(tag)) {
728 char *p = PyString_AS_STRING(tag);
729 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
730 if (p[i] == '{')
731 check = 0;
732 else if (p[i] == '}')
733 check = 1;
734 else if (check && PATHCHAR(p[i]))
735 return 1;
737 return 0;
740 return 1; /* unknown type; might be path expression */
743 static PyObject*
744 element_find(ElementObject* self, PyObject* args)
746 int i;
748 PyObject* tag;
749 if (!PyArg_ParseTuple(args, "O:find", &tag))
750 return NULL;
752 if (checkpath(tag))
753 return PyObject_CallMethod(
754 elementpath_obj, "find", "OO", self, tag
757 if (!self->extra)
758 Py_RETURN_NONE;
760 for (i = 0; i < self->extra->length; i++) {
761 PyObject* item = self->extra->children[i];
762 if (Element_CheckExact(item) &&
763 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
764 Py_INCREF(item);
765 return item;
769 Py_RETURN_NONE;
772 static PyObject*
773 element_findtext(ElementObject* self, PyObject* args)
775 int i;
777 PyObject* tag;
778 PyObject* default_value = Py_None;
779 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
780 return NULL;
782 if (checkpath(tag))
783 return PyObject_CallMethod(
784 elementpath_obj, "findtext", "OOO", self, tag, default_value
787 if (!self->extra) {
788 Py_INCREF(default_value);
789 return default_value;
792 for (i = 0; i < self->extra->length; i++) {
793 ElementObject* item = (ElementObject*) self->extra->children[i];
794 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
795 PyObject* text = element_get_text(item);
796 if (text == Py_None)
797 return PyString_FromString("");
798 Py_INCREF(text);
799 return text;
803 Py_INCREF(default_value);
804 return default_value;
807 static PyObject*
808 element_findall(ElementObject* self, PyObject* args)
810 int i;
811 PyObject* out;
813 PyObject* tag;
814 if (!PyArg_ParseTuple(args, "O:findall", &tag))
815 return NULL;
817 if (checkpath(tag))
818 return PyObject_CallMethod(
819 elementpath_obj, "findall", "OO", self, tag
822 out = PyList_New(0);
823 if (!out)
824 return NULL;
826 if (!self->extra)
827 return out;
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* item = self->extra->children[i];
831 if (Element_CheckExact(item) &&
832 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
833 if (PyList_Append(out, item) < 0) {
834 Py_DECREF(out);
835 return NULL;
840 return out;
843 static PyObject*
844 element_get(ElementObject* self, PyObject* args)
846 PyObject* value;
848 PyObject* key;
849 PyObject* default_value = Py_None;
850 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
851 return NULL;
853 if (!self->extra || self->extra->attrib == Py_None)
854 value = default_value;
855 else {
856 value = PyDict_GetItem(self->extra->attrib, key);
857 if (!value)
858 value = default_value;
861 Py_INCREF(value);
862 return value;
865 static PyObject*
866 element_getchildren(ElementObject* self, PyObject* args)
868 int i;
869 PyObject* list;
871 if (!PyArg_ParseTuple(args, ":getchildren"))
872 return NULL;
874 if (!self->extra)
875 return PyList_New(0);
877 list = PyList_New(self->extra->length);
878 if (!list)
879 return NULL;
881 for (i = 0; i < self->extra->length; i++) {
882 PyObject* item = self->extra->children[i];
883 Py_INCREF(item);
884 PyList_SET_ITEM(list, i, item);
887 return list;
890 static PyObject*
891 element_getiterator(ElementObject* self, PyObject* args)
893 PyObject* result;
895 PyObject* tag = Py_None;
896 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
897 return NULL;
899 if (!elementtree_getiterator_obj) {
900 PyErr_SetString(
901 PyExc_RuntimeError,
902 "getiterator helper not found"
904 return NULL;
907 args = PyTuple_New(2);
908 if (args == NULL)
909 return NULL;
911 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
912 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
914 result = PyObject_CallObject(elementtree_getiterator_obj, args);
916 Py_DECREF(args);
918 return result;
921 static PyObject*
922 element_getitem(ElementObject* self, int index)
924 if (!self->extra || index < 0 || index >= self->extra->length) {
925 PyErr_SetString(
926 PyExc_IndexError,
927 "child index out of range"
929 return NULL;
932 Py_INCREF(self->extra->children[index]);
933 return self->extra->children[index];
936 static PyObject*
937 element_getslice(ElementObject* self, int start, int end)
939 int i;
940 PyObject* list;
942 if (!self->extra)
943 return PyList_New(0);
945 /* standard clamping */
946 if (start < 0)
947 start = 0;
948 if (end < 0)
949 end = 0;
950 if (end > self->extra->length)
951 end = self->extra->length;
952 if (start > end)
953 start = end;
955 list = PyList_New(end - start);
956 if (!list)
957 return NULL;
959 for (i = start; i < end; i++) {
960 PyObject* item = self->extra->children[i];
961 Py_INCREF(item);
962 PyList_SET_ITEM(list, i - start, item);
965 return list;
968 static PyObject*
969 element_insert(ElementObject* self, PyObject* args)
971 int i;
973 int index;
974 PyObject* element;
975 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
976 &Element_Type, &element))
977 return NULL;
979 if (!self->extra)
980 element_new_extra(self, NULL);
982 if (index < 0)
983 index = 0;
984 if (index > self->extra->length)
985 index = self->extra->length;
987 if (element_resize(self, 1) < 0)
988 return NULL;
990 for (i = self->extra->length; i > index; i--)
991 self->extra->children[i] = self->extra->children[i-1];
993 Py_INCREF(element);
994 self->extra->children[index] = element;
996 self->extra->length++;
998 Py_RETURN_NONE;
1001 static PyObject*
1002 element_items(ElementObject* self, PyObject* args)
1004 if (!PyArg_ParseTuple(args, ":items"))
1005 return NULL;
1007 if (!self->extra || self->extra->attrib == Py_None)
1008 return PyList_New(0);
1010 return PyDict_Items(self->extra->attrib);
1013 static PyObject*
1014 element_keys(ElementObject* self, PyObject* args)
1016 if (!PyArg_ParseTuple(args, ":keys"))
1017 return NULL;
1019 if (!self->extra || self->extra->attrib == Py_None)
1020 return PyList_New(0);
1022 return PyDict_Keys(self->extra->attrib);
1025 static int
1026 element_length(ElementObject* self)
1028 if (!self->extra)
1029 return 0;
1031 return self->extra->length;
1034 static PyObject*
1035 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1037 PyObject* elem;
1039 PyObject* tag;
1040 PyObject* attrib;
1041 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1042 return NULL;
1044 attrib = PyDict_Copy(attrib);
1045 if (!attrib)
1046 return NULL;
1048 elem = element_new(tag, attrib);
1050 Py_DECREF(attrib);
1052 return elem;
1055 static PyObject*
1056 element_reduce(ElementObject* self, PyObject* args)
1058 if (!PyArg_ParseTuple(args, ":__reduce__"))
1059 return NULL;
1061 /* Hack alert: This method is used to work around a __copy__
1062 problem on certain 2.3 and 2.4 versions. To save time and
1063 simplify the code, we create the copy in here, and use a dummy
1064 copyelement helper to trick the copy module into doing the
1065 right thing. */
1067 if (!elementtree_copyelement_obj) {
1068 PyErr_SetString(
1069 PyExc_RuntimeError,
1070 "copyelement helper not found"
1072 return NULL;
1075 return Py_BuildValue(
1076 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1080 static PyObject*
1081 element_remove(ElementObject* self, PyObject* args)
1083 int i;
1085 PyObject* element;
1086 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1087 return NULL;
1089 if (!self->extra) {
1090 /* element has no children, so raise exception */
1091 PyErr_SetString(
1092 PyExc_ValueError,
1093 "list.remove(x): x not in list"
1095 return NULL;
1098 for (i = 0; i < self->extra->length; i++) {
1099 if (self->extra->children[i] == element)
1100 break;
1101 if (PyObject_Compare(self->extra->children[i], element) == 0)
1102 break;
1105 if (i == self->extra->length) {
1106 /* element is not in children, so raise exception */
1107 PyErr_SetString(
1108 PyExc_ValueError,
1109 "list.remove(x): x not in list"
1111 return NULL;
1114 Py_DECREF(self->extra->children[i]);
1116 self->extra->length--;
1118 for (; i < self->extra->length; i++)
1119 self->extra->children[i] = self->extra->children[i+1];
1121 Py_RETURN_NONE;
1124 static PyObject*
1125 element_repr(ElementObject* self)
1127 PyObject* repr;
1128 char buffer[100];
1130 repr = PyString_FromString("<Element ");
1132 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1134 sprintf(buffer, " at %p>", self);
1135 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1137 return repr;
1140 static PyObject*
1141 element_set(ElementObject* self, PyObject* args)
1143 PyObject* attrib;
1145 PyObject* key;
1146 PyObject* value;
1147 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1148 return NULL;
1150 if (!self->extra)
1151 element_new_extra(self, NULL);
1153 attrib = element_get_attrib(self);
1154 if (!attrib)
1155 return NULL;
1157 if (PyDict_SetItem(attrib, key, value) < 0)
1158 return NULL;
1160 Py_RETURN_NONE;
1163 static int
1164 element_setslice(ElementObject* self, int start, int end, PyObject* item)
1166 int i, new, old;
1167 PyObject* recycle = NULL;
1169 if (!self->extra)
1170 element_new_extra(self, NULL);
1172 /* standard clamping */
1173 if (start < 0)
1174 start = 0;
1175 if (end < 0)
1176 end = 0;
1177 if (end > self->extra->length)
1178 end = self->extra->length;
1179 if (start > end)
1180 start = end;
1182 old = end - start;
1184 if (item == NULL)
1185 new = 0;
1186 else if (PyList_CheckExact(item)) {
1187 new = PyList_GET_SIZE(item);
1188 } else {
1189 /* FIXME: support arbitrary sequences? */
1190 PyErr_Format(
1191 PyExc_TypeError,
1192 "expected list, not \"%.200s\"", item->ob_type->tp_name
1194 return -1;
1197 if (old > 0) {
1198 /* to avoid recursive calls to this method (via decref), move
1199 old items to the recycle bin here, and get rid of them when
1200 we're done modifying the element */
1201 recycle = PyList_New(old);
1202 for (i = 0; i < old; i++)
1203 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1206 if (new < old) {
1207 /* delete slice */
1208 for (i = end; i < self->extra->length; i++)
1209 self->extra->children[i + new - old] = self->extra->children[i];
1210 } else if (new > old) {
1211 /* insert slice */
1212 if (element_resize(self, new - old) < 0)
1213 return -1;
1214 for (i = self->extra->length-1; i >= end; i--)
1215 self->extra->children[i + new - old] = self->extra->children[i];
1218 /* replace the slice */
1219 for (i = 0; i < new; i++) {
1220 PyObject* element = PyList_GET_ITEM(item, i);
1221 Py_INCREF(element);
1222 self->extra->children[i + start] = element;
1225 self->extra->length += new - old;
1227 /* discard the recycle bin, and everything in it */
1228 Py_XDECREF(recycle);
1230 return 0;
1233 static int
1234 element_setitem(ElementObject* self, int index, PyObject* item)
1236 int i;
1237 PyObject* old;
1239 if (!self->extra || index < 0 || index >= self->extra->length) {
1240 PyErr_SetString(
1241 PyExc_IndexError,
1242 "child assignment index out of range");
1243 return -1;
1246 old = self->extra->children[index];
1248 if (item) {
1249 Py_INCREF(item);
1250 self->extra->children[index] = item;
1251 } else {
1252 self->extra->length--;
1253 for (i = index; i < self->extra->length; i++)
1254 self->extra->children[i] = self->extra->children[i+1];
1257 Py_DECREF(old);
1259 return 0;
1262 static PyMethodDef element_methods[] = {
1264 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1266 {"get", (PyCFunction) element_get, METH_VARARGS},
1267 {"set", (PyCFunction) element_set, METH_VARARGS},
1269 {"find", (PyCFunction) element_find, METH_VARARGS},
1270 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1271 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1273 {"append", (PyCFunction) element_append, METH_VARARGS},
1274 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1275 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1277 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1278 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1280 {"items", (PyCFunction) element_items, METH_VARARGS},
1281 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1283 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1285 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1286 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1288 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1289 C objects correctly, so we have to fake it using a __reduce__-
1290 based hack (see the element_reduce implementation above for
1291 details). */
1293 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1294 using a runtime test to figure out if we need to fake things
1295 or now (see the init code below). The following entry is
1296 enabled only if the hack is needed. */
1298 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1300 {NULL, NULL}
1303 static PyObject*
1304 element_getattr(ElementObject* self, char* name)
1306 PyObject* res;
1308 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1309 if (res)
1310 return res;
1312 PyErr_Clear();
1314 if (strcmp(name, "tag") == 0)
1315 res = self->tag;
1316 else if (strcmp(name, "text") == 0)
1317 res = element_get_text(self);
1318 else if (strcmp(name, "tail") == 0) {
1319 res = element_get_tail(self);
1320 } else if (strcmp(name, "attrib") == 0) {
1321 if (!self->extra)
1322 element_new_extra(self, NULL);
1323 res = element_get_attrib(self);
1324 } else {
1325 PyErr_SetString(PyExc_AttributeError, name);
1326 return NULL;
1329 if (!res)
1330 return NULL;
1332 Py_INCREF(res);
1333 return res;
1336 static int
1337 element_setattr(ElementObject* self, const char* name, PyObject* value)
1339 if (value == NULL) {
1340 PyErr_SetString(
1341 PyExc_AttributeError,
1342 "can't delete element attributes"
1344 return -1;
1347 if (strcmp(name, "tag") == 0) {
1348 Py_DECREF(self->tag);
1349 self->tag = value;
1350 Py_INCREF(self->tag);
1351 } else if (strcmp(name, "text") == 0) {
1352 Py_DECREF(JOIN_OBJ(self->text));
1353 self->text = value;
1354 Py_INCREF(self->text);
1355 } else if (strcmp(name, "tail") == 0) {
1356 Py_DECREF(JOIN_OBJ(self->tail));
1357 self->tail = value;
1358 Py_INCREF(self->tail);
1359 } else if (strcmp(name, "attrib") == 0) {
1360 if (!self->extra)
1361 element_new_extra(self, NULL);
1362 Py_DECREF(self->extra->attrib);
1363 self->extra->attrib = value;
1364 Py_INCREF(self->extra->attrib);
1365 } else {
1366 PyErr_SetString(PyExc_AttributeError, name);
1367 return -1;
1370 return 0;
1373 static PySequenceMethods element_as_sequence = {
1374 (inquiry) element_length,
1375 0, /* sq_concat */
1376 0, /* sq_repeat */
1377 (intargfunc) element_getitem,
1378 (intintargfunc) element_getslice,
1379 (intobjargproc) element_setitem,
1380 (intintobjargproc) element_setslice,
1383 statichere PyTypeObject Element_Type = {
1384 PyObject_HEAD_INIT(NULL)
1385 0, "Element", sizeof(ElementObject), 0,
1386 /* methods */
1387 (destructor)element_dealloc, /* tp_dealloc */
1388 0, /* tp_print */
1389 (getattrfunc)element_getattr, /* tp_getattr */
1390 (setattrfunc)element_setattr, /* tp_setattr */
1391 0, /* tp_compare */
1392 (reprfunc)element_repr, /* tp_repr */
1393 0, /* tp_as_number */
1394 &element_as_sequence, /* tp_as_sequence */
1397 /* ==================================================================== */
1398 /* the tree builder type */
1400 typedef struct {
1401 PyObject_HEAD
1403 PyObject* root; /* root node (first created node) */
1405 ElementObject* this; /* current node */
1406 ElementObject* last; /* most recently created node */
1408 PyObject* data; /* data collector (string or list), or NULL */
1410 PyObject* stack; /* element stack */
1411 int index; /* current stack size (0=empty) */
1413 /* element tracing */
1414 PyObject* events; /* list of events, or NULL if not collecting */
1415 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1416 PyObject* end_event_obj;
1417 PyObject* start_ns_event_obj;
1418 PyObject* end_ns_event_obj;
1420 } TreeBuilderObject;
1422 staticforward PyTypeObject TreeBuilder_Type;
1424 #define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1426 /* -------------------------------------------------------------------- */
1427 /* constructor and destructor */
1429 LOCAL(PyObject*)
1430 treebuilder_new(void)
1432 TreeBuilderObject* self;
1434 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1435 if (self == NULL)
1436 return NULL;
1438 self->root = NULL;
1440 Py_INCREF(Py_None);
1441 self->this = (ElementObject*) Py_None;
1443 Py_INCREF(Py_None);
1444 self->last = (ElementObject*) Py_None;
1446 self->data = NULL;
1448 self->stack = PyList_New(20);
1449 self->index = 0;
1451 self->events = NULL;
1452 self->start_event_obj = self->end_event_obj = NULL;
1453 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1455 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1457 return (PyObject*) self;
1460 static PyObject*
1461 treebuilder(PyObject* _self, PyObject* args)
1463 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1464 return NULL;
1466 return treebuilder_new();
1469 static void
1470 treebuilder_dealloc(TreeBuilderObject* self)
1472 Py_XDECREF(self->end_ns_event_obj);
1473 Py_XDECREF(self->start_ns_event_obj);
1474 Py_XDECREF(self->end_event_obj);
1475 Py_XDECREF(self->start_event_obj);
1476 Py_XDECREF(self->events);
1477 Py_DECREF(self->stack);
1478 Py_XDECREF(self->data);
1479 Py_DECREF(self->last);
1480 Py_DECREF(self->this);
1481 Py_XDECREF(self->root);
1483 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1485 PyObject_Del(self);
1488 /* -------------------------------------------------------------------- */
1489 /* handlers */
1491 LOCAL(PyObject*)
1492 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1493 PyObject* standalone)
1495 Py_RETURN_NONE;
1498 LOCAL(PyObject*)
1499 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1500 PyObject* attrib)
1502 PyObject* node;
1503 PyObject* this;
1505 if (self->data) {
1506 if (self->this == self->last) {
1507 Py_DECREF(self->last->text);
1508 self->last->text = JOIN_SET(
1509 self->data, PyList_CheckExact(self->data)
1511 } else {
1512 Py_DECREF(self->last->tail);
1513 self->last->tail = JOIN_SET(
1514 self->data, PyList_CheckExact(self->data)
1517 self->data = NULL;
1520 node = element_new(tag, attrib);
1521 if (!node)
1522 return NULL;
1524 this = (PyObject*) self->this;
1526 if (this != Py_None) {
1527 if (element_add_subelement((ElementObject*) this, node) < 0)
1528 return NULL;
1529 } else {
1530 if (self->root) {
1531 PyErr_SetString(
1532 PyExc_SyntaxError,
1533 "multiple elements on top level"
1535 return NULL;
1537 Py_INCREF(node);
1538 self->root = node;
1541 if (self->index < PyList_GET_SIZE(self->stack)) {
1542 if (PyList_SetItem(self->stack, self->index, this) < 0)
1543 return NULL;
1544 Py_INCREF(this);
1545 } else {
1546 if (PyList_Append(self->stack, this) < 0)
1547 return NULL;
1549 self->index++;
1551 Py_DECREF(this);
1552 Py_INCREF(node);
1553 self->this = (ElementObject*) node;
1555 Py_DECREF(self->last);
1556 Py_INCREF(node);
1557 self->last = (ElementObject*) node;
1559 if (self->start_event_obj) {
1560 PyObject* res;
1561 PyObject* action = self->start_event_obj;
1562 res = PyTuple_New(2);
1563 if (res) {
1564 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1565 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1566 PyList_Append(self->events, res);
1567 Py_DECREF(res);
1568 } else
1569 PyErr_Clear(); /* FIXME: propagate error */
1572 return node;
1575 LOCAL(PyObject*)
1576 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1578 if (!self->data) {
1579 /* store the first item as is */
1580 Py_INCREF(data); self->data = data;
1581 } else {
1582 /* more than one item; use a list to collect items */
1583 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1584 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1585 /* expat often generates single character data sections; handle
1586 the most common case by resizing the existing string... */
1587 int size = PyString_GET_SIZE(self->data);
1588 if (_PyString_Resize(&self->data, size + 1) < 0)
1589 return NULL;
1590 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1591 } else if (PyList_CheckExact(self->data)) {
1592 if (PyList_Append(self->data, data) < 0)
1593 return NULL;
1594 } else {
1595 PyObject* list = PyList_New(2);
1596 if (!list)
1597 return NULL;
1598 PyList_SET_ITEM(list, 0, self->data);
1599 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1600 self->data = list;
1604 Py_RETURN_NONE;
1607 LOCAL(PyObject*)
1608 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1610 PyObject* item;
1612 if (self->data) {
1613 if (self->this == self->last) {
1614 Py_DECREF(self->last->text);
1615 self->last->text = JOIN_SET(
1616 self->data, PyList_CheckExact(self->data)
1618 } else {
1619 Py_DECREF(self->last->tail);
1620 self->last->tail = JOIN_SET(
1621 self->data, PyList_CheckExact(self->data)
1624 self->data = NULL;
1627 if (self->index == 0) {
1628 PyErr_SetString(
1629 PyExc_IndexError,
1630 "pop from empty stack"
1632 return NULL;
1635 self->index--;
1637 item = PyList_GET_ITEM(self->stack, self->index);
1638 Py_INCREF(item);
1640 Py_DECREF(self->last);
1642 self->last = (ElementObject*) self->this;
1643 self->this = (ElementObject*) item;
1645 if (self->end_event_obj) {
1646 PyObject* res;
1647 PyObject* action = self->end_event_obj;
1648 PyObject* node = (PyObject*) self->last;
1649 res = PyTuple_New(2);
1650 if (res) {
1651 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1652 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1653 PyList_Append(self->events, res);
1654 Py_DECREF(res);
1655 } else
1656 PyErr_Clear(); /* FIXME: propagate error */
1659 Py_INCREF(self->last);
1660 return (PyObject*) self->last;
1663 LOCAL(void)
1664 treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1665 const char* prefix, const char *uri)
1667 PyObject* res;
1668 PyObject* action;
1669 PyObject* parcel;
1671 if (!self->events)
1672 return;
1674 if (start) {
1675 if (!self->start_ns_event_obj)
1676 return;
1677 action = self->start_ns_event_obj;
1678 /* FIXME: prefix and uri use utf-8 encoding! */
1679 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1680 if (!parcel)
1681 return;
1682 Py_INCREF(action);
1683 } else {
1684 if (!self->end_ns_event_obj)
1685 return;
1686 action = self->end_ns_event_obj;
1687 Py_INCREF(action);
1688 parcel = Py_None;
1689 Py_INCREF(parcel);
1692 res = PyTuple_New(2);
1694 if (res) {
1695 PyTuple_SET_ITEM(res, 0, action);
1696 PyTuple_SET_ITEM(res, 1, parcel);
1697 PyList_Append(self->events, res);
1698 Py_DECREF(res);
1699 } else
1700 PyErr_Clear(); /* FIXME: propagate error */
1703 /* -------------------------------------------------------------------- */
1704 /* methods (in alphabetical order) */
1706 static PyObject*
1707 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1709 PyObject* data;
1710 if (!PyArg_ParseTuple(args, "O:data", &data))
1711 return NULL;
1713 return treebuilder_handle_data(self, data);
1716 static PyObject*
1717 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1719 PyObject* tag;
1720 if (!PyArg_ParseTuple(args, "O:end", &tag))
1721 return NULL;
1723 return treebuilder_handle_end(self, tag);
1726 LOCAL(PyObject*)
1727 treebuilder_done(TreeBuilderObject* self)
1729 PyObject* res;
1731 /* FIXME: check stack size? */
1733 if (self->root)
1734 res = self->root;
1735 else
1736 res = Py_None;
1738 Py_INCREF(res);
1739 return res;
1742 static PyObject*
1743 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1745 if (!PyArg_ParseTuple(args, ":close"))
1746 return NULL;
1748 return treebuilder_done(self);
1751 static PyObject*
1752 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1754 PyObject* tag;
1755 PyObject* attrib = Py_None;
1756 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1757 return NULL;
1759 return treebuilder_handle_start(self, tag, attrib);
1762 static PyObject*
1763 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1765 PyObject* encoding;
1766 PyObject* standalone;
1767 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1768 return NULL;
1770 return treebuilder_handle_xml(self, encoding, standalone);
1773 static PyMethodDef treebuilder_methods[] = {
1774 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1775 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1776 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1777 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1778 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1779 {NULL, NULL}
1782 static PyObject*
1783 treebuilder_getattr(TreeBuilderObject* self, char* name)
1785 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1788 statichere PyTypeObject TreeBuilder_Type = {
1789 PyObject_HEAD_INIT(NULL)
1790 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1791 /* methods */
1792 (destructor)treebuilder_dealloc, /* tp_dealloc */
1793 0, /* tp_print */
1794 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1797 /* ==================================================================== */
1798 /* the expat interface */
1800 #if defined(USE_EXPAT)
1802 #include "expat.h"
1804 #if defined(USE_PYEXPAT_CAPI)
1805 #include "pyexpat.h"
1806 static struct PyExpat_CAPI* expat_capi;
1807 #define EXPAT(func) (expat_capi->func)
1808 #else
1809 #define EXPAT(func) (XML_##func)
1810 #endif
1812 typedef struct {
1813 PyObject_HEAD
1815 XML_Parser parser;
1817 PyObject* target;
1818 PyObject* entity;
1820 PyObject* names;
1822 PyObject* handle_xml;
1823 PyObject* handle_start;
1824 PyObject* handle_data;
1825 PyObject* handle_end;
1827 PyObject* handle_comment;
1828 PyObject* handle_pi;
1830 } XMLParserObject;
1832 staticforward PyTypeObject XMLParser_Type;
1834 /* helpers */
1836 #if defined(Py_USING_UNICODE)
1837 LOCAL(int)
1838 checkstring(const char* string, int size)
1840 int i;
1842 /* check if an 8-bit string contains UTF-8 characters */
1843 for (i = 0; i < size; i++)
1844 if (string[i] & 0x80)
1845 return 1;
1847 return 0;
1849 #endif
1851 LOCAL(PyObject*)
1852 makestring(const char* string, int size)
1854 /* convert a UTF-8 string to either a 7-bit ascii string or a
1855 Unicode string */
1857 #if defined(Py_USING_UNICODE)
1858 if (checkstring(string, size))
1859 return PyUnicode_DecodeUTF8(string, size, "strict");
1860 #endif
1862 return PyString_FromStringAndSize(string, size);
1865 LOCAL(PyObject*)
1866 makeuniversal(XMLParserObject* self, const char* string)
1868 /* convert a UTF-8 tag/attribute name from the expat parser
1869 to a universal name string */
1871 int size = strlen(string);
1872 PyObject* key;
1873 PyObject* value;
1875 /* look the 'raw' name up in the names dictionary */
1876 key = PyString_FromStringAndSize(string, size);
1877 if (!key)
1878 return NULL;
1880 value = PyDict_GetItem(self->names, key);
1882 if (value) {
1883 Py_INCREF(value);
1884 } else {
1885 /* new name. convert to universal name, and decode as
1886 necessary */
1888 PyObject* tag;
1889 char* p;
1890 int i;
1892 /* look for namespace separator */
1893 for (i = 0; i < size; i++)
1894 if (string[i] == '}')
1895 break;
1896 if (i != size) {
1897 /* convert to universal name */
1898 tag = PyString_FromStringAndSize(NULL, size+1);
1899 p = PyString_AS_STRING(tag);
1900 p[0] = '{';
1901 memcpy(p+1, string, size);
1902 size++;
1903 } else {
1904 /* plain name; use key as tag */
1905 Py_INCREF(key);
1906 tag = key;
1909 /* decode universal name */
1910 #if defined(Py_USING_UNICODE)
1911 /* inline makestring, to avoid duplicating the source string if
1912 it's not an utf-8 string */
1913 p = PyString_AS_STRING(tag);
1914 if (checkstring(p, size)) {
1915 value = PyUnicode_DecodeUTF8(p, size, "strict");
1916 Py_DECREF(tag);
1917 if (!value) {
1918 Py_DECREF(key);
1919 return NULL;
1921 } else
1922 #endif
1923 value = tag; /* use tag as is */
1925 /* add to names dictionary */
1926 if (PyDict_SetItem(self->names, key, value) < 0) {
1927 Py_DECREF(key);
1928 Py_DECREF(value);
1929 return NULL;
1933 Py_DECREF(key);
1934 return value;
1937 /* -------------------------------------------------------------------- */
1938 /* handlers */
1940 static void
1941 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1942 int data_len)
1944 PyObject* key;
1945 PyObject* value;
1946 PyObject* res;
1948 if (data_len < 2 || data_in[0] != '&')
1949 return;
1951 key = makestring(data_in + 1, data_len - 2);
1952 if (!key)
1953 return;
1955 value = PyDict_GetItem(self->entity, key);
1957 if (value) {
1958 if (TreeBuilder_CheckExact(self->target))
1959 res = treebuilder_handle_data(
1960 (TreeBuilderObject*) self->target, value
1962 else if (self->handle_data)
1963 res = PyObject_CallFunction(self->handle_data, "O", value);
1964 else
1965 res = NULL;
1966 Py_XDECREF(res);
1967 } else {
1968 PyErr_Format(
1969 PyExc_SyntaxError, "undefined entity &%s;: line %d, column %d",
1970 PyString_AS_STRING(key),
1971 EXPAT(GetErrorLineNumber)(self->parser),
1972 EXPAT(GetErrorColumnNumber)(self->parser)
1976 Py_DECREF(key);
1979 static void
1980 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1981 const XML_Char **attrib_in)
1983 PyObject* res;
1984 PyObject* tag;
1985 PyObject* attrib;
1986 int ok;
1988 /* tag name */
1989 tag = makeuniversal(self, tag_in);
1990 if (!tag)
1991 return; /* parser will look for errors */
1993 /* attributes */
1994 if (attrib_in[0]) {
1995 attrib = PyDict_New();
1996 if (!attrib)
1997 return;
1998 while (attrib_in[0] && attrib_in[1]) {
1999 PyObject* key = makeuniversal(self, attrib_in[0]);
2000 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2001 if (!key || !value) {
2002 Py_XDECREF(value);
2003 Py_XDECREF(key);
2004 Py_DECREF(attrib);
2005 return;
2007 ok = PyDict_SetItem(attrib, key, value);
2008 Py_DECREF(value);
2009 Py_DECREF(key);
2010 if (ok < 0) {
2011 Py_DECREF(attrib);
2012 return;
2014 attrib_in += 2;
2016 } else {
2017 Py_INCREF(Py_None);
2018 attrib = Py_None;
2021 if (TreeBuilder_CheckExact(self->target))
2022 /* shortcut */
2023 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2024 tag, attrib);
2025 else if (self->handle_start)
2026 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2027 else
2028 res = NULL;
2030 Py_DECREF(tag);
2031 Py_DECREF(attrib);
2033 Py_XDECREF(res);
2036 static void
2037 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2038 int data_len)
2040 PyObject* data;
2041 PyObject* res;
2043 data = makestring(data_in, data_len);
2045 if (TreeBuilder_CheckExact(self->target))
2046 /* shortcut */
2047 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2048 else if (self->handle_data)
2049 res = PyObject_CallFunction(self->handle_data, "O", data);
2050 else
2051 res = NULL;
2053 Py_DECREF(data);
2055 Py_XDECREF(res);
2058 static void
2059 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2061 PyObject* tag;
2062 PyObject* res = NULL;
2064 if (TreeBuilder_CheckExact(self->target))
2065 /* shortcut */
2066 /* the standard tree builder doesn't look at the end tag */
2067 res = treebuilder_handle_end(
2068 (TreeBuilderObject*) self->target, Py_None
2070 else if (self->handle_end) {
2071 tag = makeuniversal(self, tag_in);
2072 if (tag) {
2073 res = PyObject_CallFunction(self->handle_end, "O", tag);
2074 Py_DECREF(tag);
2078 Py_XDECREF(res);
2081 static void
2082 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2083 const XML_Char *uri)
2085 treebuilder_handle_namespace(
2086 (TreeBuilderObject*) self->target, 1, prefix, uri
2090 static void
2091 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2093 treebuilder_handle_namespace(
2094 (TreeBuilderObject*) self->target, 0, NULL, NULL
2098 static void
2099 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2101 PyObject* comment;
2102 PyObject* res;
2104 if (self->handle_comment) {
2105 comment = makestring(comment_in, strlen(comment_in));
2106 if (comment) {
2107 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2108 Py_XDECREF(res);
2109 Py_DECREF(comment);
2114 static void
2115 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2116 const XML_Char* data_in)
2118 PyObject* target;
2119 PyObject* data;
2120 PyObject* res;
2122 if (self->handle_pi) {
2123 target = makestring(target_in, strlen(target_in));
2124 data = makestring(data_in, strlen(data_in));
2125 if (target && data) {
2126 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2127 Py_XDECREF(res);
2128 Py_DECREF(data);
2129 Py_DECREF(target);
2130 } else {
2131 Py_XDECREF(data);
2132 Py_XDECREF(target);
2137 #if defined(Py_USING_UNICODE)
2138 static int
2139 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2140 XML_Encoding *info)
2142 PyObject* u;
2143 Py_UNICODE* p;
2144 unsigned char s[256];
2145 int i;
2147 memset(info, 0, sizeof(XML_Encoding));
2149 for (i = 0; i < 256; i++)
2150 s[i] = i;
2152 u = PyUnicode_Decode((char*) s, 256, name, "replace");
2153 if (!u)
2154 return XML_STATUS_ERROR;
2156 if (PyUnicode_GET_SIZE(u) != 256) {
2157 Py_DECREF(u);
2158 return XML_STATUS_ERROR;
2161 p = PyUnicode_AS_UNICODE(u);
2163 for (i = 0; i < 256; i++) {
2164 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2165 info->map[i] = p[i];
2166 else
2167 info->map[i] = -1;
2170 Py_DECREF(u);
2172 return XML_STATUS_OK;
2174 #endif
2176 /* -------------------------------------------------------------------- */
2177 /* constructor and destructor */
2179 static PyObject*
2180 xmlparser(PyObject* _self, PyObject* args, PyObject* kw)
2182 XMLParserObject* self;
2183 /* FIXME: does this need to be static? */
2184 static XML_Memory_Handling_Suite memory_handler;
2186 PyObject* target = NULL;
2187 char* encoding = NULL;
2188 static PY_CONST char* kwlist[] = { "target", "encoding", NULL };
2189 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2190 &target, &encoding))
2191 return NULL;
2193 #if defined(USE_PYEXPAT_CAPI)
2194 if (!expat_capi) {
2195 PyErr_SetString(
2196 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2198 return NULL;
2200 #endif
2202 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2203 if (self == NULL)
2204 return NULL;
2206 self->entity = PyDict_New();
2207 if (!self->entity) {
2208 PyObject_Del(self);
2209 return NULL; /* FIXME: cleanup on error */
2212 self->names = PyDict_New();
2213 if (!self->names) {
2214 PyObject_Del(self);
2215 return NULL; /* FIXME: cleanup on error */
2218 memory_handler.malloc_fcn = PyObject_Malloc;
2219 memory_handler.realloc_fcn = PyObject_Realloc;
2220 memory_handler.free_fcn = PyObject_Free;
2222 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2223 if (!self->parser) {
2224 PyErr_NoMemory();
2225 return NULL; /* FIXME: cleanup on error */
2228 /* setup target handlers */
2229 if (!target) {
2230 target = treebuilder_new();
2231 if (!target) {
2232 PyObject_Del(self);
2233 return NULL; /* FIXME: cleanup on error */
2235 } else
2236 Py_INCREF(target);
2237 self->target = target;
2239 self->handle_xml = PyObject_GetAttrString(target, "xml");
2240 self->handle_start = PyObject_GetAttrString(target, "start");
2241 self->handle_data = PyObject_GetAttrString(target, "data");
2242 self->handle_end = PyObject_GetAttrString(target, "end");
2243 self->handle_comment = PyObject_GetAttrString(target, "comment");
2244 self->handle_pi = PyObject_GetAttrString(target, "pi");
2246 PyErr_Clear();
2248 /* configure parser */
2249 EXPAT(SetUserData)(self->parser, self);
2250 EXPAT(SetElementHandler)(
2251 self->parser,
2252 (XML_StartElementHandler) expat_start_handler,
2253 (XML_EndElementHandler) expat_end_handler
2255 EXPAT(SetDefaultHandlerExpand)(
2256 self->parser,
2257 (XML_DefaultHandler) expat_default_handler
2259 EXPAT(SetCharacterDataHandler)(
2260 self->parser,
2261 (XML_CharacterDataHandler) expat_data_handler
2263 if (self->handle_comment)
2264 EXPAT(SetCommentHandler)(
2265 self->parser,
2266 (XML_CommentHandler) expat_comment_handler
2268 if (self->handle_pi)
2269 EXPAT(SetProcessingInstructionHandler)(
2270 self->parser,
2271 (XML_ProcessingInstructionHandler) expat_pi_handler
2273 #if defined(Py_USING_UNICODE)
2274 EXPAT(SetUnknownEncodingHandler)(
2275 self->parser,
2276 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2278 #endif
2280 ALLOC(sizeof(XMLParserObject), "create expatparser");
2282 return (PyObject*) self;
2285 static void
2286 xmlparser_dealloc(XMLParserObject* self)
2288 EXPAT(ParserFree)(self->parser);
2290 Py_XDECREF(self->handle_pi);
2291 Py_XDECREF(self->handle_comment);
2292 Py_XDECREF(self->handle_end);
2293 Py_XDECREF(self->handle_data);
2294 Py_XDECREF(self->handle_start);
2295 Py_XDECREF(self->handle_xml);
2297 Py_DECREF(self->target);
2298 Py_DECREF(self->entity);
2299 Py_DECREF(self->names);
2301 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2303 PyObject_Del(self);
2306 /* -------------------------------------------------------------------- */
2307 /* methods (in alphabetical order) */
2309 LOCAL(PyObject*)
2310 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2312 int ok;
2314 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2316 if (PyErr_Occurred())
2317 return NULL;
2319 if (!ok) {
2320 PyErr_Format(
2321 PyExc_SyntaxError, "%s: line %d, column %d",
2322 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2323 EXPAT(GetErrorLineNumber)(self->parser),
2324 EXPAT(GetErrorColumnNumber)(self->parser)
2326 return NULL;
2329 Py_RETURN_NONE;
2332 static PyObject*
2333 xmlparser_close(XMLParserObject* self, PyObject* args)
2335 /* end feeding data to parser */
2337 PyObject* res;
2338 if (!PyArg_ParseTuple(args, ":close"))
2339 return NULL;
2341 res = expat_parse(self, "", 0, 1);
2343 if (res && TreeBuilder_CheckExact(self->target)) {
2344 Py_DECREF(res);
2345 return treebuilder_done((TreeBuilderObject*) self->target);
2348 return res;
2351 static PyObject*
2352 xmlparser_feed(XMLParserObject* self, PyObject* args)
2354 /* feed data to parser */
2356 char* data;
2357 int data_len;
2358 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2359 return NULL;
2361 return expat_parse(self, data, data_len, 0);
2364 static PyObject*
2365 xmlparser_parse(XMLParserObject* self, PyObject* args)
2367 /* (internal) parse until end of input stream */
2369 PyObject* reader;
2370 PyObject* buffer;
2371 PyObject* res;
2373 PyObject* fileobj;
2374 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2375 return NULL;
2377 reader = PyObject_GetAttrString(fileobj, "read");
2378 if (!reader)
2379 return NULL;
2381 /* read from open file object */
2382 for (;;) {
2384 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2386 if (!buffer) {
2387 /* read failed (e.g. due to KeyboardInterrupt) */
2388 Py_DECREF(reader);
2389 return NULL;
2392 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2393 Py_DECREF(buffer);
2394 break;
2397 res = expat_parse(
2398 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2401 Py_DECREF(buffer);
2403 if (!res) {
2404 Py_DECREF(reader);
2405 return NULL;
2407 Py_DECREF(res);
2411 Py_DECREF(reader);
2413 res = expat_parse(self, "", 0, 1);
2415 if (res && TreeBuilder_CheckExact(self->target)) {
2416 Py_DECREF(res);
2417 return treebuilder_done((TreeBuilderObject*) self->target);
2420 return res;
2423 static PyObject*
2424 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2426 /* activate element event reporting */
2428 int i;
2429 TreeBuilderObject* target;
2431 PyObject* events; /* event collector */
2432 PyObject* event_set = Py_None;
2433 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2434 &event_set))
2435 return NULL;
2437 if (!TreeBuilder_CheckExact(self->target)) {
2438 PyErr_SetString(
2439 PyExc_TypeError,
2440 "event handling only supported for cElementTree.Treebuilder "
2441 "targets"
2443 return NULL;
2446 target = (TreeBuilderObject*) self->target;
2448 Py_INCREF(events);
2449 Py_XDECREF(target->events);
2450 target->events = events;
2452 /* clear out existing events */
2453 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2454 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2455 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2456 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2458 if (event_set == Py_None) {
2459 /* default is "end" only */
2460 target->end_event_obj = PyString_FromString("end");
2461 Py_RETURN_NONE;
2464 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2465 goto error;
2467 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2468 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2469 char* event;
2470 if (!PyString_Check(item))
2471 goto error;
2472 event = PyString_AS_STRING(item);
2473 if (strcmp(event, "start") == 0) {
2474 Py_INCREF(item);
2475 target->start_event_obj = item;
2476 } else if (strcmp(event, "end") == 0) {
2477 Py_INCREF(item);
2478 Py_XDECREF(target->end_event_obj);
2479 target->end_event_obj = item;
2480 } else if (strcmp(event, "start-ns") == 0) {
2481 Py_INCREF(item);
2482 Py_XDECREF(target->start_ns_event_obj);
2483 target->start_ns_event_obj = item;
2484 EXPAT(SetNamespaceDeclHandler)(
2485 self->parser,
2486 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2487 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2489 } else if (strcmp(event, "end-ns") == 0) {
2490 Py_INCREF(item);
2491 Py_XDECREF(target->end_ns_event_obj);
2492 target->end_ns_event_obj = item;
2493 EXPAT(SetNamespaceDeclHandler)(
2494 self->parser,
2495 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2496 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2498 } else {
2499 PyErr_Format(
2500 PyExc_ValueError,
2501 "unknown event '%s'", event
2503 return NULL;
2507 Py_RETURN_NONE;
2509 error:
2510 PyErr_SetString(
2511 PyExc_TypeError,
2512 "invalid event tuple"
2514 return NULL;
2517 static PyMethodDef xmlparser_methods[] = {
2518 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2519 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2520 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2521 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2522 {NULL, NULL}
2525 static PyObject*
2526 xmlparser_getattr(XMLParserObject* self, char* name)
2528 PyObject* res;
2530 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2531 if (res)
2532 return res;
2534 PyErr_Clear();
2536 if (strcmp(name, "entity") == 0)
2537 res = self->entity;
2538 else if (strcmp(name, "target") == 0)
2539 res = self->target;
2540 else if (strcmp(name, "version") == 0) {
2541 char buffer[100];
2542 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2543 XML_MINOR_VERSION, XML_MICRO_VERSION);
2544 return PyString_FromString(buffer);
2545 } else {
2546 PyErr_SetString(PyExc_AttributeError, name);
2547 return NULL;
2550 Py_INCREF(res);
2551 return res;
2554 statichere PyTypeObject XMLParser_Type = {
2555 PyObject_HEAD_INIT(NULL)
2556 0, "XMLParser", sizeof(XMLParserObject), 0,
2557 /* methods */
2558 (destructor)xmlparser_dealloc, /* tp_dealloc */
2559 0, /* tp_print */
2560 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2563 #endif
2565 /* ==================================================================== */
2566 /* python module interface */
2568 static PyMethodDef _functions[] = {
2569 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2570 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2571 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2572 #if defined(USE_EXPAT)
2573 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2574 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2575 #endif
2576 {NULL, NULL}
2579 DL_EXPORT(void)
2580 init_elementtree(void)
2582 PyObject* m;
2583 PyObject* g;
2584 char* bootstrap;
2585 #if defined(USE_PYEXPAT_CAPI)
2586 struct PyExpat_CAPI* capi;
2587 #endif
2589 /* Patch object type */
2590 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2591 #if defined(USE_EXPAT)
2592 XMLParser_Type.ob_type = &PyType_Type;
2593 #endif
2595 m = Py_InitModule("_elementtree", _functions);
2596 if (m == NULL)
2597 return;
2599 /* python glue code */
2601 g = PyDict_New();
2602 if (g == NULL)
2603 return;
2605 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2607 bootstrap = (
2609 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2610 "from __future__ import generators\n" /* enable yield under 2.2 */
2611 #endif
2613 "from copy import copy, deepcopy\n"
2615 "try:\n"
2616 " from xml.etree import ElementTree\n"
2617 "except ImportError:\n"
2618 " import ElementTree\n"
2619 "ET = ElementTree\n"
2620 "del ElementTree\n"
2622 "import _elementtree as cElementTree\n"
2624 "try:\n" /* check if copy works as is */
2625 " copy(cElementTree.Element('x'))\n"
2626 "except:\n"
2627 " def copyelement(elem):\n"
2628 " return elem\n"
2630 "def Comment(text=None):\n" /* public */
2631 " element = cElementTree.Element(ET.Comment)\n"
2632 " element.text = text\n"
2633 " return element\n"
2634 "cElementTree.Comment = Comment\n"
2636 "class ElementTree(ET.ElementTree):\n" /* public */
2637 " def parse(self, source, parser=None):\n"
2638 " if not hasattr(source, 'read'):\n"
2639 " source = open(source, 'rb')\n"
2640 " if parser is not None:\n"
2641 " while 1:\n"
2642 " data = source.read(65536)\n"
2643 " if not data:\n"
2644 " break\n"
2645 " parser.feed(data)\n"
2646 " self._root = parser.close()\n"
2647 " else:\n"
2648 " parser = cElementTree.XMLParser()\n"
2649 " self._root = parser._parse(source)\n"
2650 " return self._root\n"
2651 "cElementTree.ElementTree = ElementTree\n"
2653 "def getiterator(node, tag=None):\n" /* helper */
2654 " if tag == '*':\n"
2655 " tag = None\n"
2656 #if (PY_VERSION_HEX < 0x02020000)
2657 " nodes = []\n" /* 2.1 doesn't have yield */
2658 " if tag is None or node.tag == tag:\n"
2659 " nodes.append(node)\n"
2660 " for node in node:\n"
2661 " nodes.extend(getiterator(node, tag))\n"
2662 " return nodes\n"
2663 #else
2664 " if tag is None or node.tag == tag:\n"
2665 " yield node\n"
2666 " for node in node:\n"
2667 " for node in getiterator(node, tag):\n"
2668 " yield node\n"
2669 #endif
2671 "def parse(source, parser=None):\n" /* public */
2672 " tree = ElementTree()\n"
2673 " tree.parse(source, parser)\n"
2674 " return tree\n"
2675 "cElementTree.parse = parse\n"
2677 #if (PY_VERSION_HEX < 0x02020000)
2678 "if hasattr(ET, 'iterparse'):\n"
2679 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2680 #else
2681 "class iterparse(object):\n"
2682 " root = None\n"
2683 " def __init__(self, file, events=None):\n"
2684 " if not hasattr(file, 'read'):\n"
2685 " file = open(file, 'rb')\n"
2686 " self._file = file\n"
2687 " self._events = events\n"
2688 " def __iter__(self):\n"
2689 " events = []\n"
2690 " b = cElementTree.TreeBuilder()\n"
2691 " p = cElementTree.XMLParser(b)\n"
2692 " p._setevents(events, self._events)\n"
2693 " while 1:\n"
2694 " data = self._file.read(16384)\n"
2695 " if not data:\n"
2696 " break\n"
2697 " p.feed(data)\n"
2698 " for event in events:\n"
2699 " yield event\n"
2700 " del events[:]\n"
2701 " root = p.close()\n"
2702 " for event in events:\n"
2703 " yield event\n"
2704 " self.root = root\n"
2705 "cElementTree.iterparse = iterparse\n"
2706 #endif
2708 "def PI(target, text=None):\n" /* public */
2709 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2710 " element.text = target\n"
2711 " if text:\n"
2712 " element.text = element.text + ' ' + text\n"
2713 " return element\n"
2715 " elem = cElementTree.Element(ET.PI)\n"
2716 " elem.text = text\n"
2717 " return elem\n"
2718 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2720 "def XML(text):\n" /* public */
2721 " parser = cElementTree.XMLParser()\n"
2722 " parser.feed(text)\n"
2723 " return parser.close()\n"
2724 "cElementTree.XML = cElementTree.fromstring = XML\n"
2726 "def XMLID(text):\n" /* public */
2727 " tree = XML(text)\n"
2728 " ids = {}\n"
2729 " for elem in tree.getiterator():\n"
2730 " id = elem.get('id')\n"
2731 " if id:\n"
2732 " ids[id] = elem\n"
2733 " return tree, ids\n"
2734 "cElementTree.XMLID = XMLID\n"
2736 "cElementTree.dump = ET.dump\n"
2737 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2738 "cElementTree.iselement = ET.iselement\n"
2739 "cElementTree.QName = ET.QName\n"
2740 "cElementTree.tostring = ET.tostring\n"
2741 "cElementTree.VERSION = '" VERSION "'\n"
2742 "cElementTree.__version__ = '" VERSION "'\n"
2743 "cElementTree.XMLParserError = SyntaxError\n"
2747 PyRun_String(bootstrap, Py_file_input, g, NULL);
2749 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2751 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2752 if (elementtree_copyelement_obj) {
2753 /* reduce hack needed; enable reduce method */
2754 PyMethodDef* mp;
2755 for (mp = element_methods; mp->ml_name; mp++)
2756 if (mp->ml_meth == (PyCFunction) element_reduce) {
2757 mp->ml_name = "__reduce__";
2758 break;
2760 } else
2761 PyErr_Clear();
2762 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2763 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2765 #if defined(USE_PYEXPAT_CAPI)
2766 /* link against pyexpat, if possible */
2767 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2768 if (capi &&
2769 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2770 capi->size <= sizeof(*expat_capi) &&
2771 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2772 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2773 capi->MICRO_VERSION == XML_MICRO_VERSION)
2774 expat_capi = capi;
2775 else
2776 expat_capi = NULL;
2777 #endif