Reword paragraph to clarify
[pytest.git] / Modules / _elementtree.c
blobb468e71b3d4cd8884aa054ea1001db19a5ce481f
1 /*
2 * ElementTree
3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
42 * info@pythonware.com
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
49 #include "Python.h"
51 #define VERSION "1.0.6-snapshot"
53 /* -------------------------------------------------------------------- */
54 /* configuration */
56 /* Leave defined to include the expat-based XMLParser type */
57 #define USE_EXPAT
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
63 allocations. */
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
76 /* -------------------------------------------------------------------- */
78 #if 0
79 static int memory = 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84 #else
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
87 #endif
89 /* compiler tweaks */
90 #if defined(_MSC_VER)
91 #define LOCAL(type) static __inline type __fastcall
92 #else
93 #define LOCAL(type) static type
94 #endif
96 /* compatibility macros */
97 #if (PY_VERSION_HEX < 0x02050000)
98 typedef int Py_ssize_t;
99 #define lenfunc inquiry
100 #endif
102 #if (PY_VERSION_HEX < 0x02040000)
103 #define PyDict_CheckExact PyDict_Check
104 #if (PY_VERSION_HEX < 0x02020000)
105 #define PyList_CheckExact PyList_Check
106 #define PyString_CheckExact PyString_Check
107 #if (PY_VERSION_HEX >= 0x01060000)
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109 #endif
110 #endif
111 #endif
113 #if !defined(Py_RETURN_NONE)
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115 #endif
117 /* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
125 /* glue functions (see the init function for details) */
126 static PyObject* elementtree_copyelement_obj;
127 static PyObject* elementtree_deepcopy_obj;
128 static PyObject* elementtree_getiterator_obj;
129 static PyObject* elementpath_obj;
131 /* helpers */
133 LOCAL(PyObject*)
134 deepcopy(PyObject* object, PyObject* memo)
136 /* do a deep copy of the given object */
138 PyObject* args;
139 PyObject* result;
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
146 return NULL;
149 args = PyTuple_New(2);
150 if (!args)
151 return NULL;
153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158 Py_DECREF(args);
160 return result;
163 LOCAL(PyObject*)
164 list_join(PyObject* list)
166 /* join list elements (destroying the list in the process) */
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
197 args = PyTuple_New(1);
198 if (!args)
199 return NULL;
201 PyTuple_SET_ITEM(args, 0, list);
203 result = PyObject_CallObject(function, args);
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
209 return result;
212 #if (PY_VERSION_HEX < 0x02020000)
213 LOCAL(int)
214 PyDict_Update(PyObject* dict, PyObject* other)
216 /* PyDict_Update emulation for 2.1 and earlier */
218 PyObject* res;
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
224 Py_DECREF(res);
225 return 0;
227 #endif
229 /* -------------------------------------------------------------------- */
230 /* the element type */
232 typedef struct {
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
244 PyObject* _children[STATIC_CHILDREN];
246 } ElementObjectExtra;
248 typedef struct {
249 PyObject_HEAD
251 /* element tag (a string). */
252 PyObject* tag;
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
266 ElementObjectExtra* extra;
268 } ElementObject;
270 staticforward PyTypeObject Element_Type;
272 #define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
274 /* -------------------------------------------------------------------- */
275 /* element constructor and destructor */
277 LOCAL(int)
278 element_new_extra(ElementObject* self, PyObject* attrib)
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
284 if (!attrib)
285 attrib = Py_None;
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
294 return 0;
297 LOCAL(void)
298 element_dealloc_extra(ElementObject* self)
300 int i;
302 Py_DECREF(self->extra->attrib);
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
310 PyObject_Free(self->extra);
313 LOCAL(PyObject*)
314 element_new(PyObject* tag, PyObject* attrib)
316 ElementObject* self;
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
326 self->extra = NULL;
328 if (attrib != Py_None) {
330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
332 return NULL;
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
341 Py_INCREF(tag);
342 self->tag = tag;
344 Py_INCREF(Py_None);
345 self->text = Py_None;
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
350 ALLOC(sizeof(ElementObject), "create element");
352 return (PyObject*) self;
355 LOCAL(int)
356 element_resize(ElementObject* self, int extra)
358 int size;
359 PyObject* *children;
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
364 if (!self->extra)
365 element_new_extra(self, NULL);
367 size = self->extra->length + extra;
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
372 if (self->extra->children != self->extra->_children) {
373 children = PyObject_Realloc(self->extra->children,
374 size * sizeof(PyObject*));
375 if (!children)
376 goto nomemory;
377 } else {
378 children = PyObject_Malloc(size * sizeof(PyObject*));
379 if (!children)
380 goto nomemory;
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children, self->extra->children,
383 self->extra->length * sizeof(PyObject*));
385 self->extra->children = children;
386 self->extra->allocated = size;
389 return 0;
391 nomemory:
392 PyErr_NoMemory();
393 return -1;
396 LOCAL(int)
397 element_add_subelement(ElementObject* self, PyObject* element)
399 /* add a child element to a parent */
401 if (element_resize(self, 1) < 0)
402 return -1;
404 Py_INCREF(element);
405 self->extra->children[self->extra->length] = element;
407 self->extra->length++;
409 return 0;
412 LOCAL(PyObject*)
413 element_get_attrib(ElementObject* self)
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
418 PyObject* res = self->extra->attrib;
420 if (res == Py_None) {
421 /* create missing dictionary */
422 res = PyDict_New();
423 if (!res)
424 return NULL;
425 self->extra->attrib = res;
428 return res;
431 LOCAL(PyObject*)
432 element_get_text(ElementObject* self)
434 /* return borrowed reference to text attribute */
436 PyObject* res = self->text;
438 if (JOIN_GET(res)) {
439 res = JOIN_OBJ(res);
440 if (PyList_CheckExact(res)) {
441 res = list_join(res);
442 if (!res)
443 return NULL;
444 self->text = res;
448 return res;
451 LOCAL(PyObject*)
452 element_get_tail(ElementObject* self)
454 /* return borrowed reference to text attribute */
456 PyObject* res = self->tail;
458 if (JOIN_GET(res)) {
459 res = JOIN_OBJ(res);
460 if (PyList_CheckExact(res)) {
461 res = list_join(res);
462 if (!res)
463 return NULL;
464 self->tail = res;
468 return res;
471 static PyObject*
472 element(PyObject* self, PyObject* args, PyObject* kw)
474 PyObject* elem;
476 PyObject* tag;
477 PyObject* attrib = NULL;
478 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
479 &PyDict_Type, &attrib))
480 return NULL;
482 if (attrib || kw) {
483 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
484 if (!attrib)
485 return NULL;
486 if (kw)
487 PyDict_Update(attrib, kw);
488 } else {
489 Py_INCREF(Py_None);
490 attrib = Py_None;
493 elem = element_new(tag, attrib);
495 Py_DECREF(attrib);
497 return elem;
500 static PyObject*
501 subelement(PyObject* self, PyObject* args, PyObject* kw)
503 PyObject* elem;
505 ElementObject* parent;
506 PyObject* tag;
507 PyObject* attrib = NULL;
508 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
509 &Element_Type, &parent, &tag,
510 &PyDict_Type, &attrib))
511 return NULL;
513 if (attrib || kw) {
514 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
515 if (!attrib)
516 return NULL;
517 if (kw)
518 PyDict_Update(attrib, kw);
519 } else {
520 Py_INCREF(Py_None);
521 attrib = Py_None;
524 elem = element_new(tag, attrib);
526 Py_DECREF(attrib);
528 if (element_add_subelement(parent, elem) < 0) {
529 Py_DECREF(elem);
530 return NULL;
533 return elem;
536 static void
537 element_dealloc(ElementObject* self)
539 if (self->extra)
540 element_dealloc_extra(self);
542 /* discard attributes */
543 Py_DECREF(self->tag);
544 Py_DECREF(JOIN_OBJ(self->text));
545 Py_DECREF(JOIN_OBJ(self->tail));
547 RELEASE(sizeof(ElementObject), "destroy element");
549 PyObject_Del(self);
552 /* -------------------------------------------------------------------- */
553 /* methods (in alphabetical order) */
555 static PyObject*
556 element_append(ElementObject* self, PyObject* args)
558 PyObject* element;
559 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
560 return NULL;
562 if (element_add_subelement(self, element) < 0)
563 return NULL;
565 Py_RETURN_NONE;
568 static PyObject*
569 element_clear(ElementObject* self, PyObject* args)
571 if (!PyArg_ParseTuple(args, ":clear"))
572 return NULL;
574 if (self->extra) {
575 element_dealloc_extra(self);
576 self->extra = NULL;
579 Py_INCREF(Py_None);
580 Py_DECREF(JOIN_OBJ(self->text));
581 self->text = Py_None;
583 Py_INCREF(Py_None);
584 Py_DECREF(JOIN_OBJ(self->tail));
585 self->tail = Py_None;
587 Py_RETURN_NONE;
590 static PyObject*
591 element_copy(ElementObject* self, PyObject* args)
593 int i;
594 ElementObject* element;
596 if (!PyArg_ParseTuple(args, ":__copy__"))
597 return NULL;
599 element = (ElementObject*) element_new(
600 self->tag, (self->extra) ? self->extra->attrib : Py_None
602 if (!element)
603 return NULL;
605 Py_DECREF(JOIN_OBJ(element->text));
606 element->text = self->text;
607 Py_INCREF(JOIN_OBJ(element->text));
609 Py_DECREF(JOIN_OBJ(element->tail));
610 element->tail = self->tail;
611 Py_INCREF(JOIN_OBJ(element->tail));
613 if (self->extra) {
615 if (element_resize(element, self->extra->length) < 0) {
616 Py_DECREF(element);
617 return NULL;
620 for (i = 0; i < self->extra->length; i++) {
621 Py_INCREF(self->extra->children[i]);
622 element->extra->children[i] = self->extra->children[i];
625 element->extra->length = self->extra->length;
629 return (PyObject*) element;
632 static PyObject*
633 element_deepcopy(ElementObject* self, PyObject* args)
635 int i;
636 ElementObject* element;
637 PyObject* tag;
638 PyObject* attrib;
639 PyObject* text;
640 PyObject* tail;
641 PyObject* id;
643 PyObject* memo;
644 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
645 return NULL;
647 tag = deepcopy(self->tag, memo);
648 if (!tag)
649 return NULL;
651 if (self->extra) {
652 attrib = deepcopy(self->extra->attrib, memo);
653 if (!attrib) {
654 Py_DECREF(tag);
655 return NULL;
657 } else {
658 Py_INCREF(Py_None);
659 attrib = Py_None;
662 element = (ElementObject*) element_new(tag, attrib);
664 Py_DECREF(tag);
665 Py_DECREF(attrib);
667 if (!element)
668 return NULL;
670 text = deepcopy(JOIN_OBJ(self->text), memo);
671 if (!text)
672 goto error;
673 Py_DECREF(element->text);
674 element->text = JOIN_SET(text, JOIN_GET(self->text));
676 tail = deepcopy(JOIN_OBJ(self->tail), memo);
677 if (!tail)
678 goto error;
679 Py_DECREF(element->tail);
680 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
682 if (self->extra) {
684 if (element_resize(element, self->extra->length) < 0)
685 goto error;
687 for (i = 0; i < self->extra->length; i++) {
688 PyObject* child = deepcopy(self->extra->children[i], memo);
689 if (!child) {
690 element->extra->length = i;
691 goto error;
693 element->extra->children[i] = child;
696 element->extra->length = self->extra->length;
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id = PyInt_FromLong((Py_uintptr_t) self);
703 i = PyDict_SetItem(memo, id, (PyObject*) element);
705 Py_DECREF(id);
707 if (i < 0)
708 goto error;
710 return (PyObject*) element;
712 error:
713 Py_DECREF(element);
714 return NULL;
717 LOCAL(int)
718 checkpath(PyObject* tag)
720 Py_ssize_t i;
721 int check = 1;
723 /* check if a tag contains an xpath character */
725 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
727 #if defined(Py_USING_UNICODE)
728 if (PyUnicode_Check(tag)) {
729 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
730 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
738 return 0;
740 #endif
741 if (PyString_Check(tag)) {
742 char *p = PyString_AS_STRING(tag);
743 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
744 if (p[i] == '{')
745 check = 0;
746 else if (p[i] == '}')
747 check = 1;
748 else if (check && PATHCHAR(p[i]))
749 return 1;
751 return 0;
754 return 1; /* unknown type; might be path expression */
757 static PyObject*
758 element_find(ElementObject* self, PyObject* args)
760 int i;
762 PyObject* tag;
763 if (!PyArg_ParseTuple(args, "O:find", &tag))
764 return NULL;
766 if (checkpath(tag))
767 return PyObject_CallMethod(
768 elementpath_obj, "find", "OO", self, tag
771 if (!self->extra)
772 Py_RETURN_NONE;
774 for (i = 0; i < self->extra->length; i++) {
775 PyObject* item = self->extra->children[i];
776 if (Element_CheckExact(item) &&
777 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
778 Py_INCREF(item);
779 return item;
783 Py_RETURN_NONE;
786 static PyObject*
787 element_findtext(ElementObject* self, PyObject* args)
789 int i;
791 PyObject* tag;
792 PyObject* default_value = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
794 return NULL;
796 if (checkpath(tag))
797 return PyObject_CallMethod(
798 elementpath_obj, "findtext", "OOO", self, tag, default_value
801 if (!self->extra) {
802 Py_INCREF(default_value);
803 return default_value;
806 for (i = 0; i < self->extra->length; i++) {
807 ElementObject* item = (ElementObject*) self->extra->children[i];
808 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
809 PyObject* text = element_get_text(item);
810 if (text == Py_None)
811 return PyString_FromString("");
812 Py_INCREF(text);
813 return text;
817 Py_INCREF(default_value);
818 return default_value;
821 static PyObject*
822 element_findall(ElementObject* self, PyObject* args)
824 int i;
825 PyObject* out;
827 PyObject* tag;
828 if (!PyArg_ParseTuple(args, "O:findall", &tag))
829 return NULL;
831 if (checkpath(tag))
832 return PyObject_CallMethod(
833 elementpath_obj, "findall", "OO", self, tag
836 out = PyList_New(0);
837 if (!out)
838 return NULL;
840 if (!self->extra)
841 return out;
843 for (i = 0; i < self->extra->length; i++) {
844 PyObject* item = self->extra->children[i];
845 if (Element_CheckExact(item) &&
846 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
847 if (PyList_Append(out, item) < 0) {
848 Py_DECREF(out);
849 return NULL;
854 return out;
857 static PyObject*
858 element_get(ElementObject* self, PyObject* args)
860 PyObject* value;
862 PyObject* key;
863 PyObject* default_value = Py_None;
864 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
865 return NULL;
867 if (!self->extra || self->extra->attrib == Py_None)
868 value = default_value;
869 else {
870 value = PyDict_GetItem(self->extra->attrib, key);
871 if (!value)
872 value = default_value;
875 Py_INCREF(value);
876 return value;
879 static PyObject*
880 element_getchildren(ElementObject* self, PyObject* args)
882 int i;
883 PyObject* list;
885 if (!PyArg_ParseTuple(args, ":getchildren"))
886 return NULL;
888 if (!self->extra)
889 return PyList_New(0);
891 list = PyList_New(self->extra->length);
892 if (!list)
893 return NULL;
895 for (i = 0; i < self->extra->length; i++) {
896 PyObject* item = self->extra->children[i];
897 Py_INCREF(item);
898 PyList_SET_ITEM(list, i, item);
901 return list;
904 static PyObject*
905 element_getiterator(ElementObject* self, PyObject* args)
907 PyObject* result;
909 PyObject* tag = Py_None;
910 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
911 return NULL;
913 if (!elementtree_getiterator_obj) {
914 PyErr_SetString(
915 PyExc_RuntimeError,
916 "getiterator helper not found"
918 return NULL;
921 args = PyTuple_New(2);
922 if (!args)
923 return NULL;
925 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
926 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
928 result = PyObject_CallObject(elementtree_getiterator_obj, args);
930 Py_DECREF(args);
932 return result;
935 static PyObject*
936 element_getitem(PyObject* self_, Py_ssize_t index)
938 ElementObject* self = (ElementObject*) self_;
940 if (!self->extra || index < 0 || index >= self->extra->length) {
941 PyErr_SetString(
942 PyExc_IndexError,
943 "child index out of range"
945 return NULL;
948 Py_INCREF(self->extra->children[index]);
949 return self->extra->children[index];
952 static PyObject*
953 element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
955 ElementObject* self = (ElementObject*) self_;
956 Py_ssize_t i;
957 PyObject* list;
959 if (!self->extra)
960 return PyList_New(0);
962 /* standard clamping */
963 if (start < 0)
964 start = 0;
965 if (end < 0)
966 end = 0;
967 if (end > self->extra->length)
968 end = self->extra->length;
969 if (start > end)
970 start = end;
972 list = PyList_New(end - start);
973 if (!list)
974 return NULL;
976 for (i = start; i < end; i++) {
977 PyObject* item = self->extra->children[i];
978 Py_INCREF(item);
979 PyList_SET_ITEM(list, i - start, item);
982 return list;
985 static PyObject*
986 element_insert(ElementObject* self, PyObject* args)
988 int i;
990 int index;
991 PyObject* element;
992 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
993 &Element_Type, &element))
994 return NULL;
996 if (!self->extra)
997 element_new_extra(self, NULL);
999 if (index < 0)
1000 index = 0;
1001 if (index > self->extra->length)
1002 index = self->extra->length;
1004 if (element_resize(self, 1) < 0)
1005 return NULL;
1007 for (i = self->extra->length; i > index; i--)
1008 self->extra->children[i] = self->extra->children[i-1];
1010 Py_INCREF(element);
1011 self->extra->children[index] = element;
1013 self->extra->length++;
1015 Py_RETURN_NONE;
1018 static PyObject*
1019 element_items(ElementObject* self, PyObject* args)
1021 if (!PyArg_ParseTuple(args, ":items"))
1022 return NULL;
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1027 return PyDict_Items(self->extra->attrib);
1030 static PyObject*
1031 element_keys(ElementObject* self, PyObject* args)
1033 if (!PyArg_ParseTuple(args, ":keys"))
1034 return NULL;
1036 if (!self->extra || self->extra->attrib == Py_None)
1037 return PyList_New(0);
1039 return PyDict_Keys(self->extra->attrib);
1042 static Py_ssize_t
1043 element_length(ElementObject* self)
1045 if (!self->extra)
1046 return 0;
1048 return self->extra->length;
1051 static PyObject*
1052 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1054 PyObject* elem;
1056 PyObject* tag;
1057 PyObject* attrib;
1058 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1059 return NULL;
1061 attrib = PyDict_Copy(attrib);
1062 if (!attrib)
1063 return NULL;
1065 elem = element_new(tag, attrib);
1067 Py_DECREF(attrib);
1069 return elem;
1072 static PyObject*
1073 element_reduce(ElementObject* self, PyObject* args)
1075 if (!PyArg_ParseTuple(args, ":__reduce__"))
1076 return NULL;
1078 /* Hack alert: This method is used to work around a __copy__
1079 problem on certain 2.3 and 2.4 versions. To save time and
1080 simplify the code, we create the copy in here, and use a dummy
1081 copyelement helper to trick the copy module into doing the
1082 right thing. */
1084 if (!elementtree_copyelement_obj) {
1085 PyErr_SetString(
1086 PyExc_RuntimeError,
1087 "copyelement helper not found"
1089 return NULL;
1092 return Py_BuildValue(
1093 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1097 static PyObject*
1098 element_remove(ElementObject* self, PyObject* args)
1100 int i;
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1104 return NULL;
1106 if (!self->extra) {
1107 /* element has no children, so raise exception */
1108 PyErr_SetString(
1109 PyExc_ValueError,
1110 "list.remove(x): x not in list"
1112 return NULL;
1115 for (i = 0; i < self->extra->length; i++) {
1116 if (self->extra->children[i] == element)
1117 break;
1118 if (PyObject_Compare(self->extra->children[i], element) == 0)
1119 break;
1122 if (i == self->extra->length) {
1123 /* element is not in children, so raise exception */
1124 PyErr_SetString(
1125 PyExc_ValueError,
1126 "list.remove(x): x not in list"
1128 return NULL;
1131 Py_DECREF(self->extra->children[i]);
1133 self->extra->length--;
1135 for (; i < self->extra->length; i++)
1136 self->extra->children[i] = self->extra->children[i+1];
1138 Py_RETURN_NONE;
1141 static PyObject*
1142 element_repr(ElementObject* self)
1144 PyObject* repr;
1145 char buffer[100];
1147 repr = PyString_FromString("<Element ");
1149 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1151 sprintf(buffer, " at %p>", self);
1152 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1154 return repr;
1157 static PyObject*
1158 element_set(ElementObject* self, PyObject* args)
1160 PyObject* attrib;
1162 PyObject* key;
1163 PyObject* value;
1164 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1165 return NULL;
1167 if (!self->extra)
1168 element_new_extra(self, NULL);
1170 attrib = element_get_attrib(self);
1171 if (!attrib)
1172 return NULL;
1174 if (PyDict_SetItem(attrib, key, value) < 0)
1175 return NULL;
1177 Py_RETURN_NONE;
1180 static int
1181 element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
1183 ElementObject* self = (ElementObject*) self_;
1184 Py_ssize_t i, new, old;
1185 PyObject* recycle = NULL;
1187 if (!self->extra)
1188 element_new_extra(self, NULL);
1190 /* standard clamping */
1191 if (start < 0)
1192 start = 0;
1193 if (end < 0)
1194 end = 0;
1195 if (end > self->extra->length)
1196 end = self->extra->length;
1197 if (start > end)
1198 start = end;
1200 old = end - start;
1202 if (item == NULL)
1203 new = 0;
1204 else if (PyList_CheckExact(item)) {
1205 new = PyList_GET_SIZE(item);
1206 } else {
1207 /* FIXME: support arbitrary sequences? */
1208 PyErr_Format(
1209 PyExc_TypeError,
1210 "expected list, not \"%.200s\"", item->ob_type->tp_name
1212 return -1;
1215 if (old > 0) {
1216 /* to avoid recursive calls to this method (via decref), move
1217 old items to the recycle bin here, and get rid of them when
1218 we're done modifying the element */
1219 recycle = PyList_New(old);
1220 for (i = 0; i < old; i++)
1221 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1224 if (new < old) {
1225 /* delete slice */
1226 for (i = end; i < self->extra->length; i++)
1227 self->extra->children[i + new - old] = self->extra->children[i];
1228 } else if (new > old) {
1229 /* insert slice */
1230 if (element_resize(self, new - old) < 0)
1231 return -1;
1232 for (i = self->extra->length-1; i >= end; i--)
1233 self->extra->children[i + new - old] = self->extra->children[i];
1236 /* replace the slice */
1237 for (i = 0; i < new; i++) {
1238 PyObject* element = PyList_GET_ITEM(item, i);
1239 Py_INCREF(element);
1240 self->extra->children[i + start] = element;
1243 self->extra->length += new - old;
1245 /* discard the recycle bin, and everything in it */
1246 Py_XDECREF(recycle);
1248 return 0;
1251 static int
1252 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1254 ElementObject* self = (ElementObject*) self_;
1255 int i;
1256 PyObject* old;
1258 if (!self->extra || index < 0 || index >= self->extra->length) {
1259 PyErr_SetString(
1260 PyExc_IndexError,
1261 "child assignment index out of range");
1262 return -1;
1265 old = self->extra->children[index];
1267 if (item) {
1268 Py_INCREF(item);
1269 self->extra->children[index] = item;
1270 } else {
1271 self->extra->length--;
1272 for (i = index; i < self->extra->length; i++)
1273 self->extra->children[i] = self->extra->children[i+1];
1276 Py_DECREF(old);
1278 return 0;
1281 static PyMethodDef element_methods[] = {
1283 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1285 {"get", (PyCFunction) element_get, METH_VARARGS},
1286 {"set", (PyCFunction) element_set, METH_VARARGS},
1288 {"find", (PyCFunction) element_find, METH_VARARGS},
1289 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1290 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1292 {"append", (PyCFunction) element_append, METH_VARARGS},
1293 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1294 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1296 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1297 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1299 {"items", (PyCFunction) element_items, METH_VARARGS},
1300 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1302 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1304 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1305 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1307 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1308 C objects correctly, so we have to fake it using a __reduce__-
1309 based hack (see the element_reduce implementation above for
1310 details). */
1312 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1313 using a runtime test to figure out if we need to fake things
1314 or now (see the init code below). The following entry is
1315 enabled only if the hack is needed. */
1317 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1319 {NULL, NULL}
1322 static PyObject*
1323 element_getattr(ElementObject* self, char* name)
1325 PyObject* res;
1327 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1328 if (res)
1329 return res;
1331 PyErr_Clear();
1333 if (strcmp(name, "tag") == 0)
1334 res = self->tag;
1335 else if (strcmp(name, "text") == 0)
1336 res = element_get_text(self);
1337 else if (strcmp(name, "tail") == 0) {
1338 res = element_get_tail(self);
1339 } else if (strcmp(name, "attrib") == 0) {
1340 if (!self->extra)
1341 element_new_extra(self, NULL);
1342 res = element_get_attrib(self);
1343 } else {
1344 PyErr_SetString(PyExc_AttributeError, name);
1345 return NULL;
1348 if (!res)
1349 return NULL;
1351 Py_INCREF(res);
1352 return res;
1355 static int
1356 element_setattr(ElementObject* self, const char* name, PyObject* value)
1358 if (value == NULL) {
1359 PyErr_SetString(
1360 PyExc_AttributeError,
1361 "can't delete element attributes"
1363 return -1;
1366 if (strcmp(name, "tag") == 0) {
1367 Py_DECREF(self->tag);
1368 self->tag = value;
1369 Py_INCREF(self->tag);
1370 } else if (strcmp(name, "text") == 0) {
1371 Py_DECREF(JOIN_OBJ(self->text));
1372 self->text = value;
1373 Py_INCREF(self->text);
1374 } else if (strcmp(name, "tail") == 0) {
1375 Py_DECREF(JOIN_OBJ(self->tail));
1376 self->tail = value;
1377 Py_INCREF(self->tail);
1378 } else if (strcmp(name, "attrib") == 0) {
1379 if (!self->extra)
1380 element_new_extra(self, NULL);
1381 Py_DECREF(self->extra->attrib);
1382 self->extra->attrib = value;
1383 Py_INCREF(self->extra->attrib);
1384 } else {
1385 PyErr_SetString(PyExc_AttributeError, name);
1386 return -1;
1389 return 0;
1392 static PySequenceMethods element_as_sequence = {
1393 (lenfunc) element_length,
1394 0, /* sq_concat */
1395 0, /* sq_repeat */
1396 element_getitem,
1397 element_getslice,
1398 element_setitem,
1399 element_setslice,
1402 statichere PyTypeObject Element_Type = {
1403 PyObject_HEAD_INIT(NULL)
1404 0, "Element", sizeof(ElementObject), 0,
1405 /* methods */
1406 (destructor)element_dealloc, /* tp_dealloc */
1407 0, /* tp_print */
1408 (getattrfunc)element_getattr, /* tp_getattr */
1409 (setattrfunc)element_setattr, /* tp_setattr */
1410 0, /* tp_compare */
1411 (reprfunc)element_repr, /* tp_repr */
1412 0, /* tp_as_number */
1413 &element_as_sequence, /* tp_as_sequence */
1416 /* ==================================================================== */
1417 /* the tree builder type */
1419 typedef struct {
1420 PyObject_HEAD
1422 PyObject* root; /* root node (first created node) */
1424 ElementObject* this; /* current node */
1425 ElementObject* last; /* most recently created node */
1427 PyObject* data; /* data collector (string or list), or NULL */
1429 PyObject* stack; /* element stack */
1430 Py_ssize_t index; /* current stack size (0=empty) */
1432 /* element tracing */
1433 PyObject* events; /* list of events, or NULL if not collecting */
1434 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1435 PyObject* end_event_obj;
1436 PyObject* start_ns_event_obj;
1437 PyObject* end_ns_event_obj;
1439 } TreeBuilderObject;
1441 staticforward PyTypeObject TreeBuilder_Type;
1443 #define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1445 /* -------------------------------------------------------------------- */
1446 /* constructor and destructor */
1448 LOCAL(PyObject*)
1449 treebuilder_new(void)
1451 TreeBuilderObject* self;
1453 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1454 if (self == NULL)
1455 return NULL;
1457 self->root = NULL;
1459 Py_INCREF(Py_None);
1460 self->this = (ElementObject*) Py_None;
1462 Py_INCREF(Py_None);
1463 self->last = (ElementObject*) Py_None;
1465 self->data = NULL;
1467 self->stack = PyList_New(20);
1468 self->index = 0;
1470 self->events = NULL;
1471 self->start_event_obj = self->end_event_obj = NULL;
1472 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1474 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1476 return (PyObject*) self;
1479 static PyObject*
1480 treebuilder(PyObject* self_, PyObject* args)
1482 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1483 return NULL;
1485 return treebuilder_new();
1488 static void
1489 treebuilder_dealloc(TreeBuilderObject* self)
1491 Py_XDECREF(self->end_ns_event_obj);
1492 Py_XDECREF(self->start_ns_event_obj);
1493 Py_XDECREF(self->end_event_obj);
1494 Py_XDECREF(self->start_event_obj);
1495 Py_XDECREF(self->events);
1496 Py_DECREF(self->stack);
1497 Py_XDECREF(self->data);
1498 Py_DECREF(self->last);
1499 Py_DECREF(self->this);
1500 Py_XDECREF(self->root);
1502 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1504 PyObject_Del(self);
1507 /* -------------------------------------------------------------------- */
1508 /* handlers */
1510 LOCAL(PyObject*)
1511 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1512 PyObject* standalone)
1514 Py_RETURN_NONE;
1517 LOCAL(PyObject*)
1518 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1519 PyObject* attrib)
1521 PyObject* node;
1522 PyObject* this;
1524 if (self->data) {
1525 if (self->this == self->last) {
1526 Py_DECREF(JOIN_OBJ(self->last->text));
1527 self->last->text = JOIN_SET(
1528 self->data, PyList_CheckExact(self->data)
1530 } else {
1531 Py_DECREF(JOIN_OBJ(self->last->tail));
1532 self->last->tail = JOIN_SET(
1533 self->data, PyList_CheckExact(self->data)
1536 self->data = NULL;
1539 node = element_new(tag, attrib);
1540 if (!node)
1541 return NULL;
1543 this = (PyObject*) self->this;
1545 if (this != Py_None) {
1546 if (element_add_subelement((ElementObject*) this, node) < 0)
1547 goto error;
1548 } else {
1549 if (self->root) {
1550 PyErr_SetString(
1551 PyExc_SyntaxError,
1552 "multiple elements on top level"
1554 goto error;
1556 Py_INCREF(node);
1557 self->root = node;
1560 if (self->index < PyList_GET_SIZE(self->stack)) {
1561 if (PyList_SetItem(self->stack, self->index, this) < 0)
1562 goto error;
1563 Py_INCREF(this);
1564 } else {
1565 if (PyList_Append(self->stack, this) < 0)
1566 goto error;
1568 self->index++;
1570 Py_DECREF(this);
1571 Py_INCREF(node);
1572 self->this = (ElementObject*) node;
1574 Py_DECREF(self->last);
1575 Py_INCREF(node);
1576 self->last = (ElementObject*) node;
1578 if (self->start_event_obj) {
1579 PyObject* res;
1580 PyObject* action = self->start_event_obj;
1581 res = PyTuple_New(2);
1582 if (res) {
1583 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1584 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1585 PyList_Append(self->events, res);
1586 Py_DECREF(res);
1587 } else
1588 PyErr_Clear(); /* FIXME: propagate error */
1591 return node;
1593 error:
1594 Py_DECREF(node);
1595 return NULL;
1598 LOCAL(PyObject*)
1599 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1601 if (!self->data) {
1602 /* store the first item as is */
1603 Py_INCREF(data); self->data = data;
1604 } else {
1605 /* more than one item; use a list to collect items */
1606 if (PyString_CheckExact(self->data) && self->data->ob_refcnt == 1 &&
1607 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1608 /* expat often generates single character data sections; handle
1609 the most common case by resizing the existing string... */
1610 Py_ssize_t size = PyString_GET_SIZE(self->data);
1611 if (_PyString_Resize(&self->data, size + 1) < 0)
1612 return NULL;
1613 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1614 } else if (PyList_CheckExact(self->data)) {
1615 if (PyList_Append(self->data, data) < 0)
1616 return NULL;
1617 } else {
1618 PyObject* list = PyList_New(2);
1619 if (!list)
1620 return NULL;
1621 PyList_SET_ITEM(list, 0, self->data);
1622 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1623 self->data = list;
1627 Py_RETURN_NONE;
1630 LOCAL(PyObject*)
1631 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1633 PyObject* item;
1635 if (self->data) {
1636 if (self->this == self->last) {
1637 Py_DECREF(JOIN_OBJ(self->last->text));
1638 self->last->text = JOIN_SET(
1639 self->data, PyList_CheckExact(self->data)
1641 } else {
1642 Py_DECREF(JOIN_OBJ(self->last->tail));
1643 self->last->tail = JOIN_SET(
1644 self->data, PyList_CheckExact(self->data)
1647 self->data = NULL;
1650 if (self->index == 0) {
1651 PyErr_SetString(
1652 PyExc_IndexError,
1653 "pop from empty stack"
1655 return NULL;
1658 self->index--;
1660 item = PyList_GET_ITEM(self->stack, self->index);
1661 Py_INCREF(item);
1663 Py_DECREF(self->last);
1665 self->last = (ElementObject*) self->this;
1666 self->this = (ElementObject*) item;
1668 if (self->end_event_obj) {
1669 PyObject* res;
1670 PyObject* action = self->end_event_obj;
1671 PyObject* node = (PyObject*) self->last;
1672 res = PyTuple_New(2);
1673 if (res) {
1674 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1675 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1676 PyList_Append(self->events, res);
1677 Py_DECREF(res);
1678 } else
1679 PyErr_Clear(); /* FIXME: propagate error */
1682 Py_INCREF(self->last);
1683 return (PyObject*) self->last;
1686 LOCAL(void)
1687 treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1688 const char* prefix, const char *uri)
1690 PyObject* res;
1691 PyObject* action;
1692 PyObject* parcel;
1694 if (!self->events)
1695 return;
1697 if (start) {
1698 if (!self->start_ns_event_obj)
1699 return;
1700 action = self->start_ns_event_obj;
1701 /* FIXME: prefix and uri use utf-8 encoding! */
1702 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1703 if (!parcel)
1704 return;
1705 Py_INCREF(action);
1706 } else {
1707 if (!self->end_ns_event_obj)
1708 return;
1709 action = self->end_ns_event_obj;
1710 Py_INCREF(action);
1711 parcel = Py_None;
1712 Py_INCREF(parcel);
1715 res = PyTuple_New(2);
1717 if (res) {
1718 PyTuple_SET_ITEM(res, 0, action);
1719 PyTuple_SET_ITEM(res, 1, parcel);
1720 PyList_Append(self->events, res);
1721 Py_DECREF(res);
1722 } else
1723 PyErr_Clear(); /* FIXME: propagate error */
1726 /* -------------------------------------------------------------------- */
1727 /* methods (in alphabetical order) */
1729 static PyObject*
1730 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1732 PyObject* data;
1733 if (!PyArg_ParseTuple(args, "O:data", &data))
1734 return NULL;
1736 return treebuilder_handle_data(self, data);
1739 static PyObject*
1740 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1742 PyObject* tag;
1743 if (!PyArg_ParseTuple(args, "O:end", &tag))
1744 return NULL;
1746 return treebuilder_handle_end(self, tag);
1749 LOCAL(PyObject*)
1750 treebuilder_done(TreeBuilderObject* self)
1752 PyObject* res;
1754 /* FIXME: check stack size? */
1756 if (self->root)
1757 res = self->root;
1758 else
1759 res = Py_None;
1761 Py_INCREF(res);
1762 return res;
1765 static PyObject*
1766 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1768 if (!PyArg_ParseTuple(args, ":close"))
1769 return NULL;
1771 return treebuilder_done(self);
1774 static PyObject*
1775 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1777 PyObject* tag;
1778 PyObject* attrib = Py_None;
1779 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1780 return NULL;
1782 return treebuilder_handle_start(self, tag, attrib);
1785 static PyObject*
1786 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1788 PyObject* encoding;
1789 PyObject* standalone;
1790 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1791 return NULL;
1793 return treebuilder_handle_xml(self, encoding, standalone);
1796 static PyMethodDef treebuilder_methods[] = {
1797 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1798 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1799 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1800 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1801 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1802 {NULL, NULL}
1805 static PyObject*
1806 treebuilder_getattr(TreeBuilderObject* self, char* name)
1808 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1811 statichere PyTypeObject TreeBuilder_Type = {
1812 PyObject_HEAD_INIT(NULL)
1813 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1814 /* methods */
1815 (destructor)treebuilder_dealloc, /* tp_dealloc */
1816 0, /* tp_print */
1817 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1820 /* ==================================================================== */
1821 /* the expat interface */
1823 #if defined(USE_EXPAT)
1825 #include "expat.h"
1827 #if defined(USE_PYEXPAT_CAPI)
1828 #include "pyexpat.h"
1829 static struct PyExpat_CAPI* expat_capi;
1830 #define EXPAT(func) (expat_capi->func)
1831 #else
1832 #define EXPAT(func) (XML_##func)
1833 #endif
1835 typedef struct {
1836 PyObject_HEAD
1838 XML_Parser parser;
1840 PyObject* target;
1841 PyObject* entity;
1843 PyObject* names;
1845 PyObject* handle_xml;
1846 PyObject* handle_start;
1847 PyObject* handle_data;
1848 PyObject* handle_end;
1850 PyObject* handle_comment;
1851 PyObject* handle_pi;
1853 } XMLParserObject;
1855 staticforward PyTypeObject XMLParser_Type;
1857 /* helpers */
1859 #if defined(Py_USING_UNICODE)
1860 LOCAL(int)
1861 checkstring(const char* string, int size)
1863 int i;
1865 /* check if an 8-bit string contains UTF-8 characters */
1866 for (i = 0; i < size; i++)
1867 if (string[i] & 0x80)
1868 return 1;
1870 return 0;
1872 #endif
1874 LOCAL(PyObject*)
1875 makestring(const char* string, int size)
1877 /* convert a UTF-8 string to either a 7-bit ascii string or a
1878 Unicode string */
1880 #if defined(Py_USING_UNICODE)
1881 if (checkstring(string, size))
1882 return PyUnicode_DecodeUTF8(string, size, "strict");
1883 #endif
1885 return PyString_FromStringAndSize(string, size);
1888 LOCAL(PyObject*)
1889 makeuniversal(XMLParserObject* self, const char* string)
1891 /* convert a UTF-8 tag/attribute name from the expat parser
1892 to a universal name string */
1894 int size = strlen(string);
1895 PyObject* key;
1896 PyObject* value;
1898 /* look the 'raw' name up in the names dictionary */
1899 key = PyString_FromStringAndSize(string, size);
1900 if (!key)
1901 return NULL;
1903 value = PyDict_GetItem(self->names, key);
1905 if (value) {
1906 Py_INCREF(value);
1907 } else {
1908 /* new name. convert to universal name, and decode as
1909 necessary */
1911 PyObject* tag;
1912 char* p;
1913 int i;
1915 /* look for namespace separator */
1916 for (i = 0; i < size; i++)
1917 if (string[i] == '}')
1918 break;
1919 if (i != size) {
1920 /* convert to universal name */
1921 tag = PyString_FromStringAndSize(NULL, size+1);
1922 p = PyString_AS_STRING(tag);
1923 p[0] = '{';
1924 memcpy(p+1, string, size);
1925 size++;
1926 } else {
1927 /* plain name; use key as tag */
1928 Py_INCREF(key);
1929 tag = key;
1932 /* decode universal name */
1933 #if defined(Py_USING_UNICODE)
1934 /* inline makestring, to avoid duplicating the source string if
1935 it's not an utf-8 string */
1936 p = PyString_AS_STRING(tag);
1937 if (checkstring(p, size)) {
1938 value = PyUnicode_DecodeUTF8(p, size, "strict");
1939 Py_DECREF(tag);
1940 if (!value) {
1941 Py_DECREF(key);
1942 return NULL;
1944 } else
1945 #endif
1946 value = tag; /* use tag as is */
1948 /* add to names dictionary */
1949 if (PyDict_SetItem(self->names, key, value) < 0) {
1950 Py_DECREF(key);
1951 Py_DECREF(value);
1952 return NULL;
1956 Py_DECREF(key);
1957 return value;
1960 /* -------------------------------------------------------------------- */
1961 /* handlers */
1963 static void
1964 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1965 int data_len)
1967 PyObject* key;
1968 PyObject* value;
1969 PyObject* res;
1971 if (data_len < 2 || data_in[0] != '&')
1972 return;
1974 key = makestring(data_in + 1, data_len - 2);
1975 if (!key)
1976 return;
1978 value = PyDict_GetItem(self->entity, key);
1980 if (value) {
1981 if (TreeBuilder_CheckExact(self->target))
1982 res = treebuilder_handle_data(
1983 (TreeBuilderObject*) self->target, value
1985 else if (self->handle_data)
1986 res = PyObject_CallFunction(self->handle_data, "O", value);
1987 else
1988 res = NULL;
1989 Py_XDECREF(res);
1990 } else {
1991 PyErr_Format(
1992 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
1993 PyString_AS_STRING(key),
1994 EXPAT(GetErrorLineNumber)(self->parser),
1995 EXPAT(GetErrorColumnNumber)(self->parser)
1999 Py_DECREF(key);
2002 static void
2003 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2004 const XML_Char **attrib_in)
2006 PyObject* res;
2007 PyObject* tag;
2008 PyObject* attrib;
2009 int ok;
2011 /* tag name */
2012 tag = makeuniversal(self, tag_in);
2013 if (!tag)
2014 return; /* parser will look for errors */
2016 /* attributes */
2017 if (attrib_in[0]) {
2018 attrib = PyDict_New();
2019 if (!attrib)
2020 return;
2021 while (attrib_in[0] && attrib_in[1]) {
2022 PyObject* key = makeuniversal(self, attrib_in[0]);
2023 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2024 if (!key || !value) {
2025 Py_XDECREF(value);
2026 Py_XDECREF(key);
2027 Py_DECREF(attrib);
2028 return;
2030 ok = PyDict_SetItem(attrib, key, value);
2031 Py_DECREF(value);
2032 Py_DECREF(key);
2033 if (ok < 0) {
2034 Py_DECREF(attrib);
2035 return;
2037 attrib_in += 2;
2039 } else {
2040 Py_INCREF(Py_None);
2041 attrib = Py_None;
2044 if (TreeBuilder_CheckExact(self->target))
2045 /* shortcut */
2046 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2047 tag, attrib);
2048 else if (self->handle_start)
2049 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2050 else
2051 res = NULL;
2053 Py_DECREF(tag);
2054 Py_DECREF(attrib);
2056 Py_XDECREF(res);
2059 static void
2060 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2061 int data_len)
2063 PyObject* data;
2064 PyObject* res;
2066 data = makestring(data_in, data_len);
2067 if (!data)
2068 return; /* parser will look for errors */
2070 if (TreeBuilder_CheckExact(self->target))
2071 /* shortcut */
2072 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2073 else if (self->handle_data)
2074 res = PyObject_CallFunction(self->handle_data, "O", data);
2075 else
2076 res = NULL;
2078 Py_DECREF(data);
2080 Py_XDECREF(res);
2083 static void
2084 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2086 PyObject* tag;
2087 PyObject* res = NULL;
2089 if (TreeBuilder_CheckExact(self->target))
2090 /* shortcut */
2091 /* the standard tree builder doesn't look at the end tag */
2092 res = treebuilder_handle_end(
2093 (TreeBuilderObject*) self->target, Py_None
2095 else if (self->handle_end) {
2096 tag = makeuniversal(self, tag_in);
2097 if (tag) {
2098 res = PyObject_CallFunction(self->handle_end, "O", tag);
2099 Py_DECREF(tag);
2103 Py_XDECREF(res);
2106 static void
2107 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2108 const XML_Char *uri)
2110 treebuilder_handle_namespace(
2111 (TreeBuilderObject*) self->target, 1, prefix, uri
2115 static void
2116 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2118 treebuilder_handle_namespace(
2119 (TreeBuilderObject*) self->target, 0, NULL, NULL
2123 static void
2124 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2126 PyObject* comment;
2127 PyObject* res;
2129 if (self->handle_comment) {
2130 comment = makestring(comment_in, strlen(comment_in));
2131 if (comment) {
2132 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2133 Py_XDECREF(res);
2134 Py_DECREF(comment);
2139 static void
2140 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2141 const XML_Char* data_in)
2143 PyObject* target;
2144 PyObject* data;
2145 PyObject* res;
2147 if (self->handle_pi) {
2148 target = makestring(target_in, strlen(target_in));
2149 data = makestring(data_in, strlen(data_in));
2150 if (target && data) {
2151 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2152 Py_XDECREF(res);
2153 Py_DECREF(data);
2154 Py_DECREF(target);
2155 } else {
2156 Py_XDECREF(data);
2157 Py_XDECREF(target);
2162 #if defined(Py_USING_UNICODE)
2163 static int
2164 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2165 XML_Encoding *info)
2167 PyObject* u;
2168 Py_UNICODE* p;
2169 unsigned char s[256];
2170 int i;
2172 memset(info, 0, sizeof(XML_Encoding));
2174 for (i = 0; i < 256; i++)
2175 s[i] = i;
2177 u = PyUnicode_Decode((char*) s, 256, name, "replace");
2178 if (!u)
2179 return XML_STATUS_ERROR;
2181 if (PyUnicode_GET_SIZE(u) != 256) {
2182 Py_DECREF(u);
2183 return XML_STATUS_ERROR;
2186 p = PyUnicode_AS_UNICODE(u);
2188 for (i = 0; i < 256; i++) {
2189 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2190 info->map[i] = p[i];
2191 else
2192 info->map[i] = -1;
2195 Py_DECREF(u);
2197 return XML_STATUS_OK;
2199 #endif
2201 /* -------------------------------------------------------------------- */
2202 /* constructor and destructor */
2204 static PyObject*
2205 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
2207 XMLParserObject* self;
2208 /* FIXME: does this need to be static? */
2209 static XML_Memory_Handling_Suite memory_handler;
2211 PyObject* target = NULL;
2212 char* encoding = NULL;
2213 static char* kwlist[] = { "target", "encoding", NULL };
2214 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2215 &target, &encoding))
2216 return NULL;
2218 #if defined(USE_PYEXPAT_CAPI)
2219 if (!expat_capi) {
2220 PyErr_SetString(
2221 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2223 return NULL;
2225 #endif
2227 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2228 if (self == NULL)
2229 return NULL;
2231 self->entity = PyDict_New();
2232 if (!self->entity) {
2233 PyObject_Del(self);
2234 return NULL;
2237 self->names = PyDict_New();
2238 if (!self->names) {
2239 PyObject_Del(self->entity);
2240 PyObject_Del(self);
2241 return NULL;
2244 memory_handler.malloc_fcn = PyObject_Malloc;
2245 memory_handler.realloc_fcn = PyObject_Realloc;
2246 memory_handler.free_fcn = PyObject_Free;
2248 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2249 if (!self->parser) {
2250 PyObject_Del(self->names);
2251 PyObject_Del(self->entity);
2252 PyObject_Del(self);
2253 PyErr_NoMemory();
2254 return NULL;
2257 /* setup target handlers */
2258 if (!target) {
2259 target = treebuilder_new();
2260 if (!target) {
2261 EXPAT(ParserFree)(self->parser);
2262 PyObject_Del(self->names);
2263 PyObject_Del(self->entity);
2264 PyObject_Del(self);
2265 return NULL;
2267 } else
2268 Py_INCREF(target);
2269 self->target = target;
2271 self->handle_xml = PyObject_GetAttrString(target, "xml");
2272 self->handle_start = PyObject_GetAttrString(target, "start");
2273 self->handle_data = PyObject_GetAttrString(target, "data");
2274 self->handle_end = PyObject_GetAttrString(target, "end");
2275 self->handle_comment = PyObject_GetAttrString(target, "comment");
2276 self->handle_pi = PyObject_GetAttrString(target, "pi");
2278 PyErr_Clear();
2280 /* configure parser */
2281 EXPAT(SetUserData)(self->parser, self);
2282 EXPAT(SetElementHandler)(
2283 self->parser,
2284 (XML_StartElementHandler) expat_start_handler,
2285 (XML_EndElementHandler) expat_end_handler
2287 EXPAT(SetDefaultHandlerExpand)(
2288 self->parser,
2289 (XML_DefaultHandler) expat_default_handler
2291 EXPAT(SetCharacterDataHandler)(
2292 self->parser,
2293 (XML_CharacterDataHandler) expat_data_handler
2295 if (self->handle_comment)
2296 EXPAT(SetCommentHandler)(
2297 self->parser,
2298 (XML_CommentHandler) expat_comment_handler
2300 if (self->handle_pi)
2301 EXPAT(SetProcessingInstructionHandler)(
2302 self->parser,
2303 (XML_ProcessingInstructionHandler) expat_pi_handler
2305 #if defined(Py_USING_UNICODE)
2306 EXPAT(SetUnknownEncodingHandler)(
2307 self->parser,
2308 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2310 #endif
2312 ALLOC(sizeof(XMLParserObject), "create expatparser");
2314 return (PyObject*) self;
2317 static void
2318 xmlparser_dealloc(XMLParserObject* self)
2320 EXPAT(ParserFree)(self->parser);
2322 Py_XDECREF(self->handle_pi);
2323 Py_XDECREF(self->handle_comment);
2324 Py_XDECREF(self->handle_end);
2325 Py_XDECREF(self->handle_data);
2326 Py_XDECREF(self->handle_start);
2327 Py_XDECREF(self->handle_xml);
2329 Py_DECREF(self->target);
2330 Py_DECREF(self->entity);
2331 Py_DECREF(self->names);
2333 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2335 PyObject_Del(self);
2338 /* -------------------------------------------------------------------- */
2339 /* methods (in alphabetical order) */
2341 LOCAL(PyObject*)
2342 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2344 int ok;
2346 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2348 if (PyErr_Occurred())
2349 return NULL;
2351 if (!ok) {
2352 PyErr_Format(
2353 PyExc_SyntaxError, "%s: line %ld, column %ld",
2354 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2355 EXPAT(GetErrorLineNumber)(self->parser),
2356 EXPAT(GetErrorColumnNumber)(self->parser)
2358 return NULL;
2361 Py_RETURN_NONE;
2364 static PyObject*
2365 xmlparser_close(XMLParserObject* self, PyObject* args)
2367 /* end feeding data to parser */
2369 PyObject* res;
2370 if (!PyArg_ParseTuple(args, ":close"))
2371 return NULL;
2373 res = expat_parse(self, "", 0, 1);
2375 if (res && TreeBuilder_CheckExact(self->target)) {
2376 Py_DECREF(res);
2377 return treebuilder_done((TreeBuilderObject*) self->target);
2380 return res;
2383 static PyObject*
2384 xmlparser_feed(XMLParserObject* self, PyObject* args)
2386 /* feed data to parser */
2388 char* data;
2389 int data_len;
2390 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2391 return NULL;
2393 return expat_parse(self, data, data_len, 0);
2396 static PyObject*
2397 xmlparser_parse(XMLParserObject* self, PyObject* args)
2399 /* (internal) parse until end of input stream */
2401 PyObject* reader;
2402 PyObject* buffer;
2403 PyObject* res;
2405 PyObject* fileobj;
2406 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2407 return NULL;
2409 reader = PyObject_GetAttrString(fileobj, "read");
2410 if (!reader)
2411 return NULL;
2413 /* read from open file object */
2414 for (;;) {
2416 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2418 if (!buffer) {
2419 /* read failed (e.g. due to KeyboardInterrupt) */
2420 Py_DECREF(reader);
2421 return NULL;
2424 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2425 Py_DECREF(buffer);
2426 break;
2429 res = expat_parse(
2430 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2433 Py_DECREF(buffer);
2435 if (!res) {
2436 Py_DECREF(reader);
2437 return NULL;
2439 Py_DECREF(res);
2443 Py_DECREF(reader);
2445 res = expat_parse(self, "", 0, 1);
2447 if (res && TreeBuilder_CheckExact(self->target)) {
2448 Py_DECREF(res);
2449 return treebuilder_done((TreeBuilderObject*) self->target);
2452 return res;
2455 static PyObject*
2456 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2458 /* activate element event reporting */
2460 Py_ssize_t i;
2461 TreeBuilderObject* target;
2463 PyObject* events; /* event collector */
2464 PyObject* event_set = Py_None;
2465 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2466 &event_set))
2467 return NULL;
2469 if (!TreeBuilder_CheckExact(self->target)) {
2470 PyErr_SetString(
2471 PyExc_TypeError,
2472 "event handling only supported for cElementTree.Treebuilder "
2473 "targets"
2475 return NULL;
2478 target = (TreeBuilderObject*) self->target;
2480 Py_INCREF(events);
2481 Py_XDECREF(target->events);
2482 target->events = events;
2484 /* clear out existing events */
2485 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2486 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2487 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2488 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2490 if (event_set == Py_None) {
2491 /* default is "end" only */
2492 target->end_event_obj = PyString_FromString("end");
2493 Py_RETURN_NONE;
2496 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2497 goto error;
2499 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2500 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2501 char* event;
2502 if (!PyString_Check(item))
2503 goto error;
2504 event = PyString_AS_STRING(item);
2505 if (strcmp(event, "start") == 0) {
2506 Py_INCREF(item);
2507 target->start_event_obj = item;
2508 } else if (strcmp(event, "end") == 0) {
2509 Py_INCREF(item);
2510 Py_XDECREF(target->end_event_obj);
2511 target->end_event_obj = item;
2512 } else if (strcmp(event, "start-ns") == 0) {
2513 Py_INCREF(item);
2514 Py_XDECREF(target->start_ns_event_obj);
2515 target->start_ns_event_obj = item;
2516 EXPAT(SetNamespaceDeclHandler)(
2517 self->parser,
2518 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2519 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2521 } else if (strcmp(event, "end-ns") == 0) {
2522 Py_INCREF(item);
2523 Py_XDECREF(target->end_ns_event_obj);
2524 target->end_ns_event_obj = item;
2525 EXPAT(SetNamespaceDeclHandler)(
2526 self->parser,
2527 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2528 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2530 } else {
2531 PyErr_Format(
2532 PyExc_ValueError,
2533 "unknown event '%s'", event
2535 return NULL;
2539 Py_RETURN_NONE;
2541 error:
2542 PyErr_SetString(
2543 PyExc_TypeError,
2544 "invalid event tuple"
2546 return NULL;
2549 static PyMethodDef xmlparser_methods[] = {
2550 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2551 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2552 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2553 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2554 {NULL, NULL}
2557 static PyObject*
2558 xmlparser_getattr(XMLParserObject* self, char* name)
2560 PyObject* res;
2562 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2563 if (res)
2564 return res;
2566 PyErr_Clear();
2568 if (strcmp(name, "entity") == 0)
2569 res = self->entity;
2570 else if (strcmp(name, "target") == 0)
2571 res = self->target;
2572 else if (strcmp(name, "version") == 0) {
2573 char buffer[100];
2574 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2575 XML_MINOR_VERSION, XML_MICRO_VERSION);
2576 return PyString_FromString(buffer);
2577 } else {
2578 PyErr_SetString(PyExc_AttributeError, name);
2579 return NULL;
2582 Py_INCREF(res);
2583 return res;
2586 statichere PyTypeObject XMLParser_Type = {
2587 PyObject_HEAD_INIT(NULL)
2588 0, "XMLParser", sizeof(XMLParserObject), 0,
2589 /* methods */
2590 (destructor)xmlparser_dealloc, /* tp_dealloc */
2591 0, /* tp_print */
2592 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2595 #endif
2597 /* ==================================================================== */
2598 /* python module interface */
2600 static PyMethodDef _functions[] = {
2601 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2602 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2603 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2604 #if defined(USE_EXPAT)
2605 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2606 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2607 #endif
2608 {NULL, NULL}
2611 DL_EXPORT(void)
2612 init_elementtree(void)
2614 PyObject* m;
2615 PyObject* g;
2616 char* bootstrap;
2617 #if defined(USE_PYEXPAT_CAPI)
2618 struct PyExpat_CAPI* capi;
2619 #endif
2621 /* Patch object type */
2622 Element_Type.ob_type = TreeBuilder_Type.ob_type = &PyType_Type;
2623 #if defined(USE_EXPAT)
2624 XMLParser_Type.ob_type = &PyType_Type;
2625 #endif
2627 m = Py_InitModule("_elementtree", _functions);
2628 if (!m)
2629 return;
2631 /* python glue code */
2633 g = PyDict_New();
2634 if (!g)
2635 return;
2637 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2639 bootstrap = (
2641 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2642 "from __future__ import generators\n" /* enable yield under 2.2 */
2643 #endif
2645 "from copy import copy, deepcopy\n"
2647 "try:\n"
2648 " from xml.etree import ElementTree\n"
2649 "except ImportError:\n"
2650 " import ElementTree\n"
2651 "ET = ElementTree\n"
2652 "del ElementTree\n"
2654 "import _elementtree as cElementTree\n"
2656 "try:\n" /* check if copy works as is */
2657 " copy(cElementTree.Element('x'))\n"
2658 "except:\n"
2659 " def copyelement(elem):\n"
2660 " return elem\n"
2662 "def Comment(text=None):\n" /* public */
2663 " element = cElementTree.Element(ET.Comment)\n"
2664 " element.text = text\n"
2665 " return element\n"
2666 "cElementTree.Comment = Comment\n"
2668 "class ElementTree(ET.ElementTree):\n" /* public */
2669 " def parse(self, source, parser=None):\n"
2670 " if not hasattr(source, 'read'):\n"
2671 " source = open(source, 'rb')\n"
2672 " if parser is not None:\n"
2673 " while 1:\n"
2674 " data = source.read(65536)\n"
2675 " if not data:\n"
2676 " break\n"
2677 " parser.feed(data)\n"
2678 " self._root = parser.close()\n"
2679 " else:\n"
2680 " parser = cElementTree.XMLParser()\n"
2681 " self._root = parser._parse(source)\n"
2682 " return self._root\n"
2683 "cElementTree.ElementTree = ElementTree\n"
2685 "def getiterator(node, tag=None):\n" /* helper */
2686 " if tag == '*':\n"
2687 " tag = None\n"
2688 #if (PY_VERSION_HEX < 0x02020000)
2689 " nodes = []\n" /* 2.1 doesn't have yield */
2690 " if tag is None or node.tag == tag:\n"
2691 " nodes.append(node)\n"
2692 " for node in node:\n"
2693 " nodes.extend(getiterator(node, tag))\n"
2694 " return nodes\n"
2695 #else
2696 " if tag is None or node.tag == tag:\n"
2697 " yield node\n"
2698 " for node in node:\n"
2699 " for node in getiterator(node, tag):\n"
2700 " yield node\n"
2701 #endif
2703 "def parse(source, parser=None):\n" /* public */
2704 " tree = ElementTree()\n"
2705 " tree.parse(source, parser)\n"
2706 " return tree\n"
2707 "cElementTree.parse = parse\n"
2709 #if (PY_VERSION_HEX < 0x02020000)
2710 "if hasattr(ET, 'iterparse'):\n"
2711 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2712 #else
2713 "class iterparse(object):\n"
2714 " root = None\n"
2715 " def __init__(self, file, events=None):\n"
2716 " if not hasattr(file, 'read'):\n"
2717 " file = open(file, 'rb')\n"
2718 " self._file = file\n"
2719 " self._events = events\n"
2720 " def __iter__(self):\n"
2721 " events = []\n"
2722 " b = cElementTree.TreeBuilder()\n"
2723 " p = cElementTree.XMLParser(b)\n"
2724 " p._setevents(events, self._events)\n"
2725 " while 1:\n"
2726 " data = self._file.read(16384)\n"
2727 " if not data:\n"
2728 " break\n"
2729 " p.feed(data)\n"
2730 " for event in events:\n"
2731 " yield event\n"
2732 " del events[:]\n"
2733 " root = p.close()\n"
2734 " for event in events:\n"
2735 " yield event\n"
2736 " self.root = root\n"
2737 "cElementTree.iterparse = iterparse\n"
2738 #endif
2740 "def PI(target, text=None):\n" /* public */
2741 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2742 " element.text = target\n"
2743 " if text:\n"
2744 " element.text = element.text + ' ' + text\n"
2745 " return element\n"
2747 " elem = cElementTree.Element(ET.PI)\n"
2748 " elem.text = text\n"
2749 " return elem\n"
2750 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2752 "def XML(text):\n" /* public */
2753 " parser = cElementTree.XMLParser()\n"
2754 " parser.feed(text)\n"
2755 " return parser.close()\n"
2756 "cElementTree.XML = cElementTree.fromstring = XML\n"
2758 "def XMLID(text):\n" /* public */
2759 " tree = XML(text)\n"
2760 " ids = {}\n"
2761 " for elem in tree.getiterator():\n"
2762 " id = elem.get('id')\n"
2763 " if id:\n"
2764 " ids[id] = elem\n"
2765 " return tree, ids\n"
2766 "cElementTree.XMLID = XMLID\n"
2768 "cElementTree.dump = ET.dump\n"
2769 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2770 "cElementTree.iselement = ET.iselement\n"
2771 "cElementTree.QName = ET.QName\n"
2772 "cElementTree.tostring = ET.tostring\n"
2773 "cElementTree.VERSION = '" VERSION "'\n"
2774 "cElementTree.__version__ = '" VERSION "'\n"
2775 "cElementTree.XMLParserError = SyntaxError\n"
2779 PyRun_String(bootstrap, Py_file_input, g, NULL);
2781 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2783 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2784 if (elementtree_copyelement_obj) {
2785 /* reduce hack needed; enable reduce method */
2786 PyMethodDef* mp;
2787 for (mp = element_methods; mp->ml_name; mp++)
2788 if (mp->ml_meth == (PyCFunction) element_reduce) {
2789 mp->ml_name = "__reduce__";
2790 break;
2792 } else
2793 PyErr_Clear();
2794 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2795 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2797 #if defined(USE_PYEXPAT_CAPI)
2798 /* link against pyexpat, if possible */
2799 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2800 if (capi &&
2801 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2802 capi->size <= sizeof(*expat_capi) &&
2803 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2804 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2805 capi->MICRO_VERSION == XML_MICRO_VERSION)
2806 expat_capi = capi;
2807 else
2808 expat_capi = NULL;
2809 #endif