Change to flush and close logic to fix #1760556.
[python.git] / Modules / _elementtree.c
blobd8a28b4068eba83d56f5765d3d65aa775f01797b
1 /*
2 * ElementTree
3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
42 * info@pythonware.com
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
49 #include "Python.h"
51 #define VERSION "1.0.6"
53 /* -------------------------------------------------------------------- */
54 /* configuration */
56 /* Leave defined to include the expat-based XMLParser type */
57 #define USE_EXPAT
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
63 allocations. */
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
76 /* -------------------------------------------------------------------- */
78 #if 0
79 static int memory = 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84 #else
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
87 #endif
89 /* compiler tweaks */
90 #if defined(_MSC_VER)
91 #define LOCAL(type) static __inline type __fastcall
92 #else
93 #define LOCAL(type) static type
94 #endif
96 /* compatibility macros */
97 #if (PY_VERSION_HEX < 0x02050000)
98 typedef int Py_ssize_t;
99 #define lenfunc inquiry
100 #endif
102 #if (PY_VERSION_HEX < 0x02040000)
103 #define PyDict_CheckExact PyDict_Check
104 #if (PY_VERSION_HEX < 0x02020000)
105 #define PyList_CheckExact PyList_Check
106 #define PyString_CheckExact PyString_Check
107 #if (PY_VERSION_HEX >= 0x01060000)
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109 #endif
110 #endif
111 #endif
113 #if !defined(Py_RETURN_NONE)
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115 #endif
117 /* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
125 /* glue functions (see the init function for details) */
126 static PyObject* elementtree_copyelement_obj;
127 static PyObject* elementtree_deepcopy_obj;
128 static PyObject* elementtree_getiterator_obj;
129 static PyObject* elementpath_obj;
131 /* helpers */
133 LOCAL(PyObject*)
134 deepcopy(PyObject* object, PyObject* memo)
136 /* do a deep copy of the given object */
138 PyObject* args;
139 PyObject* result;
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
146 return NULL;
149 args = PyTuple_New(2);
150 if (!args)
151 return NULL;
153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158 Py_DECREF(args);
160 return result;
163 LOCAL(PyObject*)
164 list_join(PyObject* list)
166 /* join list elements (destroying the list in the process) */
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
197 args = PyTuple_New(1);
198 if (!args)
199 return NULL;
201 PyTuple_SET_ITEM(args, 0, list);
203 result = PyObject_CallObject(function, args);
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
209 return result;
212 #if (PY_VERSION_HEX < 0x02020000)
213 LOCAL(int)
214 PyDict_Update(PyObject* dict, PyObject* other)
216 /* PyDict_Update emulation for 2.1 and earlier */
218 PyObject* res;
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
224 Py_DECREF(res);
225 return 0;
227 #endif
229 /* -------------------------------------------------------------------- */
230 /* the element type */
232 typedef struct {
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
244 PyObject* _children[STATIC_CHILDREN];
246 } ElementObjectExtra;
248 typedef struct {
249 PyObject_HEAD
251 /* element tag (a string). */
252 PyObject* tag;
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
266 ElementObjectExtra* extra;
268 } ElementObject;
270 staticforward PyTypeObject Element_Type;
272 #define Element_CheckExact(op) (Py_Type(op) == &Element_Type)
274 /* -------------------------------------------------------------------- */
275 /* element constructor and destructor */
277 LOCAL(int)
278 element_new_extra(ElementObject* self, PyObject* attrib)
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
284 if (!attrib)
285 attrib = Py_None;
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
294 return 0;
297 LOCAL(void)
298 element_dealloc_extra(ElementObject* self)
300 int i;
302 Py_DECREF(self->extra->attrib);
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
310 PyObject_Free(self->extra);
313 LOCAL(PyObject*)
314 element_new(PyObject* tag, PyObject* attrib)
316 ElementObject* self;
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
326 self->extra = NULL;
328 if (attrib != Py_None) {
330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
332 return NULL;
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
341 Py_INCREF(tag);
342 self->tag = tag;
344 Py_INCREF(Py_None);
345 self->text = Py_None;
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
350 ALLOC(sizeof(ElementObject), "create element");
352 return (PyObject*) self;
355 LOCAL(int)
356 element_resize(ElementObject* self, int extra)
358 int size;
359 PyObject* *children;
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
364 if (!self->extra)
365 element_new_extra(self, NULL);
367 size = self->extra->length + extra;
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
372 if (self->extra->children != self->extra->_children) {
373 children = PyObject_Realloc(self->extra->children,
374 size * sizeof(PyObject*));
375 if (!children)
376 goto nomemory;
377 } else {
378 children = PyObject_Malloc(size * sizeof(PyObject*));
379 if (!children)
380 goto nomemory;
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children, self->extra->children,
383 self->extra->length * sizeof(PyObject*));
385 self->extra->children = children;
386 self->extra->allocated = size;
389 return 0;
391 nomemory:
392 PyErr_NoMemory();
393 return -1;
396 LOCAL(int)
397 element_add_subelement(ElementObject* self, PyObject* element)
399 /* add a child element to a parent */
401 if (element_resize(self, 1) < 0)
402 return -1;
404 Py_INCREF(element);
405 self->extra->children[self->extra->length] = element;
407 self->extra->length++;
409 return 0;
412 LOCAL(PyObject*)
413 element_get_attrib(ElementObject* self)
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
418 PyObject* res = self->extra->attrib;
420 if (res == Py_None) {
421 /* create missing dictionary */
422 res = PyDict_New();
423 if (!res)
424 return NULL;
425 self->extra->attrib = res;
428 return res;
431 LOCAL(PyObject*)
432 element_get_text(ElementObject* self)
434 /* return borrowed reference to text attribute */
436 PyObject* res = self->text;
438 if (JOIN_GET(res)) {
439 res = JOIN_OBJ(res);
440 if (PyList_CheckExact(res)) {
441 res = list_join(res);
442 if (!res)
443 return NULL;
444 self->text = res;
448 return res;
451 LOCAL(PyObject*)
452 element_get_tail(ElementObject* self)
454 /* return borrowed reference to text attribute */
456 PyObject* res = self->tail;
458 if (JOIN_GET(res)) {
459 res = JOIN_OBJ(res);
460 if (PyList_CheckExact(res)) {
461 res = list_join(res);
462 if (!res)
463 return NULL;
464 self->tail = res;
468 return res;
471 static PyObject*
472 element(PyObject* self, PyObject* args, PyObject* kw)
474 PyObject* elem;
476 PyObject* tag;
477 PyObject* attrib = NULL;
478 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
479 &PyDict_Type, &attrib))
480 return NULL;
482 if (attrib || kw) {
483 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
484 if (!attrib)
485 return NULL;
486 if (kw)
487 PyDict_Update(attrib, kw);
488 } else {
489 Py_INCREF(Py_None);
490 attrib = Py_None;
493 elem = element_new(tag, attrib);
495 Py_DECREF(attrib);
497 return elem;
500 static PyObject*
501 subelement(PyObject* self, PyObject* args, PyObject* kw)
503 PyObject* elem;
505 ElementObject* parent;
506 PyObject* tag;
507 PyObject* attrib = NULL;
508 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
509 &Element_Type, &parent, &tag,
510 &PyDict_Type, &attrib))
511 return NULL;
513 if (attrib || kw) {
514 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
515 if (!attrib)
516 return NULL;
517 if (kw)
518 PyDict_Update(attrib, kw);
519 } else {
520 Py_INCREF(Py_None);
521 attrib = Py_None;
524 elem = element_new(tag, attrib);
526 Py_DECREF(attrib);
528 if (element_add_subelement(parent, elem) < 0) {
529 Py_DECREF(elem);
530 return NULL;
533 return elem;
536 static void
537 element_dealloc(ElementObject* self)
539 if (self->extra)
540 element_dealloc_extra(self);
542 /* discard attributes */
543 Py_DECREF(self->tag);
544 Py_DECREF(JOIN_OBJ(self->text));
545 Py_DECREF(JOIN_OBJ(self->tail));
547 RELEASE(sizeof(ElementObject), "destroy element");
549 PyObject_Del(self);
552 /* -------------------------------------------------------------------- */
553 /* methods (in alphabetical order) */
555 static PyObject*
556 element_append(ElementObject* self, PyObject* args)
558 PyObject* element;
559 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
560 return NULL;
562 if (element_add_subelement(self, element) < 0)
563 return NULL;
565 Py_RETURN_NONE;
568 static PyObject*
569 element_clear(ElementObject* self, PyObject* args)
571 if (!PyArg_ParseTuple(args, ":clear"))
572 return NULL;
574 if (self->extra) {
575 element_dealloc_extra(self);
576 self->extra = NULL;
579 Py_INCREF(Py_None);
580 Py_DECREF(JOIN_OBJ(self->text));
581 self->text = Py_None;
583 Py_INCREF(Py_None);
584 Py_DECREF(JOIN_OBJ(self->tail));
585 self->tail = Py_None;
587 Py_RETURN_NONE;
590 static PyObject*
591 element_copy(ElementObject* self, PyObject* args)
593 int i;
594 ElementObject* element;
596 if (!PyArg_ParseTuple(args, ":__copy__"))
597 return NULL;
599 element = (ElementObject*) element_new(
600 self->tag, (self->extra) ? self->extra->attrib : Py_None
602 if (!element)
603 return NULL;
605 Py_DECREF(JOIN_OBJ(element->text));
606 element->text = self->text;
607 Py_INCREF(JOIN_OBJ(element->text));
609 Py_DECREF(JOIN_OBJ(element->tail));
610 element->tail = self->tail;
611 Py_INCREF(JOIN_OBJ(element->tail));
613 if (self->extra) {
615 if (element_resize(element, self->extra->length) < 0) {
616 Py_DECREF(element);
617 return NULL;
620 for (i = 0; i < self->extra->length; i++) {
621 Py_INCREF(self->extra->children[i]);
622 element->extra->children[i] = self->extra->children[i];
625 element->extra->length = self->extra->length;
629 return (PyObject*) element;
632 static PyObject*
633 element_deepcopy(ElementObject* self, PyObject* args)
635 int i;
636 ElementObject* element;
637 PyObject* tag;
638 PyObject* attrib;
639 PyObject* text;
640 PyObject* tail;
641 PyObject* id;
643 PyObject* memo;
644 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
645 return NULL;
647 tag = deepcopy(self->tag, memo);
648 if (!tag)
649 return NULL;
651 if (self->extra) {
652 attrib = deepcopy(self->extra->attrib, memo);
653 if (!attrib) {
654 Py_DECREF(tag);
655 return NULL;
657 } else {
658 Py_INCREF(Py_None);
659 attrib = Py_None;
662 element = (ElementObject*) element_new(tag, attrib);
664 Py_DECREF(tag);
665 Py_DECREF(attrib);
667 if (!element)
668 return NULL;
670 text = deepcopy(JOIN_OBJ(self->text), memo);
671 if (!text)
672 goto error;
673 Py_DECREF(element->text);
674 element->text = JOIN_SET(text, JOIN_GET(self->text));
676 tail = deepcopy(JOIN_OBJ(self->tail), memo);
677 if (!tail)
678 goto error;
679 Py_DECREF(element->tail);
680 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
682 if (self->extra) {
684 if (element_resize(element, self->extra->length) < 0)
685 goto error;
687 for (i = 0; i < self->extra->length; i++) {
688 PyObject* child = deepcopy(self->extra->children[i], memo);
689 if (!child) {
690 element->extra->length = i;
691 goto error;
693 element->extra->children[i] = child;
696 element->extra->length = self->extra->length;
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id = PyInt_FromLong((Py_uintptr_t) self);
703 i = PyDict_SetItem(memo, id, (PyObject*) element);
705 Py_DECREF(id);
707 if (i < 0)
708 goto error;
710 return (PyObject*) element;
712 error:
713 Py_DECREF(element);
714 return NULL;
717 LOCAL(int)
718 checkpath(PyObject* tag)
720 Py_ssize_t i;
721 int check = 1;
723 /* check if a tag contains an xpath character */
725 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
727 #if defined(Py_USING_UNICODE)
728 if (PyUnicode_Check(tag)) {
729 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
730 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
738 return 0;
740 #endif
741 if (PyString_Check(tag)) {
742 char *p = PyString_AS_STRING(tag);
743 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
744 if (p[i] == '{')
745 check = 0;
746 else if (p[i] == '}')
747 check = 1;
748 else if (check && PATHCHAR(p[i]))
749 return 1;
751 return 0;
754 return 1; /* unknown type; might be path expression */
757 static PyObject*
758 element_find(ElementObject* self, PyObject* args)
760 int i;
762 PyObject* tag;
763 if (!PyArg_ParseTuple(args, "O:find", &tag))
764 return NULL;
766 if (checkpath(tag))
767 return PyObject_CallMethod(
768 elementpath_obj, "find", "OO", self, tag
771 if (!self->extra)
772 Py_RETURN_NONE;
774 for (i = 0; i < self->extra->length; i++) {
775 PyObject* item = self->extra->children[i];
776 if (Element_CheckExact(item) &&
777 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
778 Py_INCREF(item);
779 return item;
783 Py_RETURN_NONE;
786 static PyObject*
787 element_findtext(ElementObject* self, PyObject* args)
789 int i;
791 PyObject* tag;
792 PyObject* default_value = Py_None;
793 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
794 return NULL;
796 if (checkpath(tag))
797 return PyObject_CallMethod(
798 elementpath_obj, "findtext", "OOO", self, tag, default_value
801 if (!self->extra) {
802 Py_INCREF(default_value);
803 return default_value;
806 for (i = 0; i < self->extra->length; i++) {
807 ElementObject* item = (ElementObject*) self->extra->children[i];
808 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
809 PyObject* text = element_get_text(item);
810 if (text == Py_None)
811 return PyString_FromString("");
812 Py_XINCREF(text);
813 return text;
817 Py_INCREF(default_value);
818 return default_value;
821 static PyObject*
822 element_findall(ElementObject* self, PyObject* args)
824 int i;
825 PyObject* out;
827 PyObject* tag;
828 if (!PyArg_ParseTuple(args, "O:findall", &tag))
829 return NULL;
831 if (checkpath(tag))
832 return PyObject_CallMethod(
833 elementpath_obj, "findall", "OO", self, tag
836 out = PyList_New(0);
837 if (!out)
838 return NULL;
840 if (!self->extra)
841 return out;
843 for (i = 0; i < self->extra->length; i++) {
844 PyObject* item = self->extra->children[i];
845 if (Element_CheckExact(item) &&
846 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
847 if (PyList_Append(out, item) < 0) {
848 Py_DECREF(out);
849 return NULL;
854 return out;
857 static PyObject*
858 element_get(ElementObject* self, PyObject* args)
860 PyObject* value;
862 PyObject* key;
863 PyObject* default_value = Py_None;
864 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
865 return NULL;
867 if (!self->extra || self->extra->attrib == Py_None)
868 value = default_value;
869 else {
870 value = PyDict_GetItem(self->extra->attrib, key);
871 if (!value)
872 value = default_value;
875 Py_INCREF(value);
876 return value;
879 static PyObject*
880 element_getchildren(ElementObject* self, PyObject* args)
882 int i;
883 PyObject* list;
885 if (!PyArg_ParseTuple(args, ":getchildren"))
886 return NULL;
888 if (!self->extra)
889 return PyList_New(0);
891 list = PyList_New(self->extra->length);
892 if (!list)
893 return NULL;
895 for (i = 0; i < self->extra->length; i++) {
896 PyObject* item = self->extra->children[i];
897 Py_INCREF(item);
898 PyList_SET_ITEM(list, i, item);
901 return list;
904 static PyObject*
905 element_getiterator(ElementObject* self, PyObject* args)
907 PyObject* result;
909 PyObject* tag = Py_None;
910 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
911 return NULL;
913 if (!elementtree_getiterator_obj) {
914 PyErr_SetString(
915 PyExc_RuntimeError,
916 "getiterator helper not found"
918 return NULL;
921 args = PyTuple_New(2);
922 if (!args)
923 return NULL;
925 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
926 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
928 result = PyObject_CallObject(elementtree_getiterator_obj, args);
930 Py_DECREF(args);
932 return result;
935 static PyObject*
936 element_getitem(PyObject* self_, Py_ssize_t index)
938 ElementObject* self = (ElementObject*) self_;
940 if (!self->extra || index < 0 || index >= self->extra->length) {
941 PyErr_SetString(
942 PyExc_IndexError,
943 "child index out of range"
945 return NULL;
948 Py_INCREF(self->extra->children[index]);
949 return self->extra->children[index];
952 static PyObject*
953 element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
955 ElementObject* self = (ElementObject*) self_;
956 Py_ssize_t i;
957 PyObject* list;
959 if (!self->extra)
960 return PyList_New(0);
962 /* standard clamping */
963 if (start < 0)
964 start = 0;
965 if (end < 0)
966 end = 0;
967 if (end > self->extra->length)
968 end = self->extra->length;
969 if (start > end)
970 start = end;
972 list = PyList_New(end - start);
973 if (!list)
974 return NULL;
976 for (i = start; i < end; i++) {
977 PyObject* item = self->extra->children[i];
978 Py_INCREF(item);
979 PyList_SET_ITEM(list, i - start, item);
982 return list;
985 static PyObject*
986 element_insert(ElementObject* self, PyObject* args)
988 int i;
990 int index;
991 PyObject* element;
992 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
993 &Element_Type, &element))
994 return NULL;
996 if (!self->extra)
997 element_new_extra(self, NULL);
999 if (index < 0)
1000 index = 0;
1001 if (index > self->extra->length)
1002 index = self->extra->length;
1004 if (element_resize(self, 1) < 0)
1005 return NULL;
1007 for (i = self->extra->length; i > index; i--)
1008 self->extra->children[i] = self->extra->children[i-1];
1010 Py_INCREF(element);
1011 self->extra->children[index] = element;
1013 self->extra->length++;
1015 Py_RETURN_NONE;
1018 static PyObject*
1019 element_items(ElementObject* self, PyObject* args)
1021 if (!PyArg_ParseTuple(args, ":items"))
1022 return NULL;
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1027 return PyDict_Items(self->extra->attrib);
1030 static PyObject*
1031 element_keys(ElementObject* self, PyObject* args)
1033 if (!PyArg_ParseTuple(args, ":keys"))
1034 return NULL;
1036 if (!self->extra || self->extra->attrib == Py_None)
1037 return PyList_New(0);
1039 return PyDict_Keys(self->extra->attrib);
1042 static Py_ssize_t
1043 element_length(ElementObject* self)
1045 if (!self->extra)
1046 return 0;
1048 return self->extra->length;
1051 static PyObject*
1052 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1054 PyObject* elem;
1056 PyObject* tag;
1057 PyObject* attrib;
1058 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1059 return NULL;
1061 attrib = PyDict_Copy(attrib);
1062 if (!attrib)
1063 return NULL;
1065 elem = element_new(tag, attrib);
1067 Py_DECREF(attrib);
1069 return elem;
1072 static PyObject*
1073 element_reduce(ElementObject* self, PyObject* args)
1075 if (!PyArg_ParseTuple(args, ":__reduce__"))
1076 return NULL;
1078 /* Hack alert: This method is used to work around a __copy__
1079 problem on certain 2.3 and 2.4 versions. To save time and
1080 simplify the code, we create the copy in here, and use a dummy
1081 copyelement helper to trick the copy module into doing the
1082 right thing. */
1084 if (!elementtree_copyelement_obj) {
1085 PyErr_SetString(
1086 PyExc_RuntimeError,
1087 "copyelement helper not found"
1089 return NULL;
1092 return Py_BuildValue(
1093 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1097 static PyObject*
1098 element_remove(ElementObject* self, PyObject* args)
1100 int i;
1102 PyObject* element;
1103 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1104 return NULL;
1106 if (!self->extra) {
1107 /* element has no children, so raise exception */
1108 PyErr_SetString(
1109 PyExc_ValueError,
1110 "list.remove(x): x not in list"
1112 return NULL;
1115 for (i = 0; i < self->extra->length; i++) {
1116 if (self->extra->children[i] == element)
1117 break;
1118 if (PyObject_Compare(self->extra->children[i], element) == 0)
1119 break;
1122 if (i == self->extra->length) {
1123 /* element is not in children, so raise exception */
1124 PyErr_SetString(
1125 PyExc_ValueError,
1126 "list.remove(x): x not in list"
1128 return NULL;
1131 Py_DECREF(self->extra->children[i]);
1133 self->extra->length--;
1135 for (; i < self->extra->length; i++)
1136 self->extra->children[i] = self->extra->children[i+1];
1138 Py_RETURN_NONE;
1141 static PyObject*
1142 element_repr(ElementObject* self)
1144 PyObject* repr;
1145 char buffer[100];
1147 repr = PyString_FromString("<Element ");
1149 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1151 sprintf(buffer, " at %p>", self);
1152 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1154 return repr;
1157 static PyObject*
1158 element_set(ElementObject* self, PyObject* args)
1160 PyObject* attrib;
1162 PyObject* key;
1163 PyObject* value;
1164 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1165 return NULL;
1167 if (!self->extra)
1168 element_new_extra(self, NULL);
1170 attrib = element_get_attrib(self);
1171 if (!attrib)
1172 return NULL;
1174 if (PyDict_SetItem(attrib, key, value) < 0)
1175 return NULL;
1177 Py_RETURN_NONE;
1180 static int
1181 element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
1183 ElementObject* self = (ElementObject*) self_;
1184 Py_ssize_t i, new, old;
1185 PyObject* recycle = NULL;
1187 if (!self->extra)
1188 element_new_extra(self, NULL);
1190 /* standard clamping */
1191 if (start < 0)
1192 start = 0;
1193 if (end < 0)
1194 end = 0;
1195 if (end > self->extra->length)
1196 end = self->extra->length;
1197 if (start > end)
1198 start = end;
1200 old = end - start;
1202 if (item == NULL)
1203 new = 0;
1204 else if (PyList_CheckExact(item)) {
1205 new = PyList_GET_SIZE(item);
1206 } else {
1207 /* FIXME: support arbitrary sequences? */
1208 PyErr_Format(
1209 PyExc_TypeError,
1210 "expected list, not \"%.200s\"", Py_Type(item)->tp_name
1212 return -1;
1215 if (old > 0) {
1216 /* to avoid recursive calls to this method (via decref), move
1217 old items to the recycle bin here, and get rid of them when
1218 we're done modifying the element */
1219 recycle = PyList_New(old);
1220 for (i = 0; i < old; i++)
1221 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1224 if (new < old) {
1225 /* delete slice */
1226 for (i = end; i < self->extra->length; i++)
1227 self->extra->children[i + new - old] = self->extra->children[i];
1228 } else if (new > old) {
1229 /* insert slice */
1230 if (element_resize(self, new - old) < 0)
1231 return -1;
1232 for (i = self->extra->length-1; i >= end; i--)
1233 self->extra->children[i + new - old] = self->extra->children[i];
1236 /* replace the slice */
1237 for (i = 0; i < new; i++) {
1238 PyObject* element = PyList_GET_ITEM(item, i);
1239 Py_INCREF(element);
1240 self->extra->children[i + start] = element;
1243 self->extra->length += new - old;
1245 /* discard the recycle bin, and everything in it */
1246 Py_XDECREF(recycle);
1248 return 0;
1251 static int
1252 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1254 ElementObject* self = (ElementObject*) self_;
1255 int i;
1256 PyObject* old;
1258 if (!self->extra || index < 0 || index >= self->extra->length) {
1259 PyErr_SetString(
1260 PyExc_IndexError,
1261 "child assignment index out of range");
1262 return -1;
1265 old = self->extra->children[index];
1267 if (item) {
1268 Py_INCREF(item);
1269 self->extra->children[index] = item;
1270 } else {
1271 self->extra->length--;
1272 for (i = index; i < self->extra->length; i++)
1273 self->extra->children[i] = self->extra->children[i+1];
1276 Py_DECREF(old);
1278 return 0;
1281 static PyMethodDef element_methods[] = {
1283 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1285 {"get", (PyCFunction) element_get, METH_VARARGS},
1286 {"set", (PyCFunction) element_set, METH_VARARGS},
1288 {"find", (PyCFunction) element_find, METH_VARARGS},
1289 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1290 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1292 {"append", (PyCFunction) element_append, METH_VARARGS},
1293 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1294 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1296 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1297 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1299 {"items", (PyCFunction) element_items, METH_VARARGS},
1300 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1302 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1304 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1305 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1307 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1308 C objects correctly, so we have to fake it using a __reduce__-
1309 based hack (see the element_reduce implementation above for
1310 details). */
1312 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1313 using a runtime test to figure out if we need to fake things
1314 or now (see the init code below). The following entry is
1315 enabled only if the hack is needed. */
1317 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1319 {NULL, NULL}
1322 static PyObject*
1323 element_getattr(ElementObject* self, char* name)
1325 PyObject* res;
1327 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1328 if (res)
1329 return res;
1331 PyErr_Clear();
1333 if (strcmp(name, "tag") == 0)
1334 res = self->tag;
1335 else if (strcmp(name, "text") == 0)
1336 res = element_get_text(self);
1337 else if (strcmp(name, "tail") == 0) {
1338 res = element_get_tail(self);
1339 } else if (strcmp(name, "attrib") == 0) {
1340 if (!self->extra)
1341 element_new_extra(self, NULL);
1342 res = element_get_attrib(self);
1343 } else {
1344 PyErr_SetString(PyExc_AttributeError, name);
1345 return NULL;
1348 if (!res)
1349 return NULL;
1351 Py_INCREF(res);
1352 return res;
1355 static int
1356 element_setattr(ElementObject* self, const char* name, PyObject* value)
1358 if (value == NULL) {
1359 PyErr_SetString(
1360 PyExc_AttributeError,
1361 "can't delete element attributes"
1363 return -1;
1366 if (strcmp(name, "tag") == 0) {
1367 Py_DECREF(self->tag);
1368 self->tag = value;
1369 Py_INCREF(self->tag);
1370 } else if (strcmp(name, "text") == 0) {
1371 Py_DECREF(JOIN_OBJ(self->text));
1372 self->text = value;
1373 Py_INCREF(self->text);
1374 } else if (strcmp(name, "tail") == 0) {
1375 Py_DECREF(JOIN_OBJ(self->tail));
1376 self->tail = value;
1377 Py_INCREF(self->tail);
1378 } else if (strcmp(name, "attrib") == 0) {
1379 if (!self->extra)
1380 element_new_extra(self, NULL);
1381 Py_DECREF(self->extra->attrib);
1382 self->extra->attrib = value;
1383 Py_INCREF(self->extra->attrib);
1384 } else {
1385 PyErr_SetString(PyExc_AttributeError, name);
1386 return -1;
1389 return 0;
1392 static PySequenceMethods element_as_sequence = {
1393 (lenfunc) element_length,
1394 0, /* sq_concat */
1395 0, /* sq_repeat */
1396 element_getitem,
1397 element_getslice,
1398 element_setitem,
1399 element_setslice,
1402 statichere PyTypeObject Element_Type = {
1403 PyObject_HEAD_INIT(NULL)
1404 0, "Element", sizeof(ElementObject), 0,
1405 /* methods */
1406 (destructor)element_dealloc, /* tp_dealloc */
1407 0, /* tp_print */
1408 (getattrfunc)element_getattr, /* tp_getattr */
1409 (setattrfunc)element_setattr, /* tp_setattr */
1410 0, /* tp_compare */
1411 (reprfunc)element_repr, /* tp_repr */
1412 0, /* tp_as_number */
1413 &element_as_sequence, /* tp_as_sequence */
1416 /* ==================================================================== */
1417 /* the tree builder type */
1419 typedef struct {
1420 PyObject_HEAD
1422 PyObject* root; /* root node (first created node) */
1424 ElementObject* this; /* current node */
1425 ElementObject* last; /* most recently created node */
1427 PyObject* data; /* data collector (string or list), or NULL */
1429 PyObject* stack; /* element stack */
1430 Py_ssize_t index; /* current stack size (0=empty) */
1432 /* element tracing */
1433 PyObject* events; /* list of events, or NULL if not collecting */
1434 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1435 PyObject* end_event_obj;
1436 PyObject* start_ns_event_obj;
1437 PyObject* end_ns_event_obj;
1439 } TreeBuilderObject;
1441 staticforward PyTypeObject TreeBuilder_Type;
1443 #define TreeBuilder_CheckExact(op) (Py_Type(op) == &TreeBuilder_Type)
1445 /* -------------------------------------------------------------------- */
1446 /* constructor and destructor */
1448 LOCAL(PyObject*)
1449 treebuilder_new(void)
1451 TreeBuilderObject* self;
1453 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1454 if (self == NULL)
1455 return NULL;
1457 self->root = NULL;
1459 Py_INCREF(Py_None);
1460 self->this = (ElementObject*) Py_None;
1462 Py_INCREF(Py_None);
1463 self->last = (ElementObject*) Py_None;
1465 self->data = NULL;
1467 self->stack = PyList_New(20);
1468 self->index = 0;
1470 self->events = NULL;
1471 self->start_event_obj = self->end_event_obj = NULL;
1472 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1474 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1476 return (PyObject*) self;
1479 static PyObject*
1480 treebuilder(PyObject* self_, PyObject* args)
1482 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1483 return NULL;
1485 return treebuilder_new();
1488 static void
1489 treebuilder_dealloc(TreeBuilderObject* self)
1491 Py_XDECREF(self->end_ns_event_obj);
1492 Py_XDECREF(self->start_ns_event_obj);
1493 Py_XDECREF(self->end_event_obj);
1494 Py_XDECREF(self->start_event_obj);
1495 Py_XDECREF(self->events);
1496 Py_DECREF(self->stack);
1497 Py_XDECREF(self->data);
1498 Py_DECREF(self->last);
1499 Py_DECREF(self->this);
1500 Py_XDECREF(self->root);
1502 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1504 PyObject_Del(self);
1507 /* -------------------------------------------------------------------- */
1508 /* handlers */
1510 LOCAL(PyObject*)
1511 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1512 PyObject* standalone)
1514 Py_RETURN_NONE;
1517 LOCAL(PyObject*)
1518 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1519 PyObject* attrib)
1521 PyObject* node;
1522 PyObject* this;
1524 if (self->data) {
1525 if (self->this == self->last) {
1526 Py_DECREF(JOIN_OBJ(self->last->text));
1527 self->last->text = JOIN_SET(
1528 self->data, PyList_CheckExact(self->data)
1530 } else {
1531 Py_DECREF(JOIN_OBJ(self->last->tail));
1532 self->last->tail = JOIN_SET(
1533 self->data, PyList_CheckExact(self->data)
1536 self->data = NULL;
1539 node = element_new(tag, attrib);
1540 if (!node)
1541 return NULL;
1543 this = (PyObject*) self->this;
1545 if (this != Py_None) {
1546 if (element_add_subelement((ElementObject*) this, node) < 0)
1547 goto error;
1548 } else {
1549 if (self->root) {
1550 PyErr_SetString(
1551 PyExc_SyntaxError,
1552 "multiple elements on top level"
1554 goto error;
1556 Py_INCREF(node);
1557 self->root = node;
1560 if (self->index < PyList_GET_SIZE(self->stack)) {
1561 if (PyList_SetItem(self->stack, self->index, this) < 0)
1562 goto error;
1563 Py_INCREF(this);
1564 } else {
1565 if (PyList_Append(self->stack, this) < 0)
1566 goto error;
1568 self->index++;
1570 Py_DECREF(this);
1571 Py_INCREF(node);
1572 self->this = (ElementObject*) node;
1574 Py_DECREF(self->last);
1575 Py_INCREF(node);
1576 self->last = (ElementObject*) node;
1578 if (self->start_event_obj) {
1579 PyObject* res;
1580 PyObject* action = self->start_event_obj;
1581 res = PyTuple_New(2);
1582 if (res) {
1583 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1584 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1585 PyList_Append(self->events, res);
1586 Py_DECREF(res);
1587 } else
1588 PyErr_Clear(); /* FIXME: propagate error */
1591 return node;
1593 error:
1594 Py_DECREF(node);
1595 return NULL;
1598 LOCAL(PyObject*)
1599 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1601 if (!self->data) {
1602 if (self->last == (ElementObject*) Py_None) {
1603 /* ignore calls to data before the first call to start */
1604 Py_RETURN_NONE;
1606 /* store the first item as is */
1607 Py_INCREF(data); self->data = data;
1608 } else {
1609 /* more than one item; use a list to collect items */
1610 if (PyString_CheckExact(self->data) && Py_Refcnt(self->data) == 1 &&
1611 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1612 /* expat often generates single character data sections; handle
1613 the most common case by resizing the existing string... */
1614 Py_ssize_t size = PyString_GET_SIZE(self->data);
1615 if (_PyString_Resize(&self->data, size + 1) < 0)
1616 return NULL;
1617 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1618 } else if (PyList_CheckExact(self->data)) {
1619 if (PyList_Append(self->data, data) < 0)
1620 return NULL;
1621 } else {
1622 PyObject* list = PyList_New(2);
1623 if (!list)
1624 return NULL;
1625 PyList_SET_ITEM(list, 0, self->data);
1626 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1627 self->data = list;
1631 Py_RETURN_NONE;
1634 LOCAL(PyObject*)
1635 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1637 PyObject* item;
1639 if (self->data) {
1640 if (self->this == self->last) {
1641 Py_DECREF(JOIN_OBJ(self->last->text));
1642 self->last->text = JOIN_SET(
1643 self->data, PyList_CheckExact(self->data)
1645 } else {
1646 Py_DECREF(JOIN_OBJ(self->last->tail));
1647 self->last->tail = JOIN_SET(
1648 self->data, PyList_CheckExact(self->data)
1651 self->data = NULL;
1654 if (self->index == 0) {
1655 PyErr_SetString(
1656 PyExc_IndexError,
1657 "pop from empty stack"
1659 return NULL;
1662 self->index--;
1664 item = PyList_GET_ITEM(self->stack, self->index);
1665 Py_INCREF(item);
1667 Py_DECREF(self->last);
1669 self->last = (ElementObject*) self->this;
1670 self->this = (ElementObject*) item;
1672 if (self->end_event_obj) {
1673 PyObject* res;
1674 PyObject* action = self->end_event_obj;
1675 PyObject* node = (PyObject*) self->last;
1676 res = PyTuple_New(2);
1677 if (res) {
1678 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1679 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1680 PyList_Append(self->events, res);
1681 Py_DECREF(res);
1682 } else
1683 PyErr_Clear(); /* FIXME: propagate error */
1686 Py_INCREF(self->last);
1687 return (PyObject*) self->last;
1690 LOCAL(void)
1691 treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1692 const char* prefix, const char *uri)
1694 PyObject* res;
1695 PyObject* action;
1696 PyObject* parcel;
1698 if (!self->events)
1699 return;
1701 if (start) {
1702 if (!self->start_ns_event_obj)
1703 return;
1704 action = self->start_ns_event_obj;
1705 /* FIXME: prefix and uri use utf-8 encoding! */
1706 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1707 if (!parcel)
1708 return;
1709 Py_INCREF(action);
1710 } else {
1711 if (!self->end_ns_event_obj)
1712 return;
1713 action = self->end_ns_event_obj;
1714 Py_INCREF(action);
1715 parcel = Py_None;
1716 Py_INCREF(parcel);
1719 res = PyTuple_New(2);
1721 if (res) {
1722 PyTuple_SET_ITEM(res, 0, action);
1723 PyTuple_SET_ITEM(res, 1, parcel);
1724 PyList_Append(self->events, res);
1725 Py_DECREF(res);
1726 } else
1727 PyErr_Clear(); /* FIXME: propagate error */
1730 /* -------------------------------------------------------------------- */
1731 /* methods (in alphabetical order) */
1733 static PyObject*
1734 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1736 PyObject* data;
1737 if (!PyArg_ParseTuple(args, "O:data", &data))
1738 return NULL;
1740 return treebuilder_handle_data(self, data);
1743 static PyObject*
1744 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1746 PyObject* tag;
1747 if (!PyArg_ParseTuple(args, "O:end", &tag))
1748 return NULL;
1750 return treebuilder_handle_end(self, tag);
1753 LOCAL(PyObject*)
1754 treebuilder_done(TreeBuilderObject* self)
1756 PyObject* res;
1758 /* FIXME: check stack size? */
1760 if (self->root)
1761 res = self->root;
1762 else
1763 res = Py_None;
1765 Py_INCREF(res);
1766 return res;
1769 static PyObject*
1770 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1772 if (!PyArg_ParseTuple(args, ":close"))
1773 return NULL;
1775 return treebuilder_done(self);
1778 static PyObject*
1779 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1781 PyObject* tag;
1782 PyObject* attrib = Py_None;
1783 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1784 return NULL;
1786 return treebuilder_handle_start(self, tag, attrib);
1789 static PyObject*
1790 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1792 PyObject* encoding;
1793 PyObject* standalone;
1794 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1795 return NULL;
1797 return treebuilder_handle_xml(self, encoding, standalone);
1800 static PyMethodDef treebuilder_methods[] = {
1801 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1802 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1803 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1804 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1805 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1806 {NULL, NULL}
1809 static PyObject*
1810 treebuilder_getattr(TreeBuilderObject* self, char* name)
1812 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1815 statichere PyTypeObject TreeBuilder_Type = {
1816 PyObject_HEAD_INIT(NULL)
1817 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1818 /* methods */
1819 (destructor)treebuilder_dealloc, /* tp_dealloc */
1820 0, /* tp_print */
1821 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1824 /* ==================================================================== */
1825 /* the expat interface */
1827 #if defined(USE_EXPAT)
1829 #include "expat.h"
1831 #if defined(USE_PYEXPAT_CAPI)
1832 #include "pyexpat.h"
1833 static struct PyExpat_CAPI* expat_capi;
1834 #define EXPAT(func) (expat_capi->func)
1835 #else
1836 #define EXPAT(func) (XML_##func)
1837 #endif
1839 typedef struct {
1840 PyObject_HEAD
1842 XML_Parser parser;
1844 PyObject* target;
1845 PyObject* entity;
1847 PyObject* names;
1849 PyObject* handle_xml;
1850 PyObject* handle_start;
1851 PyObject* handle_data;
1852 PyObject* handle_end;
1854 PyObject* handle_comment;
1855 PyObject* handle_pi;
1857 } XMLParserObject;
1859 staticforward PyTypeObject XMLParser_Type;
1861 /* helpers */
1863 #if defined(Py_USING_UNICODE)
1864 LOCAL(int)
1865 checkstring(const char* string, int size)
1867 int i;
1869 /* check if an 8-bit string contains UTF-8 characters */
1870 for (i = 0; i < size; i++)
1871 if (string[i] & 0x80)
1872 return 1;
1874 return 0;
1876 #endif
1878 LOCAL(PyObject*)
1879 makestring(const char* string, int size)
1881 /* convert a UTF-8 string to either a 7-bit ascii string or a
1882 Unicode string */
1884 #if defined(Py_USING_UNICODE)
1885 if (checkstring(string, size))
1886 return PyUnicode_DecodeUTF8(string, size, "strict");
1887 #endif
1889 return PyString_FromStringAndSize(string, size);
1892 LOCAL(PyObject*)
1893 makeuniversal(XMLParserObject* self, const char* string)
1895 /* convert a UTF-8 tag/attribute name from the expat parser
1896 to a universal name string */
1898 int size = strlen(string);
1899 PyObject* key;
1900 PyObject* value;
1902 /* look the 'raw' name up in the names dictionary */
1903 key = PyString_FromStringAndSize(string, size);
1904 if (!key)
1905 return NULL;
1907 value = PyDict_GetItem(self->names, key);
1909 if (value) {
1910 Py_INCREF(value);
1911 } else {
1912 /* new name. convert to universal name, and decode as
1913 necessary */
1915 PyObject* tag;
1916 char* p;
1917 int i;
1919 /* look for namespace separator */
1920 for (i = 0; i < size; i++)
1921 if (string[i] == '}')
1922 break;
1923 if (i != size) {
1924 /* convert to universal name */
1925 tag = PyString_FromStringAndSize(NULL, size+1);
1926 p = PyString_AS_STRING(tag);
1927 p[0] = '{';
1928 memcpy(p+1, string, size);
1929 size++;
1930 } else {
1931 /* plain name; use key as tag */
1932 Py_INCREF(key);
1933 tag = key;
1936 /* decode universal name */
1937 #if defined(Py_USING_UNICODE)
1938 /* inline makestring, to avoid duplicating the source string if
1939 it's not an utf-8 string */
1940 p = PyString_AS_STRING(tag);
1941 if (checkstring(p, size)) {
1942 value = PyUnicode_DecodeUTF8(p, size, "strict");
1943 Py_DECREF(tag);
1944 if (!value) {
1945 Py_DECREF(key);
1946 return NULL;
1948 } else
1949 #endif
1950 value = tag; /* use tag as is */
1952 /* add to names dictionary */
1953 if (PyDict_SetItem(self->names, key, value) < 0) {
1954 Py_DECREF(key);
1955 Py_DECREF(value);
1956 return NULL;
1960 Py_DECREF(key);
1961 return value;
1964 /* -------------------------------------------------------------------- */
1965 /* handlers */
1967 static void
1968 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1969 int data_len)
1971 PyObject* key;
1972 PyObject* value;
1973 PyObject* res;
1975 if (data_len < 2 || data_in[0] != '&')
1976 return;
1978 key = makestring(data_in + 1, data_len - 2);
1979 if (!key)
1980 return;
1982 value = PyDict_GetItem(self->entity, key);
1984 if (value) {
1985 if (TreeBuilder_CheckExact(self->target))
1986 res = treebuilder_handle_data(
1987 (TreeBuilderObject*) self->target, value
1989 else if (self->handle_data)
1990 res = PyObject_CallFunction(self->handle_data, "O", value);
1991 else
1992 res = NULL;
1993 Py_XDECREF(res);
1994 } else {
1995 PyErr_Format(
1996 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
1997 PyString_AS_STRING(key),
1998 EXPAT(GetErrorLineNumber)(self->parser),
1999 EXPAT(GetErrorColumnNumber)(self->parser)
2003 Py_DECREF(key);
2006 static void
2007 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2008 const XML_Char **attrib_in)
2010 PyObject* res;
2011 PyObject* tag;
2012 PyObject* attrib;
2013 int ok;
2015 /* tag name */
2016 tag = makeuniversal(self, tag_in);
2017 if (!tag)
2018 return; /* parser will look for errors */
2020 /* attributes */
2021 if (attrib_in[0]) {
2022 attrib = PyDict_New();
2023 if (!attrib)
2024 return;
2025 while (attrib_in[0] && attrib_in[1]) {
2026 PyObject* key = makeuniversal(self, attrib_in[0]);
2027 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2028 if (!key || !value) {
2029 Py_XDECREF(value);
2030 Py_XDECREF(key);
2031 Py_DECREF(attrib);
2032 return;
2034 ok = PyDict_SetItem(attrib, key, value);
2035 Py_DECREF(value);
2036 Py_DECREF(key);
2037 if (ok < 0) {
2038 Py_DECREF(attrib);
2039 return;
2041 attrib_in += 2;
2043 } else {
2044 Py_INCREF(Py_None);
2045 attrib = Py_None;
2048 if (TreeBuilder_CheckExact(self->target))
2049 /* shortcut */
2050 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2051 tag, attrib);
2052 else if (self->handle_start)
2053 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2054 else
2055 res = NULL;
2057 Py_DECREF(tag);
2058 Py_DECREF(attrib);
2060 Py_XDECREF(res);
2063 static void
2064 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2065 int data_len)
2067 PyObject* data;
2068 PyObject* res;
2070 data = makestring(data_in, data_len);
2071 if (!data)
2072 return; /* parser will look for errors */
2074 if (TreeBuilder_CheckExact(self->target))
2075 /* shortcut */
2076 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2077 else if (self->handle_data)
2078 res = PyObject_CallFunction(self->handle_data, "O", data);
2079 else
2080 res = NULL;
2082 Py_DECREF(data);
2084 Py_XDECREF(res);
2087 static void
2088 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2090 PyObject* tag;
2091 PyObject* res = NULL;
2093 if (TreeBuilder_CheckExact(self->target))
2094 /* shortcut */
2095 /* the standard tree builder doesn't look at the end tag */
2096 res = treebuilder_handle_end(
2097 (TreeBuilderObject*) self->target, Py_None
2099 else if (self->handle_end) {
2100 tag = makeuniversal(self, tag_in);
2101 if (tag) {
2102 res = PyObject_CallFunction(self->handle_end, "O", tag);
2103 Py_DECREF(tag);
2107 Py_XDECREF(res);
2110 static void
2111 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2112 const XML_Char *uri)
2114 treebuilder_handle_namespace(
2115 (TreeBuilderObject*) self->target, 1, prefix, uri
2119 static void
2120 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2122 treebuilder_handle_namespace(
2123 (TreeBuilderObject*) self->target, 0, NULL, NULL
2127 static void
2128 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2130 PyObject* comment;
2131 PyObject* res;
2133 if (self->handle_comment) {
2134 comment = makestring(comment_in, strlen(comment_in));
2135 if (comment) {
2136 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2137 Py_XDECREF(res);
2138 Py_DECREF(comment);
2143 static void
2144 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2145 const XML_Char* data_in)
2147 PyObject* target;
2148 PyObject* data;
2149 PyObject* res;
2151 if (self->handle_pi) {
2152 target = makestring(target_in, strlen(target_in));
2153 data = makestring(data_in, strlen(data_in));
2154 if (target && data) {
2155 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2156 Py_XDECREF(res);
2157 Py_DECREF(data);
2158 Py_DECREF(target);
2159 } else {
2160 Py_XDECREF(data);
2161 Py_XDECREF(target);
2166 #if defined(Py_USING_UNICODE)
2167 static int
2168 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2169 XML_Encoding *info)
2171 PyObject* u;
2172 Py_UNICODE* p;
2173 unsigned char s[256];
2174 int i;
2176 memset(info, 0, sizeof(XML_Encoding));
2178 for (i = 0; i < 256; i++)
2179 s[i] = i;
2181 u = PyUnicode_Decode((char*) s, 256, name, "replace");
2182 if (!u)
2183 return XML_STATUS_ERROR;
2185 if (PyUnicode_GET_SIZE(u) != 256) {
2186 Py_DECREF(u);
2187 return XML_STATUS_ERROR;
2190 p = PyUnicode_AS_UNICODE(u);
2192 for (i = 0; i < 256; i++) {
2193 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2194 info->map[i] = p[i];
2195 else
2196 info->map[i] = -1;
2199 Py_DECREF(u);
2201 return XML_STATUS_OK;
2203 #endif
2205 /* -------------------------------------------------------------------- */
2206 /* constructor and destructor */
2208 static PyObject*
2209 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
2211 XMLParserObject* self;
2212 /* FIXME: does this need to be static? */
2213 static XML_Memory_Handling_Suite memory_handler;
2215 PyObject* target = NULL;
2216 char* encoding = NULL;
2217 static char* kwlist[] = { "target", "encoding", NULL };
2218 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2219 &target, &encoding))
2220 return NULL;
2222 #if defined(USE_PYEXPAT_CAPI)
2223 if (!expat_capi) {
2224 PyErr_SetString(
2225 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2227 return NULL;
2229 #endif
2231 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2232 if (self == NULL)
2233 return NULL;
2235 self->entity = PyDict_New();
2236 if (!self->entity) {
2237 PyObject_Del(self);
2238 return NULL;
2241 self->names = PyDict_New();
2242 if (!self->names) {
2243 PyObject_Del(self->entity);
2244 PyObject_Del(self);
2245 return NULL;
2248 memory_handler.malloc_fcn = PyObject_Malloc;
2249 memory_handler.realloc_fcn = PyObject_Realloc;
2250 memory_handler.free_fcn = PyObject_Free;
2252 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2253 if (!self->parser) {
2254 PyObject_Del(self->names);
2255 PyObject_Del(self->entity);
2256 PyObject_Del(self);
2257 PyErr_NoMemory();
2258 return NULL;
2261 /* setup target handlers */
2262 if (!target) {
2263 target = treebuilder_new();
2264 if (!target) {
2265 EXPAT(ParserFree)(self->parser);
2266 PyObject_Del(self->names);
2267 PyObject_Del(self->entity);
2268 PyObject_Del(self);
2269 return NULL;
2271 } else
2272 Py_INCREF(target);
2273 self->target = target;
2275 self->handle_xml = PyObject_GetAttrString(target, "xml");
2276 self->handle_start = PyObject_GetAttrString(target, "start");
2277 self->handle_data = PyObject_GetAttrString(target, "data");
2278 self->handle_end = PyObject_GetAttrString(target, "end");
2279 self->handle_comment = PyObject_GetAttrString(target, "comment");
2280 self->handle_pi = PyObject_GetAttrString(target, "pi");
2282 PyErr_Clear();
2284 /* configure parser */
2285 EXPAT(SetUserData)(self->parser, self);
2286 EXPAT(SetElementHandler)(
2287 self->parser,
2288 (XML_StartElementHandler) expat_start_handler,
2289 (XML_EndElementHandler) expat_end_handler
2291 EXPAT(SetDefaultHandlerExpand)(
2292 self->parser,
2293 (XML_DefaultHandler) expat_default_handler
2295 EXPAT(SetCharacterDataHandler)(
2296 self->parser,
2297 (XML_CharacterDataHandler) expat_data_handler
2299 if (self->handle_comment)
2300 EXPAT(SetCommentHandler)(
2301 self->parser,
2302 (XML_CommentHandler) expat_comment_handler
2304 if (self->handle_pi)
2305 EXPAT(SetProcessingInstructionHandler)(
2306 self->parser,
2307 (XML_ProcessingInstructionHandler) expat_pi_handler
2309 #if defined(Py_USING_UNICODE)
2310 EXPAT(SetUnknownEncodingHandler)(
2311 self->parser,
2312 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2314 #endif
2316 ALLOC(sizeof(XMLParserObject), "create expatparser");
2318 return (PyObject*) self;
2321 static void
2322 xmlparser_dealloc(XMLParserObject* self)
2324 EXPAT(ParserFree)(self->parser);
2326 Py_XDECREF(self->handle_pi);
2327 Py_XDECREF(self->handle_comment);
2328 Py_XDECREF(self->handle_end);
2329 Py_XDECREF(self->handle_data);
2330 Py_XDECREF(self->handle_start);
2331 Py_XDECREF(self->handle_xml);
2333 Py_DECREF(self->target);
2334 Py_DECREF(self->entity);
2335 Py_DECREF(self->names);
2337 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2339 PyObject_Del(self);
2342 /* -------------------------------------------------------------------- */
2343 /* methods (in alphabetical order) */
2345 LOCAL(PyObject*)
2346 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2348 int ok;
2350 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2352 if (PyErr_Occurred())
2353 return NULL;
2355 if (!ok) {
2356 PyErr_Format(
2357 PyExc_SyntaxError, "%s: line %ld, column %ld",
2358 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2359 EXPAT(GetErrorLineNumber)(self->parser),
2360 EXPAT(GetErrorColumnNumber)(self->parser)
2362 return NULL;
2365 Py_RETURN_NONE;
2368 static PyObject*
2369 xmlparser_close(XMLParserObject* self, PyObject* args)
2371 /* end feeding data to parser */
2373 PyObject* res;
2374 if (!PyArg_ParseTuple(args, ":close"))
2375 return NULL;
2377 res = expat_parse(self, "", 0, 1);
2379 if (res && TreeBuilder_CheckExact(self->target)) {
2380 Py_DECREF(res);
2381 return treebuilder_done((TreeBuilderObject*) self->target);
2384 return res;
2387 static PyObject*
2388 xmlparser_feed(XMLParserObject* self, PyObject* args)
2390 /* feed data to parser */
2392 char* data;
2393 int data_len;
2394 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2395 return NULL;
2397 return expat_parse(self, data, data_len, 0);
2400 static PyObject*
2401 xmlparser_parse(XMLParserObject* self, PyObject* args)
2403 /* (internal) parse until end of input stream */
2405 PyObject* reader;
2406 PyObject* buffer;
2407 PyObject* res;
2409 PyObject* fileobj;
2410 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2411 return NULL;
2413 reader = PyObject_GetAttrString(fileobj, "read");
2414 if (!reader)
2415 return NULL;
2417 /* read from open file object */
2418 for (;;) {
2420 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2422 if (!buffer) {
2423 /* read failed (e.g. due to KeyboardInterrupt) */
2424 Py_DECREF(reader);
2425 return NULL;
2428 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2429 Py_DECREF(buffer);
2430 break;
2433 res = expat_parse(
2434 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2437 Py_DECREF(buffer);
2439 if (!res) {
2440 Py_DECREF(reader);
2441 return NULL;
2443 Py_DECREF(res);
2447 Py_DECREF(reader);
2449 res = expat_parse(self, "", 0, 1);
2451 if (res && TreeBuilder_CheckExact(self->target)) {
2452 Py_DECREF(res);
2453 return treebuilder_done((TreeBuilderObject*) self->target);
2456 return res;
2459 static PyObject*
2460 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2462 /* activate element event reporting */
2464 Py_ssize_t i;
2465 TreeBuilderObject* target;
2467 PyObject* events; /* event collector */
2468 PyObject* event_set = Py_None;
2469 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2470 &event_set))
2471 return NULL;
2473 if (!TreeBuilder_CheckExact(self->target)) {
2474 PyErr_SetString(
2475 PyExc_TypeError,
2476 "event handling only supported for cElementTree.Treebuilder "
2477 "targets"
2479 return NULL;
2482 target = (TreeBuilderObject*) self->target;
2484 Py_INCREF(events);
2485 Py_XDECREF(target->events);
2486 target->events = events;
2488 /* clear out existing events */
2489 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2490 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2491 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2492 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2494 if (event_set == Py_None) {
2495 /* default is "end" only */
2496 target->end_event_obj = PyString_FromString("end");
2497 Py_RETURN_NONE;
2500 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2501 goto error;
2503 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2504 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2505 char* event;
2506 if (!PyString_Check(item))
2507 goto error;
2508 event = PyString_AS_STRING(item);
2509 if (strcmp(event, "start") == 0) {
2510 Py_INCREF(item);
2511 target->start_event_obj = item;
2512 } else if (strcmp(event, "end") == 0) {
2513 Py_INCREF(item);
2514 Py_XDECREF(target->end_event_obj);
2515 target->end_event_obj = item;
2516 } else if (strcmp(event, "start-ns") == 0) {
2517 Py_INCREF(item);
2518 Py_XDECREF(target->start_ns_event_obj);
2519 target->start_ns_event_obj = item;
2520 EXPAT(SetNamespaceDeclHandler)(
2521 self->parser,
2522 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2523 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2525 } else if (strcmp(event, "end-ns") == 0) {
2526 Py_INCREF(item);
2527 Py_XDECREF(target->end_ns_event_obj);
2528 target->end_ns_event_obj = item;
2529 EXPAT(SetNamespaceDeclHandler)(
2530 self->parser,
2531 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2532 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2534 } else {
2535 PyErr_Format(
2536 PyExc_ValueError,
2537 "unknown event '%s'", event
2539 return NULL;
2543 Py_RETURN_NONE;
2545 error:
2546 PyErr_SetString(
2547 PyExc_TypeError,
2548 "invalid event tuple"
2550 return NULL;
2553 static PyMethodDef xmlparser_methods[] = {
2554 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2555 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2556 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2557 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2558 {NULL, NULL}
2561 static PyObject*
2562 xmlparser_getattr(XMLParserObject* self, char* name)
2564 PyObject* res;
2566 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2567 if (res)
2568 return res;
2570 PyErr_Clear();
2572 if (strcmp(name, "entity") == 0)
2573 res = self->entity;
2574 else if (strcmp(name, "target") == 0)
2575 res = self->target;
2576 else if (strcmp(name, "version") == 0) {
2577 char buffer[100];
2578 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2579 XML_MINOR_VERSION, XML_MICRO_VERSION);
2580 return PyString_FromString(buffer);
2581 } else {
2582 PyErr_SetString(PyExc_AttributeError, name);
2583 return NULL;
2586 Py_INCREF(res);
2587 return res;
2590 statichere PyTypeObject XMLParser_Type = {
2591 PyObject_HEAD_INIT(NULL)
2592 0, "XMLParser", sizeof(XMLParserObject), 0,
2593 /* methods */
2594 (destructor)xmlparser_dealloc, /* tp_dealloc */
2595 0, /* tp_print */
2596 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2599 #endif
2601 /* ==================================================================== */
2602 /* python module interface */
2604 static PyMethodDef _functions[] = {
2605 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2606 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2607 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2608 #if defined(USE_EXPAT)
2609 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2610 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2611 #endif
2612 {NULL, NULL}
2615 DL_EXPORT(void)
2616 init_elementtree(void)
2618 PyObject* m;
2619 PyObject* g;
2620 char* bootstrap;
2621 #if defined(USE_PYEXPAT_CAPI)
2622 struct PyExpat_CAPI* capi;
2623 #endif
2625 /* Patch object type */
2626 Py_Type(&Element_Type) = Py_Type(&TreeBuilder_Type) = &PyType_Type;
2627 #if defined(USE_EXPAT)
2628 Py_Type(&XMLParser_Type) = &PyType_Type;
2629 #endif
2631 m = Py_InitModule("_elementtree", _functions);
2632 if (!m)
2633 return;
2635 /* python glue code */
2637 g = PyDict_New();
2638 if (!g)
2639 return;
2641 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2643 bootstrap = (
2645 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2646 "from __future__ import generators\n" /* enable yield under 2.2 */
2647 #endif
2649 "from copy import copy, deepcopy\n"
2651 "try:\n"
2652 " from xml.etree import ElementTree\n"
2653 "except ImportError:\n"
2654 " import ElementTree\n"
2655 "ET = ElementTree\n"
2656 "del ElementTree\n"
2658 "import _elementtree as cElementTree\n"
2660 "try:\n" /* check if copy works as is */
2661 " copy(cElementTree.Element('x'))\n"
2662 "except:\n"
2663 " def copyelement(elem):\n"
2664 " return elem\n"
2666 "def Comment(text=None):\n" /* public */
2667 " element = cElementTree.Element(ET.Comment)\n"
2668 " element.text = text\n"
2669 " return element\n"
2670 "cElementTree.Comment = Comment\n"
2672 "class ElementTree(ET.ElementTree):\n" /* public */
2673 " def parse(self, source, parser=None):\n"
2674 " if not hasattr(source, 'read'):\n"
2675 " source = open(source, 'rb')\n"
2676 " if parser is not None:\n"
2677 " while 1:\n"
2678 " data = source.read(65536)\n"
2679 " if not data:\n"
2680 " break\n"
2681 " parser.feed(data)\n"
2682 " self._root = parser.close()\n"
2683 " else:\n"
2684 " parser = cElementTree.XMLParser()\n"
2685 " self._root = parser._parse(source)\n"
2686 " return self._root\n"
2687 "cElementTree.ElementTree = ElementTree\n"
2689 "def getiterator(node, tag=None):\n" /* helper */
2690 " if tag == '*':\n"
2691 " tag = None\n"
2692 #if (PY_VERSION_HEX < 0x02020000)
2693 " nodes = []\n" /* 2.1 doesn't have yield */
2694 " if tag is None or node.tag == tag:\n"
2695 " nodes.append(node)\n"
2696 " for node in node:\n"
2697 " nodes.extend(getiterator(node, tag))\n"
2698 " return nodes\n"
2699 #else
2700 " if tag is None or node.tag == tag:\n"
2701 " yield node\n"
2702 " for node in node:\n"
2703 " for node in getiterator(node, tag):\n"
2704 " yield node\n"
2705 #endif
2707 "def parse(source, parser=None):\n" /* public */
2708 " tree = ElementTree()\n"
2709 " tree.parse(source, parser)\n"
2710 " return tree\n"
2711 "cElementTree.parse = parse\n"
2713 #if (PY_VERSION_HEX < 0x02020000)
2714 "if hasattr(ET, 'iterparse'):\n"
2715 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2716 #else
2717 "class iterparse(object):\n"
2718 " root = None\n"
2719 " def __init__(self, file, events=None):\n"
2720 " if not hasattr(file, 'read'):\n"
2721 " file = open(file, 'rb')\n"
2722 " self._file = file\n"
2723 " self._events = events\n"
2724 " def __iter__(self):\n"
2725 " events = []\n"
2726 " b = cElementTree.TreeBuilder()\n"
2727 " p = cElementTree.XMLParser(b)\n"
2728 " p._setevents(events, self._events)\n"
2729 " while 1:\n"
2730 " data = self._file.read(16384)\n"
2731 " if not data:\n"
2732 " break\n"
2733 " p.feed(data)\n"
2734 " for event in events:\n"
2735 " yield event\n"
2736 " del events[:]\n"
2737 " root = p.close()\n"
2738 " for event in events:\n"
2739 " yield event\n"
2740 " self.root = root\n"
2741 "cElementTree.iterparse = iterparse\n"
2742 #endif
2744 "def PI(target, text=None):\n" /* public */
2745 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2746 " element.text = target\n"
2747 " if text:\n"
2748 " element.text = element.text + ' ' + text\n"
2749 " return element\n"
2751 " elem = cElementTree.Element(ET.PI)\n"
2752 " elem.text = text\n"
2753 " return elem\n"
2754 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2756 "def XML(text):\n" /* public */
2757 " parser = cElementTree.XMLParser()\n"
2758 " parser.feed(text)\n"
2759 " return parser.close()\n"
2760 "cElementTree.XML = cElementTree.fromstring = XML\n"
2762 "def XMLID(text):\n" /* public */
2763 " tree = XML(text)\n"
2764 " ids = {}\n"
2765 " for elem in tree.getiterator():\n"
2766 " id = elem.get('id')\n"
2767 " if id:\n"
2768 " ids[id] = elem\n"
2769 " return tree, ids\n"
2770 "cElementTree.XMLID = XMLID\n"
2772 "cElementTree.dump = ET.dump\n"
2773 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2774 "cElementTree.iselement = ET.iselement\n"
2775 "cElementTree.QName = ET.QName\n"
2776 "cElementTree.tostring = ET.tostring\n"
2777 "cElementTree.VERSION = '" VERSION "'\n"
2778 "cElementTree.__version__ = '" VERSION "'\n"
2779 "cElementTree.XMLParserError = SyntaxError\n"
2783 PyRun_String(bootstrap, Py_file_input, g, NULL);
2785 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2787 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2788 if (elementtree_copyelement_obj) {
2789 /* reduce hack needed; enable reduce method */
2790 PyMethodDef* mp;
2791 for (mp = element_methods; mp->ml_name; mp++)
2792 if (mp->ml_meth == (PyCFunction) element_reduce) {
2793 mp->ml_name = "__reduce__";
2794 break;
2796 } else
2797 PyErr_Clear();
2798 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2799 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2801 #if defined(USE_PYEXPAT_CAPI)
2802 /* link against pyexpat, if possible */
2803 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2804 if (capi &&
2805 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2806 capi->size <= sizeof(*expat_capi) &&
2807 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2808 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2809 capi->MICRO_VERSION == XML_MICRO_VERSION)
2810 expat_capi = capi;
2811 else
2812 expat_capi = NULL;
2813 #endif