Add missing issue number in Misc/NEWS entry.
[python.git] / Modules / _elementtree.c
blob684081c7c926db489778003b5975cbfff3fcb32c
1 /*
2 * ElementTree
3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
42 * info@pythonware.com
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
49 #include "Python.h"
51 #define VERSION "1.0.6"
53 /* -------------------------------------------------------------------- */
54 /* configuration */
56 /* Leave defined to include the expat-based XMLParser type */
57 #define USE_EXPAT
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
63 allocations. */
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
76 /* -------------------------------------------------------------------- */
78 #if 0
79 static int memory = 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84 #else
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
87 #endif
89 /* compiler tweaks */
90 #if defined(_MSC_VER)
91 #define LOCAL(type) static __inline type __fastcall
92 #else
93 #define LOCAL(type) static type
94 #endif
96 /* compatibility macros */
97 #if (PY_VERSION_HEX < 0x02050000)
98 typedef int Py_ssize_t;
99 #define lenfunc inquiry
100 #endif
102 #if (PY_VERSION_HEX < 0x02040000)
103 #define PyDict_CheckExact PyDict_Check
104 #if (PY_VERSION_HEX < 0x02020000)
105 #define PyList_CheckExact PyList_Check
106 #define PyString_CheckExact PyString_Check
107 #if (PY_VERSION_HEX >= 0x01060000)
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
109 #endif
110 #endif
111 #endif
113 #if !defined(Py_RETURN_NONE)
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
115 #endif
117 /* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
120 info. */
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
125 /* glue functions (see the init function for details) */
126 static PyObject* elementtree_copyelement_obj;
127 static PyObject* elementtree_deepcopy_obj;
128 static PyObject* elementtree_getiterator_obj;
129 static PyObject* elementpath_obj;
131 /* helpers */
133 LOCAL(PyObject*)
134 deepcopy(PyObject* object, PyObject* memo)
136 /* do a deep copy of the given object */
138 PyObject* args;
139 PyObject* result;
141 if (!elementtree_deepcopy_obj) {
142 PyErr_SetString(
143 PyExc_RuntimeError,
144 "deepcopy helper not found"
146 return NULL;
149 args = PyTuple_New(2);
150 if (!args)
151 return NULL;
153 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
154 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
156 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
158 Py_DECREF(args);
160 return result;
163 LOCAL(PyObject*)
164 list_join(PyObject* list)
166 /* join list elements (destroying the list in the process) */
168 PyObject* joiner;
169 PyObject* function;
170 PyObject* args;
171 PyObject* result;
173 switch (PyList_GET_SIZE(list)) {
174 case 0:
175 Py_DECREF(list);
176 return PyString_FromString("");
177 case 1:
178 result = PyList_GET_ITEM(list, 0);
179 Py_INCREF(result);
180 Py_DECREF(list);
181 return result;
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
187 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
188 if (!joiner)
189 return NULL;
191 function = PyObject_GetAttrString(joiner, "join");
192 if (!function) {
193 Py_DECREF(joiner);
194 return NULL;
197 args = PyTuple_New(1);
198 if (!args)
199 return NULL;
201 PyTuple_SET_ITEM(args, 0, list);
203 result = PyObject_CallObject(function, args);
205 Py_DECREF(args); /* also removes list */
206 Py_DECREF(function);
207 Py_DECREF(joiner);
209 return result;
212 #if (PY_VERSION_HEX < 0x02020000)
213 LOCAL(int)
214 PyDict_Update(PyObject* dict, PyObject* other)
216 /* PyDict_Update emulation for 2.1 and earlier */
218 PyObject* res;
220 res = PyObject_CallMethod(dict, "update", "O", other);
221 if (!res)
222 return -1;
224 Py_DECREF(res);
225 return 0;
227 #endif
229 /* -------------------------------------------------------------------- */
230 /* the element type */
232 typedef struct {
234 /* attributes (a dictionary object), or None if no attributes */
235 PyObject* attrib;
237 /* child elements */
238 int length; /* actual number of items */
239 int allocated; /* allocated items */
241 /* this either points to _children or to a malloced buffer */
242 PyObject* *children;
244 PyObject* _children[STATIC_CHILDREN];
246 } ElementObjectExtra;
248 typedef struct {
249 PyObject_HEAD
251 /* element tag (a string). */
252 PyObject* tag;
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
260 PyObject* text;
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
264 PyObject* tail;
266 ElementObjectExtra* extra;
268 } ElementObject;
270 staticforward PyTypeObject Element_Type;
272 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
274 /* -------------------------------------------------------------------- */
275 /* element constructor and destructor */
277 LOCAL(int)
278 element_new_extra(ElementObject* self, PyObject* attrib)
280 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
281 if (!self->extra)
282 return -1;
284 if (!attrib)
285 attrib = Py_None;
287 Py_INCREF(attrib);
288 self->extra->attrib = attrib;
290 self->extra->length = 0;
291 self->extra->allocated = STATIC_CHILDREN;
292 self->extra->children = self->extra->_children;
294 return 0;
297 LOCAL(void)
298 element_dealloc_extra(ElementObject* self)
300 int i;
302 Py_DECREF(self->extra->attrib);
304 for (i = 0; i < self->extra->length; i++)
305 Py_DECREF(self->extra->children[i]);
307 if (self->extra->children != self->extra->_children)
308 PyObject_Free(self->extra->children);
310 PyObject_Free(self->extra);
313 LOCAL(PyObject*)
314 element_new(PyObject* tag, PyObject* attrib)
316 ElementObject* self;
318 self = PyObject_New(ElementObject, &Element_Type);
319 if (self == NULL)
320 return NULL;
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
324 attrib = Py_None;
326 self->extra = NULL;
328 if (attrib != Py_None) {
330 if (element_new_extra(self, attrib) < 0) {
331 PyObject_Del(self);
332 return NULL;
335 self->extra->length = 0;
336 self->extra->allocated = STATIC_CHILDREN;
337 self->extra->children = self->extra->_children;
341 Py_INCREF(tag);
342 self->tag = tag;
344 Py_INCREF(Py_None);
345 self->text = Py_None;
347 Py_INCREF(Py_None);
348 self->tail = Py_None;
350 ALLOC(sizeof(ElementObject), "create element");
352 return (PyObject*) self;
355 LOCAL(int)
356 element_resize(ElementObject* self, int extra)
358 int size;
359 PyObject* *children;
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
364 if (!self->extra)
365 element_new_extra(self, NULL);
367 size = self->extra->length + extra;
369 if (size > self->extra->allocated) {
370 /* use Python 2.4's list growth strategy */
371 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
372 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
373 * which needs at least 4 bytes.
374 * Although it's a false alarm always assume at least one child to
375 * be safe.
377 size = size ? size : 1;
378 if (self->extra->children != self->extra->_children) {
379 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
380 * "children", which needs at least 4 bytes. Although it's a
381 * false alarm always assume at least one child to be safe.
383 children = PyObject_Realloc(self->extra->children,
384 size * sizeof(PyObject*));
385 if (!children)
386 goto nomemory;
387 } else {
388 children = PyObject_Malloc(size * sizeof(PyObject*));
389 if (!children)
390 goto nomemory;
391 /* copy existing children from static area to malloc buffer */
392 memcpy(children, self->extra->children,
393 self->extra->length * sizeof(PyObject*));
395 self->extra->children = children;
396 self->extra->allocated = size;
399 return 0;
401 nomemory:
402 PyErr_NoMemory();
403 return -1;
406 LOCAL(int)
407 element_add_subelement(ElementObject* self, PyObject* element)
409 /* add a child element to a parent */
411 if (element_resize(self, 1) < 0)
412 return -1;
414 Py_INCREF(element);
415 self->extra->children[self->extra->length] = element;
417 self->extra->length++;
419 return 0;
422 LOCAL(PyObject*)
423 element_get_attrib(ElementObject* self)
425 /* return borrowed reference to attrib dictionary */
426 /* note: this function assumes that the extra section exists */
428 PyObject* res = self->extra->attrib;
430 if (res == Py_None) {
431 /* create missing dictionary */
432 res = PyDict_New();
433 if (!res)
434 return NULL;
435 self->extra->attrib = res;
438 return res;
441 LOCAL(PyObject*)
442 element_get_text(ElementObject* self)
444 /* return borrowed reference to text attribute */
446 PyObject* res = self->text;
448 if (JOIN_GET(res)) {
449 res = JOIN_OBJ(res);
450 if (PyList_CheckExact(res)) {
451 res = list_join(res);
452 if (!res)
453 return NULL;
454 self->text = res;
458 return res;
461 LOCAL(PyObject*)
462 element_get_tail(ElementObject* self)
464 /* return borrowed reference to text attribute */
466 PyObject* res = self->tail;
468 if (JOIN_GET(res)) {
469 res = JOIN_OBJ(res);
470 if (PyList_CheckExact(res)) {
471 res = list_join(res);
472 if (!res)
473 return NULL;
474 self->tail = res;
478 return res;
481 static PyObject*
482 element(PyObject* self, PyObject* args, PyObject* kw)
484 PyObject* elem;
486 PyObject* tag;
487 PyObject* attrib = NULL;
488 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
489 &PyDict_Type, &attrib))
490 return NULL;
492 if (attrib || kw) {
493 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
494 if (!attrib)
495 return NULL;
496 if (kw)
497 PyDict_Update(attrib, kw);
498 } else {
499 Py_INCREF(Py_None);
500 attrib = Py_None;
503 elem = element_new(tag, attrib);
505 Py_DECREF(attrib);
507 return elem;
510 static PyObject*
511 subelement(PyObject* self, PyObject* args, PyObject* kw)
513 PyObject* elem;
515 ElementObject* parent;
516 PyObject* tag;
517 PyObject* attrib = NULL;
518 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
519 &Element_Type, &parent, &tag,
520 &PyDict_Type, &attrib))
521 return NULL;
523 if (attrib || kw) {
524 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
525 if (!attrib)
526 return NULL;
527 if (kw)
528 PyDict_Update(attrib, kw);
529 } else {
530 Py_INCREF(Py_None);
531 attrib = Py_None;
534 elem = element_new(tag, attrib);
536 Py_DECREF(attrib);
538 if (element_add_subelement(parent, elem) < 0) {
539 Py_DECREF(elem);
540 return NULL;
543 return elem;
546 static void
547 element_dealloc(ElementObject* self)
549 if (self->extra)
550 element_dealloc_extra(self);
552 /* discard attributes */
553 Py_DECREF(self->tag);
554 Py_DECREF(JOIN_OBJ(self->text));
555 Py_DECREF(JOIN_OBJ(self->tail));
557 RELEASE(sizeof(ElementObject), "destroy element");
559 PyObject_Del(self);
562 /* -------------------------------------------------------------------- */
563 /* methods (in alphabetical order) */
565 static PyObject*
566 element_append(ElementObject* self, PyObject* args)
568 PyObject* element;
569 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
570 return NULL;
572 if (element_add_subelement(self, element) < 0)
573 return NULL;
575 Py_RETURN_NONE;
578 static PyObject*
579 element_clear(ElementObject* self, PyObject* args)
581 if (!PyArg_ParseTuple(args, ":clear"))
582 return NULL;
584 if (self->extra) {
585 element_dealloc_extra(self);
586 self->extra = NULL;
589 Py_INCREF(Py_None);
590 Py_DECREF(JOIN_OBJ(self->text));
591 self->text = Py_None;
593 Py_INCREF(Py_None);
594 Py_DECREF(JOIN_OBJ(self->tail));
595 self->tail = Py_None;
597 Py_RETURN_NONE;
600 static PyObject*
601 element_copy(ElementObject* self, PyObject* args)
603 int i;
604 ElementObject* element;
606 if (!PyArg_ParseTuple(args, ":__copy__"))
607 return NULL;
609 element = (ElementObject*) element_new(
610 self->tag, (self->extra) ? self->extra->attrib : Py_None
612 if (!element)
613 return NULL;
615 Py_DECREF(JOIN_OBJ(element->text));
616 element->text = self->text;
617 Py_INCREF(JOIN_OBJ(element->text));
619 Py_DECREF(JOIN_OBJ(element->tail));
620 element->tail = self->tail;
621 Py_INCREF(JOIN_OBJ(element->tail));
623 if (self->extra) {
625 if (element_resize(element, self->extra->length) < 0) {
626 Py_DECREF(element);
627 return NULL;
630 for (i = 0; i < self->extra->length; i++) {
631 Py_INCREF(self->extra->children[i]);
632 element->extra->children[i] = self->extra->children[i];
635 element->extra->length = self->extra->length;
639 return (PyObject*) element;
642 static PyObject*
643 element_deepcopy(ElementObject* self, PyObject* args)
645 int i;
646 ElementObject* element;
647 PyObject* tag;
648 PyObject* attrib;
649 PyObject* text;
650 PyObject* tail;
651 PyObject* id;
653 PyObject* memo;
654 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
655 return NULL;
657 tag = deepcopy(self->tag, memo);
658 if (!tag)
659 return NULL;
661 if (self->extra) {
662 attrib = deepcopy(self->extra->attrib, memo);
663 if (!attrib) {
664 Py_DECREF(tag);
665 return NULL;
667 } else {
668 Py_INCREF(Py_None);
669 attrib = Py_None;
672 element = (ElementObject*) element_new(tag, attrib);
674 Py_DECREF(tag);
675 Py_DECREF(attrib);
677 if (!element)
678 return NULL;
680 text = deepcopy(JOIN_OBJ(self->text), memo);
681 if (!text)
682 goto error;
683 Py_DECREF(element->text);
684 element->text = JOIN_SET(text, JOIN_GET(self->text));
686 tail = deepcopy(JOIN_OBJ(self->tail), memo);
687 if (!tail)
688 goto error;
689 Py_DECREF(element->tail);
690 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
692 if (self->extra) {
694 if (element_resize(element, self->extra->length) < 0)
695 goto error;
697 for (i = 0; i < self->extra->length; i++) {
698 PyObject* child = deepcopy(self->extra->children[i], memo);
699 if (!child) {
700 element->extra->length = i;
701 goto error;
703 element->extra->children[i] = child;
706 element->extra->length = self->extra->length;
710 /* add object to memo dictionary (so deepcopy won't visit it again) */
711 id = PyInt_FromLong((Py_uintptr_t) self);
713 i = PyDict_SetItem(memo, id, (PyObject*) element);
715 Py_DECREF(id);
717 if (i < 0)
718 goto error;
720 return (PyObject*) element;
722 error:
723 Py_DECREF(element);
724 return NULL;
727 LOCAL(int)
728 checkpath(PyObject* tag)
730 Py_ssize_t i;
731 int check = 1;
733 /* check if a tag contains an xpath character */
735 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
737 #if defined(Py_USING_UNICODE)
738 if (PyUnicode_Check(tag)) {
739 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
740 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
741 if (p[i] == '{')
742 check = 0;
743 else if (p[i] == '}')
744 check = 1;
745 else if (check && PATHCHAR(p[i]))
746 return 1;
748 return 0;
750 #endif
751 if (PyString_Check(tag)) {
752 char *p = PyString_AS_STRING(tag);
753 for (i = 0; i < PyString_GET_SIZE(tag); i++) {
754 if (p[i] == '{')
755 check = 0;
756 else if (p[i] == '}')
757 check = 1;
758 else if (check && PATHCHAR(p[i]))
759 return 1;
761 return 0;
764 return 1; /* unknown type; might be path expression */
767 static PyObject*
768 element_find(ElementObject* self, PyObject* args)
770 int i;
772 PyObject* tag;
773 if (!PyArg_ParseTuple(args, "O:find", &tag))
774 return NULL;
776 if (checkpath(tag))
777 return PyObject_CallMethod(
778 elementpath_obj, "find", "OO", self, tag
781 if (!self->extra)
782 Py_RETURN_NONE;
784 for (i = 0; i < self->extra->length; i++) {
785 PyObject* item = self->extra->children[i];
786 if (Element_CheckExact(item) &&
787 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
788 Py_INCREF(item);
789 return item;
793 Py_RETURN_NONE;
796 static PyObject*
797 element_findtext(ElementObject* self, PyObject* args)
799 int i;
801 PyObject* tag;
802 PyObject* default_value = Py_None;
803 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
804 return NULL;
806 if (checkpath(tag))
807 return PyObject_CallMethod(
808 elementpath_obj, "findtext", "OOO", self, tag, default_value
811 if (!self->extra) {
812 Py_INCREF(default_value);
813 return default_value;
816 for (i = 0; i < self->extra->length; i++) {
817 ElementObject* item = (ElementObject*) self->extra->children[i];
818 if (Element_CheckExact(item) && !PyObject_Compare(item->tag, tag)) {
819 PyObject* text = element_get_text(item);
820 if (text == Py_None)
821 return PyString_FromString("");
822 Py_XINCREF(text);
823 return text;
827 Py_INCREF(default_value);
828 return default_value;
831 static PyObject*
832 element_findall(ElementObject* self, PyObject* args)
834 int i;
835 PyObject* out;
837 PyObject* tag;
838 if (!PyArg_ParseTuple(args, "O:findall", &tag))
839 return NULL;
841 if (checkpath(tag))
842 return PyObject_CallMethod(
843 elementpath_obj, "findall", "OO", self, tag
846 out = PyList_New(0);
847 if (!out)
848 return NULL;
850 if (!self->extra)
851 return out;
853 for (i = 0; i < self->extra->length; i++) {
854 PyObject* item = self->extra->children[i];
855 if (Element_CheckExact(item) &&
856 PyObject_Compare(((ElementObject*)item)->tag, tag) == 0) {
857 if (PyList_Append(out, item) < 0) {
858 Py_DECREF(out);
859 return NULL;
864 return out;
867 static PyObject*
868 element_get(ElementObject* self, PyObject* args)
870 PyObject* value;
872 PyObject* key;
873 PyObject* default_value = Py_None;
874 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
875 return NULL;
877 if (!self->extra || self->extra->attrib == Py_None)
878 value = default_value;
879 else {
880 value = PyDict_GetItem(self->extra->attrib, key);
881 if (!value)
882 value = default_value;
885 Py_INCREF(value);
886 return value;
889 static PyObject*
890 element_getchildren(ElementObject* self, PyObject* args)
892 int i;
893 PyObject* list;
895 if (!PyArg_ParseTuple(args, ":getchildren"))
896 return NULL;
898 if (!self->extra)
899 return PyList_New(0);
901 list = PyList_New(self->extra->length);
902 if (!list)
903 return NULL;
905 for (i = 0; i < self->extra->length; i++) {
906 PyObject* item = self->extra->children[i];
907 Py_INCREF(item);
908 PyList_SET_ITEM(list, i, item);
911 return list;
914 static PyObject*
915 element_getiterator(ElementObject* self, PyObject* args)
917 PyObject* result;
919 PyObject* tag = Py_None;
920 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
921 return NULL;
923 if (!elementtree_getiterator_obj) {
924 PyErr_SetString(
925 PyExc_RuntimeError,
926 "getiterator helper not found"
928 return NULL;
931 args = PyTuple_New(2);
932 if (!args)
933 return NULL;
935 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
936 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
938 result = PyObject_CallObject(elementtree_getiterator_obj, args);
940 Py_DECREF(args);
942 return result;
945 static PyObject*
946 element_getitem(PyObject* self_, Py_ssize_t index)
948 ElementObject* self = (ElementObject*) self_;
950 if (!self->extra || index < 0 || index >= self->extra->length) {
951 PyErr_SetString(
952 PyExc_IndexError,
953 "child index out of range"
955 return NULL;
958 Py_INCREF(self->extra->children[index]);
959 return self->extra->children[index];
962 static PyObject*
963 element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
965 ElementObject* self = (ElementObject*) self_;
966 Py_ssize_t i;
967 PyObject* list;
969 if (!self->extra)
970 return PyList_New(0);
972 /* standard clamping */
973 if (start < 0)
974 start = 0;
975 if (end < 0)
976 end = 0;
977 if (end > self->extra->length)
978 end = self->extra->length;
979 if (start > end)
980 start = end;
982 list = PyList_New(end - start);
983 if (!list)
984 return NULL;
986 for (i = start; i < end; i++) {
987 PyObject* item = self->extra->children[i];
988 Py_INCREF(item);
989 PyList_SET_ITEM(list, i - start, item);
992 return list;
995 static PyObject*
996 element_insert(ElementObject* self, PyObject* args)
998 int i;
1000 int index;
1001 PyObject* element;
1002 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1003 &Element_Type, &element))
1004 return NULL;
1006 if (!self->extra)
1007 element_new_extra(self, NULL);
1009 if (index < 0)
1010 index = 0;
1011 if (index > self->extra->length)
1012 index = self->extra->length;
1014 if (element_resize(self, 1) < 0)
1015 return NULL;
1017 for (i = self->extra->length; i > index; i--)
1018 self->extra->children[i] = self->extra->children[i-1];
1020 Py_INCREF(element);
1021 self->extra->children[index] = element;
1023 self->extra->length++;
1025 Py_RETURN_NONE;
1028 static PyObject*
1029 element_items(ElementObject* self, PyObject* args)
1031 if (!PyArg_ParseTuple(args, ":items"))
1032 return NULL;
1034 if (!self->extra || self->extra->attrib == Py_None)
1035 return PyList_New(0);
1037 return PyDict_Items(self->extra->attrib);
1040 static PyObject*
1041 element_keys(ElementObject* self, PyObject* args)
1043 if (!PyArg_ParseTuple(args, ":keys"))
1044 return NULL;
1046 if (!self->extra || self->extra->attrib == Py_None)
1047 return PyList_New(0);
1049 return PyDict_Keys(self->extra->attrib);
1052 static Py_ssize_t
1053 element_length(ElementObject* self)
1055 if (!self->extra)
1056 return 0;
1058 return self->extra->length;
1061 static PyObject*
1062 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1064 PyObject* elem;
1066 PyObject* tag;
1067 PyObject* attrib;
1068 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1069 return NULL;
1071 attrib = PyDict_Copy(attrib);
1072 if (!attrib)
1073 return NULL;
1075 elem = element_new(tag, attrib);
1077 Py_DECREF(attrib);
1079 return elem;
1082 static PyObject*
1083 element_reduce(ElementObject* self, PyObject* args)
1085 if (!PyArg_ParseTuple(args, ":__reduce__"))
1086 return NULL;
1088 /* Hack alert: This method is used to work around a __copy__
1089 problem on certain 2.3 and 2.4 versions. To save time and
1090 simplify the code, we create the copy in here, and use a dummy
1091 copyelement helper to trick the copy module into doing the
1092 right thing. */
1094 if (!elementtree_copyelement_obj) {
1095 PyErr_SetString(
1096 PyExc_RuntimeError,
1097 "copyelement helper not found"
1099 return NULL;
1102 return Py_BuildValue(
1103 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1107 static PyObject*
1108 element_remove(ElementObject* self, PyObject* args)
1110 int i;
1112 PyObject* element;
1113 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1114 return NULL;
1116 if (!self->extra) {
1117 /* element has no children, so raise exception */
1118 PyErr_SetString(
1119 PyExc_ValueError,
1120 "list.remove(x): x not in list"
1122 return NULL;
1125 for (i = 0; i < self->extra->length; i++) {
1126 if (self->extra->children[i] == element)
1127 break;
1128 if (PyObject_Compare(self->extra->children[i], element) == 0)
1129 break;
1132 if (i == self->extra->length) {
1133 /* element is not in children, so raise exception */
1134 PyErr_SetString(
1135 PyExc_ValueError,
1136 "list.remove(x): x not in list"
1138 return NULL;
1141 Py_DECREF(self->extra->children[i]);
1143 self->extra->length--;
1145 for (; i < self->extra->length; i++)
1146 self->extra->children[i] = self->extra->children[i+1];
1148 Py_RETURN_NONE;
1151 static PyObject*
1152 element_repr(ElementObject* self)
1154 PyObject* repr;
1155 char buffer[100];
1157 repr = PyString_FromString("<Element ");
1159 PyString_ConcatAndDel(&repr, PyObject_Repr(self->tag));
1161 sprintf(buffer, " at %p>", self);
1162 PyString_ConcatAndDel(&repr, PyString_FromString(buffer));
1164 return repr;
1167 static PyObject*
1168 element_set(ElementObject* self, PyObject* args)
1170 PyObject* attrib;
1172 PyObject* key;
1173 PyObject* value;
1174 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1175 return NULL;
1177 if (!self->extra)
1178 element_new_extra(self, NULL);
1180 attrib = element_get_attrib(self);
1181 if (!attrib)
1182 return NULL;
1184 if (PyDict_SetItem(attrib, key, value) < 0)
1185 return NULL;
1187 Py_RETURN_NONE;
1190 static int
1191 element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
1193 ElementObject* self = (ElementObject*) self_;
1194 Py_ssize_t i, new, old;
1195 PyObject* recycle = NULL;
1197 if (!self->extra)
1198 element_new_extra(self, NULL);
1200 /* standard clamping */
1201 if (start < 0)
1202 start = 0;
1203 if (end < 0)
1204 end = 0;
1205 if (end > self->extra->length)
1206 end = self->extra->length;
1207 if (start > end)
1208 start = end;
1210 old = end - start;
1212 if (item == NULL)
1213 new = 0;
1214 else if (PyList_CheckExact(item)) {
1215 new = PyList_GET_SIZE(item);
1216 } else {
1217 /* FIXME: support arbitrary sequences? */
1218 PyErr_Format(
1219 PyExc_TypeError,
1220 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
1222 return -1;
1225 if (old > 0) {
1226 /* to avoid recursive calls to this method (via decref), move
1227 old items to the recycle bin here, and get rid of them when
1228 we're done modifying the element */
1229 recycle = PyList_New(old);
1230 for (i = 0; i < old; i++)
1231 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1234 if (new < old) {
1235 /* delete slice */
1236 for (i = end; i < self->extra->length; i++)
1237 self->extra->children[i + new - old] = self->extra->children[i];
1238 } else if (new > old) {
1239 /* insert slice */
1240 if (element_resize(self, new - old) < 0)
1241 return -1;
1242 for (i = self->extra->length-1; i >= end; i--)
1243 self->extra->children[i + new - old] = self->extra->children[i];
1246 /* replace the slice */
1247 for (i = 0; i < new; i++) {
1248 PyObject* element = PyList_GET_ITEM(item, i);
1249 Py_INCREF(element);
1250 self->extra->children[i + start] = element;
1253 self->extra->length += new - old;
1255 /* discard the recycle bin, and everything in it */
1256 Py_XDECREF(recycle);
1258 return 0;
1261 static int
1262 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1264 ElementObject* self = (ElementObject*) self_;
1265 int i;
1266 PyObject* old;
1268 if (!self->extra || index < 0 || index >= self->extra->length) {
1269 PyErr_SetString(
1270 PyExc_IndexError,
1271 "child assignment index out of range");
1272 return -1;
1275 old = self->extra->children[index];
1277 if (item) {
1278 Py_INCREF(item);
1279 self->extra->children[index] = item;
1280 } else {
1281 self->extra->length--;
1282 for (i = index; i < self->extra->length; i++)
1283 self->extra->children[i] = self->extra->children[i+1];
1286 Py_DECREF(old);
1288 return 0;
1291 static PyMethodDef element_methods[] = {
1293 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1295 {"get", (PyCFunction) element_get, METH_VARARGS},
1296 {"set", (PyCFunction) element_set, METH_VARARGS},
1298 {"find", (PyCFunction) element_find, METH_VARARGS},
1299 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1300 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1302 {"append", (PyCFunction) element_append, METH_VARARGS},
1303 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1304 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1306 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1307 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1309 {"items", (PyCFunction) element_items, METH_VARARGS},
1310 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1312 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1314 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1315 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1317 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1318 C objects correctly, so we have to fake it using a __reduce__-
1319 based hack (see the element_reduce implementation above for
1320 details). */
1322 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1323 using a runtime test to figure out if we need to fake things
1324 or now (see the init code below). The following entry is
1325 enabled only if the hack is needed. */
1327 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1329 {NULL, NULL}
1332 static PyObject*
1333 element_getattr(ElementObject* self, char* name)
1335 PyObject* res;
1337 res = Py_FindMethod(element_methods, (PyObject*) self, name);
1338 if (res)
1339 return res;
1341 PyErr_Clear();
1343 if (strcmp(name, "tag") == 0)
1344 res = self->tag;
1345 else if (strcmp(name, "text") == 0)
1346 res = element_get_text(self);
1347 else if (strcmp(name, "tail") == 0) {
1348 res = element_get_tail(self);
1349 } else if (strcmp(name, "attrib") == 0) {
1350 if (!self->extra)
1351 element_new_extra(self, NULL);
1352 res = element_get_attrib(self);
1353 } else {
1354 PyErr_SetString(PyExc_AttributeError, name);
1355 return NULL;
1358 if (!res)
1359 return NULL;
1361 Py_INCREF(res);
1362 return res;
1365 static int
1366 element_setattr(ElementObject* self, const char* name, PyObject* value)
1368 if (value == NULL) {
1369 PyErr_SetString(
1370 PyExc_AttributeError,
1371 "can't delete element attributes"
1373 return -1;
1376 if (strcmp(name, "tag") == 0) {
1377 Py_DECREF(self->tag);
1378 self->tag = value;
1379 Py_INCREF(self->tag);
1380 } else if (strcmp(name, "text") == 0) {
1381 Py_DECREF(JOIN_OBJ(self->text));
1382 self->text = value;
1383 Py_INCREF(self->text);
1384 } else if (strcmp(name, "tail") == 0) {
1385 Py_DECREF(JOIN_OBJ(self->tail));
1386 self->tail = value;
1387 Py_INCREF(self->tail);
1388 } else if (strcmp(name, "attrib") == 0) {
1389 if (!self->extra)
1390 element_new_extra(self, NULL);
1391 Py_DECREF(self->extra->attrib);
1392 self->extra->attrib = value;
1393 Py_INCREF(self->extra->attrib);
1394 } else {
1395 PyErr_SetString(PyExc_AttributeError, name);
1396 return -1;
1399 return 0;
1402 static PySequenceMethods element_as_sequence = {
1403 (lenfunc) element_length,
1404 0, /* sq_concat */
1405 0, /* sq_repeat */
1406 element_getitem,
1407 element_getslice,
1408 element_setitem,
1409 element_setslice,
1412 statichere PyTypeObject Element_Type = {
1413 PyObject_HEAD_INIT(NULL)
1414 0, "Element", sizeof(ElementObject), 0,
1415 /* methods */
1416 (destructor)element_dealloc, /* tp_dealloc */
1417 0, /* tp_print */
1418 (getattrfunc)element_getattr, /* tp_getattr */
1419 (setattrfunc)element_setattr, /* tp_setattr */
1420 0, /* tp_compare */
1421 (reprfunc)element_repr, /* tp_repr */
1422 0, /* tp_as_number */
1423 &element_as_sequence, /* tp_as_sequence */
1426 /* ==================================================================== */
1427 /* the tree builder type */
1429 typedef struct {
1430 PyObject_HEAD
1432 PyObject* root; /* root node (first created node) */
1434 ElementObject* this; /* current node */
1435 ElementObject* last; /* most recently created node */
1437 PyObject* data; /* data collector (string or list), or NULL */
1439 PyObject* stack; /* element stack */
1440 Py_ssize_t index; /* current stack size (0=empty) */
1442 /* element tracing */
1443 PyObject* events; /* list of events, or NULL if not collecting */
1444 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1445 PyObject* end_event_obj;
1446 PyObject* start_ns_event_obj;
1447 PyObject* end_ns_event_obj;
1449 } TreeBuilderObject;
1451 staticforward PyTypeObject TreeBuilder_Type;
1453 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
1455 /* -------------------------------------------------------------------- */
1456 /* constructor and destructor */
1458 LOCAL(PyObject*)
1459 treebuilder_new(void)
1461 TreeBuilderObject* self;
1463 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1464 if (self == NULL)
1465 return NULL;
1467 self->root = NULL;
1469 Py_INCREF(Py_None);
1470 self->this = (ElementObject*) Py_None;
1472 Py_INCREF(Py_None);
1473 self->last = (ElementObject*) Py_None;
1475 self->data = NULL;
1477 self->stack = PyList_New(20);
1478 self->index = 0;
1480 self->events = NULL;
1481 self->start_event_obj = self->end_event_obj = NULL;
1482 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1484 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1486 return (PyObject*) self;
1489 static PyObject*
1490 treebuilder(PyObject* self_, PyObject* args)
1492 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1493 return NULL;
1495 return treebuilder_new();
1498 static void
1499 treebuilder_dealloc(TreeBuilderObject* self)
1501 Py_XDECREF(self->end_ns_event_obj);
1502 Py_XDECREF(self->start_ns_event_obj);
1503 Py_XDECREF(self->end_event_obj);
1504 Py_XDECREF(self->start_event_obj);
1505 Py_XDECREF(self->events);
1506 Py_DECREF(self->stack);
1507 Py_XDECREF(self->data);
1508 Py_DECREF(self->last);
1509 Py_DECREF(self->this);
1510 Py_XDECREF(self->root);
1512 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1514 PyObject_Del(self);
1517 /* -------------------------------------------------------------------- */
1518 /* handlers */
1520 LOCAL(PyObject*)
1521 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1522 PyObject* standalone)
1524 Py_RETURN_NONE;
1527 LOCAL(PyObject*)
1528 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1529 PyObject* attrib)
1531 PyObject* node;
1532 PyObject* this;
1534 if (self->data) {
1535 if (self->this == self->last) {
1536 Py_DECREF(JOIN_OBJ(self->last->text));
1537 self->last->text = JOIN_SET(
1538 self->data, PyList_CheckExact(self->data)
1540 } else {
1541 Py_DECREF(JOIN_OBJ(self->last->tail));
1542 self->last->tail = JOIN_SET(
1543 self->data, PyList_CheckExact(self->data)
1546 self->data = NULL;
1549 node = element_new(tag, attrib);
1550 if (!node)
1551 return NULL;
1553 this = (PyObject*) self->this;
1555 if (this != Py_None) {
1556 if (element_add_subelement((ElementObject*) this, node) < 0)
1557 goto error;
1558 } else {
1559 if (self->root) {
1560 PyErr_SetString(
1561 PyExc_SyntaxError,
1562 "multiple elements on top level"
1564 goto error;
1566 Py_INCREF(node);
1567 self->root = node;
1570 if (self->index < PyList_GET_SIZE(self->stack)) {
1571 if (PyList_SetItem(self->stack, self->index, this) < 0)
1572 goto error;
1573 Py_INCREF(this);
1574 } else {
1575 if (PyList_Append(self->stack, this) < 0)
1576 goto error;
1578 self->index++;
1580 Py_DECREF(this);
1581 Py_INCREF(node);
1582 self->this = (ElementObject*) node;
1584 Py_DECREF(self->last);
1585 Py_INCREF(node);
1586 self->last = (ElementObject*) node;
1588 if (self->start_event_obj) {
1589 PyObject* res;
1590 PyObject* action = self->start_event_obj;
1591 res = PyTuple_New(2);
1592 if (res) {
1593 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1594 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1595 PyList_Append(self->events, res);
1596 Py_DECREF(res);
1597 } else
1598 PyErr_Clear(); /* FIXME: propagate error */
1601 return node;
1603 error:
1604 Py_DECREF(node);
1605 return NULL;
1608 LOCAL(PyObject*)
1609 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1611 if (!self->data) {
1612 if (self->last == (ElementObject*) Py_None) {
1613 /* ignore calls to data before the first call to start */
1614 Py_RETURN_NONE;
1616 /* store the first item as is */
1617 Py_INCREF(data); self->data = data;
1618 } else {
1619 /* more than one item; use a list to collect items */
1620 if (PyString_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1621 PyString_CheckExact(data) && PyString_GET_SIZE(data) == 1) {
1622 /* expat often generates single character data sections; handle
1623 the most common case by resizing the existing string... */
1624 Py_ssize_t size = PyString_GET_SIZE(self->data);
1625 if (_PyString_Resize(&self->data, size + 1) < 0)
1626 return NULL;
1627 PyString_AS_STRING(self->data)[size] = PyString_AS_STRING(data)[0];
1628 } else if (PyList_CheckExact(self->data)) {
1629 if (PyList_Append(self->data, data) < 0)
1630 return NULL;
1631 } else {
1632 PyObject* list = PyList_New(2);
1633 if (!list)
1634 return NULL;
1635 PyList_SET_ITEM(list, 0, self->data);
1636 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1637 self->data = list;
1641 Py_RETURN_NONE;
1644 LOCAL(PyObject*)
1645 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1647 PyObject* item;
1649 if (self->data) {
1650 if (self->this == self->last) {
1651 Py_DECREF(JOIN_OBJ(self->last->text));
1652 self->last->text = JOIN_SET(
1653 self->data, PyList_CheckExact(self->data)
1655 } else {
1656 Py_DECREF(JOIN_OBJ(self->last->tail));
1657 self->last->tail = JOIN_SET(
1658 self->data, PyList_CheckExact(self->data)
1661 self->data = NULL;
1664 if (self->index == 0) {
1665 PyErr_SetString(
1666 PyExc_IndexError,
1667 "pop from empty stack"
1669 return NULL;
1672 self->index--;
1674 item = PyList_GET_ITEM(self->stack, self->index);
1675 Py_INCREF(item);
1677 Py_DECREF(self->last);
1679 self->last = (ElementObject*) self->this;
1680 self->this = (ElementObject*) item;
1682 if (self->end_event_obj) {
1683 PyObject* res;
1684 PyObject* action = self->end_event_obj;
1685 PyObject* node = (PyObject*) self->last;
1686 res = PyTuple_New(2);
1687 if (res) {
1688 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1689 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1690 PyList_Append(self->events, res);
1691 Py_DECREF(res);
1692 } else
1693 PyErr_Clear(); /* FIXME: propagate error */
1696 Py_INCREF(self->last);
1697 return (PyObject*) self->last;
1700 LOCAL(void)
1701 treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1702 const char* prefix, const char *uri)
1704 PyObject* res;
1705 PyObject* action;
1706 PyObject* parcel;
1708 if (!self->events)
1709 return;
1711 if (start) {
1712 if (!self->start_ns_event_obj)
1713 return;
1714 action = self->start_ns_event_obj;
1715 /* FIXME: prefix and uri use utf-8 encoding! */
1716 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1717 if (!parcel)
1718 return;
1719 Py_INCREF(action);
1720 } else {
1721 if (!self->end_ns_event_obj)
1722 return;
1723 action = self->end_ns_event_obj;
1724 Py_INCREF(action);
1725 parcel = Py_None;
1726 Py_INCREF(parcel);
1729 res = PyTuple_New(2);
1731 if (res) {
1732 PyTuple_SET_ITEM(res, 0, action);
1733 PyTuple_SET_ITEM(res, 1, parcel);
1734 PyList_Append(self->events, res);
1735 Py_DECREF(res);
1736 } else
1737 PyErr_Clear(); /* FIXME: propagate error */
1740 /* -------------------------------------------------------------------- */
1741 /* methods (in alphabetical order) */
1743 static PyObject*
1744 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1746 PyObject* data;
1747 if (!PyArg_ParseTuple(args, "O:data", &data))
1748 return NULL;
1750 return treebuilder_handle_data(self, data);
1753 static PyObject*
1754 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1756 PyObject* tag;
1757 if (!PyArg_ParseTuple(args, "O:end", &tag))
1758 return NULL;
1760 return treebuilder_handle_end(self, tag);
1763 LOCAL(PyObject*)
1764 treebuilder_done(TreeBuilderObject* self)
1766 PyObject* res;
1768 /* FIXME: check stack size? */
1770 if (self->root)
1771 res = self->root;
1772 else
1773 res = Py_None;
1775 Py_INCREF(res);
1776 return res;
1779 static PyObject*
1780 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1782 if (!PyArg_ParseTuple(args, ":close"))
1783 return NULL;
1785 return treebuilder_done(self);
1788 static PyObject*
1789 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1791 PyObject* tag;
1792 PyObject* attrib = Py_None;
1793 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1794 return NULL;
1796 return treebuilder_handle_start(self, tag, attrib);
1799 static PyObject*
1800 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1802 PyObject* encoding;
1803 PyObject* standalone;
1804 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1805 return NULL;
1807 return treebuilder_handle_xml(self, encoding, standalone);
1810 static PyMethodDef treebuilder_methods[] = {
1811 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1812 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1813 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1814 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1815 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1816 {NULL, NULL}
1819 static PyObject*
1820 treebuilder_getattr(TreeBuilderObject* self, char* name)
1822 return Py_FindMethod(treebuilder_methods, (PyObject*) self, name);
1825 statichere PyTypeObject TreeBuilder_Type = {
1826 PyObject_HEAD_INIT(NULL)
1827 0, "TreeBuilder", sizeof(TreeBuilderObject), 0,
1828 /* methods */
1829 (destructor)treebuilder_dealloc, /* tp_dealloc */
1830 0, /* tp_print */
1831 (getattrfunc)treebuilder_getattr, /* tp_getattr */
1834 /* ==================================================================== */
1835 /* the expat interface */
1837 #if defined(USE_EXPAT)
1839 #include "expat.h"
1841 #if defined(USE_PYEXPAT_CAPI)
1842 #include "pyexpat.h"
1843 static struct PyExpat_CAPI* expat_capi;
1844 #define EXPAT(func) (expat_capi->func)
1845 #else
1846 #define EXPAT(func) (XML_##func)
1847 #endif
1849 typedef struct {
1850 PyObject_HEAD
1852 XML_Parser parser;
1854 PyObject* target;
1855 PyObject* entity;
1857 PyObject* names;
1859 PyObject* handle_xml;
1860 PyObject* handle_start;
1861 PyObject* handle_data;
1862 PyObject* handle_end;
1864 PyObject* handle_comment;
1865 PyObject* handle_pi;
1867 } XMLParserObject;
1869 staticforward PyTypeObject XMLParser_Type;
1871 /* helpers */
1873 #if defined(Py_USING_UNICODE)
1874 LOCAL(int)
1875 checkstring(const char* string, int size)
1877 int i;
1879 /* check if an 8-bit string contains UTF-8 characters */
1880 for (i = 0; i < size; i++)
1881 if (string[i] & 0x80)
1882 return 1;
1884 return 0;
1886 #endif
1888 LOCAL(PyObject*)
1889 makestring(const char* string, int size)
1891 /* convert a UTF-8 string to either a 7-bit ascii string or a
1892 Unicode string */
1894 #if defined(Py_USING_UNICODE)
1895 if (checkstring(string, size))
1896 return PyUnicode_DecodeUTF8(string, size, "strict");
1897 #endif
1899 return PyString_FromStringAndSize(string, size);
1902 LOCAL(PyObject*)
1903 makeuniversal(XMLParserObject* self, const char* string)
1905 /* convert a UTF-8 tag/attribute name from the expat parser
1906 to a universal name string */
1908 int size = strlen(string);
1909 PyObject* key;
1910 PyObject* value;
1912 /* look the 'raw' name up in the names dictionary */
1913 key = PyString_FromStringAndSize(string, size);
1914 if (!key)
1915 return NULL;
1917 value = PyDict_GetItem(self->names, key);
1919 if (value) {
1920 Py_INCREF(value);
1921 } else {
1922 /* new name. convert to universal name, and decode as
1923 necessary */
1925 PyObject* tag;
1926 char* p;
1927 int i;
1929 /* look for namespace separator */
1930 for (i = 0; i < size; i++)
1931 if (string[i] == '}')
1932 break;
1933 if (i != size) {
1934 /* convert to universal name */
1935 tag = PyString_FromStringAndSize(NULL, size+1);
1936 p = PyString_AS_STRING(tag);
1937 p[0] = '{';
1938 memcpy(p+1, string, size);
1939 size++;
1940 } else {
1941 /* plain name; use key as tag */
1942 Py_INCREF(key);
1943 tag = key;
1946 /* decode universal name */
1947 #if defined(Py_USING_UNICODE)
1948 /* inline makestring, to avoid duplicating the source string if
1949 it's not an utf-8 string */
1950 p = PyString_AS_STRING(tag);
1951 if (checkstring(p, size)) {
1952 value = PyUnicode_DecodeUTF8(p, size, "strict");
1953 Py_DECREF(tag);
1954 if (!value) {
1955 Py_DECREF(key);
1956 return NULL;
1958 } else
1959 #endif
1960 value = tag; /* use tag as is */
1962 /* add to names dictionary */
1963 if (PyDict_SetItem(self->names, key, value) < 0) {
1964 Py_DECREF(key);
1965 Py_DECREF(value);
1966 return NULL;
1970 Py_DECREF(key);
1971 return value;
1974 /* -------------------------------------------------------------------- */
1975 /* handlers */
1977 static void
1978 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1979 int data_len)
1981 PyObject* key;
1982 PyObject* value;
1983 PyObject* res;
1985 if (data_len < 2 || data_in[0] != '&')
1986 return;
1988 key = makestring(data_in + 1, data_len - 2);
1989 if (!key)
1990 return;
1992 value = PyDict_GetItem(self->entity, key);
1994 if (value) {
1995 if (TreeBuilder_CheckExact(self->target))
1996 res = treebuilder_handle_data(
1997 (TreeBuilderObject*) self->target, value
1999 else if (self->handle_data)
2000 res = PyObject_CallFunction(self->handle_data, "O", value);
2001 else
2002 res = NULL;
2003 Py_XDECREF(res);
2004 } else {
2005 PyErr_Format(
2006 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
2007 PyString_AS_STRING(key),
2008 EXPAT(GetErrorLineNumber)(self->parser),
2009 EXPAT(GetErrorColumnNumber)(self->parser)
2013 Py_DECREF(key);
2016 static void
2017 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2018 const XML_Char **attrib_in)
2020 PyObject* res;
2021 PyObject* tag;
2022 PyObject* attrib;
2023 int ok;
2025 /* tag name */
2026 tag = makeuniversal(self, tag_in);
2027 if (!tag)
2028 return; /* parser will look for errors */
2030 /* attributes */
2031 if (attrib_in[0]) {
2032 attrib = PyDict_New();
2033 if (!attrib)
2034 return;
2035 while (attrib_in[0] && attrib_in[1]) {
2036 PyObject* key = makeuniversal(self, attrib_in[0]);
2037 PyObject* value = makestring(attrib_in[1], strlen(attrib_in[1]));
2038 if (!key || !value) {
2039 Py_XDECREF(value);
2040 Py_XDECREF(key);
2041 Py_DECREF(attrib);
2042 return;
2044 ok = PyDict_SetItem(attrib, key, value);
2045 Py_DECREF(value);
2046 Py_DECREF(key);
2047 if (ok < 0) {
2048 Py_DECREF(attrib);
2049 return;
2051 attrib_in += 2;
2053 } else {
2054 Py_INCREF(Py_None);
2055 attrib = Py_None;
2058 if (TreeBuilder_CheckExact(self->target))
2059 /* shortcut */
2060 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2061 tag, attrib);
2062 else if (self->handle_start)
2063 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2064 else
2065 res = NULL;
2067 Py_DECREF(tag);
2068 Py_DECREF(attrib);
2070 Py_XDECREF(res);
2073 static void
2074 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2075 int data_len)
2077 PyObject* data;
2078 PyObject* res;
2080 data = makestring(data_in, data_len);
2081 if (!data)
2082 return; /* parser will look for errors */
2084 if (TreeBuilder_CheckExact(self->target))
2085 /* shortcut */
2086 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2087 else if (self->handle_data)
2088 res = PyObject_CallFunction(self->handle_data, "O", data);
2089 else
2090 res = NULL;
2092 Py_DECREF(data);
2094 Py_XDECREF(res);
2097 static void
2098 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2100 PyObject* tag;
2101 PyObject* res = NULL;
2103 if (TreeBuilder_CheckExact(self->target))
2104 /* shortcut */
2105 /* the standard tree builder doesn't look at the end tag */
2106 res = treebuilder_handle_end(
2107 (TreeBuilderObject*) self->target, Py_None
2109 else if (self->handle_end) {
2110 tag = makeuniversal(self, tag_in);
2111 if (tag) {
2112 res = PyObject_CallFunction(self->handle_end, "O", tag);
2113 Py_DECREF(tag);
2117 Py_XDECREF(res);
2120 static void
2121 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2122 const XML_Char *uri)
2124 treebuilder_handle_namespace(
2125 (TreeBuilderObject*) self->target, 1, prefix, uri
2129 static void
2130 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2132 treebuilder_handle_namespace(
2133 (TreeBuilderObject*) self->target, 0, NULL, NULL
2137 static void
2138 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2140 PyObject* comment;
2141 PyObject* res;
2143 if (self->handle_comment) {
2144 comment = makestring(comment_in, strlen(comment_in));
2145 if (comment) {
2146 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2147 Py_XDECREF(res);
2148 Py_DECREF(comment);
2153 static void
2154 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2155 const XML_Char* data_in)
2157 PyObject* target;
2158 PyObject* data;
2159 PyObject* res;
2161 if (self->handle_pi) {
2162 target = makestring(target_in, strlen(target_in));
2163 data = makestring(data_in, strlen(data_in));
2164 if (target && data) {
2165 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2166 Py_XDECREF(res);
2167 Py_DECREF(data);
2168 Py_DECREF(target);
2169 } else {
2170 Py_XDECREF(data);
2171 Py_XDECREF(target);
2176 #if defined(Py_USING_UNICODE)
2177 static int
2178 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2179 XML_Encoding *info)
2181 PyObject* u;
2182 Py_UNICODE* p;
2183 unsigned char s[256];
2184 int i;
2186 memset(info, 0, sizeof(XML_Encoding));
2188 for (i = 0; i < 256; i++)
2189 s[i] = i;
2191 u = PyUnicode_Decode((char*) s, 256, name, "replace");
2192 if (!u)
2193 return XML_STATUS_ERROR;
2195 if (PyUnicode_GET_SIZE(u) != 256) {
2196 Py_DECREF(u);
2197 return XML_STATUS_ERROR;
2200 p = PyUnicode_AS_UNICODE(u);
2202 for (i = 0; i < 256; i++) {
2203 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2204 info->map[i] = p[i];
2205 else
2206 info->map[i] = -1;
2209 Py_DECREF(u);
2211 return XML_STATUS_OK;
2213 #endif
2215 /* -------------------------------------------------------------------- */
2216 /* constructor and destructor */
2218 static PyObject*
2219 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
2221 XMLParserObject* self;
2222 /* FIXME: does this need to be static? */
2223 static XML_Memory_Handling_Suite memory_handler;
2225 PyObject* target = NULL;
2226 char* encoding = NULL;
2227 static char* kwlist[] = { "target", "encoding", NULL };
2228 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2229 &target, &encoding))
2230 return NULL;
2232 #if defined(USE_PYEXPAT_CAPI)
2233 if (!expat_capi) {
2234 PyErr_SetString(
2235 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2237 return NULL;
2239 #endif
2241 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2242 if (self == NULL)
2243 return NULL;
2245 self->entity = PyDict_New();
2246 if (!self->entity) {
2247 PyObject_Del(self);
2248 return NULL;
2251 self->names = PyDict_New();
2252 if (!self->names) {
2253 PyObject_Del(self->entity);
2254 PyObject_Del(self);
2255 return NULL;
2258 memory_handler.malloc_fcn = PyObject_Malloc;
2259 memory_handler.realloc_fcn = PyObject_Realloc;
2260 memory_handler.free_fcn = PyObject_Free;
2262 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2263 if (!self->parser) {
2264 PyObject_Del(self->names);
2265 PyObject_Del(self->entity);
2266 PyObject_Del(self);
2267 PyErr_NoMemory();
2268 return NULL;
2271 /* setup target handlers */
2272 if (!target) {
2273 target = treebuilder_new();
2274 if (!target) {
2275 EXPAT(ParserFree)(self->parser);
2276 PyObject_Del(self->names);
2277 PyObject_Del(self->entity);
2278 PyObject_Del(self);
2279 return NULL;
2281 } else
2282 Py_INCREF(target);
2283 self->target = target;
2285 self->handle_xml = PyObject_GetAttrString(target, "xml");
2286 self->handle_start = PyObject_GetAttrString(target, "start");
2287 self->handle_data = PyObject_GetAttrString(target, "data");
2288 self->handle_end = PyObject_GetAttrString(target, "end");
2289 self->handle_comment = PyObject_GetAttrString(target, "comment");
2290 self->handle_pi = PyObject_GetAttrString(target, "pi");
2292 PyErr_Clear();
2294 /* configure parser */
2295 EXPAT(SetUserData)(self->parser, self);
2296 EXPAT(SetElementHandler)(
2297 self->parser,
2298 (XML_StartElementHandler) expat_start_handler,
2299 (XML_EndElementHandler) expat_end_handler
2301 EXPAT(SetDefaultHandlerExpand)(
2302 self->parser,
2303 (XML_DefaultHandler) expat_default_handler
2305 EXPAT(SetCharacterDataHandler)(
2306 self->parser,
2307 (XML_CharacterDataHandler) expat_data_handler
2309 if (self->handle_comment)
2310 EXPAT(SetCommentHandler)(
2311 self->parser,
2312 (XML_CommentHandler) expat_comment_handler
2314 if (self->handle_pi)
2315 EXPAT(SetProcessingInstructionHandler)(
2316 self->parser,
2317 (XML_ProcessingInstructionHandler) expat_pi_handler
2319 #if defined(Py_USING_UNICODE)
2320 EXPAT(SetUnknownEncodingHandler)(
2321 self->parser,
2322 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2324 #endif
2326 ALLOC(sizeof(XMLParserObject), "create expatparser");
2328 return (PyObject*) self;
2331 static void
2332 xmlparser_dealloc(XMLParserObject* self)
2334 EXPAT(ParserFree)(self->parser);
2336 Py_XDECREF(self->handle_pi);
2337 Py_XDECREF(self->handle_comment);
2338 Py_XDECREF(self->handle_end);
2339 Py_XDECREF(self->handle_data);
2340 Py_XDECREF(self->handle_start);
2341 Py_XDECREF(self->handle_xml);
2343 Py_DECREF(self->target);
2344 Py_DECREF(self->entity);
2345 Py_DECREF(self->names);
2347 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2349 PyObject_Del(self);
2352 /* -------------------------------------------------------------------- */
2353 /* methods (in alphabetical order) */
2355 LOCAL(PyObject*)
2356 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2358 int ok;
2360 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2362 if (PyErr_Occurred())
2363 return NULL;
2365 if (!ok) {
2366 PyErr_Format(
2367 PyExc_SyntaxError, "%s: line %ld, column %ld",
2368 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2369 EXPAT(GetErrorLineNumber)(self->parser),
2370 EXPAT(GetErrorColumnNumber)(self->parser)
2372 return NULL;
2375 Py_RETURN_NONE;
2378 static PyObject*
2379 xmlparser_close(XMLParserObject* self, PyObject* args)
2381 /* end feeding data to parser */
2383 PyObject* res;
2384 if (!PyArg_ParseTuple(args, ":close"))
2385 return NULL;
2387 res = expat_parse(self, "", 0, 1);
2389 if (res && TreeBuilder_CheckExact(self->target)) {
2390 Py_DECREF(res);
2391 return treebuilder_done((TreeBuilderObject*) self->target);
2394 return res;
2397 static PyObject*
2398 xmlparser_feed(XMLParserObject* self, PyObject* args)
2400 /* feed data to parser */
2402 char* data;
2403 int data_len;
2404 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2405 return NULL;
2407 return expat_parse(self, data, data_len, 0);
2410 static PyObject*
2411 xmlparser_parse(XMLParserObject* self, PyObject* args)
2413 /* (internal) parse until end of input stream */
2415 PyObject* reader;
2416 PyObject* buffer;
2417 PyObject* res;
2419 PyObject* fileobj;
2420 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2421 return NULL;
2423 reader = PyObject_GetAttrString(fileobj, "read");
2424 if (!reader)
2425 return NULL;
2427 /* read from open file object */
2428 for (;;) {
2430 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2432 if (!buffer) {
2433 /* read failed (e.g. due to KeyboardInterrupt) */
2434 Py_DECREF(reader);
2435 return NULL;
2438 if (!PyString_CheckExact(buffer) || PyString_GET_SIZE(buffer) == 0) {
2439 Py_DECREF(buffer);
2440 break;
2443 res = expat_parse(
2444 self, PyString_AS_STRING(buffer), PyString_GET_SIZE(buffer), 0
2447 Py_DECREF(buffer);
2449 if (!res) {
2450 Py_DECREF(reader);
2451 return NULL;
2453 Py_DECREF(res);
2457 Py_DECREF(reader);
2459 res = expat_parse(self, "", 0, 1);
2461 if (res && TreeBuilder_CheckExact(self->target)) {
2462 Py_DECREF(res);
2463 return treebuilder_done((TreeBuilderObject*) self->target);
2466 return res;
2469 static PyObject*
2470 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2472 /* activate element event reporting */
2474 Py_ssize_t i;
2475 TreeBuilderObject* target;
2477 PyObject* events; /* event collector */
2478 PyObject* event_set = Py_None;
2479 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2480 &event_set))
2481 return NULL;
2483 if (!TreeBuilder_CheckExact(self->target)) {
2484 PyErr_SetString(
2485 PyExc_TypeError,
2486 "event handling only supported for cElementTree.Treebuilder "
2487 "targets"
2489 return NULL;
2492 target = (TreeBuilderObject*) self->target;
2494 Py_INCREF(events);
2495 Py_XDECREF(target->events);
2496 target->events = events;
2498 /* clear out existing events */
2499 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2500 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2501 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2502 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2504 if (event_set == Py_None) {
2505 /* default is "end" only */
2506 target->end_event_obj = PyString_FromString("end");
2507 Py_RETURN_NONE;
2510 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2511 goto error;
2513 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2514 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2515 char* event;
2516 if (!PyString_Check(item))
2517 goto error;
2518 event = PyString_AS_STRING(item);
2519 if (strcmp(event, "start") == 0) {
2520 Py_INCREF(item);
2521 target->start_event_obj = item;
2522 } else if (strcmp(event, "end") == 0) {
2523 Py_INCREF(item);
2524 Py_XDECREF(target->end_event_obj);
2525 target->end_event_obj = item;
2526 } else if (strcmp(event, "start-ns") == 0) {
2527 Py_INCREF(item);
2528 Py_XDECREF(target->start_ns_event_obj);
2529 target->start_ns_event_obj = item;
2530 EXPAT(SetNamespaceDeclHandler)(
2531 self->parser,
2532 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2533 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2535 } else if (strcmp(event, "end-ns") == 0) {
2536 Py_INCREF(item);
2537 Py_XDECREF(target->end_ns_event_obj);
2538 target->end_ns_event_obj = item;
2539 EXPAT(SetNamespaceDeclHandler)(
2540 self->parser,
2541 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2542 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2544 } else {
2545 PyErr_Format(
2546 PyExc_ValueError,
2547 "unknown event '%s'", event
2549 return NULL;
2553 Py_RETURN_NONE;
2555 error:
2556 PyErr_SetString(
2557 PyExc_TypeError,
2558 "invalid event tuple"
2560 return NULL;
2563 static PyMethodDef xmlparser_methods[] = {
2564 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2565 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2566 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2567 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2568 {NULL, NULL}
2571 static PyObject*
2572 xmlparser_getattr(XMLParserObject* self, char* name)
2574 PyObject* res;
2576 res = Py_FindMethod(xmlparser_methods, (PyObject*) self, name);
2577 if (res)
2578 return res;
2580 PyErr_Clear();
2582 if (strcmp(name, "entity") == 0)
2583 res = self->entity;
2584 else if (strcmp(name, "target") == 0)
2585 res = self->target;
2586 else if (strcmp(name, "version") == 0) {
2587 char buffer[100];
2588 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2589 XML_MINOR_VERSION, XML_MICRO_VERSION);
2590 return PyString_FromString(buffer);
2591 } else {
2592 PyErr_SetString(PyExc_AttributeError, name);
2593 return NULL;
2596 Py_INCREF(res);
2597 return res;
2600 statichere PyTypeObject XMLParser_Type = {
2601 PyObject_HEAD_INIT(NULL)
2602 0, "XMLParser", sizeof(XMLParserObject), 0,
2603 /* methods */
2604 (destructor)xmlparser_dealloc, /* tp_dealloc */
2605 0, /* tp_print */
2606 (getattrfunc)xmlparser_getattr, /* tp_getattr */
2609 #endif
2611 /* ==================================================================== */
2612 /* python module interface */
2614 static PyMethodDef _functions[] = {
2615 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2616 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2617 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2618 #if defined(USE_EXPAT)
2619 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2620 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2621 #endif
2622 {NULL, NULL}
2625 DL_EXPORT(void)
2626 init_elementtree(void)
2628 PyObject* m;
2629 PyObject* g;
2630 char* bootstrap;
2631 #if defined(USE_PYEXPAT_CAPI)
2632 struct PyExpat_CAPI* capi;
2633 #endif
2635 /* Patch object type */
2636 Py_TYPE(&Element_Type) = Py_TYPE(&TreeBuilder_Type) = &PyType_Type;
2637 #if defined(USE_EXPAT)
2638 Py_TYPE(&XMLParser_Type) = &PyType_Type;
2639 #endif
2641 m = Py_InitModule("_elementtree", _functions);
2642 if (!m)
2643 return;
2645 /* python glue code */
2647 g = PyDict_New();
2648 if (!g)
2649 return;
2651 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2653 bootstrap = (
2655 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2656 "from __future__ import generators\n" /* enable yield under 2.2 */
2657 #endif
2659 "from copy import copy, deepcopy\n"
2661 "try:\n"
2662 " from xml.etree import ElementTree\n"
2663 "except ImportError:\n"
2664 " import ElementTree\n"
2665 "ET = ElementTree\n"
2666 "del ElementTree\n"
2668 "import _elementtree as cElementTree\n"
2670 "try:\n" /* check if copy works as is */
2671 " copy(cElementTree.Element('x'))\n"
2672 "except:\n"
2673 " def copyelement(elem):\n"
2674 " return elem\n"
2676 "def Comment(text=None):\n" /* public */
2677 " element = cElementTree.Element(ET.Comment)\n"
2678 " element.text = text\n"
2679 " return element\n"
2680 "cElementTree.Comment = Comment\n"
2682 "class ElementTree(ET.ElementTree):\n" /* public */
2683 " def parse(self, source, parser=None):\n"
2684 " if not hasattr(source, 'read'):\n"
2685 " source = open(source, 'rb')\n"
2686 " if parser is not None:\n"
2687 " while 1:\n"
2688 " data = source.read(65536)\n"
2689 " if not data:\n"
2690 " break\n"
2691 " parser.feed(data)\n"
2692 " self._root = parser.close()\n"
2693 " else:\n"
2694 " parser = cElementTree.XMLParser()\n"
2695 " self._root = parser._parse(source)\n"
2696 " return self._root\n"
2697 "cElementTree.ElementTree = ElementTree\n"
2699 "def getiterator(node, tag=None):\n" /* helper */
2700 " if tag == '*':\n"
2701 " tag = None\n"
2702 #if (PY_VERSION_HEX < 0x02020000)
2703 " nodes = []\n" /* 2.1 doesn't have yield */
2704 " if tag is None or node.tag == tag:\n"
2705 " nodes.append(node)\n"
2706 " for node in node:\n"
2707 " nodes.extend(getiterator(node, tag))\n"
2708 " return nodes\n"
2709 #else
2710 " if tag is None or node.tag == tag:\n"
2711 " yield node\n"
2712 " for node in node:\n"
2713 " for node in getiterator(node, tag):\n"
2714 " yield node\n"
2715 #endif
2717 "def parse(source, parser=None):\n" /* public */
2718 " tree = ElementTree()\n"
2719 " tree.parse(source, parser)\n"
2720 " return tree\n"
2721 "cElementTree.parse = parse\n"
2723 #if (PY_VERSION_HEX < 0x02020000)
2724 "if hasattr(ET, 'iterparse'):\n"
2725 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2726 #else
2727 "class iterparse(object):\n"
2728 " root = None\n"
2729 " def __init__(self, file, events=None):\n"
2730 " if not hasattr(file, 'read'):\n"
2731 " file = open(file, 'rb')\n"
2732 " self._file = file\n"
2733 " self._events = events\n"
2734 " def __iter__(self):\n"
2735 " events = []\n"
2736 " b = cElementTree.TreeBuilder()\n"
2737 " p = cElementTree.XMLParser(b)\n"
2738 " p._setevents(events, self._events)\n"
2739 " while 1:\n"
2740 " data = self._file.read(16384)\n"
2741 " if not data:\n"
2742 " break\n"
2743 " p.feed(data)\n"
2744 " for event in events:\n"
2745 " yield event\n"
2746 " del events[:]\n"
2747 " root = p.close()\n"
2748 " for event in events:\n"
2749 " yield event\n"
2750 " self.root = root\n"
2751 "cElementTree.iterparse = iterparse\n"
2752 #endif
2754 "def PI(target, text=None):\n" /* public */
2755 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2756 " element.text = target\n"
2757 " if text:\n"
2758 " element.text = element.text + ' ' + text\n"
2759 " return element\n"
2761 " elem = cElementTree.Element(ET.PI)\n"
2762 " elem.text = text\n"
2763 " return elem\n"
2764 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2766 "def XML(text):\n" /* public */
2767 " parser = cElementTree.XMLParser()\n"
2768 " parser.feed(text)\n"
2769 " return parser.close()\n"
2770 "cElementTree.XML = cElementTree.fromstring = XML\n"
2772 "def XMLID(text):\n" /* public */
2773 " tree = XML(text)\n"
2774 " ids = {}\n"
2775 " for elem in tree.getiterator():\n"
2776 " id = elem.get('id')\n"
2777 " if id:\n"
2778 " ids[id] = elem\n"
2779 " return tree, ids\n"
2780 "cElementTree.XMLID = XMLID\n"
2782 "cElementTree.dump = ET.dump\n"
2783 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2784 "cElementTree.iselement = ET.iselement\n"
2785 "cElementTree.QName = ET.QName\n"
2786 "cElementTree.tostring = ET.tostring\n"
2787 "cElementTree.VERSION = '" VERSION "'\n"
2788 "cElementTree.__version__ = '" VERSION "'\n"
2789 "cElementTree.XMLParserError = SyntaxError\n"
2793 PyRun_String(bootstrap, Py_file_input, g, NULL);
2795 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2797 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2798 if (elementtree_copyelement_obj) {
2799 /* reduce hack needed; enable reduce method */
2800 PyMethodDef* mp;
2801 for (mp = element_methods; mp->ml_name; mp++)
2802 if (mp->ml_meth == (PyCFunction) element_reduce) {
2803 mp->ml_name = "__reduce__";
2804 break;
2806 } else
2807 PyErr_Clear();
2808 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2809 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2811 #if defined(USE_PYEXPAT_CAPI)
2812 /* link against pyexpat, if possible */
2813 capi = PyCObject_Import("pyexpat", "expat_CAPI");
2814 if (capi &&
2815 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2816 capi->size <= sizeof(*expat_capi) &&
2817 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2818 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2819 capi->MICRO_VERSION == XML_MICRO_VERSION)
2820 expat_capi = capi;
2821 else
2822 expat_capi = NULL;
2823 #endif