Blocked revisions 79401 via svnmerge
[python/dscho.git] / Modules / _elementtree.c
blobae098932860320007bac7f7252b466f991f4b9cf
1 /*
2 * ElementTree
3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
7 * History:
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
42 * info@pythonware.com
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
49 #include "Python.h"
51 #define VERSION "1.0.6"
53 /* -------------------------------------------------------------------- */
54 /* configuration */
56 /* Leave defined to include the expat-based XMLParser type */
57 #define USE_EXPAT
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
63 allocations. */
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
74 32-bit platforms. */
76 /* -------------------------------------------------------------------- */
78 #if 0
79 static int memory = 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
84 #else
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
87 #endif
89 /* compiler tweaks */
90 #if defined(_MSC_VER)
91 #define LOCAL(type) static __inline type __fastcall
92 #else
93 #define LOCAL(type) static type
94 #endif
96 /* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
99 info. */
100 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104 /* glue functions (see the init function for details) */
105 static PyObject* elementtree_copyelement_obj;
106 static PyObject* elementtree_deepcopy_obj;
107 static PyObject* elementtree_getiterator_obj;
108 static PyObject* elementpath_obj;
110 /* helpers */
112 LOCAL(PyObject*)
113 deepcopy(PyObject* object, PyObject* memo)
115 /* do a deep copy of the given object */
117 PyObject* args;
118 PyObject* result;
120 if (!elementtree_deepcopy_obj) {
121 PyErr_SetString(
122 PyExc_RuntimeError,
123 "deepcopy helper not found"
125 return NULL;
128 args = PyTuple_New(2);
129 if (!args)
130 return NULL;
132 Py_INCREF(object); PyTuple_SET_ITEM(args, 0, (PyObject*) object);
133 Py_INCREF(memo); PyTuple_SET_ITEM(args, 1, (PyObject*) memo);
135 result = PyObject_CallObject(elementtree_deepcopy_obj, args);
137 Py_DECREF(args);
139 return result;
142 LOCAL(PyObject*)
143 list_join(PyObject* list)
145 /* join list elements (destroying the list in the process) */
147 PyObject* joiner;
148 PyObject* function;
149 PyObject* args;
150 PyObject* result;
152 switch (PyList_GET_SIZE(list)) {
153 case 0:
154 Py_DECREF(list);
155 return PyBytes_FromString("");
156 case 1:
157 result = PyList_GET_ITEM(list, 0);
158 Py_INCREF(result);
159 Py_DECREF(list);
160 return result;
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
166 joiner = PySequence_GetSlice(PyList_GET_ITEM(list, 0), 0, 0);
167 if (!joiner)
168 return NULL;
170 function = PyObject_GetAttrString(joiner, "join");
171 if (!function) {
172 Py_DECREF(joiner);
173 return NULL;
176 args = PyTuple_New(1);
177 if (!args)
178 return NULL;
180 PyTuple_SET_ITEM(args, 0, list);
182 result = PyObject_CallObject(function, args);
184 Py_DECREF(args); /* also removes list */
185 Py_DECREF(function);
186 Py_DECREF(joiner);
188 return result;
191 #if (PY_VERSION_HEX < 0x02020000)
192 LOCAL(int)
193 PyDict_Update(PyObject* dict, PyObject* other)
195 /* PyDict_Update emulation for 2.1 and earlier */
197 PyObject* res;
199 res = PyObject_CallMethod(dict, "update", "O", other);
200 if (!res)
201 return -1;
203 Py_DECREF(res);
204 return 0;
206 #endif
208 /* -------------------------------------------------------------------- */
209 /* the element type */
211 typedef struct {
213 /* attributes (a dictionary object), or None if no attributes */
214 PyObject* attrib;
216 /* child elements */
217 int length; /* actual number of items */
218 int allocated; /* allocated items */
220 /* this either points to _children or to a malloced buffer */
221 PyObject* *children;
223 PyObject* _children[STATIC_CHILDREN];
225 } ElementObjectExtra;
227 typedef struct {
228 PyObject_HEAD
230 /* element tag (a string). */
231 PyObject* tag;
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
239 PyObject* text;
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
243 PyObject* tail;
245 ElementObjectExtra* extra;
247 } ElementObject;
249 static PyTypeObject Element_Type;
251 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
253 /* -------------------------------------------------------------------- */
254 /* element constructor and destructor */
256 LOCAL(int)
257 element_new_extra(ElementObject* self, PyObject* attrib)
259 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
260 if (!self->extra)
261 return -1;
263 if (!attrib)
264 attrib = Py_None;
266 Py_INCREF(attrib);
267 self->extra->attrib = attrib;
269 self->extra->length = 0;
270 self->extra->allocated = STATIC_CHILDREN;
271 self->extra->children = self->extra->_children;
273 return 0;
276 LOCAL(void)
277 element_dealloc_extra(ElementObject* self)
279 int i;
281 Py_DECREF(self->extra->attrib);
283 for (i = 0; i < self->extra->length; i++)
284 Py_DECREF(self->extra->children[i]);
286 if (self->extra->children != self->extra->_children)
287 PyObject_Free(self->extra->children);
289 PyObject_Free(self->extra);
292 LOCAL(PyObject*)
293 element_new(PyObject* tag, PyObject* attrib)
295 ElementObject* self;
297 self = PyObject_New(ElementObject, &Element_Type);
298 if (self == NULL)
299 return NULL;
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib) && !PyDict_Size(attrib))
303 attrib = Py_None;
305 self->extra = NULL;
307 if (attrib != Py_None) {
309 if (element_new_extra(self, attrib) < 0) {
310 PyObject_Del(self);
311 return NULL;
314 self->extra->length = 0;
315 self->extra->allocated = STATIC_CHILDREN;
316 self->extra->children = self->extra->_children;
320 Py_INCREF(tag);
321 self->tag = tag;
323 Py_INCREF(Py_None);
324 self->text = Py_None;
326 Py_INCREF(Py_None);
327 self->tail = Py_None;
329 ALLOC(sizeof(ElementObject), "create element");
331 return (PyObject*) self;
334 LOCAL(int)
335 element_resize(ElementObject* self, int extra)
337 int size;
338 PyObject* *children;
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
343 if (!self->extra)
344 element_new_extra(self, NULL);
346 size = self->extra->length + extra;
348 if (size > self->extra->allocated) {
349 /* use Python 2.4's list growth strategy */
350 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
351 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
352 * which needs at least 4 bytes.
353 * Although it's a false alarm always assume at least one child to
354 * be safe.
356 size = size ? size : 1;
357 if (self->extra->children != self->extra->_children) {
358 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
359 * "children", which needs at least 4 bytes. Although it's a
360 * false alarm always assume at least one child to be safe.
362 children = PyObject_Realloc(self->extra->children,
363 size * sizeof(PyObject*));
364 if (!children)
365 goto nomemory;
366 } else {
367 children = PyObject_Malloc(size * sizeof(PyObject*));
368 if (!children)
369 goto nomemory;
370 /* copy existing children from static area to malloc buffer */
371 memcpy(children, self->extra->children,
372 self->extra->length * sizeof(PyObject*));
374 self->extra->children = children;
375 self->extra->allocated = size;
378 return 0;
380 nomemory:
381 PyErr_NoMemory();
382 return -1;
385 LOCAL(int)
386 element_add_subelement(ElementObject* self, PyObject* element)
388 /* add a child element to a parent */
390 if (element_resize(self, 1) < 0)
391 return -1;
393 Py_INCREF(element);
394 self->extra->children[self->extra->length] = element;
396 self->extra->length++;
398 return 0;
401 LOCAL(PyObject*)
402 element_get_attrib(ElementObject* self)
404 /* return borrowed reference to attrib dictionary */
405 /* note: this function assumes that the extra section exists */
407 PyObject* res = self->extra->attrib;
409 if (res == Py_None) {
410 /* create missing dictionary */
411 res = PyDict_New();
412 if (!res)
413 return NULL;
414 self->extra->attrib = res;
417 return res;
420 LOCAL(PyObject*)
421 element_get_text(ElementObject* self)
423 /* return borrowed reference to text attribute */
425 PyObject* res = self->text;
427 if (JOIN_GET(res)) {
428 res = JOIN_OBJ(res);
429 if (PyList_CheckExact(res)) {
430 res = list_join(res);
431 if (!res)
432 return NULL;
433 self->text = res;
437 return res;
440 LOCAL(PyObject*)
441 element_get_tail(ElementObject* self)
443 /* return borrowed reference to text attribute */
445 PyObject* res = self->tail;
447 if (JOIN_GET(res)) {
448 res = JOIN_OBJ(res);
449 if (PyList_CheckExact(res)) {
450 res = list_join(res);
451 if (!res)
452 return NULL;
453 self->tail = res;
457 return res;
460 static PyObject*
461 element(PyObject* self, PyObject* args, PyObject* kw)
463 PyObject* elem;
465 PyObject* tag;
466 PyObject* attrib = NULL;
467 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
468 &PyDict_Type, &attrib))
469 return NULL;
471 if (attrib || kw) {
472 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
473 if (!attrib)
474 return NULL;
475 if (kw)
476 PyDict_Update(attrib, kw);
477 } else {
478 Py_INCREF(Py_None);
479 attrib = Py_None;
482 elem = element_new(tag, attrib);
484 Py_DECREF(attrib);
486 return elem;
489 static PyObject*
490 subelement(PyObject* self, PyObject* args, PyObject* kw)
492 PyObject* elem;
494 ElementObject* parent;
495 PyObject* tag;
496 PyObject* attrib = NULL;
497 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
498 &Element_Type, &parent, &tag,
499 &PyDict_Type, &attrib))
500 return NULL;
502 if (attrib || kw) {
503 attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
504 if (!attrib)
505 return NULL;
506 if (kw)
507 PyDict_Update(attrib, kw);
508 } else {
509 Py_INCREF(Py_None);
510 attrib = Py_None;
513 elem = element_new(tag, attrib);
515 Py_DECREF(attrib);
517 if (element_add_subelement(parent, elem) < 0) {
518 Py_DECREF(elem);
519 return NULL;
522 return elem;
525 static void
526 element_dealloc(ElementObject* self)
528 if (self->extra)
529 element_dealloc_extra(self);
531 /* discard attributes */
532 Py_DECREF(self->tag);
533 Py_DECREF(JOIN_OBJ(self->text));
534 Py_DECREF(JOIN_OBJ(self->tail));
536 RELEASE(sizeof(ElementObject), "destroy element");
538 PyObject_Del(self);
541 /* -------------------------------------------------------------------- */
542 /* methods (in alphabetical order) */
544 static PyObject*
545 element_append(ElementObject* self, PyObject* args)
547 PyObject* element;
548 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
549 return NULL;
551 if (element_add_subelement(self, element) < 0)
552 return NULL;
554 Py_RETURN_NONE;
557 static PyObject*
558 element_clear(ElementObject* self, PyObject* args)
560 if (!PyArg_ParseTuple(args, ":clear"))
561 return NULL;
563 if (self->extra) {
564 element_dealloc_extra(self);
565 self->extra = NULL;
568 Py_INCREF(Py_None);
569 Py_DECREF(JOIN_OBJ(self->text));
570 self->text = Py_None;
572 Py_INCREF(Py_None);
573 Py_DECREF(JOIN_OBJ(self->tail));
574 self->tail = Py_None;
576 Py_RETURN_NONE;
579 static PyObject*
580 element_copy(ElementObject* self, PyObject* args)
582 int i;
583 ElementObject* element;
585 if (!PyArg_ParseTuple(args, ":__copy__"))
586 return NULL;
588 element = (ElementObject*) element_new(
589 self->tag, (self->extra) ? self->extra->attrib : Py_None
591 if (!element)
592 return NULL;
594 Py_DECREF(JOIN_OBJ(element->text));
595 element->text = self->text;
596 Py_INCREF(JOIN_OBJ(element->text));
598 Py_DECREF(JOIN_OBJ(element->tail));
599 element->tail = self->tail;
600 Py_INCREF(JOIN_OBJ(element->tail));
602 if (self->extra) {
604 if (element_resize(element, self->extra->length) < 0) {
605 Py_DECREF(element);
606 return NULL;
609 for (i = 0; i < self->extra->length; i++) {
610 Py_INCREF(self->extra->children[i]);
611 element->extra->children[i] = self->extra->children[i];
614 element->extra->length = self->extra->length;
618 return (PyObject*) element;
621 static PyObject*
622 element_deepcopy(ElementObject* self, PyObject* args)
624 int i;
625 ElementObject* element;
626 PyObject* tag;
627 PyObject* attrib;
628 PyObject* text;
629 PyObject* tail;
630 PyObject* id;
632 PyObject* memo;
633 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
634 return NULL;
636 tag = deepcopy(self->tag, memo);
637 if (!tag)
638 return NULL;
640 if (self->extra) {
641 attrib = deepcopy(self->extra->attrib, memo);
642 if (!attrib) {
643 Py_DECREF(tag);
644 return NULL;
646 } else {
647 Py_INCREF(Py_None);
648 attrib = Py_None;
651 element = (ElementObject*) element_new(tag, attrib);
653 Py_DECREF(tag);
654 Py_DECREF(attrib);
656 if (!element)
657 return NULL;
659 text = deepcopy(JOIN_OBJ(self->text), memo);
660 if (!text)
661 goto error;
662 Py_DECREF(element->text);
663 element->text = JOIN_SET(text, JOIN_GET(self->text));
665 tail = deepcopy(JOIN_OBJ(self->tail), memo);
666 if (!tail)
667 goto error;
668 Py_DECREF(element->tail);
669 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
671 if (self->extra) {
673 if (element_resize(element, self->extra->length) < 0)
674 goto error;
676 for (i = 0; i < self->extra->length; i++) {
677 PyObject* child = deepcopy(self->extra->children[i], memo);
678 if (!child) {
679 element->extra->length = i;
680 goto error;
682 element->extra->children[i] = child;
685 element->extra->length = self->extra->length;
689 /* add object to memo dictionary (so deepcopy won't visit it again) */
690 id = PyLong_FromLong((Py_uintptr_t) self);
692 i = PyDict_SetItem(memo, id, (PyObject*) element);
694 Py_DECREF(id);
696 if (i < 0)
697 goto error;
699 return (PyObject*) element;
701 error:
702 Py_DECREF(element);
703 return NULL;
706 LOCAL(int)
707 checkpath(PyObject* tag)
709 Py_ssize_t i;
710 int check = 1;
712 /* check if a tag contains an xpath character */
714 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
716 if (PyUnicode_Check(tag)) {
717 Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
718 for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
719 if (p[i] == '{')
720 check = 0;
721 else if (p[i] == '}')
722 check = 1;
723 else if (check && PATHCHAR(p[i]))
724 return 1;
726 return 0;
728 if (PyBytes_Check(tag)) {
729 char *p = PyBytes_AS_STRING(tag);
730 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
731 if (p[i] == '{')
732 check = 0;
733 else if (p[i] == '}')
734 check = 1;
735 else if (check && PATHCHAR(p[i]))
736 return 1;
738 return 0;
741 return 1; /* unknown type; might be path expression */
744 static PyObject*
745 element_find(ElementObject* self, PyObject* args)
747 int i;
749 PyObject* tag;
750 if (!PyArg_ParseTuple(args, "O:find", &tag))
751 return NULL;
753 if (checkpath(tag))
754 return PyObject_CallMethod(
755 elementpath_obj, "find", "OO", self, tag
758 if (!self->extra)
759 Py_RETURN_NONE;
761 for (i = 0; i < self->extra->length; i++) {
762 PyObject* item = self->extra->children[i];
763 if (Element_CheckExact(item) &&
764 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
765 Py_INCREF(item);
766 return item;
770 Py_RETURN_NONE;
773 static PyObject*
774 element_findtext(ElementObject* self, PyObject* args)
776 int i;
778 PyObject* tag;
779 PyObject* default_value = Py_None;
780 if (!PyArg_ParseTuple(args, "O|O:findtext", &tag, &default_value))
781 return NULL;
783 if (checkpath(tag))
784 return PyObject_CallMethod(
785 elementpath_obj, "findtext", "OOO", self, tag, default_value
788 if (!self->extra) {
789 Py_INCREF(default_value);
790 return default_value;
793 for (i = 0; i < self->extra->length; i++) {
794 ElementObject* item = (ElementObject*) self->extra->children[i];
795 if (Element_CheckExact(item) && (PyObject_RichCompareBool(item->tag, tag, Py_EQ) == 1)) {
797 PyObject* text = element_get_text(item);
798 if (text == Py_None)
799 return PyBytes_FromString("");
800 Py_XINCREF(text);
801 return text;
805 Py_INCREF(default_value);
806 return default_value;
809 static PyObject*
810 element_findall(ElementObject* self, PyObject* args)
812 int i;
813 PyObject* out;
815 PyObject* tag;
816 if (!PyArg_ParseTuple(args, "O:findall", &tag))
817 return NULL;
819 if (checkpath(tag))
820 return PyObject_CallMethod(
821 elementpath_obj, "findall", "OO", self, tag
824 out = PyList_New(0);
825 if (!out)
826 return NULL;
828 if (!self->extra)
829 return out;
831 for (i = 0; i < self->extra->length; i++) {
832 PyObject* item = self->extra->children[i];
833 if (Element_CheckExact(item) &&
834 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
835 if (PyList_Append(out, item) < 0) {
836 Py_DECREF(out);
837 return NULL;
842 return out;
845 static PyObject*
846 element_get(ElementObject* self, PyObject* args)
848 PyObject* value;
850 PyObject* key;
851 PyObject* default_value = Py_None;
852 if (!PyArg_ParseTuple(args, "O|O:get", &key, &default_value))
853 return NULL;
855 if (!self->extra || self->extra->attrib == Py_None)
856 value = default_value;
857 else {
858 value = PyDict_GetItem(self->extra->attrib, key);
859 if (!value)
860 value = default_value;
863 Py_INCREF(value);
864 return value;
867 static PyObject*
868 element_getchildren(ElementObject* self, PyObject* args)
870 int i;
871 PyObject* list;
873 if (!PyArg_ParseTuple(args, ":getchildren"))
874 return NULL;
876 if (!self->extra)
877 return PyList_New(0);
879 list = PyList_New(self->extra->length);
880 if (!list)
881 return NULL;
883 for (i = 0; i < self->extra->length; i++) {
884 PyObject* item = self->extra->children[i];
885 Py_INCREF(item);
886 PyList_SET_ITEM(list, i, item);
889 return list;
892 static PyObject*
893 element_getiterator(ElementObject* self, PyObject* args)
895 PyObject* result;
897 PyObject* tag = Py_None;
898 if (!PyArg_ParseTuple(args, "|O:getiterator", &tag))
899 return NULL;
901 if (!elementtree_getiterator_obj) {
902 PyErr_SetString(
903 PyExc_RuntimeError,
904 "getiterator helper not found"
906 return NULL;
909 args = PyTuple_New(2);
910 if (!args)
911 return NULL;
913 Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
914 Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
916 result = PyObject_CallObject(elementtree_getiterator_obj, args);
918 Py_DECREF(args);
920 return result;
923 static PyObject*
924 element_getitem(PyObject* self_, Py_ssize_t index)
926 ElementObject* self = (ElementObject*) self_;
928 if (!self->extra || index < 0 || index >= self->extra->length) {
929 PyErr_SetString(
930 PyExc_IndexError,
931 "child index out of range"
933 return NULL;
936 Py_INCREF(self->extra->children[index]);
937 return self->extra->children[index];
940 static PyObject*
941 element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
943 ElementObject* self = (ElementObject*) self_;
944 Py_ssize_t i;
945 PyObject* list;
947 if (!self->extra)
948 return PyList_New(0);
950 /* standard clamping */
951 if (start < 0)
952 start = 0;
953 if (end < 0)
954 end = 0;
955 if (end > self->extra->length)
956 end = self->extra->length;
957 if (start > end)
958 start = end;
960 list = PyList_New(end - start);
961 if (!list)
962 return NULL;
964 for (i = start; i < end; i++) {
965 PyObject* item = self->extra->children[i];
966 Py_INCREF(item);
967 PyList_SET_ITEM(list, i - start, item);
970 return list;
973 static PyObject*
974 element_insert(ElementObject* self, PyObject* args)
976 int i;
978 int index;
979 PyObject* element;
980 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
981 &Element_Type, &element))
982 return NULL;
984 if (!self->extra)
985 element_new_extra(self, NULL);
987 if (index < 0)
988 index = 0;
989 if (index > self->extra->length)
990 index = self->extra->length;
992 if (element_resize(self, 1) < 0)
993 return NULL;
995 for (i = self->extra->length; i > index; i--)
996 self->extra->children[i] = self->extra->children[i-1];
998 Py_INCREF(element);
999 self->extra->children[index] = element;
1001 self->extra->length++;
1003 Py_RETURN_NONE;
1006 static PyObject*
1007 element_items(ElementObject* self, PyObject* args)
1009 if (!PyArg_ParseTuple(args, ":items"))
1010 return NULL;
1012 if (!self->extra || self->extra->attrib == Py_None)
1013 return PyList_New(0);
1015 return PyDict_Items(self->extra->attrib);
1018 static PyObject*
1019 element_keys(ElementObject* self, PyObject* args)
1021 if (!PyArg_ParseTuple(args, ":keys"))
1022 return NULL;
1024 if (!self->extra || self->extra->attrib == Py_None)
1025 return PyList_New(0);
1027 return PyDict_Keys(self->extra->attrib);
1030 static Py_ssize_t
1031 element_length(ElementObject* self)
1033 if (!self->extra)
1034 return 0;
1036 return self->extra->length;
1039 static PyObject*
1040 element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1042 PyObject* elem;
1044 PyObject* tag;
1045 PyObject* attrib;
1046 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1047 return NULL;
1049 attrib = PyDict_Copy(attrib);
1050 if (!attrib)
1051 return NULL;
1053 elem = element_new(tag, attrib);
1055 Py_DECREF(attrib);
1057 return elem;
1060 static PyObject*
1061 element_reduce(ElementObject* self, PyObject* args)
1063 if (!PyArg_ParseTuple(args, ":__reduce__"))
1064 return NULL;
1066 /* Hack alert: This method is used to work around a __copy__
1067 problem on certain 2.3 and 2.4 versions. To save time and
1068 simplify the code, we create the copy in here, and use a dummy
1069 copyelement helper to trick the copy module into doing the
1070 right thing. */
1072 if (!elementtree_copyelement_obj) {
1073 PyErr_SetString(
1074 PyExc_RuntimeError,
1075 "copyelement helper not found"
1077 return NULL;
1080 return Py_BuildValue(
1081 "O(N)", elementtree_copyelement_obj, element_copy(self, args)
1085 static PyObject*
1086 element_remove(ElementObject* self, PyObject* args)
1088 int i;
1090 PyObject* element;
1091 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1092 return NULL;
1094 if (!self->extra) {
1095 /* element has no children, so raise exception */
1096 PyErr_SetString(
1097 PyExc_ValueError,
1098 "list.remove(x): x not in list"
1100 return NULL;
1103 for (i = 0; i < self->extra->length; i++) {
1104 if (self->extra->children[i] == element)
1105 break;
1106 if (PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ) == 1)
1107 break;
1110 if (i == self->extra->length) {
1111 /* element is not in children, so raise exception */
1112 PyErr_SetString(
1113 PyExc_ValueError,
1114 "list.remove(x): x not in list"
1116 return NULL;
1119 Py_DECREF(self->extra->children[i]);
1121 self->extra->length--;
1123 for (; i < self->extra->length; i++)
1124 self->extra->children[i] = self->extra->children[i+1];
1126 Py_RETURN_NONE;
1129 static PyObject*
1130 element_repr(ElementObject* self)
1132 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1135 static PyObject*
1136 element_set(ElementObject* self, PyObject* args)
1138 PyObject* attrib;
1140 PyObject* key;
1141 PyObject* value;
1142 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1143 return NULL;
1145 if (!self->extra)
1146 element_new_extra(self, NULL);
1148 attrib = element_get_attrib(self);
1149 if (!attrib)
1150 return NULL;
1152 if (PyDict_SetItem(attrib, key, value) < 0)
1153 return NULL;
1155 Py_RETURN_NONE;
1158 static int
1159 element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
1161 ElementObject* self = (ElementObject*) self_;
1162 Py_ssize_t i, new, old;
1163 PyObject* recycle = NULL;
1165 if (!self->extra)
1166 element_new_extra(self, NULL);
1168 /* standard clamping */
1169 if (start < 0)
1170 start = 0;
1171 if (end < 0)
1172 end = 0;
1173 if (end > self->extra->length)
1174 end = self->extra->length;
1175 if (start > end)
1176 start = end;
1178 old = end - start;
1180 if (item == NULL)
1181 new = 0;
1182 else if (PyList_CheckExact(item)) {
1183 new = PyList_GET_SIZE(item);
1184 } else {
1185 /* FIXME: support arbitrary sequences? */
1186 PyErr_Format(
1187 PyExc_TypeError,
1188 "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
1190 return -1;
1193 if (old > 0) {
1194 /* to avoid recursive calls to this method (via decref), move
1195 old items to the recycle bin here, and get rid of them when
1196 we're done modifying the element */
1197 recycle = PyList_New(old);
1198 for (i = 0; i < old; i++)
1199 PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
1202 if (new < old) {
1203 /* delete slice */
1204 for (i = end; i < self->extra->length; i++)
1205 self->extra->children[i + new - old] = self->extra->children[i];
1206 } else if (new > old) {
1207 /* insert slice */
1208 if (element_resize(self, new - old) < 0)
1209 return -1;
1210 for (i = self->extra->length-1; i >= end; i--)
1211 self->extra->children[i + new - old] = self->extra->children[i];
1214 /* replace the slice */
1215 for (i = 0; i < new; i++) {
1216 PyObject* element = PyList_GET_ITEM(item, i);
1217 Py_INCREF(element);
1218 self->extra->children[i + start] = element;
1221 self->extra->length += new - old;
1223 /* discard the recycle bin, and everything in it */
1224 Py_XDECREF(recycle);
1226 return 0;
1229 static int
1230 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1232 ElementObject* self = (ElementObject*) self_;
1233 int i;
1234 PyObject* old;
1236 if (!self->extra || index < 0 || index >= self->extra->length) {
1237 PyErr_SetString(
1238 PyExc_IndexError,
1239 "child assignment index out of range");
1240 return -1;
1243 old = self->extra->children[index];
1245 if (item) {
1246 Py_INCREF(item);
1247 self->extra->children[index] = item;
1248 } else {
1249 self->extra->length--;
1250 for (i = index; i < self->extra->length; i++)
1251 self->extra->children[i] = self->extra->children[i+1];
1254 Py_DECREF(old);
1256 return 0;
1259 static PyMethodDef element_methods[] = {
1261 {"clear", (PyCFunction) element_clear, METH_VARARGS},
1263 {"get", (PyCFunction) element_get, METH_VARARGS},
1264 {"set", (PyCFunction) element_set, METH_VARARGS},
1266 {"find", (PyCFunction) element_find, METH_VARARGS},
1267 {"findtext", (PyCFunction) element_findtext, METH_VARARGS},
1268 {"findall", (PyCFunction) element_findall, METH_VARARGS},
1270 {"append", (PyCFunction) element_append, METH_VARARGS},
1271 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1272 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1274 {"getiterator", (PyCFunction) element_getiterator, METH_VARARGS},
1275 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1277 {"items", (PyCFunction) element_items, METH_VARARGS},
1278 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1280 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1282 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1283 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
1285 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1286 C objects correctly, so we have to fake it using a __reduce__-
1287 based hack (see the element_reduce implementation above for
1288 details). */
1290 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1291 using a runtime test to figure out if we need to fake things
1292 or now (see the init code below). The following entry is
1293 enabled only if the hack is needed. */
1295 {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
1297 {NULL, NULL}
1300 static PyObject*
1301 element_getattro(ElementObject* self, PyObject* nameobj)
1303 PyObject* res;
1304 char *name = "";
1306 if (PyUnicode_Check(nameobj))
1307 name = _PyUnicode_AsString(nameobj);
1309 if (strcmp(name, "tag") == 0)
1310 res = self->tag;
1311 else if (strcmp(name, "text") == 0)
1312 res = element_get_text(self);
1313 else if (strcmp(name, "tail") == 0) {
1314 res = element_get_tail(self);
1315 } else if (strcmp(name, "attrib") == 0) {
1316 if (!self->extra)
1317 element_new_extra(self, NULL);
1318 res = element_get_attrib(self);
1319 } else {
1320 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
1323 Py_XINCREF(res);
1324 return res;
1327 static int
1328 element_setattr(ElementObject* self, const char* name, PyObject* value)
1330 if (value == NULL) {
1331 PyErr_SetString(
1332 PyExc_AttributeError,
1333 "can't delete element attributes"
1335 return -1;
1338 if (strcmp(name, "tag") == 0) {
1339 Py_DECREF(self->tag);
1340 self->tag = value;
1341 Py_INCREF(self->tag);
1342 } else if (strcmp(name, "text") == 0) {
1343 Py_DECREF(JOIN_OBJ(self->text));
1344 self->text = value;
1345 Py_INCREF(self->text);
1346 } else if (strcmp(name, "tail") == 0) {
1347 Py_DECREF(JOIN_OBJ(self->tail));
1348 self->tail = value;
1349 Py_INCREF(self->tail);
1350 } else if (strcmp(name, "attrib") == 0) {
1351 if (!self->extra)
1352 element_new_extra(self, NULL);
1353 Py_DECREF(self->extra->attrib);
1354 self->extra->attrib = value;
1355 Py_INCREF(self->extra->attrib);
1356 } else {
1357 PyErr_SetString(PyExc_AttributeError, name);
1358 return -1;
1361 return 0;
1364 static PySequenceMethods element_as_sequence = {
1365 (lenfunc) element_length,
1366 0, /* sq_concat */
1367 0, /* sq_repeat */
1368 element_getitem,
1369 element_getslice,
1370 element_setitem,
1371 element_setslice,
1374 static PyTypeObject Element_Type = {
1375 PyVarObject_HEAD_INIT(NULL, 0)
1376 "Element", sizeof(ElementObject), 0,
1377 /* methods */
1378 (destructor)element_dealloc, /* tp_dealloc */
1379 0, /* tp_print */
1380 0, /* tp_getattr */
1381 (setattrfunc)element_setattr, /* tp_setattr */
1382 0, /* tp_reserved */
1383 (reprfunc)element_repr, /* tp_repr */
1384 0, /* tp_as_number */
1385 &element_as_sequence, /* tp_as_sequence */
1386 0, /* tp_as_mapping */
1387 0, /* tp_hash */
1388 0, /* tp_call */
1389 0, /* tp_str */
1390 (getattrofunc)element_getattro, /* tp_getattro */
1391 0, /* tp_setattro */
1392 0, /* tp_as_buffer */
1393 Py_TPFLAGS_DEFAULT, /* tp_flags */
1394 0, /* tp_doc */
1395 0, /* tp_traverse */
1396 0, /* tp_clear */
1397 0, /* tp_richcompare */
1398 0, /* tp_weaklistoffset */
1399 0, /* tp_iter */
1400 0, /* tp_iternext */
1401 element_methods, /* tp_methods */
1402 0, /* tp_members */
1405 /* ==================================================================== */
1406 /* the tree builder type */
1408 typedef struct {
1409 PyObject_HEAD
1411 PyObject* root; /* root node (first created node) */
1413 ElementObject* this; /* current node */
1414 ElementObject* last; /* most recently created node */
1416 PyObject* data; /* data collector (string or list), or NULL */
1418 PyObject* stack; /* element stack */
1419 Py_ssize_t index; /* current stack size (0=empty) */
1421 /* element tracing */
1422 PyObject* events; /* list of events, or NULL if not collecting */
1423 PyObject* start_event_obj; /* event objects (NULL to ignore) */
1424 PyObject* end_event_obj;
1425 PyObject* start_ns_event_obj;
1426 PyObject* end_ns_event_obj;
1428 } TreeBuilderObject;
1430 static PyTypeObject TreeBuilder_Type;
1432 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
1434 /* -------------------------------------------------------------------- */
1435 /* constructor and destructor */
1437 LOCAL(PyObject*)
1438 treebuilder_new(void)
1440 TreeBuilderObject* self;
1442 self = PyObject_New(TreeBuilderObject, &TreeBuilder_Type);
1443 if (self == NULL)
1444 return NULL;
1446 self->root = NULL;
1448 Py_INCREF(Py_None);
1449 self->this = (ElementObject*) Py_None;
1451 Py_INCREF(Py_None);
1452 self->last = (ElementObject*) Py_None;
1454 self->data = NULL;
1456 self->stack = PyList_New(20);
1457 self->index = 0;
1459 self->events = NULL;
1460 self->start_event_obj = self->end_event_obj = NULL;
1461 self->start_ns_event_obj = self->end_ns_event_obj = NULL;
1463 ALLOC(sizeof(TreeBuilderObject), "create treebuilder");
1465 return (PyObject*) self;
1468 static PyObject*
1469 treebuilder(PyObject* self_, PyObject* args)
1471 if (!PyArg_ParseTuple(args, ":TreeBuilder"))
1472 return NULL;
1474 return treebuilder_new();
1477 static void
1478 treebuilder_dealloc(TreeBuilderObject* self)
1480 Py_XDECREF(self->end_ns_event_obj);
1481 Py_XDECREF(self->start_ns_event_obj);
1482 Py_XDECREF(self->end_event_obj);
1483 Py_XDECREF(self->start_event_obj);
1484 Py_XDECREF(self->events);
1485 Py_DECREF(self->stack);
1486 Py_XDECREF(self->data);
1487 Py_DECREF(self->last);
1488 Py_DECREF(self->this);
1489 Py_XDECREF(self->root);
1491 RELEASE(sizeof(TreeBuilderObject), "destroy treebuilder");
1493 PyObject_Del(self);
1496 /* -------------------------------------------------------------------- */
1497 /* handlers */
1499 LOCAL(PyObject*)
1500 treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
1501 PyObject* standalone)
1503 Py_RETURN_NONE;
1506 LOCAL(PyObject*)
1507 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
1508 PyObject* attrib)
1510 PyObject* node;
1511 PyObject* this;
1513 if (self->data) {
1514 if (self->this == self->last) {
1515 Py_DECREF(JOIN_OBJ(self->last->text));
1516 self->last->text = JOIN_SET(
1517 self->data, PyList_CheckExact(self->data)
1519 } else {
1520 Py_DECREF(JOIN_OBJ(self->last->tail));
1521 self->last->tail = JOIN_SET(
1522 self->data, PyList_CheckExact(self->data)
1525 self->data = NULL;
1528 node = element_new(tag, attrib);
1529 if (!node)
1530 return NULL;
1532 this = (PyObject*) self->this;
1534 if (this != Py_None) {
1535 if (element_add_subelement((ElementObject*) this, node) < 0)
1536 goto error;
1537 } else {
1538 if (self->root) {
1539 PyErr_SetString(
1540 PyExc_SyntaxError,
1541 "multiple elements on top level"
1543 goto error;
1545 Py_INCREF(node);
1546 self->root = node;
1549 if (self->index < PyList_GET_SIZE(self->stack)) {
1550 if (PyList_SetItem(self->stack, self->index, this) < 0)
1551 goto error;
1552 Py_INCREF(this);
1553 } else {
1554 if (PyList_Append(self->stack, this) < 0)
1555 goto error;
1557 self->index++;
1559 Py_DECREF(this);
1560 Py_INCREF(node);
1561 self->this = (ElementObject*) node;
1563 Py_DECREF(self->last);
1564 Py_INCREF(node);
1565 self->last = (ElementObject*) node;
1567 if (self->start_event_obj) {
1568 PyObject* res;
1569 PyObject* action = self->start_event_obj;
1570 res = PyTuple_New(2);
1571 if (res) {
1572 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1573 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1574 PyList_Append(self->events, res);
1575 Py_DECREF(res);
1576 } else
1577 PyErr_Clear(); /* FIXME: propagate error */
1580 return node;
1582 error:
1583 Py_DECREF(node);
1584 return NULL;
1587 LOCAL(PyObject*)
1588 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
1590 if (!self->data) {
1591 if (self->last == (ElementObject*) Py_None) {
1592 /* ignore calls to data before the first call to start */
1593 Py_RETURN_NONE;
1595 /* store the first item as is */
1596 Py_INCREF(data); self->data = data;
1597 } else {
1598 /* more than one item; use a list to collect items */
1599 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
1600 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
1601 /* expat often generates single character data sections; handle
1602 the most common case by resizing the existing string... */
1603 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
1604 if (_PyBytes_Resize(&self->data, size + 1) < 0)
1605 return NULL;
1606 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
1607 } else if (PyList_CheckExact(self->data)) {
1608 if (PyList_Append(self->data, data) < 0)
1609 return NULL;
1610 } else {
1611 PyObject* list = PyList_New(2);
1612 if (!list)
1613 return NULL;
1614 PyList_SET_ITEM(list, 0, self->data);
1615 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
1616 self->data = list;
1620 Py_RETURN_NONE;
1623 LOCAL(PyObject*)
1624 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
1626 PyObject* item;
1628 if (self->data) {
1629 if (self->this == self->last) {
1630 Py_DECREF(JOIN_OBJ(self->last->text));
1631 self->last->text = JOIN_SET(
1632 self->data, PyList_CheckExact(self->data)
1634 } else {
1635 Py_DECREF(JOIN_OBJ(self->last->tail));
1636 self->last->tail = JOIN_SET(
1637 self->data, PyList_CheckExact(self->data)
1640 self->data = NULL;
1643 if (self->index == 0) {
1644 PyErr_SetString(
1645 PyExc_IndexError,
1646 "pop from empty stack"
1648 return NULL;
1651 self->index--;
1653 item = PyList_GET_ITEM(self->stack, self->index);
1654 Py_INCREF(item);
1656 Py_DECREF(self->last);
1658 self->last = (ElementObject*) self->this;
1659 self->this = (ElementObject*) item;
1661 if (self->end_event_obj) {
1662 PyObject* res;
1663 PyObject* action = self->end_event_obj;
1664 PyObject* node = (PyObject*) self->last;
1665 res = PyTuple_New(2);
1666 if (res) {
1667 Py_INCREF(action); PyTuple_SET_ITEM(res, 0, (PyObject*) action);
1668 Py_INCREF(node); PyTuple_SET_ITEM(res, 1, (PyObject*) node);
1669 PyList_Append(self->events, res);
1670 Py_DECREF(res);
1671 } else
1672 PyErr_Clear(); /* FIXME: propagate error */
1675 Py_INCREF(self->last);
1676 return (PyObject*) self->last;
1679 LOCAL(void)
1680 treebuilder_handle_namespace(TreeBuilderObject* self, int start,
1681 const char* prefix, const char *uri)
1683 PyObject* res;
1684 PyObject* action;
1685 PyObject* parcel;
1687 if (!self->events)
1688 return;
1690 if (start) {
1691 if (!self->start_ns_event_obj)
1692 return;
1693 action = self->start_ns_event_obj;
1694 /* FIXME: prefix and uri use utf-8 encoding! */
1695 parcel = Py_BuildValue("ss", (prefix) ? prefix : "", uri);
1696 if (!parcel)
1697 return;
1698 Py_INCREF(action);
1699 } else {
1700 if (!self->end_ns_event_obj)
1701 return;
1702 action = self->end_ns_event_obj;
1703 Py_INCREF(action);
1704 parcel = Py_None;
1705 Py_INCREF(parcel);
1708 res = PyTuple_New(2);
1710 if (res) {
1711 PyTuple_SET_ITEM(res, 0, action);
1712 PyTuple_SET_ITEM(res, 1, parcel);
1713 PyList_Append(self->events, res);
1714 Py_DECREF(res);
1715 } else
1716 PyErr_Clear(); /* FIXME: propagate error */
1719 /* -------------------------------------------------------------------- */
1720 /* methods (in alphabetical order) */
1722 static PyObject*
1723 treebuilder_data(TreeBuilderObject* self, PyObject* args)
1725 PyObject* data;
1726 if (!PyArg_ParseTuple(args, "O:data", &data))
1727 return NULL;
1729 return treebuilder_handle_data(self, data);
1732 static PyObject*
1733 treebuilder_end(TreeBuilderObject* self, PyObject* args)
1735 PyObject* tag;
1736 if (!PyArg_ParseTuple(args, "O:end", &tag))
1737 return NULL;
1739 return treebuilder_handle_end(self, tag);
1742 LOCAL(PyObject*)
1743 treebuilder_done(TreeBuilderObject* self)
1745 PyObject* res;
1747 /* FIXME: check stack size? */
1749 if (self->root)
1750 res = self->root;
1751 else
1752 res = Py_None;
1754 Py_INCREF(res);
1755 return res;
1758 static PyObject*
1759 treebuilder_close(TreeBuilderObject* self, PyObject* args)
1761 if (!PyArg_ParseTuple(args, ":close"))
1762 return NULL;
1764 return treebuilder_done(self);
1767 static PyObject*
1768 treebuilder_start(TreeBuilderObject* self, PyObject* args)
1770 PyObject* tag;
1771 PyObject* attrib = Py_None;
1772 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
1773 return NULL;
1775 return treebuilder_handle_start(self, tag, attrib);
1778 static PyObject*
1779 treebuilder_xml(TreeBuilderObject* self, PyObject* args)
1781 PyObject* encoding;
1782 PyObject* standalone;
1783 if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
1784 return NULL;
1786 return treebuilder_handle_xml(self, encoding, standalone);
1789 static PyMethodDef treebuilder_methods[] = {
1790 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
1791 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
1792 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
1793 {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
1794 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
1795 {NULL, NULL}
1798 static PyTypeObject TreeBuilder_Type = {
1799 PyVarObject_HEAD_INIT(NULL, 0)
1800 "TreeBuilder", sizeof(TreeBuilderObject), 0,
1801 /* methods */
1802 (destructor)treebuilder_dealloc, /* tp_dealloc */
1803 0, /* tp_print */
1804 0, /* tp_getattr */
1805 0, /* tp_setattr */
1806 0, /* tp_reserved */
1807 0, /* tp_repr */
1808 0, /* tp_as_number */
1809 0, /* tp_as_sequence */
1810 0, /* tp_as_mapping */
1811 0, /* tp_hash */
1812 0, /* tp_call */
1813 0, /* tp_str */
1814 0, /* tp_getattro */
1815 0, /* tp_setattro */
1816 0, /* tp_as_buffer */
1817 Py_TPFLAGS_DEFAULT, /* tp_flags */
1818 0, /* tp_doc */
1819 0, /* tp_traverse */
1820 0, /* tp_clear */
1821 0, /* tp_richcompare */
1822 0, /* tp_weaklistoffset */
1823 0, /* tp_iter */
1824 0, /* tp_iternext */
1825 treebuilder_methods, /* tp_methods */
1826 0, /* tp_members */
1829 /* ==================================================================== */
1830 /* the expat interface */
1832 #if defined(USE_EXPAT)
1834 #include "expat.h"
1836 #if defined(USE_PYEXPAT_CAPI)
1837 #include "pyexpat.h"
1838 static struct PyExpat_CAPI* expat_capi;
1839 #define EXPAT(func) (expat_capi->func)
1840 #else
1841 #define EXPAT(func) (XML_##func)
1842 #endif
1844 typedef struct {
1845 PyObject_HEAD
1847 XML_Parser parser;
1849 PyObject* target;
1850 PyObject* entity;
1852 PyObject* names;
1854 PyObject* handle_xml;
1855 PyObject* handle_start;
1856 PyObject* handle_data;
1857 PyObject* handle_end;
1859 PyObject* handle_comment;
1860 PyObject* handle_pi;
1862 } XMLParserObject;
1864 static PyTypeObject XMLParser_Type;
1866 /* helpers */
1868 LOCAL(PyObject*)
1869 makeuniversal(XMLParserObject* self, const char* string)
1871 /* convert a UTF-8 tag/attribute name from the expat parser
1872 to a universal name string */
1874 int size = strlen(string);
1875 PyObject* key;
1876 PyObject* value;
1878 /* look the 'raw' name up in the names dictionary */
1879 key = PyBytes_FromStringAndSize(string, size);
1880 if (!key)
1881 return NULL;
1883 value = PyDict_GetItem(self->names, key);
1885 if (value) {
1886 Py_INCREF(value);
1887 } else {
1888 /* new name. convert to universal name, and decode as
1889 necessary */
1891 PyObject* tag;
1892 char* p;
1893 int i;
1895 /* look for namespace separator */
1896 for (i = 0; i < size; i++)
1897 if (string[i] == '}')
1898 break;
1899 if (i != size) {
1900 /* convert to universal name */
1901 tag = PyBytes_FromStringAndSize(NULL, size+1);
1902 p = PyBytes_AS_STRING(tag);
1903 p[0] = '{';
1904 memcpy(p+1, string, size);
1905 size++;
1906 } else {
1907 /* plain name; use key as tag */
1908 Py_INCREF(key);
1909 tag = key;
1912 /* decode universal name */
1913 p = PyBytes_AS_STRING(tag);
1914 value = PyUnicode_DecodeUTF8(p, size, "strict");
1915 Py_DECREF(tag);
1916 if (!value) {
1917 Py_DECREF(key);
1918 return NULL;
1921 /* add to names dictionary */
1922 if (PyDict_SetItem(self->names, key, value) < 0) {
1923 Py_DECREF(key);
1924 Py_DECREF(value);
1925 return NULL;
1929 Py_DECREF(key);
1930 return value;
1933 /* -------------------------------------------------------------------- */
1934 /* handlers */
1936 static void
1937 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
1938 int data_len)
1940 PyObject* key;
1941 PyObject* value;
1942 PyObject* res;
1944 if (data_len < 2 || data_in[0] != '&')
1945 return;
1947 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
1948 if (!key)
1949 return;
1951 value = PyDict_GetItem(self->entity, key);
1953 if (value) {
1954 if (TreeBuilder_CheckExact(self->target))
1955 res = treebuilder_handle_data(
1956 (TreeBuilderObject*) self->target, value
1958 else if (self->handle_data)
1959 res = PyObject_CallFunction(self->handle_data, "O", value);
1960 else
1961 res = NULL;
1962 Py_XDECREF(res);
1963 } else {
1964 PyErr_Format(
1965 PyExc_SyntaxError, "undefined entity &%s;: line %ld, column %ld",
1966 PyBytes_AS_STRING(key),
1967 EXPAT(GetErrorLineNumber)(self->parser),
1968 EXPAT(GetErrorColumnNumber)(self->parser)
1972 Py_DECREF(key);
1975 static void
1976 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
1977 const XML_Char **attrib_in)
1979 PyObject* res;
1980 PyObject* tag;
1981 PyObject* attrib;
1982 int ok;
1984 /* tag name */
1985 tag = makeuniversal(self, tag_in);
1986 if (!tag)
1987 return; /* parser will look for errors */
1989 /* attributes */
1990 if (attrib_in[0]) {
1991 attrib = PyDict_New();
1992 if (!attrib)
1993 return;
1994 while (attrib_in[0] && attrib_in[1]) {
1995 PyObject* key = makeuniversal(self, attrib_in[0]);
1996 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
1997 if (!key || !value) {
1998 Py_XDECREF(value);
1999 Py_XDECREF(key);
2000 Py_DECREF(attrib);
2001 return;
2003 ok = PyDict_SetItem(attrib, key, value);
2004 Py_DECREF(value);
2005 Py_DECREF(key);
2006 if (ok < 0) {
2007 Py_DECREF(attrib);
2008 return;
2010 attrib_in += 2;
2012 } else {
2013 Py_INCREF(Py_None);
2014 attrib = Py_None;
2017 if (TreeBuilder_CheckExact(self->target))
2018 /* shortcut */
2019 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2020 tag, attrib);
2021 else if (self->handle_start)
2022 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
2023 else
2024 res = NULL;
2026 Py_DECREF(tag);
2027 Py_DECREF(attrib);
2029 Py_XDECREF(res);
2032 static void
2033 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2034 int data_len)
2036 PyObject* data;
2037 PyObject* res;
2039 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
2040 if (!data)
2041 return; /* parser will look for errors */
2043 if (TreeBuilder_CheckExact(self->target))
2044 /* shortcut */
2045 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2046 else if (self->handle_data)
2047 res = PyObject_CallFunction(self->handle_data, "O", data);
2048 else
2049 res = NULL;
2051 Py_DECREF(data);
2053 Py_XDECREF(res);
2056 static void
2057 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2059 PyObject* tag;
2060 PyObject* res = NULL;
2062 if (TreeBuilder_CheckExact(self->target))
2063 /* shortcut */
2064 /* the standard tree builder doesn't look at the end tag */
2065 res = treebuilder_handle_end(
2066 (TreeBuilderObject*) self->target, Py_None
2068 else if (self->handle_end) {
2069 tag = makeuniversal(self, tag_in);
2070 if (tag) {
2071 res = PyObject_CallFunction(self->handle_end, "O", tag);
2072 Py_DECREF(tag);
2076 Py_XDECREF(res);
2079 static void
2080 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
2081 const XML_Char *uri)
2083 treebuilder_handle_namespace(
2084 (TreeBuilderObject*) self->target, 1, prefix, uri
2088 static void
2089 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
2091 treebuilder_handle_namespace(
2092 (TreeBuilderObject*) self->target, 0, NULL, NULL
2096 static void
2097 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
2099 PyObject* comment;
2100 PyObject* res;
2102 if (self->handle_comment) {
2103 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
2104 if (comment) {
2105 res = PyObject_CallFunction(self->handle_comment, "O", comment);
2106 Py_XDECREF(res);
2107 Py_DECREF(comment);
2112 static void
2113 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
2114 const XML_Char* data_in)
2116 PyObject* target;
2117 PyObject* data;
2118 PyObject* res;
2120 if (self->handle_pi) {
2121 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
2122 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
2123 if (target && data) {
2124 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
2125 Py_XDECREF(res);
2126 Py_DECREF(data);
2127 Py_DECREF(target);
2128 } else {
2129 Py_XDECREF(data);
2130 Py_XDECREF(target);
2135 static int
2136 expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
2137 XML_Encoding *info)
2139 PyObject* u;
2140 Py_UNICODE* p;
2141 unsigned char s[256];
2142 int i;
2144 memset(info, 0, sizeof(XML_Encoding));
2146 for (i = 0; i < 256; i++)
2147 s[i] = i;
2149 u = PyUnicode_Decode((char*) s, 256, name, "replace");
2150 if (!u)
2151 return XML_STATUS_ERROR;
2153 if (PyUnicode_GET_SIZE(u) != 256) {
2154 Py_DECREF(u);
2155 return XML_STATUS_ERROR;
2158 p = PyUnicode_AS_UNICODE(u);
2160 for (i = 0; i < 256; i++) {
2161 if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
2162 info->map[i] = p[i];
2163 else
2164 info->map[i] = -1;
2167 Py_DECREF(u);
2169 return XML_STATUS_OK;
2172 /* -------------------------------------------------------------------- */
2173 /* constructor and destructor */
2175 static PyObject*
2176 xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
2178 XMLParserObject* self;
2179 /* FIXME: does this need to be static? */
2180 static XML_Memory_Handling_Suite memory_handler;
2182 PyObject* target = NULL;
2183 char* encoding = NULL;
2184 static char* kwlist[] = { "target", "encoding", NULL };
2185 if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
2186 &target, &encoding))
2187 return NULL;
2189 #if defined(USE_PYEXPAT_CAPI)
2190 if (!expat_capi) {
2191 PyErr_SetString(
2192 PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
2194 return NULL;
2196 #endif
2198 self = PyObject_New(XMLParserObject, &XMLParser_Type);
2199 if (self == NULL)
2200 return NULL;
2202 self->entity = PyDict_New();
2203 if (!self->entity) {
2204 PyObject_Del(self);
2205 return NULL;
2208 self->names = PyDict_New();
2209 if (!self->names) {
2210 PyObject_Del(self->entity);
2211 PyObject_Del(self);
2212 return NULL;
2215 memory_handler.malloc_fcn = PyObject_Malloc;
2216 memory_handler.realloc_fcn = PyObject_Realloc;
2217 memory_handler.free_fcn = PyObject_Free;
2219 self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
2220 if (!self->parser) {
2221 PyObject_Del(self->names);
2222 PyObject_Del(self->entity);
2223 PyObject_Del(self);
2224 PyErr_NoMemory();
2225 return NULL;
2228 /* setup target handlers */
2229 if (!target) {
2230 target = treebuilder_new();
2231 if (!target) {
2232 EXPAT(ParserFree)(self->parser);
2233 PyObject_Del(self->names);
2234 PyObject_Del(self->entity);
2235 PyObject_Del(self);
2236 return NULL;
2238 } else
2239 Py_INCREF(target);
2240 self->target = target;
2242 self->handle_xml = PyObject_GetAttrString(target, "xml");
2243 self->handle_start = PyObject_GetAttrString(target, "start");
2244 self->handle_data = PyObject_GetAttrString(target, "data");
2245 self->handle_end = PyObject_GetAttrString(target, "end");
2246 self->handle_comment = PyObject_GetAttrString(target, "comment");
2247 self->handle_pi = PyObject_GetAttrString(target, "pi");
2249 PyErr_Clear();
2251 /* configure parser */
2252 EXPAT(SetUserData)(self->parser, self);
2253 EXPAT(SetElementHandler)(
2254 self->parser,
2255 (XML_StartElementHandler) expat_start_handler,
2256 (XML_EndElementHandler) expat_end_handler
2258 EXPAT(SetDefaultHandlerExpand)(
2259 self->parser,
2260 (XML_DefaultHandler) expat_default_handler
2262 EXPAT(SetCharacterDataHandler)(
2263 self->parser,
2264 (XML_CharacterDataHandler) expat_data_handler
2266 if (self->handle_comment)
2267 EXPAT(SetCommentHandler)(
2268 self->parser,
2269 (XML_CommentHandler) expat_comment_handler
2271 if (self->handle_pi)
2272 EXPAT(SetProcessingInstructionHandler)(
2273 self->parser,
2274 (XML_ProcessingInstructionHandler) expat_pi_handler
2276 EXPAT(SetUnknownEncodingHandler)(
2277 self->parser,
2278 (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
2281 ALLOC(sizeof(XMLParserObject), "create expatparser");
2283 return (PyObject*) self;
2286 static void
2287 xmlparser_dealloc(XMLParserObject* self)
2289 EXPAT(ParserFree)(self->parser);
2291 Py_XDECREF(self->handle_pi);
2292 Py_XDECREF(self->handle_comment);
2293 Py_XDECREF(self->handle_end);
2294 Py_XDECREF(self->handle_data);
2295 Py_XDECREF(self->handle_start);
2296 Py_XDECREF(self->handle_xml);
2298 Py_DECREF(self->target);
2299 Py_DECREF(self->entity);
2300 Py_DECREF(self->names);
2302 RELEASE(sizeof(XMLParserObject), "destroy expatparser");
2304 PyObject_Del(self);
2307 /* -------------------------------------------------------------------- */
2308 /* methods (in alphabetical order) */
2310 LOCAL(PyObject*)
2311 expat_parse(XMLParserObject* self, char* data, int data_len, int final)
2313 int ok;
2315 ok = EXPAT(Parse)(self->parser, data, data_len, final);
2317 if (PyErr_Occurred())
2318 return NULL;
2320 if (!ok) {
2321 PyErr_Format(
2322 PyExc_SyntaxError, "%s: line %ld, column %ld",
2323 EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
2324 EXPAT(GetErrorLineNumber)(self->parser),
2325 EXPAT(GetErrorColumnNumber)(self->parser)
2327 return NULL;
2330 Py_RETURN_NONE;
2333 static PyObject*
2334 xmlparser_close(XMLParserObject* self, PyObject* args)
2336 /* end feeding data to parser */
2338 PyObject* res;
2339 if (!PyArg_ParseTuple(args, ":close"))
2340 return NULL;
2342 res = expat_parse(self, "", 0, 1);
2344 if (res && TreeBuilder_CheckExact(self->target)) {
2345 Py_DECREF(res);
2346 return treebuilder_done((TreeBuilderObject*) self->target);
2349 return res;
2352 static PyObject*
2353 xmlparser_feed(XMLParserObject* self, PyObject* args)
2355 /* feed data to parser */
2357 char* data;
2358 int data_len;
2359 if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
2360 return NULL;
2362 return expat_parse(self, data, data_len, 0);
2365 static PyObject*
2366 xmlparser_parse(XMLParserObject* self, PyObject* args)
2368 /* (internal) parse until end of input stream */
2370 PyObject* reader;
2371 PyObject* buffer;
2372 PyObject* res;
2374 PyObject* fileobj;
2375 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
2376 return NULL;
2378 reader = PyObject_GetAttrString(fileobj, "read");
2379 if (!reader)
2380 return NULL;
2382 /* read from open file object */
2383 for (;;) {
2385 buffer = PyObject_CallFunction(reader, "i", 64*1024);
2387 if (!buffer) {
2388 /* read failed (e.g. due to KeyboardInterrupt) */
2389 Py_DECREF(reader);
2390 return NULL;
2393 if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
2394 Py_DECREF(buffer);
2395 break;
2398 res = expat_parse(
2399 self, PyBytes_AS_STRING(buffer), PyBytes_GET_SIZE(buffer), 0
2402 Py_DECREF(buffer);
2404 if (!res) {
2405 Py_DECREF(reader);
2406 return NULL;
2408 Py_DECREF(res);
2412 Py_DECREF(reader);
2414 res = expat_parse(self, "", 0, 1);
2416 if (res && TreeBuilder_CheckExact(self->target)) {
2417 Py_DECREF(res);
2418 return treebuilder_done((TreeBuilderObject*) self->target);
2421 return res;
2424 static PyObject*
2425 xmlparser_setevents(XMLParserObject* self, PyObject* args)
2427 /* activate element event reporting */
2429 Py_ssize_t i;
2430 TreeBuilderObject* target;
2432 PyObject* events; /* event collector */
2433 PyObject* event_set = Py_None;
2434 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
2435 &event_set))
2436 return NULL;
2438 if (!TreeBuilder_CheckExact(self->target)) {
2439 PyErr_SetString(
2440 PyExc_TypeError,
2441 "event handling only supported for cElementTree.Treebuilder "
2442 "targets"
2444 return NULL;
2447 target = (TreeBuilderObject*) self->target;
2449 Py_INCREF(events);
2450 Py_XDECREF(target->events);
2451 target->events = events;
2453 /* clear out existing events */
2454 Py_XDECREF(target->start_event_obj); target->start_event_obj = NULL;
2455 Py_XDECREF(target->end_event_obj); target->end_event_obj = NULL;
2456 Py_XDECREF(target->start_ns_event_obj); target->start_ns_event_obj = NULL;
2457 Py_XDECREF(target->end_ns_event_obj); target->end_ns_event_obj = NULL;
2459 if (event_set == Py_None) {
2460 /* default is "end" only */
2461 target->end_event_obj = PyBytes_FromString("end");
2462 Py_RETURN_NONE;
2465 if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
2466 goto error;
2468 for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
2469 PyObject* item = PyTuple_GET_ITEM(event_set, i);
2470 char* event;
2471 if (!PyBytes_Check(item))
2472 goto error;
2473 event = PyBytes_AS_STRING(item);
2474 if (strcmp(event, "start") == 0) {
2475 Py_INCREF(item);
2476 target->start_event_obj = item;
2477 } else if (strcmp(event, "end") == 0) {
2478 Py_INCREF(item);
2479 Py_XDECREF(target->end_event_obj);
2480 target->end_event_obj = item;
2481 } else if (strcmp(event, "start-ns") == 0) {
2482 Py_INCREF(item);
2483 Py_XDECREF(target->start_ns_event_obj);
2484 target->start_ns_event_obj = item;
2485 EXPAT(SetNamespaceDeclHandler)(
2486 self->parser,
2487 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2488 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2490 } else if (strcmp(event, "end-ns") == 0) {
2491 Py_INCREF(item);
2492 Py_XDECREF(target->end_ns_event_obj);
2493 target->end_ns_event_obj = item;
2494 EXPAT(SetNamespaceDeclHandler)(
2495 self->parser,
2496 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
2497 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
2499 } else {
2500 PyErr_Format(
2501 PyExc_ValueError,
2502 "unknown event '%s'", event
2504 return NULL;
2508 Py_RETURN_NONE;
2510 error:
2511 PyErr_SetString(
2512 PyExc_TypeError,
2513 "invalid event tuple"
2515 return NULL;
2518 static PyMethodDef xmlparser_methods[] = {
2519 {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
2520 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
2521 {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
2522 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
2523 {NULL, NULL}
2526 static PyObject*
2527 xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
2529 PyObject* res;
2530 char *name = "";
2532 if (PyUnicode_Check(nameobj))
2533 name = _PyUnicode_AsString(nameobj);
2535 PyErr_Clear();
2537 if (strcmp(name, "entity") == 0)
2538 res = self->entity;
2539 else if (strcmp(name, "target") == 0)
2540 res = self->target;
2541 else if (strcmp(name, "version") == 0) {
2542 char buffer[100];
2543 sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION,
2544 XML_MINOR_VERSION, XML_MICRO_VERSION);
2545 return PyBytes_FromString(buffer);
2546 } else {
2547 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
2550 Py_INCREF(res);
2551 return res;
2554 static PyTypeObject XMLParser_Type = {
2555 PyVarObject_HEAD_INIT(NULL, 0)
2556 "XMLParser", sizeof(XMLParserObject), 0,
2557 /* methods */
2558 (destructor)xmlparser_dealloc, /* tp_dealloc */
2559 0, /* tp_print */
2560 0, /* tp_getattr */
2561 0, /* tp_setattr */
2562 0, /* tp_reserved */
2563 0, /* tp_repr */
2564 0, /* tp_as_number */
2565 0, /* tp_as_sequence */
2566 0, /* tp_as_mapping */
2567 0, /* tp_hash */
2568 0, /* tp_call */
2569 0, /* tp_str */
2570 (getattrofunc)xmlparser_getattro, /* tp_getattro */
2571 0, /* tp_setattro */
2572 0, /* tp_as_buffer */
2573 Py_TPFLAGS_DEFAULT, /* tp_flags */
2574 0, /* tp_doc */
2575 0, /* tp_traverse */
2576 0, /* tp_clear */
2577 0, /* tp_richcompare */
2578 0, /* tp_weaklistoffset */
2579 0, /* tp_iter */
2580 0, /* tp_iternext */
2581 xmlparser_methods, /* tp_methods */
2582 0, /* tp_members */
2585 #endif
2587 /* ==================================================================== */
2588 /* python module interface */
2590 static PyMethodDef _functions[] = {
2591 {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
2592 {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
2593 {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
2594 #if defined(USE_EXPAT)
2595 {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2596 {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
2597 #endif
2598 {NULL, NULL}
2602 static struct PyModuleDef _elementtreemodule = {
2603 PyModuleDef_HEAD_INIT,
2604 "_elementtree",
2605 NULL,
2607 _functions,
2608 NULL,
2609 NULL,
2610 NULL,
2611 NULL
2614 PyMODINIT_FUNC
2615 PyInit__elementtree(void)
2617 PyObject* m;
2618 PyObject* g;
2619 char* bootstrap;
2620 #if defined(USE_PYEXPAT_CAPI)
2621 struct PyExpat_CAPI* capi;
2622 #endif
2624 /* Initialize object types */
2625 if (PyType_Ready(&TreeBuilder_Type) < 0)
2626 return NULL;
2627 if (PyType_Ready(&Element_Type) < 0)
2628 return NULL;
2629 #if defined(USE_EXPAT)
2630 if (PyType_Ready(&XMLParser_Type) < 0)
2631 return NULL;
2632 #endif
2634 m = PyModule_Create(&_elementtreemodule);
2635 if (!m)
2636 return NULL;
2638 /* The code below requires that the module gets already added
2639 to sys.modules. */
2640 PyDict_SetItemString(PyImport_GetModuleDict(),
2641 _elementtreemodule.m_name,
2644 /* python glue code */
2646 g = PyDict_New();
2647 if (!g)
2648 return NULL;
2650 PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
2652 bootstrap = (
2654 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2655 "from __future__ import generators\n" /* enable yield under 2.2 */
2656 #endif
2658 "from copy import copy, deepcopy\n"
2660 "try:\n"
2661 " from xml.etree import ElementTree\n"
2662 "except ImportError:\n"
2663 " import ElementTree\n"
2664 "ET = ElementTree\n"
2665 "del ElementTree\n"
2667 "import _elementtree as cElementTree\n"
2669 "try:\n" /* check if copy works as is */
2670 " copy(cElementTree.Element('x'))\n"
2671 "except:\n"
2672 " def copyelement(elem):\n"
2673 " return elem\n"
2675 "def Comment(text=None):\n" /* public */
2676 " element = cElementTree.Element(ET.Comment)\n"
2677 " element.text = text\n"
2678 " return element\n"
2679 "cElementTree.Comment = Comment\n"
2681 "class ElementTree(ET.ElementTree):\n" /* public */
2682 " def parse(self, source, parser=None):\n"
2683 " if not hasattr(source, 'read'):\n"
2684 " source = open(source, 'rb')\n"
2685 " if parser is not None:\n"
2686 " while 1:\n"
2687 " data = source.read(65536)\n"
2688 " if not data:\n"
2689 " break\n"
2690 " parser.feed(data)\n"
2691 " self._root = parser.close()\n"
2692 " else:\n"
2693 " parser = cElementTree.XMLParser()\n"
2694 " self._root = parser._parse(source)\n"
2695 " return self._root\n"
2696 "cElementTree.ElementTree = ElementTree\n"
2698 "def getiterator(node, tag=None):\n" /* helper */
2699 " if tag == '*':\n"
2700 " tag = None\n"
2701 #if (PY_VERSION_HEX < 0x02020000)
2702 " nodes = []\n" /* 2.1 doesn't have yield */
2703 " if tag is None or node.tag == tag:\n"
2704 " nodes.append(node)\n"
2705 " for node in node:\n"
2706 " nodes.extend(getiterator(node, tag))\n"
2707 " return nodes\n"
2708 #else
2709 " if tag is None or node.tag == tag:\n"
2710 " yield node\n"
2711 " for node in node:\n"
2712 " for node in getiterator(node, tag):\n"
2713 " yield node\n"
2714 #endif
2716 "def parse(source, parser=None):\n" /* public */
2717 " tree = ElementTree()\n"
2718 " tree.parse(source, parser)\n"
2719 " return tree\n"
2720 "cElementTree.parse = parse\n"
2722 #if (PY_VERSION_HEX < 0x02020000)
2723 "if hasattr(ET, 'iterparse'):\n"
2724 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2725 #else
2726 "class iterparse(object):\n"
2727 " root = None\n"
2728 " def __init__(self, file, events=None):\n"
2729 " if not hasattr(file, 'read'):\n"
2730 " file = open(file, 'rb')\n"
2731 " self._file = file\n"
2732 " self._events = events\n"
2733 " def __iter__(self):\n"
2734 " events = []\n"
2735 " b = cElementTree.TreeBuilder()\n"
2736 " p = cElementTree.XMLParser(b)\n"
2737 " p._setevents(events, self._events)\n"
2738 " while 1:\n"
2739 " data = self._file.read(16384)\n"
2740 " if not data:\n"
2741 " break\n"
2742 " p.feed(data)\n"
2743 " for event in events:\n"
2744 " yield event\n"
2745 " del events[:]\n"
2746 " root = p.close()\n"
2747 " for event in events:\n"
2748 " yield event\n"
2749 " self.root = root\n"
2750 "cElementTree.iterparse = iterparse\n"
2751 #endif
2753 "def PI(target, text=None):\n" /* public */
2754 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2755 " element.text = target\n"
2756 " if text:\n"
2757 " element.text = element.text + ' ' + text\n"
2758 " return element\n"
2760 " elem = cElementTree.Element(ET.PI)\n"
2761 " elem.text = text\n"
2762 " return elem\n"
2763 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2765 "def XML(text):\n" /* public */
2766 " parser = cElementTree.XMLParser()\n"
2767 " parser.feed(text)\n"
2768 " return parser.close()\n"
2769 "cElementTree.XML = cElementTree.fromstring = XML\n"
2771 "def XMLID(text):\n" /* public */
2772 " tree = XML(text)\n"
2773 " ids = {}\n"
2774 " for elem in tree.getiterator():\n"
2775 " id = elem.get('id')\n"
2776 " if id:\n"
2777 " ids[id] = elem\n"
2778 " return tree, ids\n"
2779 "cElementTree.XMLID = XMLID\n"
2781 "cElementTree.dump = ET.dump\n"
2782 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2783 "cElementTree.iselement = ET.iselement\n"
2784 "cElementTree.QName = ET.QName\n"
2785 "cElementTree.tostring = ET.tostring\n"
2786 "cElementTree.VERSION = '" VERSION "'\n"
2787 "cElementTree.__version__ = '" VERSION "'\n"
2788 "cElementTree.XMLParserError = SyntaxError\n"
2792 PyRun_String(bootstrap, Py_file_input, g, NULL);
2794 elementpath_obj = PyDict_GetItemString(g, "ElementPath");
2796 elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
2797 if (elementtree_copyelement_obj) {
2798 /* reduce hack needed; enable reduce method */
2799 PyMethodDef* mp;
2800 for (mp = element_methods; mp->ml_name; mp++)
2801 if (mp->ml_meth == (PyCFunction) element_reduce) {
2802 mp->ml_name = "__reduce__";
2803 break;
2805 } else
2806 PyErr_Clear();
2807 elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
2808 elementtree_getiterator_obj = PyDict_GetItemString(g, "getiterator");
2810 #if defined(USE_PYEXPAT_CAPI)
2811 /* link against pyexpat, if possible */
2812 capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
2813 if (capi &&
2814 strcmp(capi->magic, PyExpat_CAPI_MAGIC) == 0 &&
2815 capi->size <= sizeof(*expat_capi) &&
2816 capi->MAJOR_VERSION == XML_MAJOR_VERSION &&
2817 capi->MINOR_VERSION == XML_MINOR_VERSION &&
2818 capi->MICRO_VERSION == XML_MICRO_VERSION)
2819 expat_capi = capi;
2820 else
2821 expat_capi = NULL;
2822 #endif
2823 return m;