3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
51 #define VERSION "1.0.6"
53 /* -------------------------------------------------------------------- */
56 /* Leave defined to include the expat-based XMLParser type */
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
76 /* -------------------------------------------------------------------- */
79 static int memory
= 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
91 #define LOCAL(type) static __inline type __fastcall
93 #define LOCAL(type) static type
96 /* macros used to store 'join' flags in string object pointers. note
97 that all use of text and tail as object pointers must be wrapped in
98 JOIN_OBJ. see comments in the ElementObject definition for more
100 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
101 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
102 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
104 /* glue functions (see the init function for details) */
105 static PyObject
* elementtree_copyelement_obj
;
106 static PyObject
* elementtree_deepcopy_obj
;
107 static PyObject
* elementtree_getiterator_obj
;
108 static PyObject
* elementpath_obj
;
113 deepcopy(PyObject
* object
, PyObject
* memo
)
115 /* do a deep copy of the given object */
120 if (!elementtree_deepcopy_obj
) {
123 "deepcopy helper not found"
128 args
= PyTuple_New(2);
132 Py_INCREF(object
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) object
);
133 Py_INCREF(memo
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) memo
);
135 result
= PyObject_CallObject(elementtree_deepcopy_obj
, args
);
143 list_join(PyObject
* list
)
145 /* join list elements (destroying the list in the process) */
152 switch (PyList_GET_SIZE(list
)) {
155 return PyBytes_FromString("");
157 result
= PyList_GET_ITEM(list
, 0);
163 /* two or more elements: slice out a suitable separator from the
164 first member, and use that to join the entire list */
166 joiner
= PySequence_GetSlice(PyList_GET_ITEM(list
, 0), 0, 0);
170 function
= PyObject_GetAttrString(joiner
, "join");
176 args
= PyTuple_New(1);
180 PyTuple_SET_ITEM(args
, 0, list
);
182 result
= PyObject_CallObject(function
, args
);
184 Py_DECREF(args
); /* also removes list */
191 #if (PY_VERSION_HEX < 0x02020000)
193 PyDict_Update(PyObject
* dict
, PyObject
* other
)
195 /* PyDict_Update emulation for 2.1 and earlier */
199 res
= PyObject_CallMethod(dict
, "update", "O", other
);
208 /* -------------------------------------------------------------------- */
209 /* the element type */
213 /* attributes (a dictionary object), or None if no attributes */
217 int length
; /* actual number of items */
218 int allocated
; /* allocated items */
220 /* this either points to _children or to a malloced buffer */
223 PyObject
* _children
[STATIC_CHILDREN
];
225 } ElementObjectExtra
;
230 /* element tag (a string). */
233 /* text before first child. note that this is a tagged pointer;
234 use JOIN_OBJ to get the object pointer. the join flag is used
235 to distinguish lists created by the tree builder from lists
236 assigned to the attribute by application code; the former
237 should be joined before being returned to the user, the latter
238 should be left intact. */
241 /* text after this element, in parent. note that this is a tagged
242 pointer; use JOIN_OBJ to get the object pointer. */
245 ElementObjectExtra
* extra
;
249 static PyTypeObject Element_Type
;
251 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
253 /* -------------------------------------------------------------------- */
254 /* element constructor and destructor */
257 element_new_extra(ElementObject
* self
, PyObject
* attrib
)
259 self
->extra
= PyObject_Malloc(sizeof(ElementObjectExtra
));
267 self
->extra
->attrib
= attrib
;
269 self
->extra
->length
= 0;
270 self
->extra
->allocated
= STATIC_CHILDREN
;
271 self
->extra
->children
= self
->extra
->_children
;
277 element_dealloc_extra(ElementObject
* self
)
281 Py_DECREF(self
->extra
->attrib
);
283 for (i
= 0; i
< self
->extra
->length
; i
++)
284 Py_DECREF(self
->extra
->children
[i
]);
286 if (self
->extra
->children
!= self
->extra
->_children
)
287 PyObject_Free(self
->extra
->children
);
289 PyObject_Free(self
->extra
);
293 element_new(PyObject
* tag
, PyObject
* attrib
)
297 self
= PyObject_New(ElementObject
, &Element_Type
);
301 /* use None for empty dictionaries */
302 if (PyDict_CheckExact(attrib
) && !PyDict_Size(attrib
))
307 if (attrib
!= Py_None
) {
309 if (element_new_extra(self
, attrib
) < 0) {
314 self
->extra
->length
= 0;
315 self
->extra
->allocated
= STATIC_CHILDREN
;
316 self
->extra
->children
= self
->extra
->_children
;
324 self
->text
= Py_None
;
327 self
->tail
= Py_None
;
329 ALLOC(sizeof(ElementObject
), "create element");
331 return (PyObject
*) self
;
335 element_resize(ElementObject
* self
, int extra
)
340 /* make sure self->children can hold the given number of extra
341 elements. set an exception and return -1 if allocation failed */
344 element_new_extra(self
, NULL
);
346 size
= self
->extra
->length
+ extra
;
348 if (size
> self
->extra
->allocated
) {
349 /* use Python 2.4's list growth strategy */
350 size
= (size
>> 3) + (size
< 9 ? 3 : 6) + size
;
351 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
352 * which needs at least 4 bytes.
353 * Although it's a false alarm always assume at least one child to
356 size
= size
? size
: 1;
357 if (self
->extra
->children
!= self
->extra
->_children
) {
358 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
359 * "children", which needs at least 4 bytes. Although it's a
360 * false alarm always assume at least one child to be safe.
362 children
= PyObject_Realloc(self
->extra
->children
,
363 size
* sizeof(PyObject
*));
367 children
= PyObject_Malloc(size
* sizeof(PyObject
*));
370 /* copy existing children from static area to malloc buffer */
371 memcpy(children
, self
->extra
->children
,
372 self
->extra
->length
* sizeof(PyObject
*));
374 self
->extra
->children
= children
;
375 self
->extra
->allocated
= size
;
386 element_add_subelement(ElementObject
* self
, PyObject
* element
)
388 /* add a child element to a parent */
390 if (element_resize(self
, 1) < 0)
394 self
->extra
->children
[self
->extra
->length
] = element
;
396 self
->extra
->length
++;
402 element_get_attrib(ElementObject
* self
)
404 /* return borrowed reference to attrib dictionary */
405 /* note: this function assumes that the extra section exists */
407 PyObject
* res
= self
->extra
->attrib
;
409 if (res
== Py_None
) {
410 /* create missing dictionary */
414 self
->extra
->attrib
= res
;
421 element_get_text(ElementObject
* self
)
423 /* return borrowed reference to text attribute */
425 PyObject
* res
= self
->text
;
429 if (PyList_CheckExact(res
)) {
430 res
= list_join(res
);
441 element_get_tail(ElementObject
* self
)
443 /* return borrowed reference to text attribute */
445 PyObject
* res
= self
->tail
;
449 if (PyList_CheckExact(res
)) {
450 res
= list_join(res
);
461 element(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
466 PyObject
* attrib
= NULL
;
467 if (!PyArg_ParseTuple(args
, "O|O!:Element", &tag
,
468 &PyDict_Type
, &attrib
))
472 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
476 PyDict_Update(attrib
, kw
);
482 elem
= element_new(tag
, attrib
);
490 subelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
494 ElementObject
* parent
;
496 PyObject
* attrib
= NULL
;
497 if (!PyArg_ParseTuple(args
, "O!O|O!:SubElement",
498 &Element_Type
, &parent
, &tag
,
499 &PyDict_Type
, &attrib
))
503 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
507 PyDict_Update(attrib
, kw
);
513 elem
= element_new(tag
, attrib
);
517 if (element_add_subelement(parent
, elem
) < 0) {
526 element_dealloc(ElementObject
* self
)
529 element_dealloc_extra(self
);
531 /* discard attributes */
532 Py_DECREF(self
->tag
);
533 Py_DECREF(JOIN_OBJ(self
->text
));
534 Py_DECREF(JOIN_OBJ(self
->tail
));
536 RELEASE(sizeof(ElementObject
), "destroy element");
541 /* -------------------------------------------------------------------- */
542 /* methods (in alphabetical order) */
545 element_append(ElementObject
* self
, PyObject
* args
)
548 if (!PyArg_ParseTuple(args
, "O!:append", &Element_Type
, &element
))
551 if (element_add_subelement(self
, element
) < 0)
558 element_clear(ElementObject
* self
, PyObject
* args
)
560 if (!PyArg_ParseTuple(args
, ":clear"))
564 element_dealloc_extra(self
);
569 Py_DECREF(JOIN_OBJ(self
->text
));
570 self
->text
= Py_None
;
573 Py_DECREF(JOIN_OBJ(self
->tail
));
574 self
->tail
= Py_None
;
580 element_copy(ElementObject
* self
, PyObject
* args
)
583 ElementObject
* element
;
585 if (!PyArg_ParseTuple(args
, ":__copy__"))
588 element
= (ElementObject
*) element_new(
589 self
->tag
, (self
->extra
) ? self
->extra
->attrib
: Py_None
594 Py_DECREF(JOIN_OBJ(element
->text
));
595 element
->text
= self
->text
;
596 Py_INCREF(JOIN_OBJ(element
->text
));
598 Py_DECREF(JOIN_OBJ(element
->tail
));
599 element
->tail
= self
->tail
;
600 Py_INCREF(JOIN_OBJ(element
->tail
));
604 if (element_resize(element
, self
->extra
->length
) < 0) {
609 for (i
= 0; i
< self
->extra
->length
; i
++) {
610 Py_INCREF(self
->extra
->children
[i
]);
611 element
->extra
->children
[i
] = self
->extra
->children
[i
];
614 element
->extra
->length
= self
->extra
->length
;
618 return (PyObject
*) element
;
622 element_deepcopy(ElementObject
* self
, PyObject
* args
)
625 ElementObject
* element
;
633 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
636 tag
= deepcopy(self
->tag
, memo
);
641 attrib
= deepcopy(self
->extra
->attrib
, memo
);
651 element
= (ElementObject
*) element_new(tag
, attrib
);
659 text
= deepcopy(JOIN_OBJ(self
->text
), memo
);
662 Py_DECREF(element
->text
);
663 element
->text
= JOIN_SET(text
, JOIN_GET(self
->text
));
665 tail
= deepcopy(JOIN_OBJ(self
->tail
), memo
);
668 Py_DECREF(element
->tail
);
669 element
->tail
= JOIN_SET(tail
, JOIN_GET(self
->tail
));
673 if (element_resize(element
, self
->extra
->length
) < 0)
676 for (i
= 0; i
< self
->extra
->length
; i
++) {
677 PyObject
* child
= deepcopy(self
->extra
->children
[i
], memo
);
679 element
->extra
->length
= i
;
682 element
->extra
->children
[i
] = child
;
685 element
->extra
->length
= self
->extra
->length
;
689 /* add object to memo dictionary (so deepcopy won't visit it again) */
690 id
= PyLong_FromLong((Py_uintptr_t
) self
);
692 i
= PyDict_SetItem(memo
, id
, (PyObject
*) element
);
699 return (PyObject
*) element
;
707 checkpath(PyObject
* tag
)
712 /* check if a tag contains an xpath character */
714 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
716 if (PyUnicode_Check(tag
)) {
717 Py_UNICODE
*p
= PyUnicode_AS_UNICODE(tag
);
718 for (i
= 0; i
< PyUnicode_GET_SIZE(tag
); i
++) {
721 else if (p
[i
] == '}')
723 else if (check
&& PATHCHAR(p
[i
]))
728 if (PyBytes_Check(tag
)) {
729 char *p
= PyBytes_AS_STRING(tag
);
730 for (i
= 0; i
< PyBytes_GET_SIZE(tag
); i
++) {
733 else if (p
[i
] == '}')
735 else if (check
&& PATHCHAR(p
[i
]))
741 return 1; /* unknown type; might be path expression */
745 element_find(ElementObject
* self
, PyObject
* args
)
750 if (!PyArg_ParseTuple(args
, "O:find", &tag
))
754 return PyObject_CallMethod(
755 elementpath_obj
, "find", "OO", self
, tag
761 for (i
= 0; i
< self
->extra
->length
; i
++) {
762 PyObject
* item
= self
->extra
->children
[i
];
763 if (Element_CheckExact(item
) &&
764 PyObject_RichCompareBool(((ElementObject
*)item
)->tag
, tag
, Py_EQ
) == 1) {
774 element_findtext(ElementObject
* self
, PyObject
* args
)
779 PyObject
* default_value
= Py_None
;
780 if (!PyArg_ParseTuple(args
, "O|O:findtext", &tag
, &default_value
))
784 return PyObject_CallMethod(
785 elementpath_obj
, "findtext", "OOO", self
, tag
, default_value
789 Py_INCREF(default_value
);
790 return default_value
;
793 for (i
= 0; i
< self
->extra
->length
; i
++) {
794 ElementObject
* item
= (ElementObject
*) self
->extra
->children
[i
];
795 if (Element_CheckExact(item
) && (PyObject_RichCompareBool(item
->tag
, tag
, Py_EQ
) == 1)) {
797 PyObject
* text
= element_get_text(item
);
799 return PyBytes_FromString("");
805 Py_INCREF(default_value
);
806 return default_value
;
810 element_findall(ElementObject
* self
, PyObject
* args
)
816 if (!PyArg_ParseTuple(args
, "O:findall", &tag
))
820 return PyObject_CallMethod(
821 elementpath_obj
, "findall", "OO", self
, tag
831 for (i
= 0; i
< self
->extra
->length
; i
++) {
832 PyObject
* item
= self
->extra
->children
[i
];
833 if (Element_CheckExact(item
) &&
834 PyObject_RichCompareBool(((ElementObject
*)item
)->tag
, tag
, Py_EQ
) == 1) {
835 if (PyList_Append(out
, item
) < 0) {
846 element_get(ElementObject
* self
, PyObject
* args
)
851 PyObject
* default_value
= Py_None
;
852 if (!PyArg_ParseTuple(args
, "O|O:get", &key
, &default_value
))
855 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
856 value
= default_value
;
858 value
= PyDict_GetItem(self
->extra
->attrib
, key
);
860 value
= default_value
;
868 element_getchildren(ElementObject
* self
, PyObject
* args
)
873 if (!PyArg_ParseTuple(args
, ":getchildren"))
877 return PyList_New(0);
879 list
= PyList_New(self
->extra
->length
);
883 for (i
= 0; i
< self
->extra
->length
; i
++) {
884 PyObject
* item
= self
->extra
->children
[i
];
886 PyList_SET_ITEM(list
, i
, item
);
893 element_getiterator(ElementObject
* self
, PyObject
* args
)
897 PyObject
* tag
= Py_None
;
898 if (!PyArg_ParseTuple(args
, "|O:getiterator", &tag
))
901 if (!elementtree_getiterator_obj
) {
904 "getiterator helper not found"
909 args
= PyTuple_New(2);
913 Py_INCREF(self
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) self
);
914 Py_INCREF(tag
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) tag
);
916 result
= PyObject_CallObject(elementtree_getiterator_obj
, args
);
924 element_getitem(PyObject
* self_
, Py_ssize_t index
)
926 ElementObject
* self
= (ElementObject
*) self_
;
928 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
931 "child index out of range"
936 Py_INCREF(self
->extra
->children
[index
]);
937 return self
->extra
->children
[index
];
941 element_getslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
)
943 ElementObject
* self
= (ElementObject
*) self_
;
948 return PyList_New(0);
950 /* standard clamping */
955 if (end
> self
->extra
->length
)
956 end
= self
->extra
->length
;
960 list
= PyList_New(end
- start
);
964 for (i
= start
; i
< end
; i
++) {
965 PyObject
* item
= self
->extra
->children
[i
];
967 PyList_SET_ITEM(list
, i
- start
, item
);
974 element_insert(ElementObject
* self
, PyObject
* args
)
980 if (!PyArg_ParseTuple(args
, "iO!:insert", &index
,
981 &Element_Type
, &element
))
985 element_new_extra(self
, NULL
);
989 if (index
> self
->extra
->length
)
990 index
= self
->extra
->length
;
992 if (element_resize(self
, 1) < 0)
995 for (i
= self
->extra
->length
; i
> index
; i
--)
996 self
->extra
->children
[i
] = self
->extra
->children
[i
-1];
999 self
->extra
->children
[index
] = element
;
1001 self
->extra
->length
++;
1007 element_items(ElementObject
* self
, PyObject
* args
)
1009 if (!PyArg_ParseTuple(args
, ":items"))
1012 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1013 return PyList_New(0);
1015 return PyDict_Items(self
->extra
->attrib
);
1019 element_keys(ElementObject
* self
, PyObject
* args
)
1021 if (!PyArg_ParseTuple(args
, ":keys"))
1024 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1025 return PyList_New(0);
1027 return PyDict_Keys(self
->extra
->attrib
);
1031 element_length(ElementObject
* self
)
1036 return self
->extra
->length
;
1040 element_makeelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
1046 if (!PyArg_ParseTuple(args
, "OO:makeelement", &tag
, &attrib
))
1049 attrib
= PyDict_Copy(attrib
);
1053 elem
= element_new(tag
, attrib
);
1061 element_reduce(ElementObject
* self
, PyObject
* args
)
1063 if (!PyArg_ParseTuple(args
, ":__reduce__"))
1066 /* Hack alert: This method is used to work around a __copy__
1067 problem on certain 2.3 and 2.4 versions. To save time and
1068 simplify the code, we create the copy in here, and use a dummy
1069 copyelement helper to trick the copy module into doing the
1072 if (!elementtree_copyelement_obj
) {
1075 "copyelement helper not found"
1080 return Py_BuildValue(
1081 "O(N)", elementtree_copyelement_obj
, element_copy(self
, args
)
1086 element_remove(ElementObject
* self
, PyObject
* args
)
1091 if (!PyArg_ParseTuple(args
, "O!:remove", &Element_Type
, &element
))
1095 /* element has no children, so raise exception */
1098 "list.remove(x): x not in list"
1103 for (i
= 0; i
< self
->extra
->length
; i
++) {
1104 if (self
->extra
->children
[i
] == element
)
1106 if (PyObject_RichCompareBool(self
->extra
->children
[i
], element
, Py_EQ
) == 1)
1110 if (i
== self
->extra
->length
) {
1111 /* element is not in children, so raise exception */
1114 "list.remove(x): x not in list"
1119 Py_DECREF(self
->extra
->children
[i
]);
1121 self
->extra
->length
--;
1123 for (; i
< self
->extra
->length
; i
++)
1124 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1130 element_repr(ElementObject
* self
)
1132 return PyUnicode_FromFormat("<Element %R at %p>", self
->tag
, self
);
1136 element_set(ElementObject
* self
, PyObject
* args
)
1142 if (!PyArg_ParseTuple(args
, "OO:set", &key
, &value
))
1146 element_new_extra(self
, NULL
);
1148 attrib
= element_get_attrib(self
);
1152 if (PyDict_SetItem(attrib
, key
, value
) < 0)
1159 element_setslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
, PyObject
* item
)
1161 ElementObject
* self
= (ElementObject
*) self_
;
1162 Py_ssize_t i
, new, old
;
1163 PyObject
* recycle
= NULL
;
1166 element_new_extra(self
, NULL
);
1168 /* standard clamping */
1173 if (end
> self
->extra
->length
)
1174 end
= self
->extra
->length
;
1182 else if (PyList_CheckExact(item
)) {
1183 new = PyList_GET_SIZE(item
);
1185 /* FIXME: support arbitrary sequences? */
1188 "expected list, not \"%.200s\"", Py_TYPE(item
)->tp_name
1194 /* to avoid recursive calls to this method (via decref), move
1195 old items to the recycle bin here, and get rid of them when
1196 we're done modifying the element */
1197 recycle
= PyList_New(old
);
1198 for (i
= 0; i
< old
; i
++)
1199 PyList_SET_ITEM(recycle
, i
, self
->extra
->children
[i
+ start
]);
1204 for (i
= end
; i
< self
->extra
->length
; i
++)
1205 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1206 } else if (new > old
) {
1208 if (element_resize(self
, new - old
) < 0)
1210 for (i
= self
->extra
->length
-1; i
>= end
; i
--)
1211 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1214 /* replace the slice */
1215 for (i
= 0; i
< new; i
++) {
1216 PyObject
* element
= PyList_GET_ITEM(item
, i
);
1218 self
->extra
->children
[i
+ start
] = element
;
1221 self
->extra
->length
+= new - old
;
1223 /* discard the recycle bin, and everything in it */
1224 Py_XDECREF(recycle
);
1230 element_setitem(PyObject
* self_
, Py_ssize_t index
, PyObject
* item
)
1232 ElementObject
* self
= (ElementObject
*) self_
;
1236 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
1239 "child assignment index out of range");
1243 old
= self
->extra
->children
[index
];
1247 self
->extra
->children
[index
] = item
;
1249 self
->extra
->length
--;
1250 for (i
= index
; i
< self
->extra
->length
; i
++)
1251 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1259 static PyMethodDef element_methods
[] = {
1261 {"clear", (PyCFunction
) element_clear
, METH_VARARGS
},
1263 {"get", (PyCFunction
) element_get
, METH_VARARGS
},
1264 {"set", (PyCFunction
) element_set
, METH_VARARGS
},
1266 {"find", (PyCFunction
) element_find
, METH_VARARGS
},
1267 {"findtext", (PyCFunction
) element_findtext
, METH_VARARGS
},
1268 {"findall", (PyCFunction
) element_findall
, METH_VARARGS
},
1270 {"append", (PyCFunction
) element_append
, METH_VARARGS
},
1271 {"insert", (PyCFunction
) element_insert
, METH_VARARGS
},
1272 {"remove", (PyCFunction
) element_remove
, METH_VARARGS
},
1274 {"getiterator", (PyCFunction
) element_getiterator
, METH_VARARGS
},
1275 {"getchildren", (PyCFunction
) element_getchildren
, METH_VARARGS
},
1277 {"items", (PyCFunction
) element_items
, METH_VARARGS
},
1278 {"keys", (PyCFunction
) element_keys
, METH_VARARGS
},
1280 {"makeelement", (PyCFunction
) element_makeelement
, METH_VARARGS
},
1282 {"__copy__", (PyCFunction
) element_copy
, METH_VARARGS
},
1283 {"__deepcopy__", (PyCFunction
) element_deepcopy
, METH_VARARGS
},
1285 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1286 C objects correctly, so we have to fake it using a __reduce__-
1287 based hack (see the element_reduce implementation above for
1290 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1291 using a runtime test to figure out if we need to fake things
1292 or now (see the init code below). The following entry is
1293 enabled only if the hack is needed. */
1295 {"!__reduce__", (PyCFunction
) element_reduce
, METH_VARARGS
},
1301 element_getattro(ElementObject
* self
, PyObject
* nameobj
)
1306 if (PyUnicode_Check(nameobj
))
1307 name
= _PyUnicode_AsString(nameobj
);
1309 if (strcmp(name
, "tag") == 0)
1311 else if (strcmp(name
, "text") == 0)
1312 res
= element_get_text(self
);
1313 else if (strcmp(name
, "tail") == 0) {
1314 res
= element_get_tail(self
);
1315 } else if (strcmp(name
, "attrib") == 0) {
1317 element_new_extra(self
, NULL
);
1318 res
= element_get_attrib(self
);
1320 return PyObject_GenericGetAttr((PyObject
*) self
, nameobj
);
1328 element_setattr(ElementObject
* self
, const char* name
, PyObject
* value
)
1330 if (value
== NULL
) {
1332 PyExc_AttributeError
,
1333 "can't delete element attributes"
1338 if (strcmp(name
, "tag") == 0) {
1339 Py_DECREF(self
->tag
);
1341 Py_INCREF(self
->tag
);
1342 } else if (strcmp(name
, "text") == 0) {
1343 Py_DECREF(JOIN_OBJ(self
->text
));
1345 Py_INCREF(self
->text
);
1346 } else if (strcmp(name
, "tail") == 0) {
1347 Py_DECREF(JOIN_OBJ(self
->tail
));
1349 Py_INCREF(self
->tail
);
1350 } else if (strcmp(name
, "attrib") == 0) {
1352 element_new_extra(self
, NULL
);
1353 Py_DECREF(self
->extra
->attrib
);
1354 self
->extra
->attrib
= value
;
1355 Py_INCREF(self
->extra
->attrib
);
1357 PyErr_SetString(PyExc_AttributeError
, name
);
1364 static PySequenceMethods element_as_sequence
= {
1365 (lenfunc
) element_length
,
1374 static PyTypeObject Element_Type
= {
1375 PyVarObject_HEAD_INIT(NULL
, 0)
1376 "Element", sizeof(ElementObject
), 0,
1378 (destructor
)element_dealloc
, /* tp_dealloc */
1381 (setattrfunc
)element_setattr
, /* tp_setattr */
1382 0, /* tp_reserved */
1383 (reprfunc
)element_repr
, /* tp_repr */
1384 0, /* tp_as_number */
1385 &element_as_sequence
, /* tp_as_sequence */
1386 0, /* tp_as_mapping */
1390 (getattrofunc
)element_getattro
, /* tp_getattro */
1391 0, /* tp_setattro */
1392 0, /* tp_as_buffer */
1393 Py_TPFLAGS_DEFAULT
, /* tp_flags */
1395 0, /* tp_traverse */
1397 0, /* tp_richcompare */
1398 0, /* tp_weaklistoffset */
1400 0, /* tp_iternext */
1401 element_methods
, /* tp_methods */
1405 /* ==================================================================== */
1406 /* the tree builder type */
1411 PyObject
* root
; /* root node (first created node) */
1413 ElementObject
* this; /* current node */
1414 ElementObject
* last
; /* most recently created node */
1416 PyObject
* data
; /* data collector (string or list), or NULL */
1418 PyObject
* stack
; /* element stack */
1419 Py_ssize_t index
; /* current stack size (0=empty) */
1421 /* element tracing */
1422 PyObject
* events
; /* list of events, or NULL if not collecting */
1423 PyObject
* start_event_obj
; /* event objects (NULL to ignore) */
1424 PyObject
* end_event_obj
;
1425 PyObject
* start_ns_event_obj
;
1426 PyObject
* end_ns_event_obj
;
1428 } TreeBuilderObject
;
1430 static PyTypeObject TreeBuilder_Type
;
1432 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
1434 /* -------------------------------------------------------------------- */
1435 /* constructor and destructor */
1438 treebuilder_new(void)
1440 TreeBuilderObject
* self
;
1442 self
= PyObject_New(TreeBuilderObject
, &TreeBuilder_Type
);
1449 self
->this = (ElementObject
*) Py_None
;
1452 self
->last
= (ElementObject
*) Py_None
;
1456 self
->stack
= PyList_New(20);
1459 self
->events
= NULL
;
1460 self
->start_event_obj
= self
->end_event_obj
= NULL
;
1461 self
->start_ns_event_obj
= self
->end_ns_event_obj
= NULL
;
1463 ALLOC(sizeof(TreeBuilderObject
), "create treebuilder");
1465 return (PyObject
*) self
;
1469 treebuilder(PyObject
* self_
, PyObject
* args
)
1471 if (!PyArg_ParseTuple(args
, ":TreeBuilder"))
1474 return treebuilder_new();
1478 treebuilder_dealloc(TreeBuilderObject
* self
)
1480 Py_XDECREF(self
->end_ns_event_obj
);
1481 Py_XDECREF(self
->start_ns_event_obj
);
1482 Py_XDECREF(self
->end_event_obj
);
1483 Py_XDECREF(self
->start_event_obj
);
1484 Py_XDECREF(self
->events
);
1485 Py_DECREF(self
->stack
);
1486 Py_XDECREF(self
->data
);
1487 Py_DECREF(self
->last
);
1488 Py_DECREF(self
->this);
1489 Py_XDECREF(self
->root
);
1491 RELEASE(sizeof(TreeBuilderObject
), "destroy treebuilder");
1496 /* -------------------------------------------------------------------- */
1500 treebuilder_handle_xml(TreeBuilderObject
* self
, PyObject
* encoding
,
1501 PyObject
* standalone
)
1507 treebuilder_handle_start(TreeBuilderObject
* self
, PyObject
* tag
,
1514 if (self
->this == self
->last
) {
1515 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1516 self
->last
->text
= JOIN_SET(
1517 self
->data
, PyList_CheckExact(self
->data
)
1520 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1521 self
->last
->tail
= JOIN_SET(
1522 self
->data
, PyList_CheckExact(self
->data
)
1528 node
= element_new(tag
, attrib
);
1532 this = (PyObject
*) self
->this;
1534 if (this != Py_None
) {
1535 if (element_add_subelement((ElementObject
*) this, node
) < 0)
1541 "multiple elements on top level"
1549 if (self
->index
< PyList_GET_SIZE(self
->stack
)) {
1550 if (PyList_SetItem(self
->stack
, self
->index
, this) < 0)
1554 if (PyList_Append(self
->stack
, this) < 0)
1561 self
->this = (ElementObject
*) node
;
1563 Py_DECREF(self
->last
);
1565 self
->last
= (ElementObject
*) node
;
1567 if (self
->start_event_obj
) {
1569 PyObject
* action
= self
->start_event_obj
;
1570 res
= PyTuple_New(2);
1572 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1573 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1574 PyList_Append(self
->events
, res
);
1577 PyErr_Clear(); /* FIXME: propagate error */
1588 treebuilder_handle_data(TreeBuilderObject
* self
, PyObject
* data
)
1591 if (self
->last
== (ElementObject
*) Py_None
) {
1592 /* ignore calls to data before the first call to start */
1595 /* store the first item as is */
1596 Py_INCREF(data
); self
->data
= data
;
1598 /* more than one item; use a list to collect items */
1599 if (PyBytes_CheckExact(self
->data
) && Py_REFCNT(self
->data
) == 1 &&
1600 PyBytes_CheckExact(data
) && PyBytes_GET_SIZE(data
) == 1) {
1601 /* expat often generates single character data sections; handle
1602 the most common case by resizing the existing string... */
1603 Py_ssize_t size
= PyBytes_GET_SIZE(self
->data
);
1604 if (_PyBytes_Resize(&self
->data
, size
+ 1) < 0)
1606 PyBytes_AS_STRING(self
->data
)[size
] = PyBytes_AS_STRING(data
)[0];
1607 } else if (PyList_CheckExact(self
->data
)) {
1608 if (PyList_Append(self
->data
, data
) < 0)
1611 PyObject
* list
= PyList_New(2);
1614 PyList_SET_ITEM(list
, 0, self
->data
);
1615 Py_INCREF(data
); PyList_SET_ITEM(list
, 1, data
);
1624 treebuilder_handle_end(TreeBuilderObject
* self
, PyObject
* tag
)
1629 if (self
->this == self
->last
) {
1630 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1631 self
->last
->text
= JOIN_SET(
1632 self
->data
, PyList_CheckExact(self
->data
)
1635 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1636 self
->last
->tail
= JOIN_SET(
1637 self
->data
, PyList_CheckExact(self
->data
)
1643 if (self
->index
== 0) {
1646 "pop from empty stack"
1653 item
= PyList_GET_ITEM(self
->stack
, self
->index
);
1656 Py_DECREF(self
->last
);
1658 self
->last
= (ElementObject
*) self
->this;
1659 self
->this = (ElementObject
*) item
;
1661 if (self
->end_event_obj
) {
1663 PyObject
* action
= self
->end_event_obj
;
1664 PyObject
* node
= (PyObject
*) self
->last
;
1665 res
= PyTuple_New(2);
1667 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1668 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1669 PyList_Append(self
->events
, res
);
1672 PyErr_Clear(); /* FIXME: propagate error */
1675 Py_INCREF(self
->last
);
1676 return (PyObject
*) self
->last
;
1680 treebuilder_handle_namespace(TreeBuilderObject
* self
, int start
,
1681 const char* prefix
, const char *uri
)
1691 if (!self
->start_ns_event_obj
)
1693 action
= self
->start_ns_event_obj
;
1694 /* FIXME: prefix and uri use utf-8 encoding! */
1695 parcel
= Py_BuildValue("ss", (prefix
) ? prefix
: "", uri
);
1700 if (!self
->end_ns_event_obj
)
1702 action
= self
->end_ns_event_obj
;
1708 res
= PyTuple_New(2);
1711 PyTuple_SET_ITEM(res
, 0, action
);
1712 PyTuple_SET_ITEM(res
, 1, parcel
);
1713 PyList_Append(self
->events
, res
);
1716 PyErr_Clear(); /* FIXME: propagate error */
1719 /* -------------------------------------------------------------------- */
1720 /* methods (in alphabetical order) */
1723 treebuilder_data(TreeBuilderObject
* self
, PyObject
* args
)
1726 if (!PyArg_ParseTuple(args
, "O:data", &data
))
1729 return treebuilder_handle_data(self
, data
);
1733 treebuilder_end(TreeBuilderObject
* self
, PyObject
* args
)
1736 if (!PyArg_ParseTuple(args
, "O:end", &tag
))
1739 return treebuilder_handle_end(self
, tag
);
1743 treebuilder_done(TreeBuilderObject
* self
)
1747 /* FIXME: check stack size? */
1759 treebuilder_close(TreeBuilderObject
* self
, PyObject
* args
)
1761 if (!PyArg_ParseTuple(args
, ":close"))
1764 return treebuilder_done(self
);
1768 treebuilder_start(TreeBuilderObject
* self
, PyObject
* args
)
1771 PyObject
* attrib
= Py_None
;
1772 if (!PyArg_ParseTuple(args
, "O|O:start", &tag
, &attrib
))
1775 return treebuilder_handle_start(self
, tag
, attrib
);
1779 treebuilder_xml(TreeBuilderObject
* self
, PyObject
* args
)
1782 PyObject
* standalone
;
1783 if (!PyArg_ParseTuple(args
, "OO:xml", &encoding
, &standalone
))
1786 return treebuilder_handle_xml(self
, encoding
, standalone
);
1789 static PyMethodDef treebuilder_methods
[] = {
1790 {"data", (PyCFunction
) treebuilder_data
, METH_VARARGS
},
1791 {"start", (PyCFunction
) treebuilder_start
, METH_VARARGS
},
1792 {"end", (PyCFunction
) treebuilder_end
, METH_VARARGS
},
1793 {"xml", (PyCFunction
) treebuilder_xml
, METH_VARARGS
},
1794 {"close", (PyCFunction
) treebuilder_close
, METH_VARARGS
},
1798 static PyTypeObject TreeBuilder_Type
= {
1799 PyVarObject_HEAD_INIT(NULL
, 0)
1800 "TreeBuilder", sizeof(TreeBuilderObject
), 0,
1802 (destructor
)treebuilder_dealloc
, /* tp_dealloc */
1806 0, /* tp_reserved */
1808 0, /* tp_as_number */
1809 0, /* tp_as_sequence */
1810 0, /* tp_as_mapping */
1814 0, /* tp_getattro */
1815 0, /* tp_setattro */
1816 0, /* tp_as_buffer */
1817 Py_TPFLAGS_DEFAULT
, /* tp_flags */
1819 0, /* tp_traverse */
1821 0, /* tp_richcompare */
1822 0, /* tp_weaklistoffset */
1824 0, /* tp_iternext */
1825 treebuilder_methods
, /* tp_methods */
1829 /* ==================================================================== */
1830 /* the expat interface */
1832 #if defined(USE_EXPAT)
1836 #if defined(USE_PYEXPAT_CAPI)
1837 #include "pyexpat.h"
1838 static struct PyExpat_CAPI
* expat_capi
;
1839 #define EXPAT(func) (expat_capi->func)
1841 #define EXPAT(func) (XML_##func)
1854 PyObject
* handle_xml
;
1855 PyObject
* handle_start
;
1856 PyObject
* handle_data
;
1857 PyObject
* handle_end
;
1859 PyObject
* handle_comment
;
1860 PyObject
* handle_pi
;
1864 static PyTypeObject XMLParser_Type
;
1869 makeuniversal(XMLParserObject
* self
, const char* string
)
1871 /* convert a UTF-8 tag/attribute name from the expat parser
1872 to a universal name string */
1874 int size
= strlen(string
);
1878 /* look the 'raw' name up in the names dictionary */
1879 key
= PyBytes_FromStringAndSize(string
, size
);
1883 value
= PyDict_GetItem(self
->names
, key
);
1888 /* new name. convert to universal name, and decode as
1895 /* look for namespace separator */
1896 for (i
= 0; i
< size
; i
++)
1897 if (string
[i
] == '}')
1900 /* convert to universal name */
1901 tag
= PyBytes_FromStringAndSize(NULL
, size
+1);
1902 p
= PyBytes_AS_STRING(tag
);
1904 memcpy(p
+1, string
, size
);
1907 /* plain name; use key as tag */
1912 /* decode universal name */
1913 p
= PyBytes_AS_STRING(tag
);
1914 value
= PyUnicode_DecodeUTF8(p
, size
, "strict");
1921 /* add to names dictionary */
1922 if (PyDict_SetItem(self
->names
, key
, value
) < 0) {
1933 /* -------------------------------------------------------------------- */
1937 expat_default_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
1944 if (data_len
< 2 || data_in
[0] != '&')
1947 key
= PyUnicode_DecodeUTF8(data_in
+ 1, data_len
- 2, "strict");
1951 value
= PyDict_GetItem(self
->entity
, key
);
1954 if (TreeBuilder_CheckExact(self
->target
))
1955 res
= treebuilder_handle_data(
1956 (TreeBuilderObject
*) self
->target
, value
1958 else if (self
->handle_data
)
1959 res
= PyObject_CallFunction(self
->handle_data
, "O", value
);
1965 PyExc_SyntaxError
, "undefined entity &%s;: line %ld, column %ld",
1966 PyBytes_AS_STRING(key
),
1967 EXPAT(GetErrorLineNumber
)(self
->parser
),
1968 EXPAT(GetErrorColumnNumber
)(self
->parser
)
1976 expat_start_handler(XMLParserObject
* self
, const XML_Char
* tag_in
,
1977 const XML_Char
**attrib_in
)
1985 tag
= makeuniversal(self
, tag_in
);
1987 return; /* parser will look for errors */
1991 attrib
= PyDict_New();
1994 while (attrib_in
[0] && attrib_in
[1]) {
1995 PyObject
* key
= makeuniversal(self
, attrib_in
[0]);
1996 PyObject
* value
= PyUnicode_DecodeUTF8(attrib_in
[1], strlen(attrib_in
[1]), "strict");
1997 if (!key
|| !value
) {
2003 ok
= PyDict_SetItem(attrib
, key
, value
);
2017 if (TreeBuilder_CheckExact(self
->target
))
2019 res
= treebuilder_handle_start((TreeBuilderObject
*) self
->target
,
2021 else if (self
->handle_start
)
2022 res
= PyObject_CallFunction(self
->handle_start
, "OO", tag
, attrib
);
2033 expat_data_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
2039 data
= PyUnicode_DecodeUTF8(data_in
, data_len
, "strict");
2041 return; /* parser will look for errors */
2043 if (TreeBuilder_CheckExact(self
->target
))
2045 res
= treebuilder_handle_data((TreeBuilderObject
*) self
->target
, data
);
2046 else if (self
->handle_data
)
2047 res
= PyObject_CallFunction(self
->handle_data
, "O", data
);
2057 expat_end_handler(XMLParserObject
* self
, const XML_Char
* tag_in
)
2060 PyObject
* res
= NULL
;
2062 if (TreeBuilder_CheckExact(self
->target
))
2064 /* the standard tree builder doesn't look at the end tag */
2065 res
= treebuilder_handle_end(
2066 (TreeBuilderObject
*) self
->target
, Py_None
2068 else if (self
->handle_end
) {
2069 tag
= makeuniversal(self
, tag_in
);
2071 res
= PyObject_CallFunction(self
->handle_end
, "O", tag
);
2080 expat_start_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix
,
2081 const XML_Char
*uri
)
2083 treebuilder_handle_namespace(
2084 (TreeBuilderObject
*) self
->target
, 1, prefix
, uri
2089 expat_end_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix_in
)
2091 treebuilder_handle_namespace(
2092 (TreeBuilderObject
*) self
->target
, 0, NULL
, NULL
2097 expat_comment_handler(XMLParserObject
* self
, const XML_Char
* comment_in
)
2102 if (self
->handle_comment
) {
2103 comment
= PyUnicode_DecodeUTF8(comment_in
, strlen(comment_in
), "strict");
2105 res
= PyObject_CallFunction(self
->handle_comment
, "O", comment
);
2113 expat_pi_handler(XMLParserObject
* self
, const XML_Char
* target_in
,
2114 const XML_Char
* data_in
)
2120 if (self
->handle_pi
) {
2121 target
= PyUnicode_DecodeUTF8(target_in
, strlen(target_in
), "strict");
2122 data
= PyUnicode_DecodeUTF8(data_in
, strlen(data_in
), "strict");
2123 if (target
&& data
) {
2124 res
= PyObject_CallFunction(self
->handle_pi
, "OO", target
, data
);
2136 expat_unknown_encoding_handler(XMLParserObject
*self
, const XML_Char
*name
,
2141 unsigned char s
[256];
2144 memset(info
, 0, sizeof(XML_Encoding
));
2146 for (i
= 0; i
< 256; i
++)
2149 u
= PyUnicode_Decode((char*) s
, 256, name
, "replace");
2151 return XML_STATUS_ERROR
;
2153 if (PyUnicode_GET_SIZE(u
) != 256) {
2155 return XML_STATUS_ERROR
;
2158 p
= PyUnicode_AS_UNICODE(u
);
2160 for (i
= 0; i
< 256; i
++) {
2161 if (p
[i
] != Py_UNICODE_REPLACEMENT_CHARACTER
)
2162 info
->map
[i
] = p
[i
];
2169 return XML_STATUS_OK
;
2172 /* -------------------------------------------------------------------- */
2173 /* constructor and destructor */
2176 xmlparser(PyObject
* self_
, PyObject
* args
, PyObject
* kw
)
2178 XMLParserObject
* self
;
2179 /* FIXME: does this need to be static? */
2180 static XML_Memory_Handling_Suite memory_handler
;
2182 PyObject
* target
= NULL
;
2183 char* encoding
= NULL
;
2184 static char* kwlist
[] = { "target", "encoding", NULL
};
2185 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|Oz:XMLParser", kwlist
,
2186 &target
, &encoding
))
2189 #if defined(USE_PYEXPAT_CAPI)
2192 PyExc_RuntimeError
, "cannot load dispatch table from pyexpat"
2198 self
= PyObject_New(XMLParserObject
, &XMLParser_Type
);
2202 self
->entity
= PyDict_New();
2203 if (!self
->entity
) {
2208 self
->names
= PyDict_New();
2210 PyObject_Del(self
->entity
);
2215 memory_handler
.malloc_fcn
= PyObject_Malloc
;
2216 memory_handler
.realloc_fcn
= PyObject_Realloc
;
2217 memory_handler
.free_fcn
= PyObject_Free
;
2219 self
->parser
= EXPAT(ParserCreate_MM
)(encoding
, &memory_handler
, "}");
2220 if (!self
->parser
) {
2221 PyObject_Del(self
->names
);
2222 PyObject_Del(self
->entity
);
2228 /* setup target handlers */
2230 target
= treebuilder_new();
2232 EXPAT(ParserFree
)(self
->parser
);
2233 PyObject_Del(self
->names
);
2234 PyObject_Del(self
->entity
);
2240 self
->target
= target
;
2242 self
->handle_xml
= PyObject_GetAttrString(target
, "xml");
2243 self
->handle_start
= PyObject_GetAttrString(target
, "start");
2244 self
->handle_data
= PyObject_GetAttrString(target
, "data");
2245 self
->handle_end
= PyObject_GetAttrString(target
, "end");
2246 self
->handle_comment
= PyObject_GetAttrString(target
, "comment");
2247 self
->handle_pi
= PyObject_GetAttrString(target
, "pi");
2251 /* configure parser */
2252 EXPAT(SetUserData
)(self
->parser
, self
);
2253 EXPAT(SetElementHandler
)(
2255 (XML_StartElementHandler
) expat_start_handler
,
2256 (XML_EndElementHandler
) expat_end_handler
2258 EXPAT(SetDefaultHandlerExpand
)(
2260 (XML_DefaultHandler
) expat_default_handler
2262 EXPAT(SetCharacterDataHandler
)(
2264 (XML_CharacterDataHandler
) expat_data_handler
2266 if (self
->handle_comment
)
2267 EXPAT(SetCommentHandler
)(
2269 (XML_CommentHandler
) expat_comment_handler
2271 if (self
->handle_pi
)
2272 EXPAT(SetProcessingInstructionHandler
)(
2274 (XML_ProcessingInstructionHandler
) expat_pi_handler
2276 EXPAT(SetUnknownEncodingHandler
)(
2278 (XML_UnknownEncodingHandler
) expat_unknown_encoding_handler
, NULL
2281 ALLOC(sizeof(XMLParserObject
), "create expatparser");
2283 return (PyObject
*) self
;
2287 xmlparser_dealloc(XMLParserObject
* self
)
2289 EXPAT(ParserFree
)(self
->parser
);
2291 Py_XDECREF(self
->handle_pi
);
2292 Py_XDECREF(self
->handle_comment
);
2293 Py_XDECREF(self
->handle_end
);
2294 Py_XDECREF(self
->handle_data
);
2295 Py_XDECREF(self
->handle_start
);
2296 Py_XDECREF(self
->handle_xml
);
2298 Py_DECREF(self
->target
);
2299 Py_DECREF(self
->entity
);
2300 Py_DECREF(self
->names
);
2302 RELEASE(sizeof(XMLParserObject
), "destroy expatparser");
2307 /* -------------------------------------------------------------------- */
2308 /* methods (in alphabetical order) */
2311 expat_parse(XMLParserObject
* self
, char* data
, int data_len
, int final
)
2315 ok
= EXPAT(Parse
)(self
->parser
, data
, data_len
, final
);
2317 if (PyErr_Occurred())
2322 PyExc_SyntaxError
, "%s: line %ld, column %ld",
2323 EXPAT(ErrorString
)(EXPAT(GetErrorCode
)(self
->parser
)),
2324 EXPAT(GetErrorLineNumber
)(self
->parser
),
2325 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2334 xmlparser_close(XMLParserObject
* self
, PyObject
* args
)
2336 /* end feeding data to parser */
2339 if (!PyArg_ParseTuple(args
, ":close"))
2342 res
= expat_parse(self
, "", 0, 1);
2344 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2346 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2353 xmlparser_feed(XMLParserObject
* self
, PyObject
* args
)
2355 /* feed data to parser */
2359 if (!PyArg_ParseTuple(args
, "s#:feed", &data
, &data_len
))
2362 return expat_parse(self
, data
, data_len
, 0);
2366 xmlparser_parse(XMLParserObject
* self
, PyObject
* args
)
2368 /* (internal) parse until end of input stream */
2375 if (!PyArg_ParseTuple(args
, "O:_parse", &fileobj
))
2378 reader
= PyObject_GetAttrString(fileobj
, "read");
2382 /* read from open file object */
2385 buffer
= PyObject_CallFunction(reader
, "i", 64*1024);
2388 /* read failed (e.g. due to KeyboardInterrupt) */
2393 if (!PyBytes_CheckExact(buffer
) || PyBytes_GET_SIZE(buffer
) == 0) {
2399 self
, PyBytes_AS_STRING(buffer
), PyBytes_GET_SIZE(buffer
), 0
2414 res
= expat_parse(self
, "", 0, 1);
2416 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2418 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2425 xmlparser_setevents(XMLParserObject
* self
, PyObject
* args
)
2427 /* activate element event reporting */
2430 TreeBuilderObject
* target
;
2432 PyObject
* events
; /* event collector */
2433 PyObject
* event_set
= Py_None
;
2434 if (!PyArg_ParseTuple(args
, "O!|O:_setevents", &PyList_Type
, &events
,
2438 if (!TreeBuilder_CheckExact(self
->target
)) {
2441 "event handling only supported for cElementTree.Treebuilder "
2447 target
= (TreeBuilderObject
*) self
->target
;
2450 Py_XDECREF(target
->events
);
2451 target
->events
= events
;
2453 /* clear out existing events */
2454 Py_XDECREF(target
->start_event_obj
); target
->start_event_obj
= NULL
;
2455 Py_XDECREF(target
->end_event_obj
); target
->end_event_obj
= NULL
;
2456 Py_XDECREF(target
->start_ns_event_obj
); target
->start_ns_event_obj
= NULL
;
2457 Py_XDECREF(target
->end_ns_event_obj
); target
->end_ns_event_obj
= NULL
;
2459 if (event_set
== Py_None
) {
2460 /* default is "end" only */
2461 target
->end_event_obj
= PyBytes_FromString("end");
2465 if (!PyTuple_Check(event_set
)) /* FIXME: handle arbitrary sequences */
2468 for (i
= 0; i
< PyTuple_GET_SIZE(event_set
); i
++) {
2469 PyObject
* item
= PyTuple_GET_ITEM(event_set
, i
);
2471 if (!PyBytes_Check(item
))
2473 event
= PyBytes_AS_STRING(item
);
2474 if (strcmp(event
, "start") == 0) {
2476 target
->start_event_obj
= item
;
2477 } else if (strcmp(event
, "end") == 0) {
2479 Py_XDECREF(target
->end_event_obj
);
2480 target
->end_event_obj
= item
;
2481 } else if (strcmp(event
, "start-ns") == 0) {
2483 Py_XDECREF(target
->start_ns_event_obj
);
2484 target
->start_ns_event_obj
= item
;
2485 EXPAT(SetNamespaceDeclHandler
)(
2487 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2488 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2490 } else if (strcmp(event
, "end-ns") == 0) {
2492 Py_XDECREF(target
->end_ns_event_obj
);
2493 target
->end_ns_event_obj
= item
;
2494 EXPAT(SetNamespaceDeclHandler
)(
2496 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2497 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2502 "unknown event '%s'", event
2513 "invalid event tuple"
2518 static PyMethodDef xmlparser_methods
[] = {
2519 {"feed", (PyCFunction
) xmlparser_feed
, METH_VARARGS
},
2520 {"close", (PyCFunction
) xmlparser_close
, METH_VARARGS
},
2521 {"_parse", (PyCFunction
) xmlparser_parse
, METH_VARARGS
},
2522 {"_setevents", (PyCFunction
) xmlparser_setevents
, METH_VARARGS
},
2527 xmlparser_getattro(XMLParserObject
* self
, PyObject
* nameobj
)
2532 if (PyUnicode_Check(nameobj
))
2533 name
= _PyUnicode_AsString(nameobj
);
2537 if (strcmp(name
, "entity") == 0)
2539 else if (strcmp(name
, "target") == 0)
2541 else if (strcmp(name
, "version") == 0) {
2543 sprintf(buffer
, "Expat %d.%d.%d", XML_MAJOR_VERSION
,
2544 XML_MINOR_VERSION
, XML_MICRO_VERSION
);
2545 return PyBytes_FromString(buffer
);
2547 return PyObject_GenericGetAttr((PyObject
*) self
, nameobj
);
2554 static PyTypeObject XMLParser_Type
= {
2555 PyVarObject_HEAD_INIT(NULL
, 0)
2556 "XMLParser", sizeof(XMLParserObject
), 0,
2558 (destructor
)xmlparser_dealloc
, /* tp_dealloc */
2562 0, /* tp_reserved */
2564 0, /* tp_as_number */
2565 0, /* tp_as_sequence */
2566 0, /* tp_as_mapping */
2570 (getattrofunc
)xmlparser_getattro
, /* tp_getattro */
2571 0, /* tp_setattro */
2572 0, /* tp_as_buffer */
2573 Py_TPFLAGS_DEFAULT
, /* tp_flags */
2575 0, /* tp_traverse */
2577 0, /* tp_richcompare */
2578 0, /* tp_weaklistoffset */
2580 0, /* tp_iternext */
2581 xmlparser_methods
, /* tp_methods */
2587 /* ==================================================================== */
2588 /* python module interface */
2590 static PyMethodDef _functions
[] = {
2591 {"Element", (PyCFunction
) element
, METH_VARARGS
|METH_KEYWORDS
},
2592 {"SubElement", (PyCFunction
) subelement
, METH_VARARGS
|METH_KEYWORDS
},
2593 {"TreeBuilder", (PyCFunction
) treebuilder
, METH_VARARGS
},
2594 #if defined(USE_EXPAT)
2595 {"XMLParser", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2596 {"XMLTreeBuilder", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2602 static struct PyModuleDef _elementtreemodule
= {
2603 PyModuleDef_HEAD_INIT
,
2615 PyInit__elementtree(void)
2620 #if defined(USE_PYEXPAT_CAPI)
2621 struct PyExpat_CAPI
* capi
;
2624 /* Initialize object types */
2625 if (PyType_Ready(&TreeBuilder_Type
) < 0)
2627 if (PyType_Ready(&Element_Type
) < 0)
2629 #if defined(USE_EXPAT)
2630 if (PyType_Ready(&XMLParser_Type
) < 0)
2634 m
= PyModule_Create(&_elementtreemodule
);
2638 /* The code below requires that the module gets already added
2640 PyDict_SetItemString(PyImport_GetModuleDict(),
2641 _elementtreemodule
.m_name
,
2644 /* python glue code */
2650 PyDict_SetItemString(g
, "__builtins__", PyEval_GetBuiltins());
2654 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2655 "from __future__ import generators\n" /* enable yield under 2.2 */
2658 "from copy import copy, deepcopy\n"
2661 " from xml.etree import ElementTree\n"
2662 "except ImportError:\n"
2663 " import ElementTree\n"
2664 "ET = ElementTree\n"
2667 "import _elementtree as cElementTree\n"
2669 "try:\n" /* check if copy works as is */
2670 " copy(cElementTree.Element('x'))\n"
2672 " def copyelement(elem):\n"
2675 "def Comment(text=None):\n" /* public */
2676 " element = cElementTree.Element(ET.Comment)\n"
2677 " element.text = text\n"
2679 "cElementTree.Comment = Comment\n"
2681 "class ElementTree(ET.ElementTree):\n" /* public */
2682 " def parse(self, source, parser=None):\n"
2683 " if not hasattr(source, 'read'):\n"
2684 " source = open(source, 'rb')\n"
2685 " if parser is not None:\n"
2687 " data = source.read(65536)\n"
2690 " parser.feed(data)\n"
2691 " self._root = parser.close()\n"
2693 " parser = cElementTree.XMLParser()\n"
2694 " self._root = parser._parse(source)\n"
2695 " return self._root\n"
2696 "cElementTree.ElementTree = ElementTree\n"
2698 "def getiterator(node, tag=None):\n" /* helper */
2701 #if (PY_VERSION_HEX < 0x02020000)
2702 " nodes = []\n" /* 2.1 doesn't have yield */
2703 " if tag is None or node.tag == tag:\n"
2704 " nodes.append(node)\n"
2705 " for node in node:\n"
2706 " nodes.extend(getiterator(node, tag))\n"
2709 " if tag is None or node.tag == tag:\n"
2711 " for node in node:\n"
2712 " for node in getiterator(node, tag):\n"
2716 "def parse(source, parser=None):\n" /* public */
2717 " tree = ElementTree()\n"
2718 " tree.parse(source, parser)\n"
2720 "cElementTree.parse = parse\n"
2722 #if (PY_VERSION_HEX < 0x02020000)
2723 "if hasattr(ET, 'iterparse'):\n"
2724 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2726 "class iterparse(object):\n"
2728 " def __init__(self, file, events=None):\n"
2729 " if not hasattr(file, 'read'):\n"
2730 " file = open(file, 'rb')\n"
2731 " self._file = file\n"
2732 " self._events = events\n"
2733 " def __iter__(self):\n"
2735 " b = cElementTree.TreeBuilder()\n"
2736 " p = cElementTree.XMLParser(b)\n"
2737 " p._setevents(events, self._events)\n"
2739 " data = self._file.read(16384)\n"
2743 " for event in events:\n"
2746 " root = p.close()\n"
2747 " for event in events:\n"
2749 " self.root = root\n"
2750 "cElementTree.iterparse = iterparse\n"
2753 "def PI(target, text=None):\n" /* public */
2754 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2755 " element.text = target\n"
2757 " element.text = element.text + ' ' + text\n"
2760 " elem = cElementTree.Element(ET.PI)\n"
2761 " elem.text = text\n"
2763 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2765 "def XML(text):\n" /* public */
2766 " parser = cElementTree.XMLParser()\n"
2767 " parser.feed(text)\n"
2768 " return parser.close()\n"
2769 "cElementTree.XML = cElementTree.fromstring = XML\n"
2771 "def XMLID(text):\n" /* public */
2772 " tree = XML(text)\n"
2774 " for elem in tree.getiterator():\n"
2775 " id = elem.get('id')\n"
2778 " return tree, ids\n"
2779 "cElementTree.XMLID = XMLID\n"
2781 "cElementTree.dump = ET.dump\n"
2782 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2783 "cElementTree.iselement = ET.iselement\n"
2784 "cElementTree.QName = ET.QName\n"
2785 "cElementTree.tostring = ET.tostring\n"
2786 "cElementTree.VERSION = '" VERSION
"'\n"
2787 "cElementTree.__version__ = '" VERSION
"'\n"
2788 "cElementTree.XMLParserError = SyntaxError\n"
2792 PyRun_String(bootstrap
, Py_file_input
, g
, NULL
);
2794 elementpath_obj
= PyDict_GetItemString(g
, "ElementPath");
2796 elementtree_copyelement_obj
= PyDict_GetItemString(g
, "copyelement");
2797 if (elementtree_copyelement_obj
) {
2798 /* reduce hack needed; enable reduce method */
2800 for (mp
= element_methods
; mp
->ml_name
; mp
++)
2801 if (mp
->ml_meth
== (PyCFunction
) element_reduce
) {
2802 mp
->ml_name
= "__reduce__";
2807 elementtree_deepcopy_obj
= PyDict_GetItemString(g
, "deepcopy");
2808 elementtree_getiterator_obj
= PyDict_GetItemString(g
, "getiterator");
2810 #if defined(USE_PYEXPAT_CAPI)
2811 /* link against pyexpat, if possible */
2812 capi
= PyCapsule_Import(PyExpat_CAPSULE_NAME
, 0);
2814 strcmp(capi
->magic
, PyExpat_CAPI_MAGIC
) == 0 &&
2815 capi
->size
<= sizeof(*expat_capi
) &&
2816 capi
->MAJOR_VERSION
== XML_MAJOR_VERSION
&&
2817 capi
->MINOR_VERSION
== XML_MINOR_VERSION
&&
2818 capi
->MICRO_VERSION
== XML_MICRO_VERSION
)