3 * $Id: /work/modules/celementtree/cElementTree.c 1128 2005-12-16T21:57:13.668520Z Fredrik $
5 * elementtree accelerator
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
37 * Copyright (c) 1999-2005 by Secret Labs AB. All rights reserved.
38 * Copyright (c) 1999-2005 by Fredrik Lundh.
41 * http://www.pythonware.com
44 /* Licensed to PSF under a Contributor Agreement. */
45 /* See http://www.python.org/2.4/license for licensing details. */
49 #define VERSION "1.0.5"
51 /* -------------------------------------------------------------------- */
54 /* Leave defined to include the expat-based XMLParser type */
57 /* Define to to all expat calls via pyexpat's embedded expat library */
58 /* #define USE_PYEXPAT_CAPI */
60 /* An element can hold this many children without extra memory
62 #define STATIC_CHILDREN 4
64 /* For best performance, chose a value so that 80-90% of all nodes
65 have no more than the given number of children. Set this to zero
66 to minimize the size of the element structure itself (this only
67 helps if you have lots of leaf nodes with attributes). */
69 /* Also note that pymalloc always allocates blocks in multiples of
70 eight bytes. For the current version of cElementTree, this means
71 that the number of children should be an even number, at least on
74 /* -------------------------------------------------------------------- */
77 static int memory
= 0;
78 #define ALLOC(size, comment)\
79 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
80 #define RELEASE(size, comment)\
81 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
83 #define ALLOC(size, comment)
84 #define RELEASE(size, comment)
89 #define LOCAL(type) static __inline type __fastcall
91 #define LOCAL(type) static type
94 /* compatibility macros */
95 #if (PY_VERSION_HEX < 0x02040000)
96 #define PyDict_CheckExact PyDict_Check
97 #if (PY_VERSION_HEX < 0x02020000)
98 #define PyList_CheckExact PyList_Check
99 #define PyString_CheckExact PyString_Check
100 #if (PY_VERSION_HEX >= 0x01060000)
101 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
106 #if (PY_VERSION_HEX >= 0x02050000)
107 #define PY_CONST const /* 2.5 adds const to some API:s */
112 #if !defined(Py_RETURN_NONE)
113 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
116 /* macros used to store 'join' flags in string object pointers. note
117 that all use of text and tail as object pointers must be wrapped in
118 JOIN_OBJ. see comments in the ElementObject definition for more
120 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
121 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
122 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
124 /* glue functions (see the init function for details) */
125 static PyObject
* elementtree_copyelement_obj
;
126 static PyObject
* elementtree_deepcopy_obj
;
127 static PyObject
* elementtree_getiterator_obj
;
128 static PyObject
* elementpath_obj
;
133 deepcopy(PyObject
* object
, PyObject
* memo
)
135 /* do a deep copy of the given object */
140 if (!elementtree_deepcopy_obj
) {
143 "deepcopy helper not found"
148 args
= PyTuple_New(2);
149 Py_INCREF(object
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) object
);
150 Py_INCREF(memo
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) memo
);
152 result
= PyObject_CallObject(elementtree_deepcopy_obj
, args
);
160 list_join(PyObject
* list
)
162 /* join list elements (destroying the list in the process) */
169 switch (PyList_GET_SIZE(list
)) {
172 return PyString_FromString("");
174 result
= PyList_GET_ITEM(list
, 0);
180 /* two or more elements: slice out a suitable separator from the
181 first member, and use that to join the entire list */
183 joiner
= PySequence_GetSlice(PyList_GET_ITEM(list
, 0), 0, 0);
187 function
= PyObject_GetAttrString(joiner
, "join");
193 args
= PyTuple_New(1);
194 PyTuple_SET_ITEM(args
, 0, list
);
196 result
= PyObject_CallObject(function
, args
);
198 Py_DECREF(args
); /* also removes list */
205 #if (PY_VERSION_HEX < 0x02020000)
207 PyDict_Update(PyObject
* dict
, PyObject
* other
)
209 /* PyDict_Update emulation for 2.1 and earlier */
213 res
= PyObject_CallMethod(dict
, "update", "O", other
);
222 /* -------------------------------------------------------------------- */
223 /* the element type */
227 /* attributes (a dictionary object), or None if no attributes */
231 int length
; /* actual number of items */
232 int allocated
; /* allocated items */
234 /* this either points to _children or to a malloced buffer */
237 PyObject
* _children
[STATIC_CHILDREN
];
239 } ElementObjectExtra
;
244 /* element tag (a string). */
247 /* text before first child. note that this is a tagged pointer;
248 use JOIN_OBJ to get the object pointer. the join flag is used
249 to distinguish lists created by the tree builder from lists
250 assigned to the attribute by application code; the former
251 should be joined before being returned to the user, the latter
252 should be left intact. */
255 /* text after this element, in parent. note that this is a tagged
256 pointer; use JOIN_OBJ to get the object pointer. */
259 ElementObjectExtra
* extra
;
263 staticforward PyTypeObject Element_Type
;
265 #define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
267 /* -------------------------------------------------------------------- */
268 /* element constructor and destructor */
271 element_new_extra(ElementObject
* self
, PyObject
* attrib
)
273 self
->extra
= PyObject_Malloc(sizeof(ElementObjectExtra
));
281 self
->extra
->attrib
= attrib
;
283 self
->extra
->length
= 0;
284 self
->extra
->allocated
= STATIC_CHILDREN
;
285 self
->extra
->children
= self
->extra
->_children
;
291 element_dealloc_extra(ElementObject
* self
)
295 Py_DECREF(self
->extra
->attrib
);
297 for (i
= 0; i
< self
->extra
->length
; i
++)
298 Py_DECREF(self
->extra
->children
[i
]);
300 if (self
->extra
->children
!= self
->extra
->_children
)
301 PyObject_Free(self
->extra
->children
);
303 PyObject_Free(self
->extra
);
307 element_new(PyObject
* tag
, PyObject
* attrib
)
311 self
= PyObject_New(ElementObject
, &Element_Type
);
315 /* use None for empty dictionaries */
316 if (PyDict_CheckExact(attrib
) && !PyDict_Size(attrib
))
321 if (attrib
!= Py_None
) {
323 if (element_new_extra(self
, attrib
) < 0)
326 self
->extra
->length
= 0;
327 self
->extra
->allocated
= STATIC_CHILDREN
;
328 self
->extra
->children
= self
->extra
->_children
;
336 self
->text
= Py_None
;
339 self
->tail
= Py_None
;
341 ALLOC(sizeof(ElementObject
), "create element");
343 return (PyObject
*) self
;
347 element_resize(ElementObject
* self
, int extra
)
352 /* make sure self->children can hold the given number of extra
353 elements. set an exception and return -1 if allocation failed */
356 element_new_extra(self
, NULL
);
358 size
= self
->extra
->length
+ extra
;
360 if (size
> self
->extra
->allocated
) {
361 /* use Python 2.4's list growth strategy */
362 size
= (size
>> 3) + (size
< 9 ? 3 : 6) + size
;
363 if (self
->extra
->children
!= self
->extra
->_children
) {
364 children
= PyObject_Realloc(self
->extra
->children
,
365 size
* sizeof(PyObject
*));
369 children
= PyObject_Malloc(size
* sizeof(PyObject
*));
372 /* copy existing children from static area to malloc buffer */
373 memcpy(children
, self
->extra
->children
,
374 self
->extra
->length
* sizeof(PyObject
*));
376 self
->extra
->children
= children
;
377 self
->extra
->allocated
= size
;
388 element_add_subelement(ElementObject
* self
, PyObject
* element
)
390 /* add a child element to a parent */
392 if (element_resize(self
, 1) < 0)
396 self
->extra
->children
[self
->extra
->length
] = element
;
398 self
->extra
->length
++;
404 element_get_attrib(ElementObject
* self
)
406 /* return borrowed reference to attrib dictionary */
407 /* note: this function assumes that the extra section exists */
409 PyObject
* res
= self
->extra
->attrib
;
411 if (res
== Py_None
) {
412 /* create missing dictionary */
416 self
->extra
->attrib
= res
;
423 element_get_text(ElementObject
* self
)
425 /* return borrowed reference to text attribute */
427 PyObject
* res
= self
->text
;
431 if (PyList_CheckExact(res
)) {
432 res
= list_join(res
);
443 element_get_tail(ElementObject
* self
)
445 /* return borrowed reference to text attribute */
447 PyObject
* res
= self
->tail
;
451 if (PyList_CheckExact(res
)) {
452 res
= list_join(res
);
463 element(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
468 PyObject
* attrib
= NULL
;
469 if (!PyArg_ParseTuple(args
, "O|O!:Element", &tag
,
470 &PyDict_Type
, &attrib
))
474 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
478 PyDict_Update(attrib
, kw
);
484 elem
= element_new(tag
, attrib
);
492 subelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
496 ElementObject
* parent
;
498 PyObject
* attrib
= NULL
;
499 if (!PyArg_ParseTuple(args
, "O!O|O!:SubElement",
500 &Element_Type
, &parent
, &tag
,
501 &PyDict_Type
, &attrib
))
505 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
509 PyDict_Update(attrib
, kw
);
515 elem
= element_new(tag
, attrib
);
519 if (element_add_subelement(parent
, elem
) < 0)
526 element_dealloc(ElementObject
* self
)
529 element_dealloc_extra(self
);
531 /* discard attributes */
532 Py_DECREF(self
->tag
);
533 Py_DECREF(JOIN_OBJ(self
->text
));
534 Py_DECREF(JOIN_OBJ(self
->tail
));
536 RELEASE(sizeof(ElementObject
), "destroy element");
541 /* -------------------------------------------------------------------- */
542 /* methods (in alphabetical order) */
545 element_append(ElementObject
* self
, PyObject
* args
)
548 if (!PyArg_ParseTuple(args
, "O!:append", &Element_Type
, &element
))
551 if (element_add_subelement(self
, element
) < 0)
558 element_clear(ElementObject
* self
, PyObject
* args
)
560 if (!PyArg_ParseTuple(args
, ":clear"))
564 element_dealloc_extra(self
);
569 Py_DECREF(JOIN_OBJ(self
->text
));
570 self
->text
= Py_None
;
573 Py_DECREF(JOIN_OBJ(self
->tail
));
574 self
->tail
= Py_None
;
580 element_copy(ElementObject
* self
, PyObject
* args
)
583 ElementObject
* element
;
585 if (!PyArg_ParseTuple(args
, ":__copy__"))
588 element
= (ElementObject
*) element_new(
589 self
->tag
, (self
->extra
) ? self
->extra
->attrib
: Py_None
594 Py_DECREF(JOIN_OBJ(element
->text
));
595 element
->text
= self
->text
;
596 Py_INCREF(JOIN_OBJ(element
->text
));
598 Py_DECREF(JOIN_OBJ(element
->tail
));
599 element
->tail
= self
->tail
;
600 Py_INCREF(JOIN_OBJ(element
->tail
));
604 if (element_resize(element
, self
->extra
->length
) < 0)
607 for (i
= 0; i
< self
->extra
->length
; i
++) {
608 Py_INCREF(self
->extra
->children
[i
]);
609 element
->extra
->children
[i
] = self
->extra
->children
[i
];
612 element
->extra
->length
= self
->extra
->length
;
616 return (PyObject
*) element
;
620 element_deepcopy(ElementObject
* self
, PyObject
* args
)
623 ElementObject
* element
;
631 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
634 tag
= deepcopy(self
->tag
, memo
);
639 attrib
= deepcopy(self
->extra
->attrib
, memo
);
649 element
= (ElementObject
*) element_new(tag
, attrib
);
657 text
= deepcopy(JOIN_OBJ(self
->text
), memo
);
660 Py_DECREF(element
->text
);
661 element
->text
= JOIN_SET(text
, JOIN_GET(self
->text
));
663 tail
= deepcopy(JOIN_OBJ(self
->tail
), memo
);
666 Py_DECREF(element
->tail
);
667 element
->tail
= JOIN_SET(tail
, JOIN_GET(self
->tail
));
671 if (element_resize(element
, self
->extra
->length
) < 0)
674 for (i
= 0; i
< self
->extra
->length
; i
++) {
675 PyObject
* child
= deepcopy(self
->extra
->children
[i
], memo
);
677 element
->extra
->length
= i
;
680 element
->extra
->children
[i
] = child
;
683 element
->extra
->length
= self
->extra
->length
;
687 /* add object to memo dictionary (so deepcopy won't visit it again) */
688 id
= PyInt_FromLong((Py_uintptr_t
) self
);
690 i
= PyDict_SetItem(memo
, id
, (PyObject
*) element
);
697 return (PyObject
*) element
;
705 checkpath(PyObject
* tag
)
709 /* check if a tag contains an xpath character */
711 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
713 #if defined(Py_USING_UNICODE)
714 if (PyUnicode_Check(tag
)) {
715 Py_UNICODE
*p
= PyUnicode_AS_UNICODE(tag
);
716 for (i
= 0; i
< PyUnicode_GET_SIZE(tag
); i
++) {
719 else if (p
[i
] == '}')
721 else if (check
&& PATHCHAR(p
[i
]))
727 if (PyString_Check(tag
)) {
728 char *p
= PyString_AS_STRING(tag
);
729 for (i
= 0; i
< PyString_GET_SIZE(tag
); i
++) {
732 else if (p
[i
] == '}')
734 else if (check
&& PATHCHAR(p
[i
]))
740 return 1; /* unknown type; might be path expression */
744 element_find(ElementObject
* self
, PyObject
* args
)
749 if (!PyArg_ParseTuple(args
, "O:find", &tag
))
753 return PyObject_CallMethod(
754 elementpath_obj
, "find", "OO", self
, tag
760 for (i
= 0; i
< self
->extra
->length
; i
++) {
761 PyObject
* item
= self
->extra
->children
[i
];
762 if (Element_CheckExact(item
) &&
763 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
773 element_findtext(ElementObject
* self
, PyObject
* args
)
778 PyObject
* default_value
= Py_None
;
779 if (!PyArg_ParseTuple(args
, "O|O:findtext", &tag
, &default_value
))
783 return PyObject_CallMethod(
784 elementpath_obj
, "findtext", "OOO", self
, tag
, default_value
788 Py_INCREF(default_value
);
789 return default_value
;
792 for (i
= 0; i
< self
->extra
->length
; i
++) {
793 ElementObject
* item
= (ElementObject
*) self
->extra
->children
[i
];
794 if (Element_CheckExact(item
) && !PyObject_Compare(item
->tag
, tag
)) {
795 PyObject
* text
= element_get_text(item
);
797 return PyString_FromString("");
803 Py_INCREF(default_value
);
804 return default_value
;
808 element_findall(ElementObject
* self
, PyObject
* args
)
814 if (!PyArg_ParseTuple(args
, "O:findall", &tag
))
818 return PyObject_CallMethod(
819 elementpath_obj
, "findall", "OO", self
, tag
829 for (i
= 0; i
< self
->extra
->length
; i
++) {
830 PyObject
* item
= self
->extra
->children
[i
];
831 if (Element_CheckExact(item
) &&
832 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
833 if (PyList_Append(out
, item
) < 0) {
844 element_get(ElementObject
* self
, PyObject
* args
)
849 PyObject
* default_value
= Py_None
;
850 if (!PyArg_ParseTuple(args
, "O|O:get", &key
, &default_value
))
853 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
854 value
= default_value
;
856 value
= PyDict_GetItem(self
->extra
->attrib
, key
);
858 value
= default_value
;
866 element_getchildren(ElementObject
* self
, PyObject
* args
)
871 if (!PyArg_ParseTuple(args
, ":getchildren"))
875 return PyList_New(0);
877 list
= PyList_New(self
->extra
->length
);
881 for (i
= 0; i
< self
->extra
->length
; i
++) {
882 PyObject
* item
= self
->extra
->children
[i
];
884 PyList_SET_ITEM(list
, i
, item
);
891 element_getiterator(ElementObject
* self
, PyObject
* args
)
895 PyObject
* tag
= Py_None
;
896 if (!PyArg_ParseTuple(args
, "|O:getiterator", &tag
))
899 if (!elementtree_getiterator_obj
) {
902 "getiterator helper not found"
907 args
= PyTuple_New(2);
911 Py_INCREF(self
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) self
);
912 Py_INCREF(tag
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) tag
);
914 result
= PyObject_CallObject(elementtree_getiterator_obj
, args
);
922 element_getitem(ElementObject
* self
, int index
)
924 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
927 "child index out of range"
932 Py_INCREF(self
->extra
->children
[index
]);
933 return self
->extra
->children
[index
];
937 element_getslice(ElementObject
* self
, int start
, int end
)
943 return PyList_New(0);
945 /* standard clamping */
950 if (end
> self
->extra
->length
)
951 end
= self
->extra
->length
;
955 list
= PyList_New(end
- start
);
959 for (i
= start
; i
< end
; i
++) {
960 PyObject
* item
= self
->extra
->children
[i
];
962 PyList_SET_ITEM(list
, i
- start
, item
);
969 element_insert(ElementObject
* self
, PyObject
* args
)
975 if (!PyArg_ParseTuple(args
, "iO!:insert", &index
,
976 &Element_Type
, &element
))
980 element_new_extra(self
, NULL
);
984 if (index
> self
->extra
->length
)
985 index
= self
->extra
->length
;
987 if (element_resize(self
, 1) < 0)
990 for (i
= self
->extra
->length
; i
> index
; i
--)
991 self
->extra
->children
[i
] = self
->extra
->children
[i
-1];
994 self
->extra
->children
[index
] = element
;
996 self
->extra
->length
++;
1002 element_items(ElementObject
* self
, PyObject
* args
)
1004 if (!PyArg_ParseTuple(args
, ":items"))
1007 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1008 return PyList_New(0);
1010 return PyDict_Items(self
->extra
->attrib
);
1014 element_keys(ElementObject
* self
, PyObject
* args
)
1016 if (!PyArg_ParseTuple(args
, ":keys"))
1019 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1020 return PyList_New(0);
1022 return PyDict_Keys(self
->extra
->attrib
);
1026 element_length(ElementObject
* self
)
1031 return self
->extra
->length
;
1035 element_makeelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
1041 if (!PyArg_ParseTuple(args
, "OO:makeelement", &tag
, &attrib
))
1044 attrib
= PyDict_Copy(attrib
);
1048 elem
= element_new(tag
, attrib
);
1056 element_reduce(ElementObject
* self
, PyObject
* args
)
1058 if (!PyArg_ParseTuple(args
, ":__reduce__"))
1061 /* Hack alert: This method is used to work around a __copy__
1062 problem on certain 2.3 and 2.4 versions. To save time and
1063 simplify the code, we create the copy in here, and use a dummy
1064 copyelement helper to trick the copy module into doing the
1067 if (!elementtree_copyelement_obj
) {
1070 "copyelement helper not found"
1075 return Py_BuildValue(
1076 "O(N)", elementtree_copyelement_obj
, element_copy(self
, args
)
1081 element_remove(ElementObject
* self
, PyObject
* args
)
1086 if (!PyArg_ParseTuple(args
, "O!:remove", &Element_Type
, &element
))
1090 /* element has no children, so raise exception */
1093 "list.remove(x): x not in list"
1098 for (i
= 0; i
< self
->extra
->length
; i
++) {
1099 if (self
->extra
->children
[i
] == element
)
1101 if (PyObject_Compare(self
->extra
->children
[i
], element
) == 0)
1105 if (i
== self
->extra
->length
) {
1106 /* element is not in children, so raise exception */
1109 "list.remove(x): x not in list"
1114 Py_DECREF(self
->extra
->children
[i
]);
1116 self
->extra
->length
--;
1118 for (; i
< self
->extra
->length
; i
++)
1119 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1125 element_repr(ElementObject
* self
)
1130 repr
= PyString_FromString("<Element ");
1132 PyString_ConcatAndDel(&repr
, PyObject_Repr(self
->tag
));
1134 sprintf(buffer
, " at %p>", self
);
1135 PyString_ConcatAndDel(&repr
, PyString_FromString(buffer
));
1141 element_set(ElementObject
* self
, PyObject
* args
)
1147 if (!PyArg_ParseTuple(args
, "OO:set", &key
, &value
))
1151 element_new_extra(self
, NULL
);
1153 attrib
= element_get_attrib(self
);
1157 if (PyDict_SetItem(attrib
, key
, value
) < 0)
1164 element_setslice(ElementObject
* self
, int start
, int end
, PyObject
* item
)
1167 PyObject
* recycle
= NULL
;
1170 element_new_extra(self
, NULL
);
1172 /* standard clamping */
1177 if (end
> self
->extra
->length
)
1178 end
= self
->extra
->length
;
1186 else if (PyList_CheckExact(item
)) {
1187 new = PyList_GET_SIZE(item
);
1189 /* FIXME: support arbitrary sequences? */
1192 "expected list, not \"%.200s\"", item
->ob_type
->tp_name
1198 /* to avoid recursive calls to this method (via decref), move
1199 old items to the recycle bin here, and get rid of them when
1200 we're done modifying the element */
1201 recycle
= PyList_New(old
);
1202 for (i
= 0; i
< old
; i
++)
1203 PyList_SET_ITEM(recycle
, i
, self
->extra
->children
[i
+ start
]);
1208 for (i
= end
; i
< self
->extra
->length
; i
++)
1209 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1210 } else if (new > old
) {
1212 if (element_resize(self
, new - old
) < 0)
1214 for (i
= self
->extra
->length
-1; i
>= end
; i
--)
1215 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1218 /* replace the slice */
1219 for (i
= 0; i
< new; i
++) {
1220 PyObject
* element
= PyList_GET_ITEM(item
, i
);
1222 self
->extra
->children
[i
+ start
] = element
;
1225 self
->extra
->length
+= new - old
;
1227 /* discard the recycle bin, and everything in it */
1228 Py_XDECREF(recycle
);
1234 element_setitem(ElementObject
* self
, int index
, PyObject
* item
)
1239 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
1242 "child assignment index out of range");
1246 old
= self
->extra
->children
[index
];
1250 self
->extra
->children
[index
] = item
;
1252 self
->extra
->length
--;
1253 for (i
= index
; i
< self
->extra
->length
; i
++)
1254 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1262 static PyMethodDef element_methods
[] = {
1264 {"clear", (PyCFunction
) element_clear
, METH_VARARGS
},
1266 {"get", (PyCFunction
) element_get
, METH_VARARGS
},
1267 {"set", (PyCFunction
) element_set
, METH_VARARGS
},
1269 {"find", (PyCFunction
) element_find
, METH_VARARGS
},
1270 {"findtext", (PyCFunction
) element_findtext
, METH_VARARGS
},
1271 {"findall", (PyCFunction
) element_findall
, METH_VARARGS
},
1273 {"append", (PyCFunction
) element_append
, METH_VARARGS
},
1274 {"insert", (PyCFunction
) element_insert
, METH_VARARGS
},
1275 {"remove", (PyCFunction
) element_remove
, METH_VARARGS
},
1277 {"getiterator", (PyCFunction
) element_getiterator
, METH_VARARGS
},
1278 {"getchildren", (PyCFunction
) element_getchildren
, METH_VARARGS
},
1280 {"items", (PyCFunction
) element_items
, METH_VARARGS
},
1281 {"keys", (PyCFunction
) element_keys
, METH_VARARGS
},
1283 {"makeelement", (PyCFunction
) element_makeelement
, METH_VARARGS
},
1285 {"__copy__", (PyCFunction
) element_copy
, METH_VARARGS
},
1286 {"__deepcopy__", (PyCFunction
) element_deepcopy
, METH_VARARGS
},
1288 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1289 C objects correctly, so we have to fake it using a __reduce__-
1290 based hack (see the element_reduce implementation above for
1293 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1294 using a runtime test to figure out if we need to fake things
1295 or now (see the init code below). The following entry is
1296 enabled only if the hack is needed. */
1298 {"!__reduce__", (PyCFunction
) element_reduce
, METH_VARARGS
},
1304 element_getattr(ElementObject
* self
, char* name
)
1308 res
= Py_FindMethod(element_methods
, (PyObject
*) self
, name
);
1314 if (strcmp(name
, "tag") == 0)
1316 else if (strcmp(name
, "text") == 0)
1317 res
= element_get_text(self
);
1318 else if (strcmp(name
, "tail") == 0) {
1319 res
= element_get_tail(self
);
1320 } else if (strcmp(name
, "attrib") == 0) {
1322 element_new_extra(self
, NULL
);
1323 res
= element_get_attrib(self
);
1325 PyErr_SetString(PyExc_AttributeError
, name
);
1337 element_setattr(ElementObject
* self
, const char* name
, PyObject
* value
)
1339 if (value
== NULL
) {
1341 PyExc_AttributeError
,
1342 "can't delete element attributes"
1347 if (strcmp(name
, "tag") == 0) {
1348 Py_DECREF(self
->tag
);
1350 Py_INCREF(self
->tag
);
1351 } else if (strcmp(name
, "text") == 0) {
1352 Py_DECREF(JOIN_OBJ(self
->text
));
1354 Py_INCREF(self
->text
);
1355 } else if (strcmp(name
, "tail") == 0) {
1356 Py_DECREF(JOIN_OBJ(self
->tail
));
1358 Py_INCREF(self
->tail
);
1359 } else if (strcmp(name
, "attrib") == 0) {
1361 element_new_extra(self
, NULL
);
1362 Py_DECREF(self
->extra
->attrib
);
1363 self
->extra
->attrib
= value
;
1364 Py_INCREF(self
->extra
->attrib
);
1366 PyErr_SetString(PyExc_AttributeError
, name
);
1373 static PySequenceMethods element_as_sequence
= {
1374 (inquiry
) element_length
,
1377 (intargfunc
) element_getitem
,
1378 (intintargfunc
) element_getslice
,
1379 (intobjargproc
) element_setitem
,
1380 (intintobjargproc
) element_setslice
,
1383 statichere PyTypeObject Element_Type
= {
1384 PyObject_HEAD_INIT(NULL
)
1385 0, "Element", sizeof(ElementObject
), 0,
1387 (destructor
)element_dealloc
, /* tp_dealloc */
1389 (getattrfunc
)element_getattr
, /* tp_getattr */
1390 (setattrfunc
)element_setattr
, /* tp_setattr */
1392 (reprfunc
)element_repr
, /* tp_repr */
1393 0, /* tp_as_number */
1394 &element_as_sequence
, /* tp_as_sequence */
1397 /* ==================================================================== */
1398 /* the tree builder type */
1403 PyObject
* root
; /* root node (first created node) */
1405 ElementObject
* this; /* current node */
1406 ElementObject
* last
; /* most recently created node */
1408 PyObject
* data
; /* data collector (string or list), or NULL */
1410 PyObject
* stack
; /* element stack */
1411 int index
; /* current stack size (0=empty) */
1413 /* element tracing */
1414 PyObject
* events
; /* list of events, or NULL if not collecting */
1415 PyObject
* start_event_obj
; /* event objects (NULL to ignore) */
1416 PyObject
* end_event_obj
;
1417 PyObject
* start_ns_event_obj
;
1418 PyObject
* end_ns_event_obj
;
1420 } TreeBuilderObject
;
1422 staticforward PyTypeObject TreeBuilder_Type
;
1424 #define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1426 /* -------------------------------------------------------------------- */
1427 /* constructor and destructor */
1430 treebuilder_new(void)
1432 TreeBuilderObject
* self
;
1434 self
= PyObject_New(TreeBuilderObject
, &TreeBuilder_Type
);
1441 self
->this = (ElementObject
*) Py_None
;
1444 self
->last
= (ElementObject
*) Py_None
;
1448 self
->stack
= PyList_New(20);
1451 self
->events
= NULL
;
1452 self
->start_event_obj
= self
->end_event_obj
= NULL
;
1453 self
->start_ns_event_obj
= self
->end_ns_event_obj
= NULL
;
1455 ALLOC(sizeof(TreeBuilderObject
), "create treebuilder");
1457 return (PyObject
*) self
;
1461 treebuilder(PyObject
* _self
, PyObject
* args
)
1463 if (!PyArg_ParseTuple(args
, ":TreeBuilder"))
1466 return treebuilder_new();
1470 treebuilder_dealloc(TreeBuilderObject
* self
)
1472 Py_XDECREF(self
->end_ns_event_obj
);
1473 Py_XDECREF(self
->start_ns_event_obj
);
1474 Py_XDECREF(self
->end_event_obj
);
1475 Py_XDECREF(self
->start_event_obj
);
1476 Py_XDECREF(self
->events
);
1477 Py_DECREF(self
->stack
);
1478 Py_XDECREF(self
->data
);
1479 Py_DECREF(self
->last
);
1480 Py_DECREF(self
->this);
1481 Py_XDECREF(self
->root
);
1483 RELEASE(sizeof(TreeBuilderObject
), "destroy treebuilder");
1488 /* -------------------------------------------------------------------- */
1492 treebuilder_handle_xml(TreeBuilderObject
* self
, PyObject
* encoding
,
1493 PyObject
* standalone
)
1499 treebuilder_handle_start(TreeBuilderObject
* self
, PyObject
* tag
,
1506 if (self
->this == self
->last
) {
1507 Py_DECREF(self
->last
->text
);
1508 self
->last
->text
= JOIN_SET(
1509 self
->data
, PyList_CheckExact(self
->data
)
1512 Py_DECREF(self
->last
->tail
);
1513 self
->last
->tail
= JOIN_SET(
1514 self
->data
, PyList_CheckExact(self
->data
)
1520 node
= element_new(tag
, attrib
);
1524 this = (PyObject
*) self
->this;
1526 if (this != Py_None
) {
1527 if (element_add_subelement((ElementObject
*) this, node
) < 0)
1533 "multiple elements on top level"
1541 if (self
->index
< PyList_GET_SIZE(self
->stack
)) {
1542 if (PyList_SetItem(self
->stack
, self
->index
, this) < 0)
1546 if (PyList_Append(self
->stack
, this) < 0)
1553 self
->this = (ElementObject
*) node
;
1555 Py_DECREF(self
->last
);
1557 self
->last
= (ElementObject
*) node
;
1559 if (self
->start_event_obj
) {
1561 PyObject
* action
= self
->start_event_obj
;
1562 res
= PyTuple_New(2);
1564 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1565 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1566 PyList_Append(self
->events
, res
);
1569 PyErr_Clear(); /* FIXME: propagate error */
1576 treebuilder_handle_data(TreeBuilderObject
* self
, PyObject
* data
)
1579 /* store the first item as is */
1580 Py_INCREF(data
); self
->data
= data
;
1582 /* more than one item; use a list to collect items */
1583 if (PyString_CheckExact(self
->data
) && self
->data
->ob_refcnt
== 1 &&
1584 PyString_CheckExact(data
) && PyString_GET_SIZE(data
) == 1) {
1585 /* expat often generates single character data sections; handle
1586 the most common case by resizing the existing string... */
1587 int size
= PyString_GET_SIZE(self
->data
);
1588 if (_PyString_Resize(&self
->data
, size
+ 1) < 0)
1590 PyString_AS_STRING(self
->data
)[size
] = PyString_AS_STRING(data
)[0];
1591 } else if (PyList_CheckExact(self
->data
)) {
1592 if (PyList_Append(self
->data
, data
) < 0)
1595 PyObject
* list
= PyList_New(2);
1598 PyList_SET_ITEM(list
, 0, self
->data
);
1599 Py_INCREF(data
); PyList_SET_ITEM(list
, 1, data
);
1608 treebuilder_handle_end(TreeBuilderObject
* self
, PyObject
* tag
)
1613 if (self
->this == self
->last
) {
1614 Py_DECREF(self
->last
->text
);
1615 self
->last
->text
= JOIN_SET(
1616 self
->data
, PyList_CheckExact(self
->data
)
1619 Py_DECREF(self
->last
->tail
);
1620 self
->last
->tail
= JOIN_SET(
1621 self
->data
, PyList_CheckExact(self
->data
)
1627 if (self
->index
== 0) {
1630 "pop from empty stack"
1637 item
= PyList_GET_ITEM(self
->stack
, self
->index
);
1640 Py_DECREF(self
->last
);
1642 self
->last
= (ElementObject
*) self
->this;
1643 self
->this = (ElementObject
*) item
;
1645 if (self
->end_event_obj
) {
1647 PyObject
* action
= self
->end_event_obj
;
1648 PyObject
* node
= (PyObject
*) self
->last
;
1649 res
= PyTuple_New(2);
1651 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1652 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1653 PyList_Append(self
->events
, res
);
1656 PyErr_Clear(); /* FIXME: propagate error */
1659 Py_INCREF(self
->last
);
1660 return (PyObject
*) self
->last
;
1664 treebuilder_handle_namespace(TreeBuilderObject
* self
, int start
,
1665 const char* prefix
, const char *uri
)
1675 if (!self
->start_ns_event_obj
)
1677 action
= self
->start_ns_event_obj
;
1678 /* FIXME: prefix and uri use utf-8 encoding! */
1679 parcel
= Py_BuildValue("ss", (prefix
) ? prefix
: "", uri
);
1684 if (!self
->end_ns_event_obj
)
1686 action
= self
->end_ns_event_obj
;
1692 res
= PyTuple_New(2);
1695 PyTuple_SET_ITEM(res
, 0, action
);
1696 PyTuple_SET_ITEM(res
, 1, parcel
);
1697 PyList_Append(self
->events
, res
);
1700 PyErr_Clear(); /* FIXME: propagate error */
1703 /* -------------------------------------------------------------------- */
1704 /* methods (in alphabetical order) */
1707 treebuilder_data(TreeBuilderObject
* self
, PyObject
* args
)
1710 if (!PyArg_ParseTuple(args
, "O:data", &data
))
1713 return treebuilder_handle_data(self
, data
);
1717 treebuilder_end(TreeBuilderObject
* self
, PyObject
* args
)
1720 if (!PyArg_ParseTuple(args
, "O:end", &tag
))
1723 return treebuilder_handle_end(self
, tag
);
1727 treebuilder_done(TreeBuilderObject
* self
)
1731 /* FIXME: check stack size? */
1743 treebuilder_close(TreeBuilderObject
* self
, PyObject
* args
)
1745 if (!PyArg_ParseTuple(args
, ":close"))
1748 return treebuilder_done(self
);
1752 treebuilder_start(TreeBuilderObject
* self
, PyObject
* args
)
1755 PyObject
* attrib
= Py_None
;
1756 if (!PyArg_ParseTuple(args
, "O|O:start", &tag
, &attrib
))
1759 return treebuilder_handle_start(self
, tag
, attrib
);
1763 treebuilder_xml(TreeBuilderObject
* self
, PyObject
* args
)
1766 PyObject
* standalone
;
1767 if (!PyArg_ParseTuple(args
, "OO:xml", &encoding
, &standalone
))
1770 return treebuilder_handle_xml(self
, encoding
, standalone
);
1773 static PyMethodDef treebuilder_methods
[] = {
1774 {"data", (PyCFunction
) treebuilder_data
, METH_VARARGS
},
1775 {"start", (PyCFunction
) treebuilder_start
, METH_VARARGS
},
1776 {"end", (PyCFunction
) treebuilder_end
, METH_VARARGS
},
1777 {"xml", (PyCFunction
) treebuilder_xml
, METH_VARARGS
},
1778 {"close", (PyCFunction
) treebuilder_close
, METH_VARARGS
},
1783 treebuilder_getattr(TreeBuilderObject
* self
, char* name
)
1785 return Py_FindMethod(treebuilder_methods
, (PyObject
*) self
, name
);
1788 statichere PyTypeObject TreeBuilder_Type
= {
1789 PyObject_HEAD_INIT(NULL
)
1790 0, "TreeBuilder", sizeof(TreeBuilderObject
), 0,
1792 (destructor
)treebuilder_dealloc
, /* tp_dealloc */
1794 (getattrfunc
)treebuilder_getattr
, /* tp_getattr */
1797 /* ==================================================================== */
1798 /* the expat interface */
1800 #if defined(USE_EXPAT)
1804 #if defined(USE_PYEXPAT_CAPI)
1805 #include "pyexpat.h"
1806 static struct PyExpat_CAPI
* expat_capi
;
1807 #define EXPAT(func) (expat_capi->func)
1809 #define EXPAT(func) (XML_##func)
1822 PyObject
* handle_xml
;
1823 PyObject
* handle_start
;
1824 PyObject
* handle_data
;
1825 PyObject
* handle_end
;
1827 PyObject
* handle_comment
;
1828 PyObject
* handle_pi
;
1832 staticforward PyTypeObject XMLParser_Type
;
1836 #if defined(Py_USING_UNICODE)
1838 checkstring(const char* string
, int size
)
1842 /* check if an 8-bit string contains UTF-8 characters */
1843 for (i
= 0; i
< size
; i
++)
1844 if (string
[i
] & 0x80)
1852 makestring(const char* string
, int size
)
1854 /* convert a UTF-8 string to either a 7-bit ascii string or a
1857 #if defined(Py_USING_UNICODE)
1858 if (checkstring(string
, size
))
1859 return PyUnicode_DecodeUTF8(string
, size
, "strict");
1862 return PyString_FromStringAndSize(string
, size
);
1866 makeuniversal(XMLParserObject
* self
, const char* string
)
1868 /* convert a UTF-8 tag/attribute name from the expat parser
1869 to a universal name string */
1871 int size
= strlen(string
);
1875 /* look the 'raw' name up in the names dictionary */
1876 key
= PyString_FromStringAndSize(string
, size
);
1880 value
= PyDict_GetItem(self
->names
, key
);
1885 /* new name. convert to universal name, and decode as
1892 /* look for namespace separator */
1893 for (i
= 0; i
< size
; i
++)
1894 if (string
[i
] == '}')
1897 /* convert to universal name */
1898 tag
= PyString_FromStringAndSize(NULL
, size
+1);
1899 p
= PyString_AS_STRING(tag
);
1901 memcpy(p
+1, string
, size
);
1904 /* plain name; use key as tag */
1909 /* decode universal name */
1910 #if defined(Py_USING_UNICODE)
1911 /* inline makestring, to avoid duplicating the source string if
1912 it's not an utf-8 string */
1913 p
= PyString_AS_STRING(tag
);
1914 if (checkstring(p
, size
)) {
1915 value
= PyUnicode_DecodeUTF8(p
, size
, "strict");
1923 value
= tag
; /* use tag as is */
1925 /* add to names dictionary */
1926 if (PyDict_SetItem(self
->names
, key
, value
) < 0) {
1937 /* -------------------------------------------------------------------- */
1941 expat_default_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
1948 if (data_len
< 2 || data_in
[0] != '&')
1951 key
= makestring(data_in
+ 1, data_len
- 2);
1955 value
= PyDict_GetItem(self
->entity
, key
);
1958 if (TreeBuilder_CheckExact(self
->target
))
1959 res
= treebuilder_handle_data(
1960 (TreeBuilderObject
*) self
->target
, value
1962 else if (self
->handle_data
)
1963 res
= PyObject_CallFunction(self
->handle_data
, "O", value
);
1969 PyExc_SyntaxError
, "undefined entity &%s;: line %d, column %d",
1970 PyString_AS_STRING(key
),
1971 EXPAT(GetErrorLineNumber
)(self
->parser
),
1972 EXPAT(GetErrorColumnNumber
)(self
->parser
)
1980 expat_start_handler(XMLParserObject
* self
, const XML_Char
* tag_in
,
1981 const XML_Char
**attrib_in
)
1989 tag
= makeuniversal(self
, tag_in
);
1991 return; /* parser will look for errors */
1995 attrib
= PyDict_New();
1998 while (attrib_in
[0] && attrib_in
[1]) {
1999 PyObject
* key
= makeuniversal(self
, attrib_in
[0]);
2000 PyObject
* value
= makestring(attrib_in
[1], strlen(attrib_in
[1]));
2001 if (!key
|| !value
) {
2007 ok
= PyDict_SetItem(attrib
, key
, value
);
2021 if (TreeBuilder_CheckExact(self
->target
))
2023 res
= treebuilder_handle_start((TreeBuilderObject
*) self
->target
,
2025 else if (self
->handle_start
)
2026 res
= PyObject_CallFunction(self
->handle_start
, "OO", tag
, attrib
);
2037 expat_data_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
2043 data
= makestring(data_in
, data_len
);
2045 if (TreeBuilder_CheckExact(self
->target
))
2047 res
= treebuilder_handle_data((TreeBuilderObject
*) self
->target
, data
);
2048 else if (self
->handle_data
)
2049 res
= PyObject_CallFunction(self
->handle_data
, "O", data
);
2059 expat_end_handler(XMLParserObject
* self
, const XML_Char
* tag_in
)
2062 PyObject
* res
= NULL
;
2064 if (TreeBuilder_CheckExact(self
->target
))
2066 /* the standard tree builder doesn't look at the end tag */
2067 res
= treebuilder_handle_end(
2068 (TreeBuilderObject
*) self
->target
, Py_None
2070 else if (self
->handle_end
) {
2071 tag
= makeuniversal(self
, tag_in
);
2073 res
= PyObject_CallFunction(self
->handle_end
, "O", tag
);
2082 expat_start_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix
,
2083 const XML_Char
*uri
)
2085 treebuilder_handle_namespace(
2086 (TreeBuilderObject
*) self
->target
, 1, prefix
, uri
2091 expat_end_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix_in
)
2093 treebuilder_handle_namespace(
2094 (TreeBuilderObject
*) self
->target
, 0, NULL
, NULL
2099 expat_comment_handler(XMLParserObject
* self
, const XML_Char
* comment_in
)
2104 if (self
->handle_comment
) {
2105 comment
= makestring(comment_in
, strlen(comment_in
));
2107 res
= PyObject_CallFunction(self
->handle_comment
, "O", comment
);
2115 expat_pi_handler(XMLParserObject
* self
, const XML_Char
* target_in
,
2116 const XML_Char
* data_in
)
2122 if (self
->handle_pi
) {
2123 target
= makestring(target_in
, strlen(target_in
));
2124 data
= makestring(data_in
, strlen(data_in
));
2125 if (target
&& data
) {
2126 res
= PyObject_CallFunction(self
->handle_pi
, "OO", target
, data
);
2137 #if defined(Py_USING_UNICODE)
2139 expat_unknown_encoding_handler(XMLParserObject
*self
, const XML_Char
*name
,
2144 unsigned char s
[256];
2147 memset(info
, 0, sizeof(XML_Encoding
));
2149 for (i
= 0; i
< 256; i
++)
2152 u
= PyUnicode_Decode((char*) s
, 256, name
, "replace");
2154 return XML_STATUS_ERROR
;
2156 if (PyUnicode_GET_SIZE(u
) != 256) {
2158 return XML_STATUS_ERROR
;
2161 p
= PyUnicode_AS_UNICODE(u
);
2163 for (i
= 0; i
< 256; i
++) {
2164 if (p
[i
] != Py_UNICODE_REPLACEMENT_CHARACTER
)
2165 info
->map
[i
] = p
[i
];
2172 return XML_STATUS_OK
;
2176 /* -------------------------------------------------------------------- */
2177 /* constructor and destructor */
2180 xmlparser(PyObject
* _self
, PyObject
* args
, PyObject
* kw
)
2182 XMLParserObject
* self
;
2183 /* FIXME: does this need to be static? */
2184 static XML_Memory_Handling_Suite memory_handler
;
2186 PyObject
* target
= NULL
;
2187 char* encoding
= NULL
;
2188 static PY_CONST
char* kwlist
[] = { "target", "encoding", NULL
};
2189 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|Oz:XMLParser", kwlist
,
2190 &target
, &encoding
))
2193 #if defined(USE_PYEXPAT_CAPI)
2196 PyExc_RuntimeError
, "cannot load dispatch table from pyexpat"
2202 self
= PyObject_New(XMLParserObject
, &XMLParser_Type
);
2206 self
->entity
= PyDict_New();
2207 if (!self
->entity
) {
2209 return NULL
; /* FIXME: cleanup on error */
2212 self
->names
= PyDict_New();
2215 return NULL
; /* FIXME: cleanup on error */
2218 memory_handler
.malloc_fcn
= PyObject_Malloc
;
2219 memory_handler
.realloc_fcn
= PyObject_Realloc
;
2220 memory_handler
.free_fcn
= PyObject_Free
;
2222 self
->parser
= EXPAT(ParserCreate_MM
)(encoding
, &memory_handler
, "}");
2223 if (!self
->parser
) {
2225 return NULL
; /* FIXME: cleanup on error */
2228 /* setup target handlers */
2230 target
= treebuilder_new();
2233 return NULL
; /* FIXME: cleanup on error */
2237 self
->target
= target
;
2239 self
->handle_xml
= PyObject_GetAttrString(target
, "xml");
2240 self
->handle_start
= PyObject_GetAttrString(target
, "start");
2241 self
->handle_data
= PyObject_GetAttrString(target
, "data");
2242 self
->handle_end
= PyObject_GetAttrString(target
, "end");
2243 self
->handle_comment
= PyObject_GetAttrString(target
, "comment");
2244 self
->handle_pi
= PyObject_GetAttrString(target
, "pi");
2248 /* configure parser */
2249 EXPAT(SetUserData
)(self
->parser
, self
);
2250 EXPAT(SetElementHandler
)(
2252 (XML_StartElementHandler
) expat_start_handler
,
2253 (XML_EndElementHandler
) expat_end_handler
2255 EXPAT(SetDefaultHandlerExpand
)(
2257 (XML_DefaultHandler
) expat_default_handler
2259 EXPAT(SetCharacterDataHandler
)(
2261 (XML_CharacterDataHandler
) expat_data_handler
2263 if (self
->handle_comment
)
2264 EXPAT(SetCommentHandler
)(
2266 (XML_CommentHandler
) expat_comment_handler
2268 if (self
->handle_pi
)
2269 EXPAT(SetProcessingInstructionHandler
)(
2271 (XML_ProcessingInstructionHandler
) expat_pi_handler
2273 #if defined(Py_USING_UNICODE)
2274 EXPAT(SetUnknownEncodingHandler
)(
2276 (XML_UnknownEncodingHandler
) expat_unknown_encoding_handler
, NULL
2280 ALLOC(sizeof(XMLParserObject
), "create expatparser");
2282 return (PyObject
*) self
;
2286 xmlparser_dealloc(XMLParserObject
* self
)
2288 EXPAT(ParserFree
)(self
->parser
);
2290 Py_XDECREF(self
->handle_pi
);
2291 Py_XDECREF(self
->handle_comment
);
2292 Py_XDECREF(self
->handle_end
);
2293 Py_XDECREF(self
->handle_data
);
2294 Py_XDECREF(self
->handle_start
);
2295 Py_XDECREF(self
->handle_xml
);
2297 Py_DECREF(self
->target
);
2298 Py_DECREF(self
->entity
);
2299 Py_DECREF(self
->names
);
2301 RELEASE(sizeof(XMLParserObject
), "destroy expatparser");
2306 /* -------------------------------------------------------------------- */
2307 /* methods (in alphabetical order) */
2310 expat_parse(XMLParserObject
* self
, char* data
, int data_len
, int final
)
2314 ok
= EXPAT(Parse
)(self
->parser
, data
, data_len
, final
);
2316 if (PyErr_Occurred())
2321 PyExc_SyntaxError
, "%s: line %d, column %d",
2322 EXPAT(ErrorString
)(EXPAT(GetErrorCode
)(self
->parser
)),
2323 EXPAT(GetErrorLineNumber
)(self
->parser
),
2324 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2333 xmlparser_close(XMLParserObject
* self
, PyObject
* args
)
2335 /* end feeding data to parser */
2338 if (!PyArg_ParseTuple(args
, ":close"))
2341 res
= expat_parse(self
, "", 0, 1);
2343 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2345 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2352 xmlparser_feed(XMLParserObject
* self
, PyObject
* args
)
2354 /* feed data to parser */
2358 if (!PyArg_ParseTuple(args
, "s#:feed", &data
, &data_len
))
2361 return expat_parse(self
, data
, data_len
, 0);
2365 xmlparser_parse(XMLParserObject
* self
, PyObject
* args
)
2367 /* (internal) parse until end of input stream */
2374 if (!PyArg_ParseTuple(args
, "O:_parse", &fileobj
))
2377 reader
= PyObject_GetAttrString(fileobj
, "read");
2381 /* read from open file object */
2384 buffer
= PyObject_CallFunction(reader
, "i", 64*1024);
2387 /* read failed (e.g. due to KeyboardInterrupt) */
2392 if (!PyString_CheckExact(buffer
) || PyString_GET_SIZE(buffer
) == 0) {
2398 self
, PyString_AS_STRING(buffer
), PyString_GET_SIZE(buffer
), 0
2413 res
= expat_parse(self
, "", 0, 1);
2415 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2417 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2424 xmlparser_setevents(XMLParserObject
* self
, PyObject
* args
)
2426 /* activate element event reporting */
2429 TreeBuilderObject
* target
;
2431 PyObject
* events
; /* event collector */
2432 PyObject
* event_set
= Py_None
;
2433 if (!PyArg_ParseTuple(args
, "O!|O:_setevents", &PyList_Type
, &events
,
2437 if (!TreeBuilder_CheckExact(self
->target
)) {
2440 "event handling only supported for cElementTree.Treebuilder "
2446 target
= (TreeBuilderObject
*) self
->target
;
2449 Py_XDECREF(target
->events
);
2450 target
->events
= events
;
2452 /* clear out existing events */
2453 Py_XDECREF(target
->start_event_obj
); target
->start_event_obj
= NULL
;
2454 Py_XDECREF(target
->end_event_obj
); target
->end_event_obj
= NULL
;
2455 Py_XDECREF(target
->start_ns_event_obj
); target
->start_ns_event_obj
= NULL
;
2456 Py_XDECREF(target
->end_ns_event_obj
); target
->end_ns_event_obj
= NULL
;
2458 if (event_set
== Py_None
) {
2459 /* default is "end" only */
2460 target
->end_event_obj
= PyString_FromString("end");
2464 if (!PyTuple_Check(event_set
)) /* FIXME: handle arbitrary sequences */
2467 for (i
= 0; i
< PyTuple_GET_SIZE(event_set
); i
++) {
2468 PyObject
* item
= PyTuple_GET_ITEM(event_set
, i
);
2470 if (!PyString_Check(item
))
2472 event
= PyString_AS_STRING(item
);
2473 if (strcmp(event
, "start") == 0) {
2475 target
->start_event_obj
= item
;
2476 } else if (strcmp(event
, "end") == 0) {
2478 Py_XDECREF(target
->end_event_obj
);
2479 target
->end_event_obj
= item
;
2480 } else if (strcmp(event
, "start-ns") == 0) {
2482 Py_XDECREF(target
->start_ns_event_obj
);
2483 target
->start_ns_event_obj
= item
;
2484 EXPAT(SetNamespaceDeclHandler
)(
2486 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2487 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2489 } else if (strcmp(event
, "end-ns") == 0) {
2491 Py_XDECREF(target
->end_ns_event_obj
);
2492 target
->end_ns_event_obj
= item
;
2493 EXPAT(SetNamespaceDeclHandler
)(
2495 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2496 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2501 "unknown event '%s'", event
2512 "invalid event tuple"
2517 static PyMethodDef xmlparser_methods
[] = {
2518 {"feed", (PyCFunction
) xmlparser_feed
, METH_VARARGS
},
2519 {"close", (PyCFunction
) xmlparser_close
, METH_VARARGS
},
2520 {"_parse", (PyCFunction
) xmlparser_parse
, METH_VARARGS
},
2521 {"_setevents", (PyCFunction
) xmlparser_setevents
, METH_VARARGS
},
2526 xmlparser_getattr(XMLParserObject
* self
, char* name
)
2530 res
= Py_FindMethod(xmlparser_methods
, (PyObject
*) self
, name
);
2536 if (strcmp(name
, "entity") == 0)
2538 else if (strcmp(name
, "target") == 0)
2540 else if (strcmp(name
, "version") == 0) {
2542 sprintf(buffer
, "Expat %d.%d.%d", XML_MAJOR_VERSION
,
2543 XML_MINOR_VERSION
, XML_MICRO_VERSION
);
2544 return PyString_FromString(buffer
);
2546 PyErr_SetString(PyExc_AttributeError
, name
);
2554 statichere PyTypeObject XMLParser_Type
= {
2555 PyObject_HEAD_INIT(NULL
)
2556 0, "XMLParser", sizeof(XMLParserObject
), 0,
2558 (destructor
)xmlparser_dealloc
, /* tp_dealloc */
2560 (getattrfunc
)xmlparser_getattr
, /* tp_getattr */
2565 /* ==================================================================== */
2566 /* python module interface */
2568 static PyMethodDef _functions
[] = {
2569 {"Element", (PyCFunction
) element
, METH_VARARGS
|METH_KEYWORDS
},
2570 {"SubElement", (PyCFunction
) subelement
, METH_VARARGS
|METH_KEYWORDS
},
2571 {"TreeBuilder", (PyCFunction
) treebuilder
, METH_VARARGS
},
2572 #if defined(USE_EXPAT)
2573 {"XMLParser", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2574 {"XMLTreeBuilder", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2580 init_elementtree(void)
2585 #if defined(USE_PYEXPAT_CAPI)
2586 struct PyExpat_CAPI
* capi
;
2589 /* Patch object type */
2590 Element_Type
.ob_type
= TreeBuilder_Type
.ob_type
= &PyType_Type
;
2591 #if defined(USE_EXPAT)
2592 XMLParser_Type
.ob_type
= &PyType_Type
;
2595 m
= Py_InitModule("_elementtree", _functions
);
2599 /* python glue code */
2605 PyDict_SetItemString(g
, "__builtins__", PyEval_GetBuiltins());
2609 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2610 "from __future__ import generators\n" /* enable yield under 2.2 */
2613 "from copy import copy, deepcopy\n"
2616 " from xml.etree import ElementTree\n"
2617 "except ImportError:\n"
2618 " import ElementTree\n"
2619 "ET = ElementTree\n"
2622 "import _elementtree as cElementTree\n"
2624 "try:\n" /* check if copy works as is */
2625 " copy(cElementTree.Element('x'))\n"
2627 " def copyelement(elem):\n"
2630 "def Comment(text=None):\n" /* public */
2631 " element = cElementTree.Element(ET.Comment)\n"
2632 " element.text = text\n"
2634 "cElementTree.Comment = Comment\n"
2636 "class ElementTree(ET.ElementTree):\n" /* public */
2637 " def parse(self, source, parser=None):\n"
2638 " if not hasattr(source, 'read'):\n"
2639 " source = open(source, 'rb')\n"
2640 " if parser is not None:\n"
2642 " data = source.read(65536)\n"
2645 " parser.feed(data)\n"
2646 " self._root = parser.close()\n"
2648 " parser = cElementTree.XMLParser()\n"
2649 " self._root = parser._parse(source)\n"
2650 " return self._root\n"
2651 "cElementTree.ElementTree = ElementTree\n"
2653 "def getiterator(node, tag=None):\n" /* helper */
2656 #if (PY_VERSION_HEX < 0x02020000)
2657 " nodes = []\n" /* 2.1 doesn't have yield */
2658 " if tag is None or node.tag == tag:\n"
2659 " nodes.append(node)\n"
2660 " for node in node:\n"
2661 " nodes.extend(getiterator(node, tag))\n"
2664 " if tag is None or node.tag == tag:\n"
2666 " for node in node:\n"
2667 " for node in getiterator(node, tag):\n"
2671 "def parse(source, parser=None):\n" /* public */
2672 " tree = ElementTree()\n"
2673 " tree.parse(source, parser)\n"
2675 "cElementTree.parse = parse\n"
2677 #if (PY_VERSION_HEX < 0x02020000)
2678 "if hasattr(ET, 'iterparse'):\n"
2679 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2681 "class iterparse(object):\n"
2683 " def __init__(self, file, events=None):\n"
2684 " if not hasattr(file, 'read'):\n"
2685 " file = open(file, 'rb')\n"
2686 " self._file = file\n"
2687 " self._events = events\n"
2688 " def __iter__(self):\n"
2690 " b = cElementTree.TreeBuilder()\n"
2691 " p = cElementTree.XMLParser(b)\n"
2692 " p._setevents(events, self._events)\n"
2694 " data = self._file.read(16384)\n"
2698 " for event in events:\n"
2701 " root = p.close()\n"
2702 " for event in events:\n"
2704 " self.root = root\n"
2705 "cElementTree.iterparse = iterparse\n"
2708 "def PI(target, text=None):\n" /* public */
2709 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2710 " element.text = target\n"
2712 " element.text = element.text + ' ' + text\n"
2715 " elem = cElementTree.Element(ET.PI)\n"
2716 " elem.text = text\n"
2718 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2720 "def XML(text):\n" /* public */
2721 " parser = cElementTree.XMLParser()\n"
2722 " parser.feed(text)\n"
2723 " return parser.close()\n"
2724 "cElementTree.XML = cElementTree.fromstring = XML\n"
2726 "def XMLID(text):\n" /* public */
2727 " tree = XML(text)\n"
2729 " for elem in tree.getiterator():\n"
2730 " id = elem.get('id')\n"
2733 " return tree, ids\n"
2734 "cElementTree.XMLID = XMLID\n"
2736 "cElementTree.dump = ET.dump\n"
2737 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2738 "cElementTree.iselement = ET.iselement\n"
2739 "cElementTree.QName = ET.QName\n"
2740 "cElementTree.tostring = ET.tostring\n"
2741 "cElementTree.VERSION = '" VERSION
"'\n"
2742 "cElementTree.__version__ = '" VERSION
"'\n"
2743 "cElementTree.XMLParserError = SyntaxError\n"
2747 PyRun_String(bootstrap
, Py_file_input
, g
, NULL
);
2749 elementpath_obj
= PyDict_GetItemString(g
, "ElementPath");
2751 elementtree_copyelement_obj
= PyDict_GetItemString(g
, "copyelement");
2752 if (elementtree_copyelement_obj
) {
2753 /* reduce hack needed; enable reduce method */
2755 for (mp
= element_methods
; mp
->ml_name
; mp
++)
2756 if (mp
->ml_meth
== (PyCFunction
) element_reduce
) {
2757 mp
->ml_name
= "__reduce__";
2762 elementtree_deepcopy_obj
= PyDict_GetItemString(g
, "deepcopy");
2763 elementtree_getiterator_obj
= PyDict_GetItemString(g
, "getiterator");
2765 #if defined(USE_PYEXPAT_CAPI)
2766 /* link against pyexpat, if possible */
2767 capi
= PyCObject_Import("pyexpat", "expat_CAPI");
2769 strcmp(capi
->magic
, PyExpat_CAPI_MAGIC
) == 0 &&
2770 capi
->size
<= sizeof(*expat_capi
) &&
2771 capi
->MAJOR_VERSION
== XML_MAJOR_VERSION
&&
2772 capi
->MINOR_VERSION
== XML_MINOR_VERSION
&&
2773 capi
->MICRO_VERSION
== XML_MICRO_VERSION
)