3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
51 #define VERSION "1.0.6"
53 /* -------------------------------------------------------------------- */
56 /* Leave defined to include the expat-based XMLParser type */
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
76 /* -------------------------------------------------------------------- */
79 static int memory
= 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
91 #define LOCAL(type) static __inline type __fastcall
93 #define LOCAL(type) static type
96 /* compatibility macros */
97 #if (PY_VERSION_HEX < 0x02050000)
98 typedef int Py_ssize_t
;
99 #define lenfunc inquiry
102 #if (PY_VERSION_HEX < 0x02040000)
103 #define PyDict_CheckExact PyDict_Check
104 #if (PY_VERSION_HEX < 0x02020000)
105 #define PyList_CheckExact PyList_Check
106 #define PyString_CheckExact PyString_Check
107 #if (PY_VERSION_HEX >= 0x01060000)
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
113 #if !defined(Py_RETURN_NONE)
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
117 /* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
125 /* glue functions (see the init function for details) */
126 static PyObject
* elementtree_copyelement_obj
;
127 static PyObject
* elementtree_deepcopy_obj
;
128 static PyObject
* elementtree_getiterator_obj
;
129 static PyObject
* elementpath_obj
;
134 deepcopy(PyObject
* object
, PyObject
* memo
)
136 /* do a deep copy of the given object */
141 if (!elementtree_deepcopy_obj
) {
144 "deepcopy helper not found"
149 args
= PyTuple_New(2);
153 Py_INCREF(object
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) object
);
154 Py_INCREF(memo
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) memo
);
156 result
= PyObject_CallObject(elementtree_deepcopy_obj
, args
);
164 list_join(PyObject
* list
)
166 /* join list elements (destroying the list in the process) */
173 switch (PyList_GET_SIZE(list
)) {
176 return PyString_FromString("");
178 result
= PyList_GET_ITEM(list
, 0);
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
187 joiner
= PySequence_GetSlice(PyList_GET_ITEM(list
, 0), 0, 0);
191 function
= PyObject_GetAttrString(joiner
, "join");
197 args
= PyTuple_New(1);
201 PyTuple_SET_ITEM(args
, 0, list
);
203 result
= PyObject_CallObject(function
, args
);
205 Py_DECREF(args
); /* also removes list */
212 #if (PY_VERSION_HEX < 0x02020000)
214 PyDict_Update(PyObject
* dict
, PyObject
* other
)
216 /* PyDict_Update emulation for 2.1 and earlier */
220 res
= PyObject_CallMethod(dict
, "update", "O", other
);
229 /* -------------------------------------------------------------------- */
230 /* the element type */
234 /* attributes (a dictionary object), or None if no attributes */
238 int length
; /* actual number of items */
239 int allocated
; /* allocated items */
241 /* this either points to _children or to a malloced buffer */
244 PyObject
* _children
[STATIC_CHILDREN
];
246 } ElementObjectExtra
;
251 /* element tag (a string). */
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
266 ElementObjectExtra
* extra
;
270 staticforward PyTypeObject Element_Type
;
272 #define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
274 /* -------------------------------------------------------------------- */
275 /* element constructor and destructor */
278 element_new_extra(ElementObject
* self
, PyObject
* attrib
)
280 self
->extra
= PyObject_Malloc(sizeof(ElementObjectExtra
));
288 self
->extra
->attrib
= attrib
;
290 self
->extra
->length
= 0;
291 self
->extra
->allocated
= STATIC_CHILDREN
;
292 self
->extra
->children
= self
->extra
->_children
;
298 element_dealloc_extra(ElementObject
* self
)
302 Py_DECREF(self
->extra
->attrib
);
304 for (i
= 0; i
< self
->extra
->length
; i
++)
305 Py_DECREF(self
->extra
->children
[i
]);
307 if (self
->extra
->children
!= self
->extra
->_children
)
308 PyObject_Free(self
->extra
->children
);
310 PyObject_Free(self
->extra
);
314 element_new(PyObject
* tag
, PyObject
* attrib
)
318 self
= PyObject_New(ElementObject
, &Element_Type
);
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib
) && !PyDict_Size(attrib
))
328 if (attrib
!= Py_None
) {
330 if (element_new_extra(self
, attrib
) < 0) {
335 self
->extra
->length
= 0;
336 self
->extra
->allocated
= STATIC_CHILDREN
;
337 self
->extra
->children
= self
->extra
->_children
;
345 self
->text
= Py_None
;
348 self
->tail
= Py_None
;
350 ALLOC(sizeof(ElementObject
), "create element");
352 return (PyObject
*) self
;
356 element_resize(ElementObject
* self
, int extra
)
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
365 element_new_extra(self
, NULL
);
367 size
= self
->extra
->length
+ extra
;
369 if (size
> self
->extra
->allocated
) {
370 /* use Python 2.4's list growth strategy */
371 size
= (size
>> 3) + (size
< 9 ? 3 : 6) + size
;
372 if (self
->extra
->children
!= self
->extra
->_children
) {
373 children
= PyObject_Realloc(self
->extra
->children
,
374 size
* sizeof(PyObject
*));
378 children
= PyObject_Malloc(size
* sizeof(PyObject
*));
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children
, self
->extra
->children
,
383 self
->extra
->length
* sizeof(PyObject
*));
385 self
->extra
->children
= children
;
386 self
->extra
->allocated
= size
;
397 element_add_subelement(ElementObject
* self
, PyObject
* element
)
399 /* add a child element to a parent */
401 if (element_resize(self
, 1) < 0)
405 self
->extra
->children
[self
->extra
->length
] = element
;
407 self
->extra
->length
++;
413 element_get_attrib(ElementObject
* self
)
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
418 PyObject
* res
= self
->extra
->attrib
;
420 if (res
== Py_None
) {
421 /* create missing dictionary */
425 self
->extra
->attrib
= res
;
432 element_get_text(ElementObject
* self
)
434 /* return borrowed reference to text attribute */
436 PyObject
* res
= self
->text
;
440 if (PyList_CheckExact(res
)) {
441 res
= list_join(res
);
452 element_get_tail(ElementObject
* self
)
454 /* return borrowed reference to text attribute */
456 PyObject
* res
= self
->tail
;
460 if (PyList_CheckExact(res
)) {
461 res
= list_join(res
);
472 element(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
477 PyObject
* attrib
= NULL
;
478 if (!PyArg_ParseTuple(args
, "O|O!:Element", &tag
,
479 &PyDict_Type
, &attrib
))
483 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
487 PyDict_Update(attrib
, kw
);
493 elem
= element_new(tag
, attrib
);
501 subelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
505 ElementObject
* parent
;
507 PyObject
* attrib
= NULL
;
508 if (!PyArg_ParseTuple(args
, "O!O|O!:SubElement",
509 &Element_Type
, &parent
, &tag
,
510 &PyDict_Type
, &attrib
))
514 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
518 PyDict_Update(attrib
, kw
);
524 elem
= element_new(tag
, attrib
);
528 if (element_add_subelement(parent
, elem
) < 0) {
537 element_dealloc(ElementObject
* self
)
540 element_dealloc_extra(self
);
542 /* discard attributes */
543 Py_DECREF(self
->tag
);
544 Py_DECREF(JOIN_OBJ(self
->text
));
545 Py_DECREF(JOIN_OBJ(self
->tail
));
547 RELEASE(sizeof(ElementObject
), "destroy element");
552 /* -------------------------------------------------------------------- */
553 /* methods (in alphabetical order) */
556 element_append(ElementObject
* self
, PyObject
* args
)
559 if (!PyArg_ParseTuple(args
, "O!:append", &Element_Type
, &element
))
562 if (element_add_subelement(self
, element
) < 0)
569 element_clear(ElementObject
* self
, PyObject
* args
)
571 if (!PyArg_ParseTuple(args
, ":clear"))
575 element_dealloc_extra(self
);
580 Py_DECREF(JOIN_OBJ(self
->text
));
581 self
->text
= Py_None
;
584 Py_DECREF(JOIN_OBJ(self
->tail
));
585 self
->tail
= Py_None
;
591 element_copy(ElementObject
* self
, PyObject
* args
)
594 ElementObject
* element
;
596 if (!PyArg_ParseTuple(args
, ":__copy__"))
599 element
= (ElementObject
*) element_new(
600 self
->tag
, (self
->extra
) ? self
->extra
->attrib
: Py_None
605 Py_DECREF(JOIN_OBJ(element
->text
));
606 element
->text
= self
->text
;
607 Py_INCREF(JOIN_OBJ(element
->text
));
609 Py_DECREF(JOIN_OBJ(element
->tail
));
610 element
->tail
= self
->tail
;
611 Py_INCREF(JOIN_OBJ(element
->tail
));
615 if (element_resize(element
, self
->extra
->length
) < 0) {
620 for (i
= 0; i
< self
->extra
->length
; i
++) {
621 Py_INCREF(self
->extra
->children
[i
]);
622 element
->extra
->children
[i
] = self
->extra
->children
[i
];
625 element
->extra
->length
= self
->extra
->length
;
629 return (PyObject
*) element
;
633 element_deepcopy(ElementObject
* self
, PyObject
* args
)
636 ElementObject
* element
;
644 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
647 tag
= deepcopy(self
->tag
, memo
);
652 attrib
= deepcopy(self
->extra
->attrib
, memo
);
662 element
= (ElementObject
*) element_new(tag
, attrib
);
670 text
= deepcopy(JOIN_OBJ(self
->text
), memo
);
673 Py_DECREF(element
->text
);
674 element
->text
= JOIN_SET(text
, JOIN_GET(self
->text
));
676 tail
= deepcopy(JOIN_OBJ(self
->tail
), memo
);
679 Py_DECREF(element
->tail
);
680 element
->tail
= JOIN_SET(tail
, JOIN_GET(self
->tail
));
684 if (element_resize(element
, self
->extra
->length
) < 0)
687 for (i
= 0; i
< self
->extra
->length
; i
++) {
688 PyObject
* child
= deepcopy(self
->extra
->children
[i
], memo
);
690 element
->extra
->length
= i
;
693 element
->extra
->children
[i
] = child
;
696 element
->extra
->length
= self
->extra
->length
;
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id
= PyInt_FromLong((Py_uintptr_t
) self
);
703 i
= PyDict_SetItem(memo
, id
, (PyObject
*) element
);
710 return (PyObject
*) element
;
718 checkpath(PyObject
* tag
)
723 /* check if a tag contains an xpath character */
725 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
727 #if defined(Py_USING_UNICODE)
728 if (PyUnicode_Check(tag
)) {
729 Py_UNICODE
*p
= PyUnicode_AS_UNICODE(tag
);
730 for (i
= 0; i
< PyUnicode_GET_SIZE(tag
); i
++) {
733 else if (p
[i
] == '}')
735 else if (check
&& PATHCHAR(p
[i
]))
741 if (PyString_Check(tag
)) {
742 char *p
= PyString_AS_STRING(tag
);
743 for (i
= 0; i
< PyString_GET_SIZE(tag
); i
++) {
746 else if (p
[i
] == '}')
748 else if (check
&& PATHCHAR(p
[i
]))
754 return 1; /* unknown type; might be path expression */
758 element_find(ElementObject
* self
, PyObject
* args
)
763 if (!PyArg_ParseTuple(args
, "O:find", &tag
))
767 return PyObject_CallMethod(
768 elementpath_obj
, "find", "OO", self
, tag
774 for (i
= 0; i
< self
->extra
->length
; i
++) {
775 PyObject
* item
= self
->extra
->children
[i
];
776 if (Element_CheckExact(item
) &&
777 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
787 element_findtext(ElementObject
* self
, PyObject
* args
)
792 PyObject
* default_value
= Py_None
;
793 if (!PyArg_ParseTuple(args
, "O|O:findtext", &tag
, &default_value
))
797 return PyObject_CallMethod(
798 elementpath_obj
, "findtext", "OOO", self
, tag
, default_value
802 Py_INCREF(default_value
);
803 return default_value
;
806 for (i
= 0; i
< self
->extra
->length
; i
++) {
807 ElementObject
* item
= (ElementObject
*) self
->extra
->children
[i
];
808 if (Element_CheckExact(item
) && !PyObject_Compare(item
->tag
, tag
)) {
809 PyObject
* text
= element_get_text(item
);
811 return PyString_FromString("");
817 Py_INCREF(default_value
);
818 return default_value
;
822 element_findall(ElementObject
* self
, PyObject
* args
)
828 if (!PyArg_ParseTuple(args
, "O:findall", &tag
))
832 return PyObject_CallMethod(
833 elementpath_obj
, "findall", "OO", self
, tag
843 for (i
= 0; i
< self
->extra
->length
; i
++) {
844 PyObject
* item
= self
->extra
->children
[i
];
845 if (Element_CheckExact(item
) &&
846 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
847 if (PyList_Append(out
, item
) < 0) {
858 element_get(ElementObject
* self
, PyObject
* args
)
863 PyObject
* default_value
= Py_None
;
864 if (!PyArg_ParseTuple(args
, "O|O:get", &key
, &default_value
))
867 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
868 value
= default_value
;
870 value
= PyDict_GetItem(self
->extra
->attrib
, key
);
872 value
= default_value
;
880 element_getchildren(ElementObject
* self
, PyObject
* args
)
885 if (!PyArg_ParseTuple(args
, ":getchildren"))
889 return PyList_New(0);
891 list
= PyList_New(self
->extra
->length
);
895 for (i
= 0; i
< self
->extra
->length
; i
++) {
896 PyObject
* item
= self
->extra
->children
[i
];
898 PyList_SET_ITEM(list
, i
, item
);
905 element_getiterator(ElementObject
* self
, PyObject
* args
)
909 PyObject
* tag
= Py_None
;
910 if (!PyArg_ParseTuple(args
, "|O:getiterator", &tag
))
913 if (!elementtree_getiterator_obj
) {
916 "getiterator helper not found"
921 args
= PyTuple_New(2);
925 Py_INCREF(self
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) self
);
926 Py_INCREF(tag
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) tag
);
928 result
= PyObject_CallObject(elementtree_getiterator_obj
, args
);
936 element_getitem(PyObject
* self_
, Py_ssize_t index
)
938 ElementObject
* self
= (ElementObject
*) self_
;
940 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
943 "child index out of range"
948 Py_INCREF(self
->extra
->children
[index
]);
949 return self
->extra
->children
[index
];
953 element_getslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
)
955 ElementObject
* self
= (ElementObject
*) self_
;
960 return PyList_New(0);
962 /* standard clamping */
967 if (end
> self
->extra
->length
)
968 end
= self
->extra
->length
;
972 list
= PyList_New(end
- start
);
976 for (i
= start
; i
< end
; i
++) {
977 PyObject
* item
= self
->extra
->children
[i
];
979 PyList_SET_ITEM(list
, i
- start
, item
);
986 element_insert(ElementObject
* self
, PyObject
* args
)
992 if (!PyArg_ParseTuple(args
, "iO!:insert", &index
,
993 &Element_Type
, &element
))
997 element_new_extra(self
, NULL
);
1001 if (index
> self
->extra
->length
)
1002 index
= self
->extra
->length
;
1004 if (element_resize(self
, 1) < 0)
1007 for (i
= self
->extra
->length
; i
> index
; i
--)
1008 self
->extra
->children
[i
] = self
->extra
->children
[i
-1];
1011 self
->extra
->children
[index
] = element
;
1013 self
->extra
->length
++;
1019 element_items(ElementObject
* self
, PyObject
* args
)
1021 if (!PyArg_ParseTuple(args
, ":items"))
1024 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1025 return PyList_New(0);
1027 return PyDict_Items(self
->extra
->attrib
);
1031 element_keys(ElementObject
* self
, PyObject
* args
)
1033 if (!PyArg_ParseTuple(args
, ":keys"))
1036 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1037 return PyList_New(0);
1039 return PyDict_Keys(self
->extra
->attrib
);
1043 element_length(ElementObject
* self
)
1048 return self
->extra
->length
;
1052 element_makeelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
1058 if (!PyArg_ParseTuple(args
, "OO:makeelement", &tag
, &attrib
))
1061 attrib
= PyDict_Copy(attrib
);
1065 elem
= element_new(tag
, attrib
);
1073 element_reduce(ElementObject
* self
, PyObject
* args
)
1075 if (!PyArg_ParseTuple(args
, ":__reduce__"))
1078 /* Hack alert: This method is used to work around a __copy__
1079 problem on certain 2.3 and 2.4 versions. To save time and
1080 simplify the code, we create the copy in here, and use a dummy
1081 copyelement helper to trick the copy module into doing the
1084 if (!elementtree_copyelement_obj
) {
1087 "copyelement helper not found"
1092 return Py_BuildValue(
1093 "O(N)", elementtree_copyelement_obj
, element_copy(self
, args
)
1098 element_remove(ElementObject
* self
, PyObject
* args
)
1103 if (!PyArg_ParseTuple(args
, "O!:remove", &Element_Type
, &element
))
1107 /* element has no children, so raise exception */
1110 "list.remove(x): x not in list"
1115 for (i
= 0; i
< self
->extra
->length
; i
++) {
1116 if (self
->extra
->children
[i
] == element
)
1118 if (PyObject_Compare(self
->extra
->children
[i
], element
) == 0)
1122 if (i
== self
->extra
->length
) {
1123 /* element is not in children, so raise exception */
1126 "list.remove(x): x not in list"
1131 Py_DECREF(self
->extra
->children
[i
]);
1133 self
->extra
->length
--;
1135 for (; i
< self
->extra
->length
; i
++)
1136 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1142 element_repr(ElementObject
* self
)
1147 repr
= PyString_FromString("<Element ");
1149 PyString_ConcatAndDel(&repr
, PyObject_Repr(self
->tag
));
1151 sprintf(buffer
, " at %p>", self
);
1152 PyString_ConcatAndDel(&repr
, PyString_FromString(buffer
));
1158 element_set(ElementObject
* self
, PyObject
* args
)
1164 if (!PyArg_ParseTuple(args
, "OO:set", &key
, &value
))
1168 element_new_extra(self
, NULL
);
1170 attrib
= element_get_attrib(self
);
1174 if (PyDict_SetItem(attrib
, key
, value
) < 0)
1181 element_setslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
, PyObject
* item
)
1183 ElementObject
* self
= (ElementObject
*) self_
;
1184 Py_ssize_t i
, new, old
;
1185 PyObject
* recycle
= NULL
;
1188 element_new_extra(self
, NULL
);
1190 /* standard clamping */
1195 if (end
> self
->extra
->length
)
1196 end
= self
->extra
->length
;
1204 else if (PyList_CheckExact(item
)) {
1205 new = PyList_GET_SIZE(item
);
1207 /* FIXME: support arbitrary sequences? */
1210 "expected list, not \"%.200s\"", item
->ob_type
->tp_name
1216 /* to avoid recursive calls to this method (via decref), move
1217 old items to the recycle bin here, and get rid of them when
1218 we're done modifying the element */
1219 recycle
= PyList_New(old
);
1220 for (i
= 0; i
< old
; i
++)
1221 PyList_SET_ITEM(recycle
, i
, self
->extra
->children
[i
+ start
]);
1226 for (i
= end
; i
< self
->extra
->length
; i
++)
1227 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1228 } else if (new > old
) {
1230 if (element_resize(self
, new - old
) < 0)
1232 for (i
= self
->extra
->length
-1; i
>= end
; i
--)
1233 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1236 /* replace the slice */
1237 for (i
= 0; i
< new; i
++) {
1238 PyObject
* element
= PyList_GET_ITEM(item
, i
);
1240 self
->extra
->children
[i
+ start
] = element
;
1243 self
->extra
->length
+= new - old
;
1245 /* discard the recycle bin, and everything in it */
1246 Py_XDECREF(recycle
);
1252 element_setitem(PyObject
* self_
, Py_ssize_t index
, PyObject
* item
)
1254 ElementObject
* self
= (ElementObject
*) self_
;
1258 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
1261 "child assignment index out of range");
1265 old
= self
->extra
->children
[index
];
1269 self
->extra
->children
[index
] = item
;
1271 self
->extra
->length
--;
1272 for (i
= index
; i
< self
->extra
->length
; i
++)
1273 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1281 static PyMethodDef element_methods
[] = {
1283 {"clear", (PyCFunction
) element_clear
, METH_VARARGS
},
1285 {"get", (PyCFunction
) element_get
, METH_VARARGS
},
1286 {"set", (PyCFunction
) element_set
, METH_VARARGS
},
1288 {"find", (PyCFunction
) element_find
, METH_VARARGS
},
1289 {"findtext", (PyCFunction
) element_findtext
, METH_VARARGS
},
1290 {"findall", (PyCFunction
) element_findall
, METH_VARARGS
},
1292 {"append", (PyCFunction
) element_append
, METH_VARARGS
},
1293 {"insert", (PyCFunction
) element_insert
, METH_VARARGS
},
1294 {"remove", (PyCFunction
) element_remove
, METH_VARARGS
},
1296 {"getiterator", (PyCFunction
) element_getiterator
, METH_VARARGS
},
1297 {"getchildren", (PyCFunction
) element_getchildren
, METH_VARARGS
},
1299 {"items", (PyCFunction
) element_items
, METH_VARARGS
},
1300 {"keys", (PyCFunction
) element_keys
, METH_VARARGS
},
1302 {"makeelement", (PyCFunction
) element_makeelement
, METH_VARARGS
},
1304 {"__copy__", (PyCFunction
) element_copy
, METH_VARARGS
},
1305 {"__deepcopy__", (PyCFunction
) element_deepcopy
, METH_VARARGS
},
1307 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1308 C objects correctly, so we have to fake it using a __reduce__-
1309 based hack (see the element_reduce implementation above for
1312 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1313 using a runtime test to figure out if we need to fake things
1314 or now (see the init code below). The following entry is
1315 enabled only if the hack is needed. */
1317 {"!__reduce__", (PyCFunction
) element_reduce
, METH_VARARGS
},
1323 element_getattr(ElementObject
* self
, char* name
)
1327 res
= Py_FindMethod(element_methods
, (PyObject
*) self
, name
);
1333 if (strcmp(name
, "tag") == 0)
1335 else if (strcmp(name
, "text") == 0)
1336 res
= element_get_text(self
);
1337 else if (strcmp(name
, "tail") == 0) {
1338 res
= element_get_tail(self
);
1339 } else if (strcmp(name
, "attrib") == 0) {
1341 element_new_extra(self
, NULL
);
1342 res
= element_get_attrib(self
);
1344 PyErr_SetString(PyExc_AttributeError
, name
);
1356 element_setattr(ElementObject
* self
, const char* name
, PyObject
* value
)
1358 if (value
== NULL
) {
1360 PyExc_AttributeError
,
1361 "can't delete element attributes"
1366 if (strcmp(name
, "tag") == 0) {
1367 Py_DECREF(self
->tag
);
1369 Py_INCREF(self
->tag
);
1370 } else if (strcmp(name
, "text") == 0) {
1371 Py_DECREF(JOIN_OBJ(self
->text
));
1373 Py_INCREF(self
->text
);
1374 } else if (strcmp(name
, "tail") == 0) {
1375 Py_DECREF(JOIN_OBJ(self
->tail
));
1377 Py_INCREF(self
->tail
);
1378 } else if (strcmp(name
, "attrib") == 0) {
1380 element_new_extra(self
, NULL
);
1381 Py_DECREF(self
->extra
->attrib
);
1382 self
->extra
->attrib
= value
;
1383 Py_INCREF(self
->extra
->attrib
);
1385 PyErr_SetString(PyExc_AttributeError
, name
);
1392 static PySequenceMethods element_as_sequence
= {
1393 (lenfunc
) element_length
,
1402 statichere PyTypeObject Element_Type
= {
1403 PyObject_HEAD_INIT(NULL
)
1404 0, "Element", sizeof(ElementObject
), 0,
1406 (destructor
)element_dealloc
, /* tp_dealloc */
1408 (getattrfunc
)element_getattr
, /* tp_getattr */
1409 (setattrfunc
)element_setattr
, /* tp_setattr */
1411 (reprfunc
)element_repr
, /* tp_repr */
1412 0, /* tp_as_number */
1413 &element_as_sequence
, /* tp_as_sequence */
1416 /* ==================================================================== */
1417 /* the tree builder type */
1422 PyObject
* root
; /* root node (first created node) */
1424 ElementObject
* this; /* current node */
1425 ElementObject
* last
; /* most recently created node */
1427 PyObject
* data
; /* data collector (string or list), or NULL */
1429 PyObject
* stack
; /* element stack */
1430 Py_ssize_t index
; /* current stack size (0=empty) */
1432 /* element tracing */
1433 PyObject
* events
; /* list of events, or NULL if not collecting */
1434 PyObject
* start_event_obj
; /* event objects (NULL to ignore) */
1435 PyObject
* end_event_obj
;
1436 PyObject
* start_ns_event_obj
;
1437 PyObject
* end_ns_event_obj
;
1439 } TreeBuilderObject
;
1441 staticforward PyTypeObject TreeBuilder_Type
;
1443 #define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1445 /* -------------------------------------------------------------------- */
1446 /* constructor and destructor */
1449 treebuilder_new(void)
1451 TreeBuilderObject
* self
;
1453 self
= PyObject_New(TreeBuilderObject
, &TreeBuilder_Type
);
1460 self
->this = (ElementObject
*) Py_None
;
1463 self
->last
= (ElementObject
*) Py_None
;
1467 self
->stack
= PyList_New(20);
1470 self
->events
= NULL
;
1471 self
->start_event_obj
= self
->end_event_obj
= NULL
;
1472 self
->start_ns_event_obj
= self
->end_ns_event_obj
= NULL
;
1474 ALLOC(sizeof(TreeBuilderObject
), "create treebuilder");
1476 return (PyObject
*) self
;
1480 treebuilder(PyObject
* self_
, PyObject
* args
)
1482 if (!PyArg_ParseTuple(args
, ":TreeBuilder"))
1485 return treebuilder_new();
1489 treebuilder_dealloc(TreeBuilderObject
* self
)
1491 Py_XDECREF(self
->end_ns_event_obj
);
1492 Py_XDECREF(self
->start_ns_event_obj
);
1493 Py_XDECREF(self
->end_event_obj
);
1494 Py_XDECREF(self
->start_event_obj
);
1495 Py_XDECREF(self
->events
);
1496 Py_DECREF(self
->stack
);
1497 Py_XDECREF(self
->data
);
1498 Py_DECREF(self
->last
);
1499 Py_DECREF(self
->this);
1500 Py_XDECREF(self
->root
);
1502 RELEASE(sizeof(TreeBuilderObject
), "destroy treebuilder");
1507 /* -------------------------------------------------------------------- */
1511 treebuilder_handle_xml(TreeBuilderObject
* self
, PyObject
* encoding
,
1512 PyObject
* standalone
)
1518 treebuilder_handle_start(TreeBuilderObject
* self
, PyObject
* tag
,
1525 if (self
->this == self
->last
) {
1526 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1527 self
->last
->text
= JOIN_SET(
1528 self
->data
, PyList_CheckExact(self
->data
)
1531 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1532 self
->last
->tail
= JOIN_SET(
1533 self
->data
, PyList_CheckExact(self
->data
)
1539 node
= element_new(tag
, attrib
);
1543 this = (PyObject
*) self
->this;
1545 if (this != Py_None
) {
1546 if (element_add_subelement((ElementObject
*) this, node
) < 0)
1552 "multiple elements on top level"
1560 if (self
->index
< PyList_GET_SIZE(self
->stack
)) {
1561 if (PyList_SetItem(self
->stack
, self
->index
, this) < 0)
1565 if (PyList_Append(self
->stack
, this) < 0)
1572 self
->this = (ElementObject
*) node
;
1574 Py_DECREF(self
->last
);
1576 self
->last
= (ElementObject
*) node
;
1578 if (self
->start_event_obj
) {
1580 PyObject
* action
= self
->start_event_obj
;
1581 res
= PyTuple_New(2);
1583 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1584 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1585 PyList_Append(self
->events
, res
);
1588 PyErr_Clear(); /* FIXME: propagate error */
1599 treebuilder_handle_data(TreeBuilderObject
* self
, PyObject
* data
)
1602 if (self
->last
== (ElementObject
*) Py_None
) {
1603 /* ignore calls to data before the first call to start */
1606 /* store the first item as is */
1607 Py_INCREF(data
); self
->data
= data
;
1609 /* more than one item; use a list to collect items */
1610 if (PyString_CheckExact(self
->data
) && self
->data
->ob_refcnt
== 1 &&
1611 PyString_CheckExact(data
) && PyString_GET_SIZE(data
) == 1) {
1612 /* expat often generates single character data sections; handle
1613 the most common case by resizing the existing string... */
1614 Py_ssize_t size
= PyString_GET_SIZE(self
->data
);
1615 if (_PyString_Resize(&self
->data
, size
+ 1) < 0)
1617 PyString_AS_STRING(self
->data
)[size
] = PyString_AS_STRING(data
)[0];
1618 } else if (PyList_CheckExact(self
->data
)) {
1619 if (PyList_Append(self
->data
, data
) < 0)
1622 PyObject
* list
= PyList_New(2);
1625 PyList_SET_ITEM(list
, 0, self
->data
);
1626 Py_INCREF(data
); PyList_SET_ITEM(list
, 1, data
);
1635 treebuilder_handle_end(TreeBuilderObject
* self
, PyObject
* tag
)
1640 if (self
->this == self
->last
) {
1641 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1642 self
->last
->text
= JOIN_SET(
1643 self
->data
, PyList_CheckExact(self
->data
)
1646 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1647 self
->last
->tail
= JOIN_SET(
1648 self
->data
, PyList_CheckExact(self
->data
)
1654 if (self
->index
== 0) {
1657 "pop from empty stack"
1664 item
= PyList_GET_ITEM(self
->stack
, self
->index
);
1667 Py_DECREF(self
->last
);
1669 self
->last
= (ElementObject
*) self
->this;
1670 self
->this = (ElementObject
*) item
;
1672 if (self
->end_event_obj
) {
1674 PyObject
* action
= self
->end_event_obj
;
1675 PyObject
* node
= (PyObject
*) self
->last
;
1676 res
= PyTuple_New(2);
1678 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1679 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1680 PyList_Append(self
->events
, res
);
1683 PyErr_Clear(); /* FIXME: propagate error */
1686 Py_INCREF(self
->last
);
1687 return (PyObject
*) self
->last
;
1691 treebuilder_handle_namespace(TreeBuilderObject
* self
, int start
,
1692 const char* prefix
, const char *uri
)
1702 if (!self
->start_ns_event_obj
)
1704 action
= self
->start_ns_event_obj
;
1705 /* FIXME: prefix and uri use utf-8 encoding! */
1706 parcel
= Py_BuildValue("ss", (prefix
) ? prefix
: "", uri
);
1711 if (!self
->end_ns_event_obj
)
1713 action
= self
->end_ns_event_obj
;
1719 res
= PyTuple_New(2);
1722 PyTuple_SET_ITEM(res
, 0, action
);
1723 PyTuple_SET_ITEM(res
, 1, parcel
);
1724 PyList_Append(self
->events
, res
);
1727 PyErr_Clear(); /* FIXME: propagate error */
1730 /* -------------------------------------------------------------------- */
1731 /* methods (in alphabetical order) */
1734 treebuilder_data(TreeBuilderObject
* self
, PyObject
* args
)
1737 if (!PyArg_ParseTuple(args
, "O:data", &data
))
1740 return treebuilder_handle_data(self
, data
);
1744 treebuilder_end(TreeBuilderObject
* self
, PyObject
* args
)
1747 if (!PyArg_ParseTuple(args
, "O:end", &tag
))
1750 return treebuilder_handle_end(self
, tag
);
1754 treebuilder_done(TreeBuilderObject
* self
)
1758 /* FIXME: check stack size? */
1770 treebuilder_close(TreeBuilderObject
* self
, PyObject
* args
)
1772 if (!PyArg_ParseTuple(args
, ":close"))
1775 return treebuilder_done(self
);
1779 treebuilder_start(TreeBuilderObject
* self
, PyObject
* args
)
1782 PyObject
* attrib
= Py_None
;
1783 if (!PyArg_ParseTuple(args
, "O|O:start", &tag
, &attrib
))
1786 return treebuilder_handle_start(self
, tag
, attrib
);
1790 treebuilder_xml(TreeBuilderObject
* self
, PyObject
* args
)
1793 PyObject
* standalone
;
1794 if (!PyArg_ParseTuple(args
, "OO:xml", &encoding
, &standalone
))
1797 return treebuilder_handle_xml(self
, encoding
, standalone
);
1800 static PyMethodDef treebuilder_methods
[] = {
1801 {"data", (PyCFunction
) treebuilder_data
, METH_VARARGS
},
1802 {"start", (PyCFunction
) treebuilder_start
, METH_VARARGS
},
1803 {"end", (PyCFunction
) treebuilder_end
, METH_VARARGS
},
1804 {"xml", (PyCFunction
) treebuilder_xml
, METH_VARARGS
},
1805 {"close", (PyCFunction
) treebuilder_close
, METH_VARARGS
},
1810 treebuilder_getattr(TreeBuilderObject
* self
, char* name
)
1812 return Py_FindMethod(treebuilder_methods
, (PyObject
*) self
, name
);
1815 statichere PyTypeObject TreeBuilder_Type
= {
1816 PyObject_HEAD_INIT(NULL
)
1817 0, "TreeBuilder", sizeof(TreeBuilderObject
), 0,
1819 (destructor
)treebuilder_dealloc
, /* tp_dealloc */
1821 (getattrfunc
)treebuilder_getattr
, /* tp_getattr */
1824 /* ==================================================================== */
1825 /* the expat interface */
1827 #if defined(USE_EXPAT)
1831 #if defined(USE_PYEXPAT_CAPI)
1832 #include "pyexpat.h"
1833 static struct PyExpat_CAPI
* expat_capi
;
1834 #define EXPAT(func) (expat_capi->func)
1836 #define EXPAT(func) (XML_##func)
1849 PyObject
* handle_xml
;
1850 PyObject
* handle_start
;
1851 PyObject
* handle_data
;
1852 PyObject
* handle_end
;
1854 PyObject
* handle_comment
;
1855 PyObject
* handle_pi
;
1859 staticforward PyTypeObject XMLParser_Type
;
1863 #if defined(Py_USING_UNICODE)
1865 checkstring(const char* string
, int size
)
1869 /* check if an 8-bit string contains UTF-8 characters */
1870 for (i
= 0; i
< size
; i
++)
1871 if (string
[i
] & 0x80)
1879 makestring(const char* string
, int size
)
1881 /* convert a UTF-8 string to either a 7-bit ascii string or a
1884 #if defined(Py_USING_UNICODE)
1885 if (checkstring(string
, size
))
1886 return PyUnicode_DecodeUTF8(string
, size
, "strict");
1889 return PyString_FromStringAndSize(string
, size
);
1893 makeuniversal(XMLParserObject
* self
, const char* string
)
1895 /* convert a UTF-8 tag/attribute name from the expat parser
1896 to a universal name string */
1898 int size
= strlen(string
);
1902 /* look the 'raw' name up in the names dictionary */
1903 key
= PyString_FromStringAndSize(string
, size
);
1907 value
= PyDict_GetItem(self
->names
, key
);
1912 /* new name. convert to universal name, and decode as
1919 /* look for namespace separator */
1920 for (i
= 0; i
< size
; i
++)
1921 if (string
[i
] == '}')
1924 /* convert to universal name */
1925 tag
= PyString_FromStringAndSize(NULL
, size
+1);
1926 p
= PyString_AS_STRING(tag
);
1928 memcpy(p
+1, string
, size
);
1931 /* plain name; use key as tag */
1936 /* decode universal name */
1937 #if defined(Py_USING_UNICODE)
1938 /* inline makestring, to avoid duplicating the source string if
1939 it's not an utf-8 string */
1940 p
= PyString_AS_STRING(tag
);
1941 if (checkstring(p
, size
)) {
1942 value
= PyUnicode_DecodeUTF8(p
, size
, "strict");
1950 value
= tag
; /* use tag as is */
1952 /* add to names dictionary */
1953 if (PyDict_SetItem(self
->names
, key
, value
) < 0) {
1964 /* -------------------------------------------------------------------- */
1968 expat_default_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
1975 if (data_len
< 2 || data_in
[0] != '&')
1978 key
= makestring(data_in
+ 1, data_len
- 2);
1982 value
= PyDict_GetItem(self
->entity
, key
);
1985 if (TreeBuilder_CheckExact(self
->target
))
1986 res
= treebuilder_handle_data(
1987 (TreeBuilderObject
*) self
->target
, value
1989 else if (self
->handle_data
)
1990 res
= PyObject_CallFunction(self
->handle_data
, "O", value
);
1996 PyExc_SyntaxError
, "undefined entity &%s;: line %ld, column %ld",
1997 PyString_AS_STRING(key
),
1998 EXPAT(GetErrorLineNumber
)(self
->parser
),
1999 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2007 expat_start_handler(XMLParserObject
* self
, const XML_Char
* tag_in
,
2008 const XML_Char
**attrib_in
)
2016 tag
= makeuniversal(self
, tag_in
);
2018 return; /* parser will look for errors */
2022 attrib
= PyDict_New();
2025 while (attrib_in
[0] && attrib_in
[1]) {
2026 PyObject
* key
= makeuniversal(self
, attrib_in
[0]);
2027 PyObject
* value
= makestring(attrib_in
[1], strlen(attrib_in
[1]));
2028 if (!key
|| !value
) {
2034 ok
= PyDict_SetItem(attrib
, key
, value
);
2048 if (TreeBuilder_CheckExact(self
->target
))
2050 res
= treebuilder_handle_start((TreeBuilderObject
*) self
->target
,
2052 else if (self
->handle_start
)
2053 res
= PyObject_CallFunction(self
->handle_start
, "OO", tag
, attrib
);
2064 expat_data_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
2070 data
= makestring(data_in
, data_len
);
2072 return; /* parser will look for errors */
2074 if (TreeBuilder_CheckExact(self
->target
))
2076 res
= treebuilder_handle_data((TreeBuilderObject
*) self
->target
, data
);
2077 else if (self
->handle_data
)
2078 res
= PyObject_CallFunction(self
->handle_data
, "O", data
);
2088 expat_end_handler(XMLParserObject
* self
, const XML_Char
* tag_in
)
2091 PyObject
* res
= NULL
;
2093 if (TreeBuilder_CheckExact(self
->target
))
2095 /* the standard tree builder doesn't look at the end tag */
2096 res
= treebuilder_handle_end(
2097 (TreeBuilderObject
*) self
->target
, Py_None
2099 else if (self
->handle_end
) {
2100 tag
= makeuniversal(self
, tag_in
);
2102 res
= PyObject_CallFunction(self
->handle_end
, "O", tag
);
2111 expat_start_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix
,
2112 const XML_Char
*uri
)
2114 treebuilder_handle_namespace(
2115 (TreeBuilderObject
*) self
->target
, 1, prefix
, uri
2120 expat_end_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix_in
)
2122 treebuilder_handle_namespace(
2123 (TreeBuilderObject
*) self
->target
, 0, NULL
, NULL
2128 expat_comment_handler(XMLParserObject
* self
, const XML_Char
* comment_in
)
2133 if (self
->handle_comment
) {
2134 comment
= makestring(comment_in
, strlen(comment_in
));
2136 res
= PyObject_CallFunction(self
->handle_comment
, "O", comment
);
2144 expat_pi_handler(XMLParserObject
* self
, const XML_Char
* target_in
,
2145 const XML_Char
* data_in
)
2151 if (self
->handle_pi
) {
2152 target
= makestring(target_in
, strlen(target_in
));
2153 data
= makestring(data_in
, strlen(data_in
));
2154 if (target
&& data
) {
2155 res
= PyObject_CallFunction(self
->handle_pi
, "OO", target
, data
);
2166 #if defined(Py_USING_UNICODE)
2168 expat_unknown_encoding_handler(XMLParserObject
*self
, const XML_Char
*name
,
2173 unsigned char s
[256];
2176 memset(info
, 0, sizeof(XML_Encoding
));
2178 for (i
= 0; i
< 256; i
++)
2181 u
= PyUnicode_Decode((char*) s
, 256, name
, "replace");
2183 return XML_STATUS_ERROR
;
2185 if (PyUnicode_GET_SIZE(u
) != 256) {
2187 return XML_STATUS_ERROR
;
2190 p
= PyUnicode_AS_UNICODE(u
);
2192 for (i
= 0; i
< 256; i
++) {
2193 if (p
[i
] != Py_UNICODE_REPLACEMENT_CHARACTER
)
2194 info
->map
[i
] = p
[i
];
2201 return XML_STATUS_OK
;
2205 /* -------------------------------------------------------------------- */
2206 /* constructor and destructor */
2209 xmlparser(PyObject
* self_
, PyObject
* args
, PyObject
* kw
)
2211 XMLParserObject
* self
;
2212 /* FIXME: does this need to be static? */
2213 static XML_Memory_Handling_Suite memory_handler
;
2215 PyObject
* target
= NULL
;
2216 char* encoding
= NULL
;
2217 static char* kwlist
[] = { "target", "encoding", NULL
};
2218 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|Oz:XMLParser", kwlist
,
2219 &target
, &encoding
))
2222 #if defined(USE_PYEXPAT_CAPI)
2225 PyExc_RuntimeError
, "cannot load dispatch table from pyexpat"
2231 self
= PyObject_New(XMLParserObject
, &XMLParser_Type
);
2235 self
->entity
= PyDict_New();
2236 if (!self
->entity
) {
2241 self
->names
= PyDict_New();
2243 PyObject_Del(self
->entity
);
2248 memory_handler
.malloc_fcn
= PyObject_Malloc
;
2249 memory_handler
.realloc_fcn
= PyObject_Realloc
;
2250 memory_handler
.free_fcn
= PyObject_Free
;
2252 self
->parser
= EXPAT(ParserCreate_MM
)(encoding
, &memory_handler
, "}");
2253 if (!self
->parser
) {
2254 PyObject_Del(self
->names
);
2255 PyObject_Del(self
->entity
);
2261 /* setup target handlers */
2263 target
= treebuilder_new();
2265 EXPAT(ParserFree
)(self
->parser
);
2266 PyObject_Del(self
->names
);
2267 PyObject_Del(self
->entity
);
2273 self
->target
= target
;
2275 self
->handle_xml
= PyObject_GetAttrString(target
, "xml");
2276 self
->handle_start
= PyObject_GetAttrString(target
, "start");
2277 self
->handle_data
= PyObject_GetAttrString(target
, "data");
2278 self
->handle_end
= PyObject_GetAttrString(target
, "end");
2279 self
->handle_comment
= PyObject_GetAttrString(target
, "comment");
2280 self
->handle_pi
= PyObject_GetAttrString(target
, "pi");
2284 /* configure parser */
2285 EXPAT(SetUserData
)(self
->parser
, self
);
2286 EXPAT(SetElementHandler
)(
2288 (XML_StartElementHandler
) expat_start_handler
,
2289 (XML_EndElementHandler
) expat_end_handler
2291 EXPAT(SetDefaultHandlerExpand
)(
2293 (XML_DefaultHandler
) expat_default_handler
2295 EXPAT(SetCharacterDataHandler
)(
2297 (XML_CharacterDataHandler
) expat_data_handler
2299 if (self
->handle_comment
)
2300 EXPAT(SetCommentHandler
)(
2302 (XML_CommentHandler
) expat_comment_handler
2304 if (self
->handle_pi
)
2305 EXPAT(SetProcessingInstructionHandler
)(
2307 (XML_ProcessingInstructionHandler
) expat_pi_handler
2309 #if defined(Py_USING_UNICODE)
2310 EXPAT(SetUnknownEncodingHandler
)(
2312 (XML_UnknownEncodingHandler
) expat_unknown_encoding_handler
, NULL
2316 ALLOC(sizeof(XMLParserObject
), "create expatparser");
2318 return (PyObject
*) self
;
2322 xmlparser_dealloc(XMLParserObject
* self
)
2324 EXPAT(ParserFree
)(self
->parser
);
2326 Py_XDECREF(self
->handle_pi
);
2327 Py_XDECREF(self
->handle_comment
);
2328 Py_XDECREF(self
->handle_end
);
2329 Py_XDECREF(self
->handle_data
);
2330 Py_XDECREF(self
->handle_start
);
2331 Py_XDECREF(self
->handle_xml
);
2333 Py_DECREF(self
->target
);
2334 Py_DECREF(self
->entity
);
2335 Py_DECREF(self
->names
);
2337 RELEASE(sizeof(XMLParserObject
), "destroy expatparser");
2342 /* -------------------------------------------------------------------- */
2343 /* methods (in alphabetical order) */
2346 expat_parse(XMLParserObject
* self
, char* data
, int data_len
, int final
)
2350 ok
= EXPAT(Parse
)(self
->parser
, data
, data_len
, final
);
2352 if (PyErr_Occurred())
2357 PyExc_SyntaxError
, "%s: line %ld, column %ld",
2358 EXPAT(ErrorString
)(EXPAT(GetErrorCode
)(self
->parser
)),
2359 EXPAT(GetErrorLineNumber
)(self
->parser
),
2360 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2369 xmlparser_close(XMLParserObject
* self
, PyObject
* args
)
2371 /* end feeding data to parser */
2374 if (!PyArg_ParseTuple(args
, ":close"))
2377 res
= expat_parse(self
, "", 0, 1);
2379 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2381 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2388 xmlparser_feed(XMLParserObject
* self
, PyObject
* args
)
2390 /* feed data to parser */
2394 if (!PyArg_ParseTuple(args
, "s#:feed", &data
, &data_len
))
2397 return expat_parse(self
, data
, data_len
, 0);
2401 xmlparser_parse(XMLParserObject
* self
, PyObject
* args
)
2403 /* (internal) parse until end of input stream */
2410 if (!PyArg_ParseTuple(args
, "O:_parse", &fileobj
))
2413 reader
= PyObject_GetAttrString(fileobj
, "read");
2417 /* read from open file object */
2420 buffer
= PyObject_CallFunction(reader
, "i", 64*1024);
2423 /* read failed (e.g. due to KeyboardInterrupt) */
2428 if (!PyString_CheckExact(buffer
) || PyString_GET_SIZE(buffer
) == 0) {
2434 self
, PyString_AS_STRING(buffer
), PyString_GET_SIZE(buffer
), 0
2449 res
= expat_parse(self
, "", 0, 1);
2451 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2453 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2460 xmlparser_setevents(XMLParserObject
* self
, PyObject
* args
)
2462 /* activate element event reporting */
2465 TreeBuilderObject
* target
;
2467 PyObject
* events
; /* event collector */
2468 PyObject
* event_set
= Py_None
;
2469 if (!PyArg_ParseTuple(args
, "O!|O:_setevents", &PyList_Type
, &events
,
2473 if (!TreeBuilder_CheckExact(self
->target
)) {
2476 "event handling only supported for cElementTree.Treebuilder "
2482 target
= (TreeBuilderObject
*) self
->target
;
2485 Py_XDECREF(target
->events
);
2486 target
->events
= events
;
2488 /* clear out existing events */
2489 Py_XDECREF(target
->start_event_obj
); target
->start_event_obj
= NULL
;
2490 Py_XDECREF(target
->end_event_obj
); target
->end_event_obj
= NULL
;
2491 Py_XDECREF(target
->start_ns_event_obj
); target
->start_ns_event_obj
= NULL
;
2492 Py_XDECREF(target
->end_ns_event_obj
); target
->end_ns_event_obj
= NULL
;
2494 if (event_set
== Py_None
) {
2495 /* default is "end" only */
2496 target
->end_event_obj
= PyString_FromString("end");
2500 if (!PyTuple_Check(event_set
)) /* FIXME: handle arbitrary sequences */
2503 for (i
= 0; i
< PyTuple_GET_SIZE(event_set
); i
++) {
2504 PyObject
* item
= PyTuple_GET_ITEM(event_set
, i
);
2506 if (!PyString_Check(item
))
2508 event
= PyString_AS_STRING(item
);
2509 if (strcmp(event
, "start") == 0) {
2511 target
->start_event_obj
= item
;
2512 } else if (strcmp(event
, "end") == 0) {
2514 Py_XDECREF(target
->end_event_obj
);
2515 target
->end_event_obj
= item
;
2516 } else if (strcmp(event
, "start-ns") == 0) {
2518 Py_XDECREF(target
->start_ns_event_obj
);
2519 target
->start_ns_event_obj
= item
;
2520 EXPAT(SetNamespaceDeclHandler
)(
2522 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2523 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2525 } else if (strcmp(event
, "end-ns") == 0) {
2527 Py_XDECREF(target
->end_ns_event_obj
);
2528 target
->end_ns_event_obj
= item
;
2529 EXPAT(SetNamespaceDeclHandler
)(
2531 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2532 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2537 "unknown event '%s'", event
2548 "invalid event tuple"
2553 static PyMethodDef xmlparser_methods
[] = {
2554 {"feed", (PyCFunction
) xmlparser_feed
, METH_VARARGS
},
2555 {"close", (PyCFunction
) xmlparser_close
, METH_VARARGS
},
2556 {"_parse", (PyCFunction
) xmlparser_parse
, METH_VARARGS
},
2557 {"_setevents", (PyCFunction
) xmlparser_setevents
, METH_VARARGS
},
2562 xmlparser_getattr(XMLParserObject
* self
, char* name
)
2566 res
= Py_FindMethod(xmlparser_methods
, (PyObject
*) self
, name
);
2572 if (strcmp(name
, "entity") == 0)
2574 else if (strcmp(name
, "target") == 0)
2576 else if (strcmp(name
, "version") == 0) {
2578 sprintf(buffer
, "Expat %d.%d.%d", XML_MAJOR_VERSION
,
2579 XML_MINOR_VERSION
, XML_MICRO_VERSION
);
2580 return PyString_FromString(buffer
);
2582 PyErr_SetString(PyExc_AttributeError
, name
);
2590 statichere PyTypeObject XMLParser_Type
= {
2591 PyObject_HEAD_INIT(NULL
)
2592 0, "XMLParser", sizeof(XMLParserObject
), 0,
2594 (destructor
)xmlparser_dealloc
, /* tp_dealloc */
2596 (getattrfunc
)xmlparser_getattr
, /* tp_getattr */
2601 /* ==================================================================== */
2602 /* python module interface */
2604 static PyMethodDef _functions
[] = {
2605 {"Element", (PyCFunction
) element
, METH_VARARGS
|METH_KEYWORDS
},
2606 {"SubElement", (PyCFunction
) subelement
, METH_VARARGS
|METH_KEYWORDS
},
2607 {"TreeBuilder", (PyCFunction
) treebuilder
, METH_VARARGS
},
2608 #if defined(USE_EXPAT)
2609 {"XMLParser", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2610 {"XMLTreeBuilder", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2616 init_elementtree(void)
2621 #if defined(USE_PYEXPAT_CAPI)
2622 struct PyExpat_CAPI
* capi
;
2625 /* Patch object type */
2626 Element_Type
.ob_type
= TreeBuilder_Type
.ob_type
= &PyType_Type
;
2627 #if defined(USE_EXPAT)
2628 XMLParser_Type
.ob_type
= &PyType_Type
;
2631 m
= Py_InitModule("_elementtree", _functions
);
2635 /* python glue code */
2641 PyDict_SetItemString(g
, "__builtins__", PyEval_GetBuiltins());
2645 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2646 "from __future__ import generators\n" /* enable yield under 2.2 */
2649 "from copy import copy, deepcopy\n"
2652 " from xml.etree import ElementTree\n"
2653 "except ImportError:\n"
2654 " import ElementTree\n"
2655 "ET = ElementTree\n"
2658 "import _elementtree as cElementTree\n"
2660 "try:\n" /* check if copy works as is */
2661 " copy(cElementTree.Element('x'))\n"
2663 " def copyelement(elem):\n"
2666 "def Comment(text=None):\n" /* public */
2667 " element = cElementTree.Element(ET.Comment)\n"
2668 " element.text = text\n"
2670 "cElementTree.Comment = Comment\n"
2672 "class ElementTree(ET.ElementTree):\n" /* public */
2673 " def parse(self, source, parser=None):\n"
2674 " if not hasattr(source, 'read'):\n"
2675 " source = open(source, 'rb')\n"
2676 " if parser is not None:\n"
2678 " data = source.read(65536)\n"
2681 " parser.feed(data)\n"
2682 " self._root = parser.close()\n"
2684 " parser = cElementTree.XMLParser()\n"
2685 " self._root = parser._parse(source)\n"
2686 " return self._root\n"
2687 "cElementTree.ElementTree = ElementTree\n"
2689 "def getiterator(node, tag=None):\n" /* helper */
2692 #if (PY_VERSION_HEX < 0x02020000)
2693 " nodes = []\n" /* 2.1 doesn't have yield */
2694 " if tag is None or node.tag == tag:\n"
2695 " nodes.append(node)\n"
2696 " for node in node:\n"
2697 " nodes.extend(getiterator(node, tag))\n"
2700 " if tag is None or node.tag == tag:\n"
2702 " for node in node:\n"
2703 " for node in getiterator(node, tag):\n"
2707 "def parse(source, parser=None):\n" /* public */
2708 " tree = ElementTree()\n"
2709 " tree.parse(source, parser)\n"
2711 "cElementTree.parse = parse\n"
2713 #if (PY_VERSION_HEX < 0x02020000)
2714 "if hasattr(ET, 'iterparse'):\n"
2715 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2717 "class iterparse(object):\n"
2719 " def __init__(self, file, events=None):\n"
2720 " if not hasattr(file, 'read'):\n"
2721 " file = open(file, 'rb')\n"
2722 " self._file = file\n"
2723 " self._events = events\n"
2724 " def __iter__(self):\n"
2726 " b = cElementTree.TreeBuilder()\n"
2727 " p = cElementTree.XMLParser(b)\n"
2728 " p._setevents(events, self._events)\n"
2730 " data = self._file.read(16384)\n"
2734 " for event in events:\n"
2737 " root = p.close()\n"
2738 " for event in events:\n"
2740 " self.root = root\n"
2741 "cElementTree.iterparse = iterparse\n"
2744 "def PI(target, text=None):\n" /* public */
2745 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2746 " element.text = target\n"
2748 " element.text = element.text + ' ' + text\n"
2751 " elem = cElementTree.Element(ET.PI)\n"
2752 " elem.text = text\n"
2754 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2756 "def XML(text):\n" /* public */
2757 " parser = cElementTree.XMLParser()\n"
2758 " parser.feed(text)\n"
2759 " return parser.close()\n"
2760 "cElementTree.XML = cElementTree.fromstring = XML\n"
2762 "def XMLID(text):\n" /* public */
2763 " tree = XML(text)\n"
2765 " for elem in tree.getiterator():\n"
2766 " id = elem.get('id')\n"
2769 " return tree, ids\n"
2770 "cElementTree.XMLID = XMLID\n"
2772 "cElementTree.dump = ET.dump\n"
2773 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2774 "cElementTree.iselement = ET.iselement\n"
2775 "cElementTree.QName = ET.QName\n"
2776 "cElementTree.tostring = ET.tostring\n"
2777 "cElementTree.VERSION = '" VERSION
"'\n"
2778 "cElementTree.__version__ = '" VERSION
"'\n"
2779 "cElementTree.XMLParserError = SyntaxError\n"
2783 PyRun_String(bootstrap
, Py_file_input
, g
, NULL
);
2785 elementpath_obj
= PyDict_GetItemString(g
, "ElementPath");
2787 elementtree_copyelement_obj
= PyDict_GetItemString(g
, "copyelement");
2788 if (elementtree_copyelement_obj
) {
2789 /* reduce hack needed; enable reduce method */
2791 for (mp
= element_methods
; mp
->ml_name
; mp
++)
2792 if (mp
->ml_meth
== (PyCFunction
) element_reduce
) {
2793 mp
->ml_name
= "__reduce__";
2798 elementtree_deepcopy_obj
= PyDict_GetItemString(g
, "deepcopy");
2799 elementtree_getiterator_obj
= PyDict_GetItemString(g
, "getiterator");
2801 #if defined(USE_PYEXPAT_CAPI)
2802 /* link against pyexpat, if possible */
2803 capi
= PyCObject_Import("pyexpat", "expat_CAPI");
2805 strcmp(capi
->magic
, PyExpat_CAPI_MAGIC
) == 0 &&
2806 capi
->size
<= sizeof(*expat_capi
) &&
2807 capi
->MAJOR_VERSION
== XML_MAJOR_VERSION
&&
2808 capi
->MINOR_VERSION
== XML_MINOR_VERSION
&&
2809 capi
->MICRO_VERSION
== XML_MICRO_VERSION
)