3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
51 #define VERSION "1.0.6-snapshot"
53 /* -------------------------------------------------------------------- */
56 /* Leave defined to include the expat-based XMLParser type */
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
76 /* -------------------------------------------------------------------- */
79 static int memory
= 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
91 #define LOCAL(type) static __inline type __fastcall
93 #define LOCAL(type) static type
96 /* compatibility macros */
97 #if (PY_VERSION_HEX < 0x02050000)
98 typedef int Py_ssize_t
;
99 #define lenfunc inquiry
102 #if (PY_VERSION_HEX < 0x02040000)
103 #define PyDict_CheckExact PyDict_Check
104 #if (PY_VERSION_HEX < 0x02020000)
105 #define PyList_CheckExact PyList_Check
106 #define PyString_CheckExact PyString_Check
107 #if (PY_VERSION_HEX >= 0x01060000)
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
113 #if !defined(Py_RETURN_NONE)
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
117 /* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
125 /* glue functions (see the init function for details) */
126 static PyObject
* elementtree_copyelement_obj
;
127 static PyObject
* elementtree_deepcopy_obj
;
128 static PyObject
* elementtree_getiterator_obj
;
129 static PyObject
* elementpath_obj
;
134 deepcopy(PyObject
* object
, PyObject
* memo
)
136 /* do a deep copy of the given object */
141 if (!elementtree_deepcopy_obj
) {
144 "deepcopy helper not found"
149 args
= PyTuple_New(2);
153 Py_INCREF(object
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) object
);
154 Py_INCREF(memo
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) memo
);
156 result
= PyObject_CallObject(elementtree_deepcopy_obj
, args
);
164 list_join(PyObject
* list
)
166 /* join list elements (destroying the list in the process) */
173 switch (PyList_GET_SIZE(list
)) {
176 return PyString_FromString("");
178 result
= PyList_GET_ITEM(list
, 0);
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
187 joiner
= PySequence_GetSlice(PyList_GET_ITEM(list
, 0), 0, 0);
191 function
= PyObject_GetAttrString(joiner
, "join");
197 args
= PyTuple_New(1);
201 PyTuple_SET_ITEM(args
, 0, list
);
203 result
= PyObject_CallObject(function
, args
);
205 Py_DECREF(args
); /* also removes list */
212 #if (PY_VERSION_HEX < 0x02020000)
214 PyDict_Update(PyObject
* dict
, PyObject
* other
)
216 /* PyDict_Update emulation for 2.1 and earlier */
220 res
= PyObject_CallMethod(dict
, "update", "O", other
);
229 /* -------------------------------------------------------------------- */
230 /* the element type */
234 /* attributes (a dictionary object), or None if no attributes */
238 int length
; /* actual number of items */
239 int allocated
; /* allocated items */
241 /* this either points to _children or to a malloced buffer */
244 PyObject
* _children
[STATIC_CHILDREN
];
246 } ElementObjectExtra
;
251 /* element tag (a string). */
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
266 ElementObjectExtra
* extra
;
270 staticforward PyTypeObject Element_Type
;
272 #define Element_CheckExact(op) ((op)->ob_type == &Element_Type)
274 /* -------------------------------------------------------------------- */
275 /* element constructor and destructor */
278 element_new_extra(ElementObject
* self
, PyObject
* attrib
)
280 self
->extra
= PyObject_Malloc(sizeof(ElementObjectExtra
));
288 self
->extra
->attrib
= attrib
;
290 self
->extra
->length
= 0;
291 self
->extra
->allocated
= STATIC_CHILDREN
;
292 self
->extra
->children
= self
->extra
->_children
;
298 element_dealloc_extra(ElementObject
* self
)
302 Py_DECREF(self
->extra
->attrib
);
304 for (i
= 0; i
< self
->extra
->length
; i
++)
305 Py_DECREF(self
->extra
->children
[i
]);
307 if (self
->extra
->children
!= self
->extra
->_children
)
308 PyObject_Free(self
->extra
->children
);
310 PyObject_Free(self
->extra
);
314 element_new(PyObject
* tag
, PyObject
* attrib
)
318 self
= PyObject_New(ElementObject
, &Element_Type
);
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib
) && !PyDict_Size(attrib
))
328 if (attrib
!= Py_None
) {
330 if (element_new_extra(self
, attrib
) < 0) {
335 self
->extra
->length
= 0;
336 self
->extra
->allocated
= STATIC_CHILDREN
;
337 self
->extra
->children
= self
->extra
->_children
;
345 self
->text
= Py_None
;
348 self
->tail
= Py_None
;
350 ALLOC(sizeof(ElementObject
), "create element");
352 return (PyObject
*) self
;
356 element_resize(ElementObject
* self
, int extra
)
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
365 element_new_extra(self
, NULL
);
367 size
= self
->extra
->length
+ extra
;
369 if (size
> self
->extra
->allocated
) {
370 /* use Python 2.4's list growth strategy */
371 size
= (size
>> 3) + (size
< 9 ? 3 : 6) + size
;
372 if (self
->extra
->children
!= self
->extra
->_children
) {
373 children
= PyObject_Realloc(self
->extra
->children
,
374 size
* sizeof(PyObject
*));
378 children
= PyObject_Malloc(size
* sizeof(PyObject
*));
381 /* copy existing children from static area to malloc buffer */
382 memcpy(children
, self
->extra
->children
,
383 self
->extra
->length
* sizeof(PyObject
*));
385 self
->extra
->children
= children
;
386 self
->extra
->allocated
= size
;
397 element_add_subelement(ElementObject
* self
, PyObject
* element
)
399 /* add a child element to a parent */
401 if (element_resize(self
, 1) < 0)
405 self
->extra
->children
[self
->extra
->length
] = element
;
407 self
->extra
->length
++;
413 element_get_attrib(ElementObject
* self
)
415 /* return borrowed reference to attrib dictionary */
416 /* note: this function assumes that the extra section exists */
418 PyObject
* res
= self
->extra
->attrib
;
420 if (res
== Py_None
) {
421 /* create missing dictionary */
425 self
->extra
->attrib
= res
;
432 element_get_text(ElementObject
* self
)
434 /* return borrowed reference to text attribute */
436 PyObject
* res
= self
->text
;
440 if (PyList_CheckExact(res
)) {
441 res
= list_join(res
);
452 element_get_tail(ElementObject
* self
)
454 /* return borrowed reference to text attribute */
456 PyObject
* res
= self
->tail
;
460 if (PyList_CheckExact(res
)) {
461 res
= list_join(res
);
472 element(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
477 PyObject
* attrib
= NULL
;
478 if (!PyArg_ParseTuple(args
, "O|O!:Element", &tag
,
479 &PyDict_Type
, &attrib
))
483 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
487 PyDict_Update(attrib
, kw
);
493 elem
= element_new(tag
, attrib
);
501 subelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
505 ElementObject
* parent
;
507 PyObject
* attrib
= NULL
;
508 if (!PyArg_ParseTuple(args
, "O!O|O!:SubElement",
509 &Element_Type
, &parent
, &tag
,
510 &PyDict_Type
, &attrib
))
514 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
518 PyDict_Update(attrib
, kw
);
524 elem
= element_new(tag
, attrib
);
528 if (element_add_subelement(parent
, elem
) < 0) {
537 element_dealloc(ElementObject
* self
)
540 element_dealloc_extra(self
);
542 /* discard attributes */
543 Py_DECREF(self
->tag
);
544 Py_DECREF(JOIN_OBJ(self
->text
));
545 Py_DECREF(JOIN_OBJ(self
->tail
));
547 RELEASE(sizeof(ElementObject
), "destroy element");
552 /* -------------------------------------------------------------------- */
553 /* methods (in alphabetical order) */
556 element_append(ElementObject
* self
, PyObject
* args
)
559 if (!PyArg_ParseTuple(args
, "O!:append", &Element_Type
, &element
))
562 if (element_add_subelement(self
, element
) < 0)
569 element_clear(ElementObject
* self
, PyObject
* args
)
571 if (!PyArg_ParseTuple(args
, ":clear"))
575 element_dealloc_extra(self
);
580 Py_DECREF(JOIN_OBJ(self
->text
));
581 self
->text
= Py_None
;
584 Py_DECREF(JOIN_OBJ(self
->tail
));
585 self
->tail
= Py_None
;
591 element_copy(ElementObject
* self
, PyObject
* args
)
594 ElementObject
* element
;
596 if (!PyArg_ParseTuple(args
, ":__copy__"))
599 element
= (ElementObject
*) element_new(
600 self
->tag
, (self
->extra
) ? self
->extra
->attrib
: Py_None
605 Py_DECREF(JOIN_OBJ(element
->text
));
606 element
->text
= self
->text
;
607 Py_INCREF(JOIN_OBJ(element
->text
));
609 Py_DECREF(JOIN_OBJ(element
->tail
));
610 element
->tail
= self
->tail
;
611 Py_INCREF(JOIN_OBJ(element
->tail
));
615 if (element_resize(element
, self
->extra
->length
) < 0) {
620 for (i
= 0; i
< self
->extra
->length
; i
++) {
621 Py_INCREF(self
->extra
->children
[i
]);
622 element
->extra
->children
[i
] = self
->extra
->children
[i
];
625 element
->extra
->length
= self
->extra
->length
;
629 return (PyObject
*) element
;
633 element_deepcopy(ElementObject
* self
, PyObject
* args
)
636 ElementObject
* element
;
644 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
647 tag
= deepcopy(self
->tag
, memo
);
652 attrib
= deepcopy(self
->extra
->attrib
, memo
);
662 element
= (ElementObject
*) element_new(tag
, attrib
);
670 text
= deepcopy(JOIN_OBJ(self
->text
), memo
);
673 Py_DECREF(element
->text
);
674 element
->text
= JOIN_SET(text
, JOIN_GET(self
->text
));
676 tail
= deepcopy(JOIN_OBJ(self
->tail
), memo
);
679 Py_DECREF(element
->tail
);
680 element
->tail
= JOIN_SET(tail
, JOIN_GET(self
->tail
));
684 if (element_resize(element
, self
->extra
->length
) < 0)
687 for (i
= 0; i
< self
->extra
->length
; i
++) {
688 PyObject
* child
= deepcopy(self
->extra
->children
[i
], memo
);
690 element
->extra
->length
= i
;
693 element
->extra
->children
[i
] = child
;
696 element
->extra
->length
= self
->extra
->length
;
700 /* add object to memo dictionary (so deepcopy won't visit it again) */
701 id
= PyInt_FromLong((Py_uintptr_t
) self
);
703 i
= PyDict_SetItem(memo
, id
, (PyObject
*) element
);
710 return (PyObject
*) element
;
718 checkpath(PyObject
* tag
)
723 /* check if a tag contains an xpath character */
725 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
727 #if defined(Py_USING_UNICODE)
728 if (PyUnicode_Check(tag
)) {
729 Py_UNICODE
*p
= PyUnicode_AS_UNICODE(tag
);
730 for (i
= 0; i
< PyUnicode_GET_SIZE(tag
); i
++) {
733 else if (p
[i
] == '}')
735 else if (check
&& PATHCHAR(p
[i
]))
741 if (PyString_Check(tag
)) {
742 char *p
= PyString_AS_STRING(tag
);
743 for (i
= 0; i
< PyString_GET_SIZE(tag
); i
++) {
746 else if (p
[i
] == '}')
748 else if (check
&& PATHCHAR(p
[i
]))
754 return 1; /* unknown type; might be path expression */
758 element_find(ElementObject
* self
, PyObject
* args
)
763 if (!PyArg_ParseTuple(args
, "O:find", &tag
))
767 return PyObject_CallMethod(
768 elementpath_obj
, "find", "OO", self
, tag
774 for (i
= 0; i
< self
->extra
->length
; i
++) {
775 PyObject
* item
= self
->extra
->children
[i
];
776 if (Element_CheckExact(item
) &&
777 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
787 element_findtext(ElementObject
* self
, PyObject
* args
)
792 PyObject
* default_value
= Py_None
;
793 if (!PyArg_ParseTuple(args
, "O|O:findtext", &tag
, &default_value
))
797 return PyObject_CallMethod(
798 elementpath_obj
, "findtext", "OOO", self
, tag
, default_value
802 Py_INCREF(default_value
);
803 return default_value
;
806 for (i
= 0; i
< self
->extra
->length
; i
++) {
807 ElementObject
* item
= (ElementObject
*) self
->extra
->children
[i
];
808 if (Element_CheckExact(item
) && !PyObject_Compare(item
->tag
, tag
)) {
809 PyObject
* text
= element_get_text(item
);
811 return PyString_FromString("");
817 Py_INCREF(default_value
);
818 return default_value
;
822 element_findall(ElementObject
* self
, PyObject
* args
)
828 if (!PyArg_ParseTuple(args
, "O:findall", &tag
))
832 return PyObject_CallMethod(
833 elementpath_obj
, "findall", "OO", self
, tag
843 for (i
= 0; i
< self
->extra
->length
; i
++) {
844 PyObject
* item
= self
->extra
->children
[i
];
845 if (Element_CheckExact(item
) &&
846 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
847 if (PyList_Append(out
, item
) < 0) {
858 element_get(ElementObject
* self
, PyObject
* args
)
863 PyObject
* default_value
= Py_None
;
864 if (!PyArg_ParseTuple(args
, "O|O:get", &key
, &default_value
))
867 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
868 value
= default_value
;
870 value
= PyDict_GetItem(self
->extra
->attrib
, key
);
872 value
= default_value
;
880 element_getchildren(ElementObject
* self
, PyObject
* args
)
885 if (!PyArg_ParseTuple(args
, ":getchildren"))
889 return PyList_New(0);
891 list
= PyList_New(self
->extra
->length
);
895 for (i
= 0; i
< self
->extra
->length
; i
++) {
896 PyObject
* item
= self
->extra
->children
[i
];
898 PyList_SET_ITEM(list
, i
, item
);
905 element_getiterator(ElementObject
* self
, PyObject
* args
)
909 PyObject
* tag
= Py_None
;
910 if (!PyArg_ParseTuple(args
, "|O:getiterator", &tag
))
913 if (!elementtree_getiterator_obj
) {
916 "getiterator helper not found"
921 args
= PyTuple_New(2);
925 Py_INCREF(self
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) self
);
926 Py_INCREF(tag
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) tag
);
928 result
= PyObject_CallObject(elementtree_getiterator_obj
, args
);
936 element_getitem(PyObject
* self_
, Py_ssize_t index
)
938 ElementObject
* self
= (ElementObject
*) self_
;
940 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
943 "child index out of range"
948 Py_INCREF(self
->extra
->children
[index
]);
949 return self
->extra
->children
[index
];
953 element_getslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
)
955 ElementObject
* self
= (ElementObject
*) self_
;
960 return PyList_New(0);
962 /* standard clamping */
967 if (end
> self
->extra
->length
)
968 end
= self
->extra
->length
;
972 list
= PyList_New(end
- start
);
976 for (i
= start
; i
< end
; i
++) {
977 PyObject
* item
= self
->extra
->children
[i
];
979 PyList_SET_ITEM(list
, i
- start
, item
);
986 element_insert(ElementObject
* self
, PyObject
* args
)
992 if (!PyArg_ParseTuple(args
, "iO!:insert", &index
,
993 &Element_Type
, &element
))
997 element_new_extra(self
, NULL
);
1001 if (index
> self
->extra
->length
)
1002 index
= self
->extra
->length
;
1004 if (element_resize(self
, 1) < 0)
1007 for (i
= self
->extra
->length
; i
> index
; i
--)
1008 self
->extra
->children
[i
] = self
->extra
->children
[i
-1];
1011 self
->extra
->children
[index
] = element
;
1013 self
->extra
->length
++;
1019 element_items(ElementObject
* self
, PyObject
* args
)
1021 if (!PyArg_ParseTuple(args
, ":items"))
1024 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1025 return PyList_New(0);
1027 return PyDict_Items(self
->extra
->attrib
);
1031 element_keys(ElementObject
* self
, PyObject
* args
)
1033 if (!PyArg_ParseTuple(args
, ":keys"))
1036 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1037 return PyList_New(0);
1039 return PyDict_Keys(self
->extra
->attrib
);
1043 element_length(ElementObject
* self
)
1048 return self
->extra
->length
;
1052 element_makeelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
1058 if (!PyArg_ParseTuple(args
, "OO:makeelement", &tag
, &attrib
))
1061 attrib
= PyDict_Copy(attrib
);
1065 elem
= element_new(tag
, attrib
);
1073 element_reduce(ElementObject
* self
, PyObject
* args
)
1075 if (!PyArg_ParseTuple(args
, ":__reduce__"))
1078 /* Hack alert: This method is used to work around a __copy__
1079 problem on certain 2.3 and 2.4 versions. To save time and
1080 simplify the code, we create the copy in here, and use a dummy
1081 copyelement helper to trick the copy module into doing the
1084 if (!elementtree_copyelement_obj
) {
1087 "copyelement helper not found"
1092 return Py_BuildValue(
1093 "O(N)", elementtree_copyelement_obj
, element_copy(self
, args
)
1098 element_remove(ElementObject
* self
, PyObject
* args
)
1103 if (!PyArg_ParseTuple(args
, "O!:remove", &Element_Type
, &element
))
1107 /* element has no children, so raise exception */
1110 "list.remove(x): x not in list"
1115 for (i
= 0; i
< self
->extra
->length
; i
++) {
1116 if (self
->extra
->children
[i
] == element
)
1118 if (PyObject_Compare(self
->extra
->children
[i
], element
) == 0)
1122 if (i
== self
->extra
->length
) {
1123 /* element is not in children, so raise exception */
1126 "list.remove(x): x not in list"
1131 Py_DECREF(self
->extra
->children
[i
]);
1133 self
->extra
->length
--;
1135 for (; i
< self
->extra
->length
; i
++)
1136 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1142 element_repr(ElementObject
* self
)
1147 repr
= PyString_FromString("<Element ");
1149 PyString_ConcatAndDel(&repr
, PyObject_Repr(self
->tag
));
1151 sprintf(buffer
, " at %p>", self
);
1152 PyString_ConcatAndDel(&repr
, PyString_FromString(buffer
));
1158 element_set(ElementObject
* self
, PyObject
* args
)
1164 if (!PyArg_ParseTuple(args
, "OO:set", &key
, &value
))
1168 element_new_extra(self
, NULL
);
1170 attrib
= element_get_attrib(self
);
1174 if (PyDict_SetItem(attrib
, key
, value
) < 0)
1181 element_setslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
, PyObject
* item
)
1183 ElementObject
* self
= (ElementObject
*) self_
;
1184 Py_ssize_t i
, new, old
;
1185 PyObject
* recycle
= NULL
;
1188 element_new_extra(self
, NULL
);
1190 /* standard clamping */
1195 if (end
> self
->extra
->length
)
1196 end
= self
->extra
->length
;
1204 else if (PyList_CheckExact(item
)) {
1205 new = PyList_GET_SIZE(item
);
1207 /* FIXME: support arbitrary sequences? */
1210 "expected list, not \"%.200s\"", item
->ob_type
->tp_name
1216 /* to avoid recursive calls to this method (via decref), move
1217 old items to the recycle bin here, and get rid of them when
1218 we're done modifying the element */
1219 recycle
= PyList_New(old
);
1220 for (i
= 0; i
< old
; i
++)
1221 PyList_SET_ITEM(recycle
, i
, self
->extra
->children
[i
+ start
]);
1226 for (i
= end
; i
< self
->extra
->length
; i
++)
1227 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1228 } else if (new > old
) {
1230 if (element_resize(self
, new - old
) < 0)
1232 for (i
= self
->extra
->length
-1; i
>= end
; i
--)
1233 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1236 /* replace the slice */
1237 for (i
= 0; i
< new; i
++) {
1238 PyObject
* element
= PyList_GET_ITEM(item
, i
);
1240 self
->extra
->children
[i
+ start
] = element
;
1243 self
->extra
->length
+= new - old
;
1245 /* discard the recycle bin, and everything in it */
1246 Py_XDECREF(recycle
);
1252 element_setitem(PyObject
* self_
, Py_ssize_t index
, PyObject
* item
)
1254 ElementObject
* self
= (ElementObject
*) self_
;
1258 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
1261 "child assignment index out of range");
1265 old
= self
->extra
->children
[index
];
1269 self
->extra
->children
[index
] = item
;
1271 self
->extra
->length
--;
1272 for (i
= index
; i
< self
->extra
->length
; i
++)
1273 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1281 static PyMethodDef element_methods
[] = {
1283 {"clear", (PyCFunction
) element_clear
, METH_VARARGS
},
1285 {"get", (PyCFunction
) element_get
, METH_VARARGS
},
1286 {"set", (PyCFunction
) element_set
, METH_VARARGS
},
1288 {"find", (PyCFunction
) element_find
, METH_VARARGS
},
1289 {"findtext", (PyCFunction
) element_findtext
, METH_VARARGS
},
1290 {"findall", (PyCFunction
) element_findall
, METH_VARARGS
},
1292 {"append", (PyCFunction
) element_append
, METH_VARARGS
},
1293 {"insert", (PyCFunction
) element_insert
, METH_VARARGS
},
1294 {"remove", (PyCFunction
) element_remove
, METH_VARARGS
},
1296 {"getiterator", (PyCFunction
) element_getiterator
, METH_VARARGS
},
1297 {"getchildren", (PyCFunction
) element_getchildren
, METH_VARARGS
},
1299 {"items", (PyCFunction
) element_items
, METH_VARARGS
},
1300 {"keys", (PyCFunction
) element_keys
, METH_VARARGS
},
1302 {"makeelement", (PyCFunction
) element_makeelement
, METH_VARARGS
},
1304 {"__copy__", (PyCFunction
) element_copy
, METH_VARARGS
},
1305 {"__deepcopy__", (PyCFunction
) element_deepcopy
, METH_VARARGS
},
1307 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1308 C objects correctly, so we have to fake it using a __reduce__-
1309 based hack (see the element_reduce implementation above for
1312 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1313 using a runtime test to figure out if we need to fake things
1314 or now (see the init code below). The following entry is
1315 enabled only if the hack is needed. */
1317 {"!__reduce__", (PyCFunction
) element_reduce
, METH_VARARGS
},
1323 element_getattr(ElementObject
* self
, char* name
)
1327 res
= Py_FindMethod(element_methods
, (PyObject
*) self
, name
);
1333 if (strcmp(name
, "tag") == 0)
1335 else if (strcmp(name
, "text") == 0)
1336 res
= element_get_text(self
);
1337 else if (strcmp(name
, "tail") == 0) {
1338 res
= element_get_tail(self
);
1339 } else if (strcmp(name
, "attrib") == 0) {
1341 element_new_extra(self
, NULL
);
1342 res
= element_get_attrib(self
);
1344 PyErr_SetString(PyExc_AttributeError
, name
);
1356 element_setattr(ElementObject
* self
, const char* name
, PyObject
* value
)
1358 if (value
== NULL
) {
1360 PyExc_AttributeError
,
1361 "can't delete element attributes"
1366 if (strcmp(name
, "tag") == 0) {
1367 Py_DECREF(self
->tag
);
1369 Py_INCREF(self
->tag
);
1370 } else if (strcmp(name
, "text") == 0) {
1371 Py_DECREF(JOIN_OBJ(self
->text
));
1373 Py_INCREF(self
->text
);
1374 } else if (strcmp(name
, "tail") == 0) {
1375 Py_DECREF(JOIN_OBJ(self
->tail
));
1377 Py_INCREF(self
->tail
);
1378 } else if (strcmp(name
, "attrib") == 0) {
1380 element_new_extra(self
, NULL
);
1381 Py_DECREF(self
->extra
->attrib
);
1382 self
->extra
->attrib
= value
;
1383 Py_INCREF(self
->extra
->attrib
);
1385 PyErr_SetString(PyExc_AttributeError
, name
);
1392 static PySequenceMethods element_as_sequence
= {
1393 (lenfunc
) element_length
,
1402 statichere PyTypeObject Element_Type
= {
1403 PyObject_HEAD_INIT(NULL
)
1404 0, "Element", sizeof(ElementObject
), 0,
1406 (destructor
)element_dealloc
, /* tp_dealloc */
1408 (getattrfunc
)element_getattr
, /* tp_getattr */
1409 (setattrfunc
)element_setattr
, /* tp_setattr */
1411 (reprfunc
)element_repr
, /* tp_repr */
1412 0, /* tp_as_number */
1413 &element_as_sequence
, /* tp_as_sequence */
1416 /* ==================================================================== */
1417 /* the tree builder type */
1422 PyObject
* root
; /* root node (first created node) */
1424 ElementObject
* this; /* current node */
1425 ElementObject
* last
; /* most recently created node */
1427 PyObject
* data
; /* data collector (string or list), or NULL */
1429 PyObject
* stack
; /* element stack */
1430 Py_ssize_t index
; /* current stack size (0=empty) */
1432 /* element tracing */
1433 PyObject
* events
; /* list of events, or NULL if not collecting */
1434 PyObject
* start_event_obj
; /* event objects (NULL to ignore) */
1435 PyObject
* end_event_obj
;
1436 PyObject
* start_ns_event_obj
;
1437 PyObject
* end_ns_event_obj
;
1439 } TreeBuilderObject
;
1441 staticforward PyTypeObject TreeBuilder_Type
;
1443 #define TreeBuilder_CheckExact(op) ((op)->ob_type == &TreeBuilder_Type)
1445 /* -------------------------------------------------------------------- */
1446 /* constructor and destructor */
1449 treebuilder_new(void)
1451 TreeBuilderObject
* self
;
1453 self
= PyObject_New(TreeBuilderObject
, &TreeBuilder_Type
);
1460 self
->this = (ElementObject
*) Py_None
;
1463 self
->last
= (ElementObject
*) Py_None
;
1467 self
->stack
= PyList_New(20);
1470 self
->events
= NULL
;
1471 self
->start_event_obj
= self
->end_event_obj
= NULL
;
1472 self
->start_ns_event_obj
= self
->end_ns_event_obj
= NULL
;
1474 ALLOC(sizeof(TreeBuilderObject
), "create treebuilder");
1476 return (PyObject
*) self
;
1480 treebuilder(PyObject
* self_
, PyObject
* args
)
1482 if (!PyArg_ParseTuple(args
, ":TreeBuilder"))
1485 return treebuilder_new();
1489 treebuilder_dealloc(TreeBuilderObject
* self
)
1491 Py_XDECREF(self
->end_ns_event_obj
);
1492 Py_XDECREF(self
->start_ns_event_obj
);
1493 Py_XDECREF(self
->end_event_obj
);
1494 Py_XDECREF(self
->start_event_obj
);
1495 Py_XDECREF(self
->events
);
1496 Py_DECREF(self
->stack
);
1497 Py_XDECREF(self
->data
);
1498 Py_DECREF(self
->last
);
1499 Py_DECREF(self
->this);
1500 Py_XDECREF(self
->root
);
1502 RELEASE(sizeof(TreeBuilderObject
), "destroy treebuilder");
1507 /* -------------------------------------------------------------------- */
1511 treebuilder_handle_xml(TreeBuilderObject
* self
, PyObject
* encoding
,
1512 PyObject
* standalone
)
1518 treebuilder_handle_start(TreeBuilderObject
* self
, PyObject
* tag
,
1525 if (self
->this == self
->last
) {
1526 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1527 self
->last
->text
= JOIN_SET(
1528 self
->data
, PyList_CheckExact(self
->data
)
1531 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1532 self
->last
->tail
= JOIN_SET(
1533 self
->data
, PyList_CheckExact(self
->data
)
1539 node
= element_new(tag
, attrib
);
1543 this = (PyObject
*) self
->this;
1545 if (this != Py_None
) {
1546 if (element_add_subelement((ElementObject
*) this, node
) < 0)
1552 "multiple elements on top level"
1560 if (self
->index
< PyList_GET_SIZE(self
->stack
)) {
1561 if (PyList_SetItem(self
->stack
, self
->index
, this) < 0)
1565 if (PyList_Append(self
->stack
, this) < 0)
1572 self
->this = (ElementObject
*) node
;
1574 Py_DECREF(self
->last
);
1576 self
->last
= (ElementObject
*) node
;
1578 if (self
->start_event_obj
) {
1580 PyObject
* action
= self
->start_event_obj
;
1581 res
= PyTuple_New(2);
1583 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1584 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1585 PyList_Append(self
->events
, res
);
1588 PyErr_Clear(); /* FIXME: propagate error */
1599 treebuilder_handle_data(TreeBuilderObject
* self
, PyObject
* data
)
1602 /* store the first item as is */
1603 Py_INCREF(data
); self
->data
= data
;
1605 /* more than one item; use a list to collect items */
1606 if (PyString_CheckExact(self
->data
) && self
->data
->ob_refcnt
== 1 &&
1607 PyString_CheckExact(data
) && PyString_GET_SIZE(data
) == 1) {
1608 /* expat often generates single character data sections; handle
1609 the most common case by resizing the existing string... */
1610 Py_ssize_t size
= PyString_GET_SIZE(self
->data
);
1611 if (_PyString_Resize(&self
->data
, size
+ 1) < 0)
1613 PyString_AS_STRING(self
->data
)[size
] = PyString_AS_STRING(data
)[0];
1614 } else if (PyList_CheckExact(self
->data
)) {
1615 if (PyList_Append(self
->data
, data
) < 0)
1618 PyObject
* list
= PyList_New(2);
1621 PyList_SET_ITEM(list
, 0, self
->data
);
1622 Py_INCREF(data
); PyList_SET_ITEM(list
, 1, data
);
1631 treebuilder_handle_end(TreeBuilderObject
* self
, PyObject
* tag
)
1636 if (self
->this == self
->last
) {
1637 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1638 self
->last
->text
= JOIN_SET(
1639 self
->data
, PyList_CheckExact(self
->data
)
1642 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1643 self
->last
->tail
= JOIN_SET(
1644 self
->data
, PyList_CheckExact(self
->data
)
1650 if (self
->index
== 0) {
1653 "pop from empty stack"
1660 item
= PyList_GET_ITEM(self
->stack
, self
->index
);
1663 Py_DECREF(self
->last
);
1665 self
->last
= (ElementObject
*) self
->this;
1666 self
->this = (ElementObject
*) item
;
1668 if (self
->end_event_obj
) {
1670 PyObject
* action
= self
->end_event_obj
;
1671 PyObject
* node
= (PyObject
*) self
->last
;
1672 res
= PyTuple_New(2);
1674 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1675 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1676 PyList_Append(self
->events
, res
);
1679 PyErr_Clear(); /* FIXME: propagate error */
1682 Py_INCREF(self
->last
);
1683 return (PyObject
*) self
->last
;
1687 treebuilder_handle_namespace(TreeBuilderObject
* self
, int start
,
1688 const char* prefix
, const char *uri
)
1698 if (!self
->start_ns_event_obj
)
1700 action
= self
->start_ns_event_obj
;
1701 /* FIXME: prefix and uri use utf-8 encoding! */
1702 parcel
= Py_BuildValue("ss", (prefix
) ? prefix
: "", uri
);
1707 if (!self
->end_ns_event_obj
)
1709 action
= self
->end_ns_event_obj
;
1715 res
= PyTuple_New(2);
1718 PyTuple_SET_ITEM(res
, 0, action
);
1719 PyTuple_SET_ITEM(res
, 1, parcel
);
1720 PyList_Append(self
->events
, res
);
1723 PyErr_Clear(); /* FIXME: propagate error */
1726 /* -------------------------------------------------------------------- */
1727 /* methods (in alphabetical order) */
1730 treebuilder_data(TreeBuilderObject
* self
, PyObject
* args
)
1733 if (!PyArg_ParseTuple(args
, "O:data", &data
))
1736 return treebuilder_handle_data(self
, data
);
1740 treebuilder_end(TreeBuilderObject
* self
, PyObject
* args
)
1743 if (!PyArg_ParseTuple(args
, "O:end", &tag
))
1746 return treebuilder_handle_end(self
, tag
);
1750 treebuilder_done(TreeBuilderObject
* self
)
1754 /* FIXME: check stack size? */
1766 treebuilder_close(TreeBuilderObject
* self
, PyObject
* args
)
1768 if (!PyArg_ParseTuple(args
, ":close"))
1771 return treebuilder_done(self
);
1775 treebuilder_start(TreeBuilderObject
* self
, PyObject
* args
)
1778 PyObject
* attrib
= Py_None
;
1779 if (!PyArg_ParseTuple(args
, "O|O:start", &tag
, &attrib
))
1782 return treebuilder_handle_start(self
, tag
, attrib
);
1786 treebuilder_xml(TreeBuilderObject
* self
, PyObject
* args
)
1789 PyObject
* standalone
;
1790 if (!PyArg_ParseTuple(args
, "OO:xml", &encoding
, &standalone
))
1793 return treebuilder_handle_xml(self
, encoding
, standalone
);
1796 static PyMethodDef treebuilder_methods
[] = {
1797 {"data", (PyCFunction
) treebuilder_data
, METH_VARARGS
},
1798 {"start", (PyCFunction
) treebuilder_start
, METH_VARARGS
},
1799 {"end", (PyCFunction
) treebuilder_end
, METH_VARARGS
},
1800 {"xml", (PyCFunction
) treebuilder_xml
, METH_VARARGS
},
1801 {"close", (PyCFunction
) treebuilder_close
, METH_VARARGS
},
1806 treebuilder_getattr(TreeBuilderObject
* self
, char* name
)
1808 return Py_FindMethod(treebuilder_methods
, (PyObject
*) self
, name
);
1811 statichere PyTypeObject TreeBuilder_Type
= {
1812 PyObject_HEAD_INIT(NULL
)
1813 0, "TreeBuilder", sizeof(TreeBuilderObject
), 0,
1815 (destructor
)treebuilder_dealloc
, /* tp_dealloc */
1817 (getattrfunc
)treebuilder_getattr
, /* tp_getattr */
1820 /* ==================================================================== */
1821 /* the expat interface */
1823 #if defined(USE_EXPAT)
1827 #if defined(USE_PYEXPAT_CAPI)
1828 #include "pyexpat.h"
1829 static struct PyExpat_CAPI
* expat_capi
;
1830 #define EXPAT(func) (expat_capi->func)
1832 #define EXPAT(func) (XML_##func)
1845 PyObject
* handle_xml
;
1846 PyObject
* handle_start
;
1847 PyObject
* handle_data
;
1848 PyObject
* handle_end
;
1850 PyObject
* handle_comment
;
1851 PyObject
* handle_pi
;
1855 staticforward PyTypeObject XMLParser_Type
;
1859 #if defined(Py_USING_UNICODE)
1861 checkstring(const char* string
, int size
)
1865 /* check if an 8-bit string contains UTF-8 characters */
1866 for (i
= 0; i
< size
; i
++)
1867 if (string
[i
] & 0x80)
1875 makestring(const char* string
, int size
)
1877 /* convert a UTF-8 string to either a 7-bit ascii string or a
1880 #if defined(Py_USING_UNICODE)
1881 if (checkstring(string
, size
))
1882 return PyUnicode_DecodeUTF8(string
, size
, "strict");
1885 return PyString_FromStringAndSize(string
, size
);
1889 makeuniversal(XMLParserObject
* self
, const char* string
)
1891 /* convert a UTF-8 tag/attribute name from the expat parser
1892 to a universal name string */
1894 int size
= strlen(string
);
1898 /* look the 'raw' name up in the names dictionary */
1899 key
= PyString_FromStringAndSize(string
, size
);
1903 value
= PyDict_GetItem(self
->names
, key
);
1908 /* new name. convert to universal name, and decode as
1915 /* look for namespace separator */
1916 for (i
= 0; i
< size
; i
++)
1917 if (string
[i
] == '}')
1920 /* convert to universal name */
1921 tag
= PyString_FromStringAndSize(NULL
, size
+1);
1922 p
= PyString_AS_STRING(tag
);
1924 memcpy(p
+1, string
, size
);
1927 /* plain name; use key as tag */
1932 /* decode universal name */
1933 #if defined(Py_USING_UNICODE)
1934 /* inline makestring, to avoid duplicating the source string if
1935 it's not an utf-8 string */
1936 p
= PyString_AS_STRING(tag
);
1937 if (checkstring(p
, size
)) {
1938 value
= PyUnicode_DecodeUTF8(p
, size
, "strict");
1946 value
= tag
; /* use tag as is */
1948 /* add to names dictionary */
1949 if (PyDict_SetItem(self
->names
, key
, value
) < 0) {
1960 /* -------------------------------------------------------------------- */
1964 expat_default_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
1971 if (data_len
< 2 || data_in
[0] != '&')
1974 key
= makestring(data_in
+ 1, data_len
- 2);
1978 value
= PyDict_GetItem(self
->entity
, key
);
1981 if (TreeBuilder_CheckExact(self
->target
))
1982 res
= treebuilder_handle_data(
1983 (TreeBuilderObject
*) self
->target
, value
1985 else if (self
->handle_data
)
1986 res
= PyObject_CallFunction(self
->handle_data
, "O", value
);
1992 PyExc_SyntaxError
, "undefined entity &%s;: line %ld, column %ld",
1993 PyString_AS_STRING(key
),
1994 EXPAT(GetErrorLineNumber
)(self
->parser
),
1995 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2003 expat_start_handler(XMLParserObject
* self
, const XML_Char
* tag_in
,
2004 const XML_Char
**attrib_in
)
2012 tag
= makeuniversal(self
, tag_in
);
2014 return; /* parser will look for errors */
2018 attrib
= PyDict_New();
2021 while (attrib_in
[0] && attrib_in
[1]) {
2022 PyObject
* key
= makeuniversal(self
, attrib_in
[0]);
2023 PyObject
* value
= makestring(attrib_in
[1], strlen(attrib_in
[1]));
2024 if (!key
|| !value
) {
2030 ok
= PyDict_SetItem(attrib
, key
, value
);
2044 if (TreeBuilder_CheckExact(self
->target
))
2046 res
= treebuilder_handle_start((TreeBuilderObject
*) self
->target
,
2048 else if (self
->handle_start
)
2049 res
= PyObject_CallFunction(self
->handle_start
, "OO", tag
, attrib
);
2060 expat_data_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
2066 data
= makestring(data_in
, data_len
);
2068 return; /* parser will look for errors */
2070 if (TreeBuilder_CheckExact(self
->target
))
2072 res
= treebuilder_handle_data((TreeBuilderObject
*) self
->target
, data
);
2073 else if (self
->handle_data
)
2074 res
= PyObject_CallFunction(self
->handle_data
, "O", data
);
2084 expat_end_handler(XMLParserObject
* self
, const XML_Char
* tag_in
)
2087 PyObject
* res
= NULL
;
2089 if (TreeBuilder_CheckExact(self
->target
))
2091 /* the standard tree builder doesn't look at the end tag */
2092 res
= treebuilder_handle_end(
2093 (TreeBuilderObject
*) self
->target
, Py_None
2095 else if (self
->handle_end
) {
2096 tag
= makeuniversal(self
, tag_in
);
2098 res
= PyObject_CallFunction(self
->handle_end
, "O", tag
);
2107 expat_start_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix
,
2108 const XML_Char
*uri
)
2110 treebuilder_handle_namespace(
2111 (TreeBuilderObject
*) self
->target
, 1, prefix
, uri
2116 expat_end_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix_in
)
2118 treebuilder_handle_namespace(
2119 (TreeBuilderObject
*) self
->target
, 0, NULL
, NULL
2124 expat_comment_handler(XMLParserObject
* self
, const XML_Char
* comment_in
)
2129 if (self
->handle_comment
) {
2130 comment
= makestring(comment_in
, strlen(comment_in
));
2132 res
= PyObject_CallFunction(self
->handle_comment
, "O", comment
);
2140 expat_pi_handler(XMLParserObject
* self
, const XML_Char
* target_in
,
2141 const XML_Char
* data_in
)
2147 if (self
->handle_pi
) {
2148 target
= makestring(target_in
, strlen(target_in
));
2149 data
= makestring(data_in
, strlen(data_in
));
2150 if (target
&& data
) {
2151 res
= PyObject_CallFunction(self
->handle_pi
, "OO", target
, data
);
2162 #if defined(Py_USING_UNICODE)
2164 expat_unknown_encoding_handler(XMLParserObject
*self
, const XML_Char
*name
,
2169 unsigned char s
[256];
2172 memset(info
, 0, sizeof(XML_Encoding
));
2174 for (i
= 0; i
< 256; i
++)
2177 u
= PyUnicode_Decode((char*) s
, 256, name
, "replace");
2179 return XML_STATUS_ERROR
;
2181 if (PyUnicode_GET_SIZE(u
) != 256) {
2183 return XML_STATUS_ERROR
;
2186 p
= PyUnicode_AS_UNICODE(u
);
2188 for (i
= 0; i
< 256; i
++) {
2189 if (p
[i
] != Py_UNICODE_REPLACEMENT_CHARACTER
)
2190 info
->map
[i
] = p
[i
];
2197 return XML_STATUS_OK
;
2201 /* -------------------------------------------------------------------- */
2202 /* constructor and destructor */
2205 xmlparser(PyObject
* self_
, PyObject
* args
, PyObject
* kw
)
2207 XMLParserObject
* self
;
2208 /* FIXME: does this need to be static? */
2209 static XML_Memory_Handling_Suite memory_handler
;
2211 PyObject
* target
= NULL
;
2212 char* encoding
= NULL
;
2213 static char* kwlist
[] = { "target", "encoding", NULL
};
2214 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|Oz:XMLParser", kwlist
,
2215 &target
, &encoding
))
2218 #if defined(USE_PYEXPAT_CAPI)
2221 PyExc_RuntimeError
, "cannot load dispatch table from pyexpat"
2227 self
= PyObject_New(XMLParserObject
, &XMLParser_Type
);
2231 self
->entity
= PyDict_New();
2232 if (!self
->entity
) {
2237 self
->names
= PyDict_New();
2239 PyObject_Del(self
->entity
);
2244 memory_handler
.malloc_fcn
= PyObject_Malloc
;
2245 memory_handler
.realloc_fcn
= PyObject_Realloc
;
2246 memory_handler
.free_fcn
= PyObject_Free
;
2248 self
->parser
= EXPAT(ParserCreate_MM
)(encoding
, &memory_handler
, "}");
2249 if (!self
->parser
) {
2250 PyObject_Del(self
->names
);
2251 PyObject_Del(self
->entity
);
2257 /* setup target handlers */
2259 target
= treebuilder_new();
2261 EXPAT(ParserFree
)(self
->parser
);
2262 PyObject_Del(self
->names
);
2263 PyObject_Del(self
->entity
);
2269 self
->target
= target
;
2271 self
->handle_xml
= PyObject_GetAttrString(target
, "xml");
2272 self
->handle_start
= PyObject_GetAttrString(target
, "start");
2273 self
->handle_data
= PyObject_GetAttrString(target
, "data");
2274 self
->handle_end
= PyObject_GetAttrString(target
, "end");
2275 self
->handle_comment
= PyObject_GetAttrString(target
, "comment");
2276 self
->handle_pi
= PyObject_GetAttrString(target
, "pi");
2280 /* configure parser */
2281 EXPAT(SetUserData
)(self
->parser
, self
);
2282 EXPAT(SetElementHandler
)(
2284 (XML_StartElementHandler
) expat_start_handler
,
2285 (XML_EndElementHandler
) expat_end_handler
2287 EXPAT(SetDefaultHandlerExpand
)(
2289 (XML_DefaultHandler
) expat_default_handler
2291 EXPAT(SetCharacterDataHandler
)(
2293 (XML_CharacterDataHandler
) expat_data_handler
2295 if (self
->handle_comment
)
2296 EXPAT(SetCommentHandler
)(
2298 (XML_CommentHandler
) expat_comment_handler
2300 if (self
->handle_pi
)
2301 EXPAT(SetProcessingInstructionHandler
)(
2303 (XML_ProcessingInstructionHandler
) expat_pi_handler
2305 #if defined(Py_USING_UNICODE)
2306 EXPAT(SetUnknownEncodingHandler
)(
2308 (XML_UnknownEncodingHandler
) expat_unknown_encoding_handler
, NULL
2312 ALLOC(sizeof(XMLParserObject
), "create expatparser");
2314 return (PyObject
*) self
;
2318 xmlparser_dealloc(XMLParserObject
* self
)
2320 EXPAT(ParserFree
)(self
->parser
);
2322 Py_XDECREF(self
->handle_pi
);
2323 Py_XDECREF(self
->handle_comment
);
2324 Py_XDECREF(self
->handle_end
);
2325 Py_XDECREF(self
->handle_data
);
2326 Py_XDECREF(self
->handle_start
);
2327 Py_XDECREF(self
->handle_xml
);
2329 Py_DECREF(self
->target
);
2330 Py_DECREF(self
->entity
);
2331 Py_DECREF(self
->names
);
2333 RELEASE(sizeof(XMLParserObject
), "destroy expatparser");
2338 /* -------------------------------------------------------------------- */
2339 /* methods (in alphabetical order) */
2342 expat_parse(XMLParserObject
* self
, char* data
, int data_len
, int final
)
2346 ok
= EXPAT(Parse
)(self
->parser
, data
, data_len
, final
);
2348 if (PyErr_Occurred())
2353 PyExc_SyntaxError
, "%s: line %ld, column %ld",
2354 EXPAT(ErrorString
)(EXPAT(GetErrorCode
)(self
->parser
)),
2355 EXPAT(GetErrorLineNumber
)(self
->parser
),
2356 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2365 xmlparser_close(XMLParserObject
* self
, PyObject
* args
)
2367 /* end feeding data to parser */
2370 if (!PyArg_ParseTuple(args
, ":close"))
2373 res
= expat_parse(self
, "", 0, 1);
2375 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2377 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2384 xmlparser_feed(XMLParserObject
* self
, PyObject
* args
)
2386 /* feed data to parser */
2390 if (!PyArg_ParseTuple(args
, "s#:feed", &data
, &data_len
))
2393 return expat_parse(self
, data
, data_len
, 0);
2397 xmlparser_parse(XMLParserObject
* self
, PyObject
* args
)
2399 /* (internal) parse until end of input stream */
2406 if (!PyArg_ParseTuple(args
, "O:_parse", &fileobj
))
2409 reader
= PyObject_GetAttrString(fileobj
, "read");
2413 /* read from open file object */
2416 buffer
= PyObject_CallFunction(reader
, "i", 64*1024);
2419 /* read failed (e.g. due to KeyboardInterrupt) */
2424 if (!PyString_CheckExact(buffer
) || PyString_GET_SIZE(buffer
) == 0) {
2430 self
, PyString_AS_STRING(buffer
), PyString_GET_SIZE(buffer
), 0
2445 res
= expat_parse(self
, "", 0, 1);
2447 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2449 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2456 xmlparser_setevents(XMLParserObject
* self
, PyObject
* args
)
2458 /* activate element event reporting */
2461 TreeBuilderObject
* target
;
2463 PyObject
* events
; /* event collector */
2464 PyObject
* event_set
= Py_None
;
2465 if (!PyArg_ParseTuple(args
, "O!|O:_setevents", &PyList_Type
, &events
,
2469 if (!TreeBuilder_CheckExact(self
->target
)) {
2472 "event handling only supported for cElementTree.Treebuilder "
2478 target
= (TreeBuilderObject
*) self
->target
;
2481 Py_XDECREF(target
->events
);
2482 target
->events
= events
;
2484 /* clear out existing events */
2485 Py_XDECREF(target
->start_event_obj
); target
->start_event_obj
= NULL
;
2486 Py_XDECREF(target
->end_event_obj
); target
->end_event_obj
= NULL
;
2487 Py_XDECREF(target
->start_ns_event_obj
); target
->start_ns_event_obj
= NULL
;
2488 Py_XDECREF(target
->end_ns_event_obj
); target
->end_ns_event_obj
= NULL
;
2490 if (event_set
== Py_None
) {
2491 /* default is "end" only */
2492 target
->end_event_obj
= PyString_FromString("end");
2496 if (!PyTuple_Check(event_set
)) /* FIXME: handle arbitrary sequences */
2499 for (i
= 0; i
< PyTuple_GET_SIZE(event_set
); i
++) {
2500 PyObject
* item
= PyTuple_GET_ITEM(event_set
, i
);
2502 if (!PyString_Check(item
))
2504 event
= PyString_AS_STRING(item
);
2505 if (strcmp(event
, "start") == 0) {
2507 target
->start_event_obj
= item
;
2508 } else if (strcmp(event
, "end") == 0) {
2510 Py_XDECREF(target
->end_event_obj
);
2511 target
->end_event_obj
= item
;
2512 } else if (strcmp(event
, "start-ns") == 0) {
2514 Py_XDECREF(target
->start_ns_event_obj
);
2515 target
->start_ns_event_obj
= item
;
2516 EXPAT(SetNamespaceDeclHandler
)(
2518 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2519 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2521 } else if (strcmp(event
, "end-ns") == 0) {
2523 Py_XDECREF(target
->end_ns_event_obj
);
2524 target
->end_ns_event_obj
= item
;
2525 EXPAT(SetNamespaceDeclHandler
)(
2527 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2528 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2533 "unknown event '%s'", event
2544 "invalid event tuple"
2549 static PyMethodDef xmlparser_methods
[] = {
2550 {"feed", (PyCFunction
) xmlparser_feed
, METH_VARARGS
},
2551 {"close", (PyCFunction
) xmlparser_close
, METH_VARARGS
},
2552 {"_parse", (PyCFunction
) xmlparser_parse
, METH_VARARGS
},
2553 {"_setevents", (PyCFunction
) xmlparser_setevents
, METH_VARARGS
},
2558 xmlparser_getattr(XMLParserObject
* self
, char* name
)
2562 res
= Py_FindMethod(xmlparser_methods
, (PyObject
*) self
, name
);
2568 if (strcmp(name
, "entity") == 0)
2570 else if (strcmp(name
, "target") == 0)
2572 else if (strcmp(name
, "version") == 0) {
2574 sprintf(buffer
, "Expat %d.%d.%d", XML_MAJOR_VERSION
,
2575 XML_MINOR_VERSION
, XML_MICRO_VERSION
);
2576 return PyString_FromString(buffer
);
2578 PyErr_SetString(PyExc_AttributeError
, name
);
2586 statichere PyTypeObject XMLParser_Type
= {
2587 PyObject_HEAD_INIT(NULL
)
2588 0, "XMLParser", sizeof(XMLParserObject
), 0,
2590 (destructor
)xmlparser_dealloc
, /* tp_dealloc */
2592 (getattrfunc
)xmlparser_getattr
, /* tp_getattr */
2597 /* ==================================================================== */
2598 /* python module interface */
2600 static PyMethodDef _functions
[] = {
2601 {"Element", (PyCFunction
) element
, METH_VARARGS
|METH_KEYWORDS
},
2602 {"SubElement", (PyCFunction
) subelement
, METH_VARARGS
|METH_KEYWORDS
},
2603 {"TreeBuilder", (PyCFunction
) treebuilder
, METH_VARARGS
},
2604 #if defined(USE_EXPAT)
2605 {"XMLParser", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2606 {"XMLTreeBuilder", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2612 init_elementtree(void)
2617 #if defined(USE_PYEXPAT_CAPI)
2618 struct PyExpat_CAPI
* capi
;
2621 /* Patch object type */
2622 Element_Type
.ob_type
= TreeBuilder_Type
.ob_type
= &PyType_Type
;
2623 #if defined(USE_EXPAT)
2624 XMLParser_Type
.ob_type
= &PyType_Type
;
2627 m
= Py_InitModule("_elementtree", _functions
);
2631 /* python glue code */
2637 PyDict_SetItemString(g
, "__builtins__", PyEval_GetBuiltins());
2641 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2642 "from __future__ import generators\n" /* enable yield under 2.2 */
2645 "from copy import copy, deepcopy\n"
2648 " from xml.etree import ElementTree\n"
2649 "except ImportError:\n"
2650 " import ElementTree\n"
2651 "ET = ElementTree\n"
2654 "import _elementtree as cElementTree\n"
2656 "try:\n" /* check if copy works as is */
2657 " copy(cElementTree.Element('x'))\n"
2659 " def copyelement(elem):\n"
2662 "def Comment(text=None):\n" /* public */
2663 " element = cElementTree.Element(ET.Comment)\n"
2664 " element.text = text\n"
2666 "cElementTree.Comment = Comment\n"
2668 "class ElementTree(ET.ElementTree):\n" /* public */
2669 " def parse(self, source, parser=None):\n"
2670 " if not hasattr(source, 'read'):\n"
2671 " source = open(source, 'rb')\n"
2672 " if parser is not None:\n"
2674 " data = source.read(65536)\n"
2677 " parser.feed(data)\n"
2678 " self._root = parser.close()\n"
2680 " parser = cElementTree.XMLParser()\n"
2681 " self._root = parser._parse(source)\n"
2682 " return self._root\n"
2683 "cElementTree.ElementTree = ElementTree\n"
2685 "def getiterator(node, tag=None):\n" /* helper */
2688 #if (PY_VERSION_HEX < 0x02020000)
2689 " nodes = []\n" /* 2.1 doesn't have yield */
2690 " if tag is None or node.tag == tag:\n"
2691 " nodes.append(node)\n"
2692 " for node in node:\n"
2693 " nodes.extend(getiterator(node, tag))\n"
2696 " if tag is None or node.tag == tag:\n"
2698 " for node in node:\n"
2699 " for node in getiterator(node, tag):\n"
2703 "def parse(source, parser=None):\n" /* public */
2704 " tree = ElementTree()\n"
2705 " tree.parse(source, parser)\n"
2707 "cElementTree.parse = parse\n"
2709 #if (PY_VERSION_HEX < 0x02020000)
2710 "if hasattr(ET, 'iterparse'):\n"
2711 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2713 "class iterparse(object):\n"
2715 " def __init__(self, file, events=None):\n"
2716 " if not hasattr(file, 'read'):\n"
2717 " file = open(file, 'rb')\n"
2718 " self._file = file\n"
2719 " self._events = events\n"
2720 " def __iter__(self):\n"
2722 " b = cElementTree.TreeBuilder()\n"
2723 " p = cElementTree.XMLParser(b)\n"
2724 " p._setevents(events, self._events)\n"
2726 " data = self._file.read(16384)\n"
2730 " for event in events:\n"
2733 " root = p.close()\n"
2734 " for event in events:\n"
2736 " self.root = root\n"
2737 "cElementTree.iterparse = iterparse\n"
2740 "def PI(target, text=None):\n" /* public */
2741 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2742 " element.text = target\n"
2744 " element.text = element.text + ' ' + text\n"
2747 " elem = cElementTree.Element(ET.PI)\n"
2748 " elem.text = text\n"
2750 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2752 "def XML(text):\n" /* public */
2753 " parser = cElementTree.XMLParser()\n"
2754 " parser.feed(text)\n"
2755 " return parser.close()\n"
2756 "cElementTree.XML = cElementTree.fromstring = XML\n"
2758 "def XMLID(text):\n" /* public */
2759 " tree = XML(text)\n"
2761 " for elem in tree.getiterator():\n"
2762 " id = elem.get('id')\n"
2765 " return tree, ids\n"
2766 "cElementTree.XMLID = XMLID\n"
2768 "cElementTree.dump = ET.dump\n"
2769 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2770 "cElementTree.iselement = ET.iselement\n"
2771 "cElementTree.QName = ET.QName\n"
2772 "cElementTree.tostring = ET.tostring\n"
2773 "cElementTree.VERSION = '" VERSION
"'\n"
2774 "cElementTree.__version__ = '" VERSION
"'\n"
2775 "cElementTree.XMLParserError = SyntaxError\n"
2779 PyRun_String(bootstrap
, Py_file_input
, g
, NULL
);
2781 elementpath_obj
= PyDict_GetItemString(g
, "ElementPath");
2783 elementtree_copyelement_obj
= PyDict_GetItemString(g
, "copyelement");
2784 if (elementtree_copyelement_obj
) {
2785 /* reduce hack needed; enable reduce method */
2787 for (mp
= element_methods
; mp
->ml_name
; mp
++)
2788 if (mp
->ml_meth
== (PyCFunction
) element_reduce
) {
2789 mp
->ml_name
= "__reduce__";
2794 elementtree_deepcopy_obj
= PyDict_GetItemString(g
, "deepcopy");
2795 elementtree_getiterator_obj
= PyDict_GetItemString(g
, "getiterator");
2797 #if defined(USE_PYEXPAT_CAPI)
2798 /* link against pyexpat, if possible */
2799 capi
= PyCObject_Import("pyexpat", "expat_CAPI");
2801 strcmp(capi
->magic
, PyExpat_CAPI_MAGIC
) == 0 &&
2802 capi
->size
<= sizeof(*expat_capi
) &&
2803 capi
->MAJOR_VERSION
== XML_MAJOR_VERSION
&&
2804 capi
->MINOR_VERSION
== XML_MINOR_VERSION
&&
2805 capi
->MICRO_VERSION
== XML_MICRO_VERSION
)