3 * $Id: _elementtree.c 2657 2006-03-12 20:50:32Z fredrik $
5 * elementtree accelerator
8 * 1999-06-20 fl created (as part of sgmlop)
9 * 2001-05-29 fl effdom edition
10 * 2001-06-05 fl backported to unix; fixed bogus free in clear
11 * 2001-07-10 fl added findall helper
12 * 2003-02-27 fl elementtree edition (alpha)
13 * 2004-06-03 fl updates for elementtree 1.2
14 * 2005-01-05 fl added universal name cache, Element/SubElement factories
15 * 2005-01-06 fl moved python helpers into C module; removed 1.5.2 support
16 * 2005-01-07 fl added 2.1 support; work around broken __copy__ in 2.3
17 * 2005-01-08 fl added makeelement method; fixed path support
18 * 2005-01-10 fl optimized memory usage
19 * 2005-01-11 fl first public release (cElementTree 0.8)
20 * 2005-01-12 fl split element object into base and extras
21 * 2005-01-13 fl use tagged pointers for tail/text (cElementTree 0.9)
22 * 2005-01-17 fl added treebuilder close method
23 * 2005-01-17 fl fixed crash in getchildren
24 * 2005-01-18 fl removed observer api, added iterparse (cElementTree 0.9.3)
25 * 2005-01-23 fl revised iterparse api; added namespace event support (0.9.8)
26 * 2005-01-26 fl added VERSION module property (cElementTree 1.0)
27 * 2005-01-28 fl added remove method (1.0.1)
28 * 2005-03-01 fl added iselement function; fixed makeelement aliasing (1.0.2)
29 * 2005-03-13 fl export Comment and ProcessingInstruction/PI helpers
30 * 2005-03-26 fl added Comment and PI support to XMLParser
31 * 2005-03-27 fl event optimizations; complain about bogus events
32 * 2005-08-08 fl fixed read error handling in parse
33 * 2005-08-11 fl added runtime test for copy workaround (1.0.3)
34 * 2005-12-13 fl added expat_capi support (for xml.etree) (1.0.4)
35 * 2005-12-16 fl added support for non-standard encodings
36 * 2006-03-08 fl fixed a couple of potential null-refs and leaks
37 * 2006-03-12 fl merge in 2.5 ssize_t changes
39 * Copyright (c) 1999-2006 by Secret Labs AB. All rights reserved.
40 * Copyright (c) 1999-2006 by Fredrik Lundh.
43 * http://www.pythonware.com
46 /* Licensed to PSF under a Contributor Agreement. */
47 /* See http://www.python.org/2.4/license for licensing details. */
51 #define VERSION "1.0.6"
53 /* -------------------------------------------------------------------- */
56 /* Leave defined to include the expat-based XMLParser type */
59 /* Define to to all expat calls via pyexpat's embedded expat library */
60 /* #define USE_PYEXPAT_CAPI */
62 /* An element can hold this many children without extra memory
64 #define STATIC_CHILDREN 4
66 /* For best performance, chose a value so that 80-90% of all nodes
67 have no more than the given number of children. Set this to zero
68 to minimize the size of the element structure itself (this only
69 helps if you have lots of leaf nodes with attributes). */
71 /* Also note that pymalloc always allocates blocks in multiples of
72 eight bytes. For the current version of cElementTree, this means
73 that the number of children should be an even number, at least on
76 /* -------------------------------------------------------------------- */
79 static int memory
= 0;
80 #define ALLOC(size, comment)\
81 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
82 #define RELEASE(size, comment)\
83 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
85 #define ALLOC(size, comment)
86 #define RELEASE(size, comment)
91 #define LOCAL(type) static __inline type __fastcall
93 #define LOCAL(type) static type
96 /* compatibility macros */
97 #if (PY_VERSION_HEX < 0x02050000)
98 typedef int Py_ssize_t
;
99 #define lenfunc inquiry
102 #if (PY_VERSION_HEX < 0x02040000)
103 #define PyDict_CheckExact PyDict_Check
104 #if (PY_VERSION_HEX < 0x02020000)
105 #define PyList_CheckExact PyList_Check
106 #define PyString_CheckExact PyString_Check
107 #if (PY_VERSION_HEX >= 0x01060000)
108 #define Py_USING_UNICODE /* always enabled for 2.0 and 2.1 */
113 #if !defined(Py_RETURN_NONE)
114 #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
117 /* macros used to store 'join' flags in string object pointers. note
118 that all use of text and tail as object pointers must be wrapped in
119 JOIN_OBJ. see comments in the ElementObject definition for more
121 #define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
122 #define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
123 #define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~1))
125 /* glue functions (see the init function for details) */
126 static PyObject
* elementtree_copyelement_obj
;
127 static PyObject
* elementtree_deepcopy_obj
;
128 static PyObject
* elementtree_getiterator_obj
;
129 static PyObject
* elementpath_obj
;
134 deepcopy(PyObject
* object
, PyObject
* memo
)
136 /* do a deep copy of the given object */
141 if (!elementtree_deepcopy_obj
) {
144 "deepcopy helper not found"
149 args
= PyTuple_New(2);
153 Py_INCREF(object
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) object
);
154 Py_INCREF(memo
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) memo
);
156 result
= PyObject_CallObject(elementtree_deepcopy_obj
, args
);
164 list_join(PyObject
* list
)
166 /* join list elements (destroying the list in the process) */
173 switch (PyList_GET_SIZE(list
)) {
176 return PyString_FromString("");
178 result
= PyList_GET_ITEM(list
, 0);
184 /* two or more elements: slice out a suitable separator from the
185 first member, and use that to join the entire list */
187 joiner
= PySequence_GetSlice(PyList_GET_ITEM(list
, 0), 0, 0);
191 function
= PyObject_GetAttrString(joiner
, "join");
197 args
= PyTuple_New(1);
201 PyTuple_SET_ITEM(args
, 0, list
);
203 result
= PyObject_CallObject(function
, args
);
205 Py_DECREF(args
); /* also removes list */
212 #if (PY_VERSION_HEX < 0x02020000)
214 PyDict_Update(PyObject
* dict
, PyObject
* other
)
216 /* PyDict_Update emulation for 2.1 and earlier */
220 res
= PyObject_CallMethod(dict
, "update", "O", other
);
229 /* -------------------------------------------------------------------- */
230 /* the element type */
234 /* attributes (a dictionary object), or None if no attributes */
238 int length
; /* actual number of items */
239 int allocated
; /* allocated items */
241 /* this either points to _children or to a malloced buffer */
244 PyObject
* _children
[STATIC_CHILDREN
];
246 } ElementObjectExtra
;
251 /* element tag (a string). */
254 /* text before first child. note that this is a tagged pointer;
255 use JOIN_OBJ to get the object pointer. the join flag is used
256 to distinguish lists created by the tree builder from lists
257 assigned to the attribute by application code; the former
258 should be joined before being returned to the user, the latter
259 should be left intact. */
262 /* text after this element, in parent. note that this is a tagged
263 pointer; use JOIN_OBJ to get the object pointer. */
266 ElementObjectExtra
* extra
;
270 staticforward PyTypeObject Element_Type
;
272 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
274 /* -------------------------------------------------------------------- */
275 /* element constructor and destructor */
278 element_new_extra(ElementObject
* self
, PyObject
* attrib
)
280 self
->extra
= PyObject_Malloc(sizeof(ElementObjectExtra
));
288 self
->extra
->attrib
= attrib
;
290 self
->extra
->length
= 0;
291 self
->extra
->allocated
= STATIC_CHILDREN
;
292 self
->extra
->children
= self
->extra
->_children
;
298 element_dealloc_extra(ElementObject
* self
)
302 Py_DECREF(self
->extra
->attrib
);
304 for (i
= 0; i
< self
->extra
->length
; i
++)
305 Py_DECREF(self
->extra
->children
[i
]);
307 if (self
->extra
->children
!= self
->extra
->_children
)
308 PyObject_Free(self
->extra
->children
);
310 PyObject_Free(self
->extra
);
314 element_new(PyObject
* tag
, PyObject
* attrib
)
318 self
= PyObject_New(ElementObject
, &Element_Type
);
322 /* use None for empty dictionaries */
323 if (PyDict_CheckExact(attrib
) && !PyDict_Size(attrib
))
328 if (attrib
!= Py_None
) {
330 if (element_new_extra(self
, attrib
) < 0) {
335 self
->extra
->length
= 0;
336 self
->extra
->allocated
= STATIC_CHILDREN
;
337 self
->extra
->children
= self
->extra
->_children
;
345 self
->text
= Py_None
;
348 self
->tail
= Py_None
;
350 ALLOC(sizeof(ElementObject
), "create element");
352 return (PyObject
*) self
;
356 element_resize(ElementObject
* self
, int extra
)
361 /* make sure self->children can hold the given number of extra
362 elements. set an exception and return -1 if allocation failed */
365 element_new_extra(self
, NULL
);
367 size
= self
->extra
->length
+ extra
;
369 if (size
> self
->extra
->allocated
) {
370 /* use Python 2.4's list growth strategy */
371 size
= (size
>> 3) + (size
< 9 ? 3 : 6) + size
;
372 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
373 * which needs at least 4 bytes.
374 * Although it's a false alarm always assume at least one child to
377 size
= size
? size
: 1;
378 if (self
->extra
->children
!= self
->extra
->_children
) {
379 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
380 * "children", which needs at least 4 bytes. Although it's a
381 * false alarm always assume at least one child to be safe.
383 children
= PyObject_Realloc(self
->extra
->children
,
384 size
* sizeof(PyObject
*));
388 children
= PyObject_Malloc(size
* sizeof(PyObject
*));
391 /* copy existing children from static area to malloc buffer */
392 memcpy(children
, self
->extra
->children
,
393 self
->extra
->length
* sizeof(PyObject
*));
395 self
->extra
->children
= children
;
396 self
->extra
->allocated
= size
;
407 element_add_subelement(ElementObject
* self
, PyObject
* element
)
409 /* add a child element to a parent */
411 if (element_resize(self
, 1) < 0)
415 self
->extra
->children
[self
->extra
->length
] = element
;
417 self
->extra
->length
++;
423 element_get_attrib(ElementObject
* self
)
425 /* return borrowed reference to attrib dictionary */
426 /* note: this function assumes that the extra section exists */
428 PyObject
* res
= self
->extra
->attrib
;
430 if (res
== Py_None
) {
431 /* create missing dictionary */
435 self
->extra
->attrib
= res
;
442 element_get_text(ElementObject
* self
)
444 /* return borrowed reference to text attribute */
446 PyObject
* res
= self
->text
;
450 if (PyList_CheckExact(res
)) {
451 res
= list_join(res
);
462 element_get_tail(ElementObject
* self
)
464 /* return borrowed reference to text attribute */
466 PyObject
* res
= self
->tail
;
470 if (PyList_CheckExact(res
)) {
471 res
= list_join(res
);
482 element(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
487 PyObject
* attrib
= NULL
;
488 if (!PyArg_ParseTuple(args
, "O|O!:Element", &tag
,
489 &PyDict_Type
, &attrib
))
493 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
497 PyDict_Update(attrib
, kw
);
503 elem
= element_new(tag
, attrib
);
511 subelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
515 ElementObject
* parent
;
517 PyObject
* attrib
= NULL
;
518 if (!PyArg_ParseTuple(args
, "O!O|O!:SubElement",
519 &Element_Type
, &parent
, &tag
,
520 &PyDict_Type
, &attrib
))
524 attrib
= (attrib
) ? PyDict_Copy(attrib
) : PyDict_New();
528 PyDict_Update(attrib
, kw
);
534 elem
= element_new(tag
, attrib
);
538 if (element_add_subelement(parent
, elem
) < 0) {
547 element_dealloc(ElementObject
* self
)
550 element_dealloc_extra(self
);
552 /* discard attributes */
553 Py_DECREF(self
->tag
);
554 Py_DECREF(JOIN_OBJ(self
->text
));
555 Py_DECREF(JOIN_OBJ(self
->tail
));
557 RELEASE(sizeof(ElementObject
), "destroy element");
562 /* -------------------------------------------------------------------- */
563 /* methods (in alphabetical order) */
566 element_append(ElementObject
* self
, PyObject
* args
)
569 if (!PyArg_ParseTuple(args
, "O!:append", &Element_Type
, &element
))
572 if (element_add_subelement(self
, element
) < 0)
579 element_clear(ElementObject
* self
, PyObject
* args
)
581 if (!PyArg_ParseTuple(args
, ":clear"))
585 element_dealloc_extra(self
);
590 Py_DECREF(JOIN_OBJ(self
->text
));
591 self
->text
= Py_None
;
594 Py_DECREF(JOIN_OBJ(self
->tail
));
595 self
->tail
= Py_None
;
601 element_copy(ElementObject
* self
, PyObject
* args
)
604 ElementObject
* element
;
606 if (!PyArg_ParseTuple(args
, ":__copy__"))
609 element
= (ElementObject
*) element_new(
610 self
->tag
, (self
->extra
) ? self
->extra
->attrib
: Py_None
615 Py_DECREF(JOIN_OBJ(element
->text
));
616 element
->text
= self
->text
;
617 Py_INCREF(JOIN_OBJ(element
->text
));
619 Py_DECREF(JOIN_OBJ(element
->tail
));
620 element
->tail
= self
->tail
;
621 Py_INCREF(JOIN_OBJ(element
->tail
));
625 if (element_resize(element
, self
->extra
->length
) < 0) {
630 for (i
= 0; i
< self
->extra
->length
; i
++) {
631 Py_INCREF(self
->extra
->children
[i
]);
632 element
->extra
->children
[i
] = self
->extra
->children
[i
];
635 element
->extra
->length
= self
->extra
->length
;
639 return (PyObject
*) element
;
643 element_deepcopy(ElementObject
* self
, PyObject
* args
)
646 ElementObject
* element
;
654 if (!PyArg_ParseTuple(args
, "O:__deepcopy__", &memo
))
657 tag
= deepcopy(self
->tag
, memo
);
662 attrib
= deepcopy(self
->extra
->attrib
, memo
);
672 element
= (ElementObject
*) element_new(tag
, attrib
);
680 text
= deepcopy(JOIN_OBJ(self
->text
), memo
);
683 Py_DECREF(element
->text
);
684 element
->text
= JOIN_SET(text
, JOIN_GET(self
->text
));
686 tail
= deepcopy(JOIN_OBJ(self
->tail
), memo
);
689 Py_DECREF(element
->tail
);
690 element
->tail
= JOIN_SET(tail
, JOIN_GET(self
->tail
));
694 if (element_resize(element
, self
->extra
->length
) < 0)
697 for (i
= 0; i
< self
->extra
->length
; i
++) {
698 PyObject
* child
= deepcopy(self
->extra
->children
[i
], memo
);
700 element
->extra
->length
= i
;
703 element
->extra
->children
[i
] = child
;
706 element
->extra
->length
= self
->extra
->length
;
710 /* add object to memo dictionary (so deepcopy won't visit it again) */
711 id
= PyInt_FromLong((Py_uintptr_t
) self
);
713 i
= PyDict_SetItem(memo
, id
, (PyObject
*) element
);
720 return (PyObject
*) element
;
728 checkpath(PyObject
* tag
)
733 /* check if a tag contains an xpath character */
735 #define PATHCHAR(ch) (ch == '/' || ch == '*' || ch == '[' || ch == '@')
737 #if defined(Py_USING_UNICODE)
738 if (PyUnicode_Check(tag
)) {
739 Py_UNICODE
*p
= PyUnicode_AS_UNICODE(tag
);
740 for (i
= 0; i
< PyUnicode_GET_SIZE(tag
); i
++) {
743 else if (p
[i
] == '}')
745 else if (check
&& PATHCHAR(p
[i
]))
751 if (PyString_Check(tag
)) {
752 char *p
= PyString_AS_STRING(tag
);
753 for (i
= 0; i
< PyString_GET_SIZE(tag
); i
++) {
756 else if (p
[i
] == '}')
758 else if (check
&& PATHCHAR(p
[i
]))
764 return 1; /* unknown type; might be path expression */
768 element_find(ElementObject
* self
, PyObject
* args
)
773 if (!PyArg_ParseTuple(args
, "O:find", &tag
))
777 return PyObject_CallMethod(
778 elementpath_obj
, "find", "OO", self
, tag
784 for (i
= 0; i
< self
->extra
->length
; i
++) {
785 PyObject
* item
= self
->extra
->children
[i
];
786 if (Element_CheckExact(item
) &&
787 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
797 element_findtext(ElementObject
* self
, PyObject
* args
)
802 PyObject
* default_value
= Py_None
;
803 if (!PyArg_ParseTuple(args
, "O|O:findtext", &tag
, &default_value
))
807 return PyObject_CallMethod(
808 elementpath_obj
, "findtext", "OOO", self
, tag
, default_value
812 Py_INCREF(default_value
);
813 return default_value
;
816 for (i
= 0; i
< self
->extra
->length
; i
++) {
817 ElementObject
* item
= (ElementObject
*) self
->extra
->children
[i
];
818 if (Element_CheckExact(item
) && !PyObject_Compare(item
->tag
, tag
)) {
819 PyObject
* text
= element_get_text(item
);
821 return PyString_FromString("");
827 Py_INCREF(default_value
);
828 return default_value
;
832 element_findall(ElementObject
* self
, PyObject
* args
)
838 if (!PyArg_ParseTuple(args
, "O:findall", &tag
))
842 return PyObject_CallMethod(
843 elementpath_obj
, "findall", "OO", self
, tag
853 for (i
= 0; i
< self
->extra
->length
; i
++) {
854 PyObject
* item
= self
->extra
->children
[i
];
855 if (Element_CheckExact(item
) &&
856 PyObject_Compare(((ElementObject
*)item
)->tag
, tag
) == 0) {
857 if (PyList_Append(out
, item
) < 0) {
868 element_get(ElementObject
* self
, PyObject
* args
)
873 PyObject
* default_value
= Py_None
;
874 if (!PyArg_ParseTuple(args
, "O|O:get", &key
, &default_value
))
877 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
878 value
= default_value
;
880 value
= PyDict_GetItem(self
->extra
->attrib
, key
);
882 value
= default_value
;
890 element_getchildren(ElementObject
* self
, PyObject
* args
)
895 if (!PyArg_ParseTuple(args
, ":getchildren"))
899 return PyList_New(0);
901 list
= PyList_New(self
->extra
->length
);
905 for (i
= 0; i
< self
->extra
->length
; i
++) {
906 PyObject
* item
= self
->extra
->children
[i
];
908 PyList_SET_ITEM(list
, i
, item
);
915 element_getiterator(ElementObject
* self
, PyObject
* args
)
919 PyObject
* tag
= Py_None
;
920 if (!PyArg_ParseTuple(args
, "|O:getiterator", &tag
))
923 if (!elementtree_getiterator_obj
) {
926 "getiterator helper not found"
931 args
= PyTuple_New(2);
935 Py_INCREF(self
); PyTuple_SET_ITEM(args
, 0, (PyObject
*) self
);
936 Py_INCREF(tag
); PyTuple_SET_ITEM(args
, 1, (PyObject
*) tag
);
938 result
= PyObject_CallObject(elementtree_getiterator_obj
, args
);
946 element_getitem(PyObject
* self_
, Py_ssize_t index
)
948 ElementObject
* self
= (ElementObject
*) self_
;
950 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
953 "child index out of range"
958 Py_INCREF(self
->extra
->children
[index
]);
959 return self
->extra
->children
[index
];
963 element_getslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
)
965 ElementObject
* self
= (ElementObject
*) self_
;
970 return PyList_New(0);
972 /* standard clamping */
977 if (end
> self
->extra
->length
)
978 end
= self
->extra
->length
;
982 list
= PyList_New(end
- start
);
986 for (i
= start
; i
< end
; i
++) {
987 PyObject
* item
= self
->extra
->children
[i
];
989 PyList_SET_ITEM(list
, i
- start
, item
);
996 element_insert(ElementObject
* self
, PyObject
* args
)
1002 if (!PyArg_ParseTuple(args
, "iO!:insert", &index
,
1003 &Element_Type
, &element
))
1007 element_new_extra(self
, NULL
);
1011 if (index
> self
->extra
->length
)
1012 index
= self
->extra
->length
;
1014 if (element_resize(self
, 1) < 0)
1017 for (i
= self
->extra
->length
; i
> index
; i
--)
1018 self
->extra
->children
[i
] = self
->extra
->children
[i
-1];
1021 self
->extra
->children
[index
] = element
;
1023 self
->extra
->length
++;
1029 element_items(ElementObject
* self
, PyObject
* args
)
1031 if (!PyArg_ParseTuple(args
, ":items"))
1034 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1035 return PyList_New(0);
1037 return PyDict_Items(self
->extra
->attrib
);
1041 element_keys(ElementObject
* self
, PyObject
* args
)
1043 if (!PyArg_ParseTuple(args
, ":keys"))
1046 if (!self
->extra
|| self
->extra
->attrib
== Py_None
)
1047 return PyList_New(0);
1049 return PyDict_Keys(self
->extra
->attrib
);
1053 element_length(ElementObject
* self
)
1058 return self
->extra
->length
;
1062 element_makeelement(PyObject
* self
, PyObject
* args
, PyObject
* kw
)
1068 if (!PyArg_ParseTuple(args
, "OO:makeelement", &tag
, &attrib
))
1071 attrib
= PyDict_Copy(attrib
);
1075 elem
= element_new(tag
, attrib
);
1083 element_reduce(ElementObject
* self
, PyObject
* args
)
1085 if (!PyArg_ParseTuple(args
, ":__reduce__"))
1088 /* Hack alert: This method is used to work around a __copy__
1089 problem on certain 2.3 and 2.4 versions. To save time and
1090 simplify the code, we create the copy in here, and use a dummy
1091 copyelement helper to trick the copy module into doing the
1094 if (!elementtree_copyelement_obj
) {
1097 "copyelement helper not found"
1102 return Py_BuildValue(
1103 "O(N)", elementtree_copyelement_obj
, element_copy(self
, args
)
1108 element_remove(ElementObject
* self
, PyObject
* args
)
1113 if (!PyArg_ParseTuple(args
, "O!:remove", &Element_Type
, &element
))
1117 /* element has no children, so raise exception */
1120 "list.remove(x): x not in list"
1125 for (i
= 0; i
< self
->extra
->length
; i
++) {
1126 if (self
->extra
->children
[i
] == element
)
1128 if (PyObject_Compare(self
->extra
->children
[i
], element
) == 0)
1132 if (i
== self
->extra
->length
) {
1133 /* element is not in children, so raise exception */
1136 "list.remove(x): x not in list"
1141 Py_DECREF(self
->extra
->children
[i
]);
1143 self
->extra
->length
--;
1145 for (; i
< self
->extra
->length
; i
++)
1146 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1152 element_repr(ElementObject
* self
)
1157 repr
= PyString_FromString("<Element ");
1159 PyString_ConcatAndDel(&repr
, PyObject_Repr(self
->tag
));
1161 sprintf(buffer
, " at %p>", self
);
1162 PyString_ConcatAndDel(&repr
, PyString_FromString(buffer
));
1168 element_set(ElementObject
* self
, PyObject
* args
)
1174 if (!PyArg_ParseTuple(args
, "OO:set", &key
, &value
))
1178 element_new_extra(self
, NULL
);
1180 attrib
= element_get_attrib(self
);
1184 if (PyDict_SetItem(attrib
, key
, value
) < 0)
1191 element_setslice(PyObject
* self_
, Py_ssize_t start
, Py_ssize_t end
, PyObject
* item
)
1193 ElementObject
* self
= (ElementObject
*) self_
;
1194 Py_ssize_t i
, new, old
;
1195 PyObject
* recycle
= NULL
;
1198 element_new_extra(self
, NULL
);
1200 /* standard clamping */
1205 if (end
> self
->extra
->length
)
1206 end
= self
->extra
->length
;
1214 else if (PyList_CheckExact(item
)) {
1215 new = PyList_GET_SIZE(item
);
1217 /* FIXME: support arbitrary sequences? */
1220 "expected list, not \"%.200s\"", Py_TYPE(item
)->tp_name
1226 /* to avoid recursive calls to this method (via decref), move
1227 old items to the recycle bin here, and get rid of them when
1228 we're done modifying the element */
1229 recycle
= PyList_New(old
);
1230 for (i
= 0; i
< old
; i
++)
1231 PyList_SET_ITEM(recycle
, i
, self
->extra
->children
[i
+ start
]);
1236 for (i
= end
; i
< self
->extra
->length
; i
++)
1237 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1238 } else if (new > old
) {
1240 if (element_resize(self
, new - old
) < 0)
1242 for (i
= self
->extra
->length
-1; i
>= end
; i
--)
1243 self
->extra
->children
[i
+ new - old
] = self
->extra
->children
[i
];
1246 /* replace the slice */
1247 for (i
= 0; i
< new; i
++) {
1248 PyObject
* element
= PyList_GET_ITEM(item
, i
);
1250 self
->extra
->children
[i
+ start
] = element
;
1253 self
->extra
->length
+= new - old
;
1255 /* discard the recycle bin, and everything in it */
1256 Py_XDECREF(recycle
);
1262 element_setitem(PyObject
* self_
, Py_ssize_t index
, PyObject
* item
)
1264 ElementObject
* self
= (ElementObject
*) self_
;
1268 if (!self
->extra
|| index
< 0 || index
>= self
->extra
->length
) {
1271 "child assignment index out of range");
1275 old
= self
->extra
->children
[index
];
1279 self
->extra
->children
[index
] = item
;
1281 self
->extra
->length
--;
1282 for (i
= index
; i
< self
->extra
->length
; i
++)
1283 self
->extra
->children
[i
] = self
->extra
->children
[i
+1];
1291 static PyMethodDef element_methods
[] = {
1293 {"clear", (PyCFunction
) element_clear
, METH_VARARGS
},
1295 {"get", (PyCFunction
) element_get
, METH_VARARGS
},
1296 {"set", (PyCFunction
) element_set
, METH_VARARGS
},
1298 {"find", (PyCFunction
) element_find
, METH_VARARGS
},
1299 {"findtext", (PyCFunction
) element_findtext
, METH_VARARGS
},
1300 {"findall", (PyCFunction
) element_findall
, METH_VARARGS
},
1302 {"append", (PyCFunction
) element_append
, METH_VARARGS
},
1303 {"insert", (PyCFunction
) element_insert
, METH_VARARGS
},
1304 {"remove", (PyCFunction
) element_remove
, METH_VARARGS
},
1306 {"getiterator", (PyCFunction
) element_getiterator
, METH_VARARGS
},
1307 {"getchildren", (PyCFunction
) element_getchildren
, METH_VARARGS
},
1309 {"items", (PyCFunction
) element_items
, METH_VARARGS
},
1310 {"keys", (PyCFunction
) element_keys
, METH_VARARGS
},
1312 {"makeelement", (PyCFunction
) element_makeelement
, METH_VARARGS
},
1314 {"__copy__", (PyCFunction
) element_copy
, METH_VARARGS
},
1315 {"__deepcopy__", (PyCFunction
) element_deepcopy
, METH_VARARGS
},
1317 /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
1318 C objects correctly, so we have to fake it using a __reduce__-
1319 based hack (see the element_reduce implementation above for
1322 /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
1323 using a runtime test to figure out if we need to fake things
1324 or now (see the init code below). The following entry is
1325 enabled only if the hack is needed. */
1327 {"!__reduce__", (PyCFunction
) element_reduce
, METH_VARARGS
},
1333 element_getattr(ElementObject
* self
, char* name
)
1337 res
= Py_FindMethod(element_methods
, (PyObject
*) self
, name
);
1343 if (strcmp(name
, "tag") == 0)
1345 else if (strcmp(name
, "text") == 0)
1346 res
= element_get_text(self
);
1347 else if (strcmp(name
, "tail") == 0) {
1348 res
= element_get_tail(self
);
1349 } else if (strcmp(name
, "attrib") == 0) {
1351 element_new_extra(self
, NULL
);
1352 res
= element_get_attrib(self
);
1354 PyErr_SetString(PyExc_AttributeError
, name
);
1366 element_setattr(ElementObject
* self
, const char* name
, PyObject
* value
)
1368 if (value
== NULL
) {
1370 PyExc_AttributeError
,
1371 "can't delete element attributes"
1376 if (strcmp(name
, "tag") == 0) {
1377 Py_DECREF(self
->tag
);
1379 Py_INCREF(self
->tag
);
1380 } else if (strcmp(name
, "text") == 0) {
1381 Py_DECREF(JOIN_OBJ(self
->text
));
1383 Py_INCREF(self
->text
);
1384 } else if (strcmp(name
, "tail") == 0) {
1385 Py_DECREF(JOIN_OBJ(self
->tail
));
1387 Py_INCREF(self
->tail
);
1388 } else if (strcmp(name
, "attrib") == 0) {
1390 element_new_extra(self
, NULL
);
1391 Py_DECREF(self
->extra
->attrib
);
1392 self
->extra
->attrib
= value
;
1393 Py_INCREF(self
->extra
->attrib
);
1395 PyErr_SetString(PyExc_AttributeError
, name
);
1402 static PySequenceMethods element_as_sequence
= {
1403 (lenfunc
) element_length
,
1412 statichere PyTypeObject Element_Type
= {
1413 PyObject_HEAD_INIT(NULL
)
1414 0, "Element", sizeof(ElementObject
), 0,
1416 (destructor
)element_dealloc
, /* tp_dealloc */
1418 (getattrfunc
)element_getattr
, /* tp_getattr */
1419 (setattrfunc
)element_setattr
, /* tp_setattr */
1421 (reprfunc
)element_repr
, /* tp_repr */
1422 0, /* tp_as_number */
1423 &element_as_sequence
, /* tp_as_sequence */
1426 /* ==================================================================== */
1427 /* the tree builder type */
1432 PyObject
* root
; /* root node (first created node) */
1434 ElementObject
* this; /* current node */
1435 ElementObject
* last
; /* most recently created node */
1437 PyObject
* data
; /* data collector (string or list), or NULL */
1439 PyObject
* stack
; /* element stack */
1440 Py_ssize_t index
; /* current stack size (0=empty) */
1442 /* element tracing */
1443 PyObject
* events
; /* list of events, or NULL if not collecting */
1444 PyObject
* start_event_obj
; /* event objects (NULL to ignore) */
1445 PyObject
* end_event_obj
;
1446 PyObject
* start_ns_event_obj
;
1447 PyObject
* end_ns_event_obj
;
1449 } TreeBuilderObject
;
1451 staticforward PyTypeObject TreeBuilder_Type
;
1453 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
1455 /* -------------------------------------------------------------------- */
1456 /* constructor and destructor */
1459 treebuilder_new(void)
1461 TreeBuilderObject
* self
;
1463 self
= PyObject_New(TreeBuilderObject
, &TreeBuilder_Type
);
1470 self
->this = (ElementObject
*) Py_None
;
1473 self
->last
= (ElementObject
*) Py_None
;
1477 self
->stack
= PyList_New(20);
1480 self
->events
= NULL
;
1481 self
->start_event_obj
= self
->end_event_obj
= NULL
;
1482 self
->start_ns_event_obj
= self
->end_ns_event_obj
= NULL
;
1484 ALLOC(sizeof(TreeBuilderObject
), "create treebuilder");
1486 return (PyObject
*) self
;
1490 treebuilder(PyObject
* self_
, PyObject
* args
)
1492 if (!PyArg_ParseTuple(args
, ":TreeBuilder"))
1495 return treebuilder_new();
1499 treebuilder_dealloc(TreeBuilderObject
* self
)
1501 Py_XDECREF(self
->end_ns_event_obj
);
1502 Py_XDECREF(self
->start_ns_event_obj
);
1503 Py_XDECREF(self
->end_event_obj
);
1504 Py_XDECREF(self
->start_event_obj
);
1505 Py_XDECREF(self
->events
);
1506 Py_DECREF(self
->stack
);
1507 Py_XDECREF(self
->data
);
1508 Py_DECREF(self
->last
);
1509 Py_DECREF(self
->this);
1510 Py_XDECREF(self
->root
);
1512 RELEASE(sizeof(TreeBuilderObject
), "destroy treebuilder");
1517 /* -------------------------------------------------------------------- */
1521 treebuilder_handle_xml(TreeBuilderObject
* self
, PyObject
* encoding
,
1522 PyObject
* standalone
)
1528 treebuilder_handle_start(TreeBuilderObject
* self
, PyObject
* tag
,
1535 if (self
->this == self
->last
) {
1536 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1537 self
->last
->text
= JOIN_SET(
1538 self
->data
, PyList_CheckExact(self
->data
)
1541 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1542 self
->last
->tail
= JOIN_SET(
1543 self
->data
, PyList_CheckExact(self
->data
)
1549 node
= element_new(tag
, attrib
);
1553 this = (PyObject
*) self
->this;
1555 if (this != Py_None
) {
1556 if (element_add_subelement((ElementObject
*) this, node
) < 0)
1562 "multiple elements on top level"
1570 if (self
->index
< PyList_GET_SIZE(self
->stack
)) {
1571 if (PyList_SetItem(self
->stack
, self
->index
, this) < 0)
1575 if (PyList_Append(self
->stack
, this) < 0)
1582 self
->this = (ElementObject
*) node
;
1584 Py_DECREF(self
->last
);
1586 self
->last
= (ElementObject
*) node
;
1588 if (self
->start_event_obj
) {
1590 PyObject
* action
= self
->start_event_obj
;
1591 res
= PyTuple_New(2);
1593 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1594 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1595 PyList_Append(self
->events
, res
);
1598 PyErr_Clear(); /* FIXME: propagate error */
1609 treebuilder_handle_data(TreeBuilderObject
* self
, PyObject
* data
)
1612 if (self
->last
== (ElementObject
*) Py_None
) {
1613 /* ignore calls to data before the first call to start */
1616 /* store the first item as is */
1617 Py_INCREF(data
); self
->data
= data
;
1619 /* more than one item; use a list to collect items */
1620 if (PyString_CheckExact(self
->data
) && Py_REFCNT(self
->data
) == 1 &&
1621 PyString_CheckExact(data
) && PyString_GET_SIZE(data
) == 1) {
1622 /* expat often generates single character data sections; handle
1623 the most common case by resizing the existing string... */
1624 Py_ssize_t size
= PyString_GET_SIZE(self
->data
);
1625 if (_PyString_Resize(&self
->data
, size
+ 1) < 0)
1627 PyString_AS_STRING(self
->data
)[size
] = PyString_AS_STRING(data
)[0];
1628 } else if (PyList_CheckExact(self
->data
)) {
1629 if (PyList_Append(self
->data
, data
) < 0)
1632 PyObject
* list
= PyList_New(2);
1635 PyList_SET_ITEM(list
, 0, self
->data
);
1636 Py_INCREF(data
); PyList_SET_ITEM(list
, 1, data
);
1645 treebuilder_handle_end(TreeBuilderObject
* self
, PyObject
* tag
)
1650 if (self
->this == self
->last
) {
1651 Py_DECREF(JOIN_OBJ(self
->last
->text
));
1652 self
->last
->text
= JOIN_SET(
1653 self
->data
, PyList_CheckExact(self
->data
)
1656 Py_DECREF(JOIN_OBJ(self
->last
->tail
));
1657 self
->last
->tail
= JOIN_SET(
1658 self
->data
, PyList_CheckExact(self
->data
)
1664 if (self
->index
== 0) {
1667 "pop from empty stack"
1674 item
= PyList_GET_ITEM(self
->stack
, self
->index
);
1677 Py_DECREF(self
->last
);
1679 self
->last
= (ElementObject
*) self
->this;
1680 self
->this = (ElementObject
*) item
;
1682 if (self
->end_event_obj
) {
1684 PyObject
* action
= self
->end_event_obj
;
1685 PyObject
* node
= (PyObject
*) self
->last
;
1686 res
= PyTuple_New(2);
1688 Py_INCREF(action
); PyTuple_SET_ITEM(res
, 0, (PyObject
*) action
);
1689 Py_INCREF(node
); PyTuple_SET_ITEM(res
, 1, (PyObject
*) node
);
1690 PyList_Append(self
->events
, res
);
1693 PyErr_Clear(); /* FIXME: propagate error */
1696 Py_INCREF(self
->last
);
1697 return (PyObject
*) self
->last
;
1701 treebuilder_handle_namespace(TreeBuilderObject
* self
, int start
,
1702 const char* prefix
, const char *uri
)
1712 if (!self
->start_ns_event_obj
)
1714 action
= self
->start_ns_event_obj
;
1715 /* FIXME: prefix and uri use utf-8 encoding! */
1716 parcel
= Py_BuildValue("ss", (prefix
) ? prefix
: "", uri
);
1721 if (!self
->end_ns_event_obj
)
1723 action
= self
->end_ns_event_obj
;
1729 res
= PyTuple_New(2);
1732 PyTuple_SET_ITEM(res
, 0, action
);
1733 PyTuple_SET_ITEM(res
, 1, parcel
);
1734 PyList_Append(self
->events
, res
);
1737 PyErr_Clear(); /* FIXME: propagate error */
1740 /* -------------------------------------------------------------------- */
1741 /* methods (in alphabetical order) */
1744 treebuilder_data(TreeBuilderObject
* self
, PyObject
* args
)
1747 if (!PyArg_ParseTuple(args
, "O:data", &data
))
1750 return treebuilder_handle_data(self
, data
);
1754 treebuilder_end(TreeBuilderObject
* self
, PyObject
* args
)
1757 if (!PyArg_ParseTuple(args
, "O:end", &tag
))
1760 return treebuilder_handle_end(self
, tag
);
1764 treebuilder_done(TreeBuilderObject
* self
)
1768 /* FIXME: check stack size? */
1780 treebuilder_close(TreeBuilderObject
* self
, PyObject
* args
)
1782 if (!PyArg_ParseTuple(args
, ":close"))
1785 return treebuilder_done(self
);
1789 treebuilder_start(TreeBuilderObject
* self
, PyObject
* args
)
1792 PyObject
* attrib
= Py_None
;
1793 if (!PyArg_ParseTuple(args
, "O|O:start", &tag
, &attrib
))
1796 return treebuilder_handle_start(self
, tag
, attrib
);
1800 treebuilder_xml(TreeBuilderObject
* self
, PyObject
* args
)
1803 PyObject
* standalone
;
1804 if (!PyArg_ParseTuple(args
, "OO:xml", &encoding
, &standalone
))
1807 return treebuilder_handle_xml(self
, encoding
, standalone
);
1810 static PyMethodDef treebuilder_methods
[] = {
1811 {"data", (PyCFunction
) treebuilder_data
, METH_VARARGS
},
1812 {"start", (PyCFunction
) treebuilder_start
, METH_VARARGS
},
1813 {"end", (PyCFunction
) treebuilder_end
, METH_VARARGS
},
1814 {"xml", (PyCFunction
) treebuilder_xml
, METH_VARARGS
},
1815 {"close", (PyCFunction
) treebuilder_close
, METH_VARARGS
},
1820 treebuilder_getattr(TreeBuilderObject
* self
, char* name
)
1822 return Py_FindMethod(treebuilder_methods
, (PyObject
*) self
, name
);
1825 statichere PyTypeObject TreeBuilder_Type
= {
1826 PyObject_HEAD_INIT(NULL
)
1827 0, "TreeBuilder", sizeof(TreeBuilderObject
), 0,
1829 (destructor
)treebuilder_dealloc
, /* tp_dealloc */
1831 (getattrfunc
)treebuilder_getattr
, /* tp_getattr */
1834 /* ==================================================================== */
1835 /* the expat interface */
1837 #if defined(USE_EXPAT)
1841 #if defined(USE_PYEXPAT_CAPI)
1842 #include "pyexpat.h"
1843 static struct PyExpat_CAPI
* expat_capi
;
1844 #define EXPAT(func) (expat_capi->func)
1846 #define EXPAT(func) (XML_##func)
1859 PyObject
* handle_xml
;
1860 PyObject
* handle_start
;
1861 PyObject
* handle_data
;
1862 PyObject
* handle_end
;
1864 PyObject
* handle_comment
;
1865 PyObject
* handle_pi
;
1869 staticforward PyTypeObject XMLParser_Type
;
1873 #if defined(Py_USING_UNICODE)
1875 checkstring(const char* string
, int size
)
1879 /* check if an 8-bit string contains UTF-8 characters */
1880 for (i
= 0; i
< size
; i
++)
1881 if (string
[i
] & 0x80)
1889 makestring(const char* string
, int size
)
1891 /* convert a UTF-8 string to either a 7-bit ascii string or a
1894 #if defined(Py_USING_UNICODE)
1895 if (checkstring(string
, size
))
1896 return PyUnicode_DecodeUTF8(string
, size
, "strict");
1899 return PyString_FromStringAndSize(string
, size
);
1903 makeuniversal(XMLParserObject
* self
, const char* string
)
1905 /* convert a UTF-8 tag/attribute name from the expat parser
1906 to a universal name string */
1908 int size
= strlen(string
);
1912 /* look the 'raw' name up in the names dictionary */
1913 key
= PyString_FromStringAndSize(string
, size
);
1917 value
= PyDict_GetItem(self
->names
, key
);
1922 /* new name. convert to universal name, and decode as
1929 /* look for namespace separator */
1930 for (i
= 0; i
< size
; i
++)
1931 if (string
[i
] == '}')
1934 /* convert to universal name */
1935 tag
= PyString_FromStringAndSize(NULL
, size
+1);
1936 p
= PyString_AS_STRING(tag
);
1938 memcpy(p
+1, string
, size
);
1941 /* plain name; use key as tag */
1946 /* decode universal name */
1947 #if defined(Py_USING_UNICODE)
1948 /* inline makestring, to avoid duplicating the source string if
1949 it's not an utf-8 string */
1950 p
= PyString_AS_STRING(tag
);
1951 if (checkstring(p
, size
)) {
1952 value
= PyUnicode_DecodeUTF8(p
, size
, "strict");
1960 value
= tag
; /* use tag as is */
1962 /* add to names dictionary */
1963 if (PyDict_SetItem(self
->names
, key
, value
) < 0) {
1974 /* -------------------------------------------------------------------- */
1978 expat_default_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
1985 if (data_len
< 2 || data_in
[0] != '&')
1988 key
= makestring(data_in
+ 1, data_len
- 2);
1992 value
= PyDict_GetItem(self
->entity
, key
);
1995 if (TreeBuilder_CheckExact(self
->target
))
1996 res
= treebuilder_handle_data(
1997 (TreeBuilderObject
*) self
->target
, value
1999 else if (self
->handle_data
)
2000 res
= PyObject_CallFunction(self
->handle_data
, "O", value
);
2006 PyExc_SyntaxError
, "undefined entity &%s;: line %ld, column %ld",
2007 PyString_AS_STRING(key
),
2008 EXPAT(GetErrorLineNumber
)(self
->parser
),
2009 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2017 expat_start_handler(XMLParserObject
* self
, const XML_Char
* tag_in
,
2018 const XML_Char
**attrib_in
)
2026 tag
= makeuniversal(self
, tag_in
);
2028 return; /* parser will look for errors */
2032 attrib
= PyDict_New();
2035 while (attrib_in
[0] && attrib_in
[1]) {
2036 PyObject
* key
= makeuniversal(self
, attrib_in
[0]);
2037 PyObject
* value
= makestring(attrib_in
[1], strlen(attrib_in
[1]));
2038 if (!key
|| !value
) {
2044 ok
= PyDict_SetItem(attrib
, key
, value
);
2058 if (TreeBuilder_CheckExact(self
->target
))
2060 res
= treebuilder_handle_start((TreeBuilderObject
*) self
->target
,
2062 else if (self
->handle_start
)
2063 res
= PyObject_CallFunction(self
->handle_start
, "OO", tag
, attrib
);
2074 expat_data_handler(XMLParserObject
* self
, const XML_Char
* data_in
,
2080 data
= makestring(data_in
, data_len
);
2082 return; /* parser will look for errors */
2084 if (TreeBuilder_CheckExact(self
->target
))
2086 res
= treebuilder_handle_data((TreeBuilderObject
*) self
->target
, data
);
2087 else if (self
->handle_data
)
2088 res
= PyObject_CallFunction(self
->handle_data
, "O", data
);
2098 expat_end_handler(XMLParserObject
* self
, const XML_Char
* tag_in
)
2101 PyObject
* res
= NULL
;
2103 if (TreeBuilder_CheckExact(self
->target
))
2105 /* the standard tree builder doesn't look at the end tag */
2106 res
= treebuilder_handle_end(
2107 (TreeBuilderObject
*) self
->target
, Py_None
2109 else if (self
->handle_end
) {
2110 tag
= makeuniversal(self
, tag_in
);
2112 res
= PyObject_CallFunction(self
->handle_end
, "O", tag
);
2121 expat_start_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix
,
2122 const XML_Char
*uri
)
2124 treebuilder_handle_namespace(
2125 (TreeBuilderObject
*) self
->target
, 1, prefix
, uri
2130 expat_end_ns_handler(XMLParserObject
* self
, const XML_Char
* prefix_in
)
2132 treebuilder_handle_namespace(
2133 (TreeBuilderObject
*) self
->target
, 0, NULL
, NULL
2138 expat_comment_handler(XMLParserObject
* self
, const XML_Char
* comment_in
)
2143 if (self
->handle_comment
) {
2144 comment
= makestring(comment_in
, strlen(comment_in
));
2146 res
= PyObject_CallFunction(self
->handle_comment
, "O", comment
);
2154 expat_pi_handler(XMLParserObject
* self
, const XML_Char
* target_in
,
2155 const XML_Char
* data_in
)
2161 if (self
->handle_pi
) {
2162 target
= makestring(target_in
, strlen(target_in
));
2163 data
= makestring(data_in
, strlen(data_in
));
2164 if (target
&& data
) {
2165 res
= PyObject_CallFunction(self
->handle_pi
, "OO", target
, data
);
2176 #if defined(Py_USING_UNICODE)
2178 expat_unknown_encoding_handler(XMLParserObject
*self
, const XML_Char
*name
,
2183 unsigned char s
[256];
2186 memset(info
, 0, sizeof(XML_Encoding
));
2188 for (i
= 0; i
< 256; i
++)
2191 u
= PyUnicode_Decode((char*) s
, 256, name
, "replace");
2193 return XML_STATUS_ERROR
;
2195 if (PyUnicode_GET_SIZE(u
) != 256) {
2197 return XML_STATUS_ERROR
;
2200 p
= PyUnicode_AS_UNICODE(u
);
2202 for (i
= 0; i
< 256; i
++) {
2203 if (p
[i
] != Py_UNICODE_REPLACEMENT_CHARACTER
)
2204 info
->map
[i
] = p
[i
];
2211 return XML_STATUS_OK
;
2215 /* -------------------------------------------------------------------- */
2216 /* constructor and destructor */
2219 xmlparser(PyObject
* self_
, PyObject
* args
, PyObject
* kw
)
2221 XMLParserObject
* self
;
2222 /* FIXME: does this need to be static? */
2223 static XML_Memory_Handling_Suite memory_handler
;
2225 PyObject
* target
= NULL
;
2226 char* encoding
= NULL
;
2227 static char* kwlist
[] = { "target", "encoding", NULL
};
2228 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|Oz:XMLParser", kwlist
,
2229 &target
, &encoding
))
2232 #if defined(USE_PYEXPAT_CAPI)
2235 PyExc_RuntimeError
, "cannot load dispatch table from pyexpat"
2241 self
= PyObject_New(XMLParserObject
, &XMLParser_Type
);
2245 self
->entity
= PyDict_New();
2246 if (!self
->entity
) {
2251 self
->names
= PyDict_New();
2253 PyObject_Del(self
->entity
);
2258 memory_handler
.malloc_fcn
= PyObject_Malloc
;
2259 memory_handler
.realloc_fcn
= PyObject_Realloc
;
2260 memory_handler
.free_fcn
= PyObject_Free
;
2262 self
->parser
= EXPAT(ParserCreate_MM
)(encoding
, &memory_handler
, "}");
2263 if (!self
->parser
) {
2264 PyObject_Del(self
->names
);
2265 PyObject_Del(self
->entity
);
2271 /* setup target handlers */
2273 target
= treebuilder_new();
2275 EXPAT(ParserFree
)(self
->parser
);
2276 PyObject_Del(self
->names
);
2277 PyObject_Del(self
->entity
);
2283 self
->target
= target
;
2285 self
->handle_xml
= PyObject_GetAttrString(target
, "xml");
2286 self
->handle_start
= PyObject_GetAttrString(target
, "start");
2287 self
->handle_data
= PyObject_GetAttrString(target
, "data");
2288 self
->handle_end
= PyObject_GetAttrString(target
, "end");
2289 self
->handle_comment
= PyObject_GetAttrString(target
, "comment");
2290 self
->handle_pi
= PyObject_GetAttrString(target
, "pi");
2294 /* configure parser */
2295 EXPAT(SetUserData
)(self
->parser
, self
);
2296 EXPAT(SetElementHandler
)(
2298 (XML_StartElementHandler
) expat_start_handler
,
2299 (XML_EndElementHandler
) expat_end_handler
2301 EXPAT(SetDefaultHandlerExpand
)(
2303 (XML_DefaultHandler
) expat_default_handler
2305 EXPAT(SetCharacterDataHandler
)(
2307 (XML_CharacterDataHandler
) expat_data_handler
2309 if (self
->handle_comment
)
2310 EXPAT(SetCommentHandler
)(
2312 (XML_CommentHandler
) expat_comment_handler
2314 if (self
->handle_pi
)
2315 EXPAT(SetProcessingInstructionHandler
)(
2317 (XML_ProcessingInstructionHandler
) expat_pi_handler
2319 #if defined(Py_USING_UNICODE)
2320 EXPAT(SetUnknownEncodingHandler
)(
2322 (XML_UnknownEncodingHandler
) expat_unknown_encoding_handler
, NULL
2326 ALLOC(sizeof(XMLParserObject
), "create expatparser");
2328 return (PyObject
*) self
;
2332 xmlparser_dealloc(XMLParserObject
* self
)
2334 EXPAT(ParserFree
)(self
->parser
);
2336 Py_XDECREF(self
->handle_pi
);
2337 Py_XDECREF(self
->handle_comment
);
2338 Py_XDECREF(self
->handle_end
);
2339 Py_XDECREF(self
->handle_data
);
2340 Py_XDECREF(self
->handle_start
);
2341 Py_XDECREF(self
->handle_xml
);
2343 Py_DECREF(self
->target
);
2344 Py_DECREF(self
->entity
);
2345 Py_DECREF(self
->names
);
2347 RELEASE(sizeof(XMLParserObject
), "destroy expatparser");
2352 /* -------------------------------------------------------------------- */
2353 /* methods (in alphabetical order) */
2356 expat_parse(XMLParserObject
* self
, char* data
, int data_len
, int final
)
2360 ok
= EXPAT(Parse
)(self
->parser
, data
, data_len
, final
);
2362 if (PyErr_Occurred())
2367 PyExc_SyntaxError
, "%s: line %ld, column %ld",
2368 EXPAT(ErrorString
)(EXPAT(GetErrorCode
)(self
->parser
)),
2369 EXPAT(GetErrorLineNumber
)(self
->parser
),
2370 EXPAT(GetErrorColumnNumber
)(self
->parser
)
2379 xmlparser_close(XMLParserObject
* self
, PyObject
* args
)
2381 /* end feeding data to parser */
2384 if (!PyArg_ParseTuple(args
, ":close"))
2387 res
= expat_parse(self
, "", 0, 1);
2389 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2391 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2398 xmlparser_feed(XMLParserObject
* self
, PyObject
* args
)
2400 /* feed data to parser */
2404 if (!PyArg_ParseTuple(args
, "s#:feed", &data
, &data_len
))
2407 return expat_parse(self
, data
, data_len
, 0);
2411 xmlparser_parse(XMLParserObject
* self
, PyObject
* args
)
2413 /* (internal) parse until end of input stream */
2420 if (!PyArg_ParseTuple(args
, "O:_parse", &fileobj
))
2423 reader
= PyObject_GetAttrString(fileobj
, "read");
2427 /* read from open file object */
2430 buffer
= PyObject_CallFunction(reader
, "i", 64*1024);
2433 /* read failed (e.g. due to KeyboardInterrupt) */
2438 if (!PyString_CheckExact(buffer
) || PyString_GET_SIZE(buffer
) == 0) {
2444 self
, PyString_AS_STRING(buffer
), PyString_GET_SIZE(buffer
), 0
2459 res
= expat_parse(self
, "", 0, 1);
2461 if (res
&& TreeBuilder_CheckExact(self
->target
)) {
2463 return treebuilder_done((TreeBuilderObject
*) self
->target
);
2470 xmlparser_setevents(XMLParserObject
* self
, PyObject
* args
)
2472 /* activate element event reporting */
2475 TreeBuilderObject
* target
;
2477 PyObject
* events
; /* event collector */
2478 PyObject
* event_set
= Py_None
;
2479 if (!PyArg_ParseTuple(args
, "O!|O:_setevents", &PyList_Type
, &events
,
2483 if (!TreeBuilder_CheckExact(self
->target
)) {
2486 "event handling only supported for cElementTree.Treebuilder "
2492 target
= (TreeBuilderObject
*) self
->target
;
2495 Py_XDECREF(target
->events
);
2496 target
->events
= events
;
2498 /* clear out existing events */
2499 Py_XDECREF(target
->start_event_obj
); target
->start_event_obj
= NULL
;
2500 Py_XDECREF(target
->end_event_obj
); target
->end_event_obj
= NULL
;
2501 Py_XDECREF(target
->start_ns_event_obj
); target
->start_ns_event_obj
= NULL
;
2502 Py_XDECREF(target
->end_ns_event_obj
); target
->end_ns_event_obj
= NULL
;
2504 if (event_set
== Py_None
) {
2505 /* default is "end" only */
2506 target
->end_event_obj
= PyString_FromString("end");
2510 if (!PyTuple_Check(event_set
)) /* FIXME: handle arbitrary sequences */
2513 for (i
= 0; i
< PyTuple_GET_SIZE(event_set
); i
++) {
2514 PyObject
* item
= PyTuple_GET_ITEM(event_set
, i
);
2516 if (!PyString_Check(item
))
2518 event
= PyString_AS_STRING(item
);
2519 if (strcmp(event
, "start") == 0) {
2521 target
->start_event_obj
= item
;
2522 } else if (strcmp(event
, "end") == 0) {
2524 Py_XDECREF(target
->end_event_obj
);
2525 target
->end_event_obj
= item
;
2526 } else if (strcmp(event
, "start-ns") == 0) {
2528 Py_XDECREF(target
->start_ns_event_obj
);
2529 target
->start_ns_event_obj
= item
;
2530 EXPAT(SetNamespaceDeclHandler
)(
2532 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2533 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2535 } else if (strcmp(event
, "end-ns") == 0) {
2537 Py_XDECREF(target
->end_ns_event_obj
);
2538 target
->end_ns_event_obj
= item
;
2539 EXPAT(SetNamespaceDeclHandler
)(
2541 (XML_StartNamespaceDeclHandler
) expat_start_ns_handler
,
2542 (XML_EndNamespaceDeclHandler
) expat_end_ns_handler
2547 "unknown event '%s'", event
2558 "invalid event tuple"
2563 static PyMethodDef xmlparser_methods
[] = {
2564 {"feed", (PyCFunction
) xmlparser_feed
, METH_VARARGS
},
2565 {"close", (PyCFunction
) xmlparser_close
, METH_VARARGS
},
2566 {"_parse", (PyCFunction
) xmlparser_parse
, METH_VARARGS
},
2567 {"_setevents", (PyCFunction
) xmlparser_setevents
, METH_VARARGS
},
2572 xmlparser_getattr(XMLParserObject
* self
, char* name
)
2576 res
= Py_FindMethod(xmlparser_methods
, (PyObject
*) self
, name
);
2582 if (strcmp(name
, "entity") == 0)
2584 else if (strcmp(name
, "target") == 0)
2586 else if (strcmp(name
, "version") == 0) {
2588 sprintf(buffer
, "Expat %d.%d.%d", XML_MAJOR_VERSION
,
2589 XML_MINOR_VERSION
, XML_MICRO_VERSION
);
2590 return PyString_FromString(buffer
);
2592 PyErr_SetString(PyExc_AttributeError
, name
);
2600 statichere PyTypeObject XMLParser_Type
= {
2601 PyObject_HEAD_INIT(NULL
)
2602 0, "XMLParser", sizeof(XMLParserObject
), 0,
2604 (destructor
)xmlparser_dealloc
, /* tp_dealloc */
2606 (getattrfunc
)xmlparser_getattr
, /* tp_getattr */
2611 /* ==================================================================== */
2612 /* python module interface */
2614 static PyMethodDef _functions
[] = {
2615 {"Element", (PyCFunction
) element
, METH_VARARGS
|METH_KEYWORDS
},
2616 {"SubElement", (PyCFunction
) subelement
, METH_VARARGS
|METH_KEYWORDS
},
2617 {"TreeBuilder", (PyCFunction
) treebuilder
, METH_VARARGS
},
2618 #if defined(USE_EXPAT)
2619 {"XMLParser", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2620 {"XMLTreeBuilder", (PyCFunction
) xmlparser
, METH_VARARGS
|METH_KEYWORDS
},
2626 init_elementtree(void)
2631 #if defined(USE_PYEXPAT_CAPI)
2632 struct PyExpat_CAPI
* capi
;
2635 /* Patch object type */
2636 Py_TYPE(&Element_Type
) = Py_TYPE(&TreeBuilder_Type
) = &PyType_Type
;
2637 #if defined(USE_EXPAT)
2638 Py_TYPE(&XMLParser_Type
) = &PyType_Type
;
2641 m
= Py_InitModule("_elementtree", _functions
);
2645 /* python glue code */
2651 PyDict_SetItemString(g
, "__builtins__", PyEval_GetBuiltins());
2655 #if (PY_VERSION_HEX >= 0x02020000 && PY_VERSION_HEX < 0x02030000)
2656 "from __future__ import generators\n" /* enable yield under 2.2 */
2659 "from copy import copy, deepcopy\n"
2662 " from xml.etree import ElementTree\n"
2663 "except ImportError:\n"
2664 " import ElementTree\n"
2665 "ET = ElementTree\n"
2668 "import _elementtree as cElementTree\n"
2670 "try:\n" /* check if copy works as is */
2671 " copy(cElementTree.Element('x'))\n"
2673 " def copyelement(elem):\n"
2676 "def Comment(text=None):\n" /* public */
2677 " element = cElementTree.Element(ET.Comment)\n"
2678 " element.text = text\n"
2680 "cElementTree.Comment = Comment\n"
2682 "class ElementTree(ET.ElementTree):\n" /* public */
2683 " def parse(self, source, parser=None):\n"
2684 " if not hasattr(source, 'read'):\n"
2685 " source = open(source, 'rb')\n"
2686 " if parser is not None:\n"
2688 " data = source.read(65536)\n"
2691 " parser.feed(data)\n"
2692 " self._root = parser.close()\n"
2694 " parser = cElementTree.XMLParser()\n"
2695 " self._root = parser._parse(source)\n"
2696 " return self._root\n"
2697 "cElementTree.ElementTree = ElementTree\n"
2699 "def getiterator(node, tag=None):\n" /* helper */
2702 #if (PY_VERSION_HEX < 0x02020000)
2703 " nodes = []\n" /* 2.1 doesn't have yield */
2704 " if tag is None or node.tag == tag:\n"
2705 " nodes.append(node)\n"
2706 " for node in node:\n"
2707 " nodes.extend(getiterator(node, tag))\n"
2710 " if tag is None or node.tag == tag:\n"
2712 " for node in node:\n"
2713 " for node in getiterator(node, tag):\n"
2717 "def parse(source, parser=None):\n" /* public */
2718 " tree = ElementTree()\n"
2719 " tree.parse(source, parser)\n"
2721 "cElementTree.parse = parse\n"
2723 #if (PY_VERSION_HEX < 0x02020000)
2724 "if hasattr(ET, 'iterparse'):\n"
2725 " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
2727 "class iterparse(object):\n"
2729 " def __init__(self, file, events=None):\n"
2730 " if not hasattr(file, 'read'):\n"
2731 " file = open(file, 'rb')\n"
2732 " self._file = file\n"
2733 " self._events = events\n"
2734 " def __iter__(self):\n"
2736 " b = cElementTree.TreeBuilder()\n"
2737 " p = cElementTree.XMLParser(b)\n"
2738 " p._setevents(events, self._events)\n"
2740 " data = self._file.read(16384)\n"
2744 " for event in events:\n"
2747 " root = p.close()\n"
2748 " for event in events:\n"
2750 " self.root = root\n"
2751 "cElementTree.iterparse = iterparse\n"
2754 "def PI(target, text=None):\n" /* public */
2755 " element = cElementTree.Element(ET.ProcessingInstruction)\n"
2756 " element.text = target\n"
2758 " element.text = element.text + ' ' + text\n"
2761 " elem = cElementTree.Element(ET.PI)\n"
2762 " elem.text = text\n"
2764 "cElementTree.PI = cElementTree.ProcessingInstruction = PI\n"
2766 "def XML(text):\n" /* public */
2767 " parser = cElementTree.XMLParser()\n"
2768 " parser.feed(text)\n"
2769 " return parser.close()\n"
2770 "cElementTree.XML = cElementTree.fromstring = XML\n"
2772 "def XMLID(text):\n" /* public */
2773 " tree = XML(text)\n"
2775 " for elem in tree.getiterator():\n"
2776 " id = elem.get('id')\n"
2779 " return tree, ids\n"
2780 "cElementTree.XMLID = XMLID\n"
2782 "cElementTree.dump = ET.dump\n"
2783 "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
2784 "cElementTree.iselement = ET.iselement\n"
2785 "cElementTree.QName = ET.QName\n"
2786 "cElementTree.tostring = ET.tostring\n"
2787 "cElementTree.VERSION = '" VERSION
"'\n"
2788 "cElementTree.__version__ = '" VERSION
"'\n"
2789 "cElementTree.XMLParserError = SyntaxError\n"
2793 PyRun_String(bootstrap
, Py_file_input
, g
, NULL
);
2795 elementpath_obj
= PyDict_GetItemString(g
, "ElementPath");
2797 elementtree_copyelement_obj
= PyDict_GetItemString(g
, "copyelement");
2798 if (elementtree_copyelement_obj
) {
2799 /* reduce hack needed; enable reduce method */
2801 for (mp
= element_methods
; mp
->ml_name
; mp
++)
2802 if (mp
->ml_meth
== (PyCFunction
) element_reduce
) {
2803 mp
->ml_name
= "__reduce__";
2808 elementtree_deepcopy_obj
= PyDict_GetItemString(g
, "deepcopy");
2809 elementtree_getiterator_obj
= PyDict_GetItemString(g
, "getiterator");
2811 #if defined(USE_PYEXPAT_CAPI)
2812 /* link against pyexpat, if possible */
2813 capi
= PyCObject_Import("pyexpat", "expat_CAPI");
2815 strcmp(capi
->magic
, PyExpat_CAPI_MAGIC
) == 0 &&
2816 capi
->size
<= sizeof(*expat_capi
) &&
2817 capi
->MAJOR_VERSION
== XML_MAJOR_VERSION
&&
2818 capi
->MINOR_VERSION
== XML_MINOR_VERSION
&&
2819 capi
->MICRO_VERSION
== XML_MICRO_VERSION
)