4 #include "frameobject.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
35 ProcessingInstruction
,
54 #if XML_COMBINED_VERSION >= 19504
60 static PyObject
*ErrorObject
;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
70 int returns_unicode
; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes
; /* Return attributes as a list. */
73 int specified_attributes
; /* Report only specified attributes. */
74 int in_callback
; /* Is a callback active? */
75 int ns_prefixes
; /* Namespace-triplets mode? */
76 XML_Char
*buffer
; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size
; /* Size of buffer, in XML_Char units */
79 int buffer_used
; /* Buffer units in use */
80 PyObject
*intern
; /* Dictionary to intern strings */
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype
;
88 typedef void (*xmlhandlersetter
)(XML_Parser self
, void *meth
);
89 typedef void* xmlhandler
;
93 xmlhandlersetter setter
;
95 PyCodeObject
*tb_code
;
99 static struct HandlerInfo handler_info
[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
105 set_error_attr(PyObject
*err
, char *name
, int value
)
107 PyObject
*v
= PyInt_FromLong(value
);
109 if (v
== NULL
|| PyObject_SetAttrString(err
, name
, v
) == -1) {
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
121 set_error(xmlparseobject
*self
, enum XML_Error code
)
125 XML_Parser parser
= self
->itself
;
126 int lineno
= XML_GetErrorLineNumber(parser
);
127 int column
= XML_GetErrorColumnNumber(parser
);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer
, "%.200s: line %i, column %i",
132 XML_ErrorString(code
), lineno
, column
);
133 err
= PyObject_CallFunction(ErrorObject
, "s", buffer
);
135 && set_error_attr(err
, "code", code
)
136 && set_error_attr(err
, "offset", column
)
137 && set_error_attr(err
, "lineno", lineno
)) {
138 PyErr_SetObject(ErrorObject
, err
);
145 have_handler(xmlparseobject
*self
, int type
)
147 PyObject
*handler
= self
->handlers
[type
];
148 return handler
!= NULL
;
152 get_handler_name(struct HandlerInfo
*hinfo
)
154 PyObject
*name
= hinfo
->nameobj
;
156 name
= PyString_FromString(hinfo
->name
);
157 hinfo
->nameobj
= name
;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
169 conv_string_to_unicode(const XML_Char
*str
)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
178 return PyUnicode_DecodeUTF8(str
, strlen(str
), "strict");
182 conv_string_len_to_unicode(const XML_Char
*str
, int len
)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
191 return PyUnicode_DecodeUTF8((const char *)str
, len
, "strict");
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
199 conv_string_to_utf8(const XML_Char
*str
)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
208 return PyString_FromString(str
);
212 conv_string_len_to_utf8(const XML_Char
*str
, int len
)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
221 return PyString_FromStringAndSize((const char *)str
, len
);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject
*self
, int initial
);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
232 error_external_entity_ref_handler(XML_Parser parser
,
233 const XML_Char
*context
,
234 const XML_Char
*base
,
235 const XML_Char
*systemId
,
236 const XML_Char
*publicId
)
241 /* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
248 noop_character_data_handler(void *userData
, const XML_Char
*data
, int len
)
254 flag_error(xmlparseobject
*self
)
256 clear_handlers(self
, 0);
257 XML_SetExternalEntityRefHandler(self
->itself
,
258 error_external_entity_ref_handler
);
262 getcode(enum HandlerTypes slot
, char* func_name
, int lineno
)
264 PyObject
*code
= NULL
;
265 PyObject
*name
= NULL
;
266 PyObject
*nulltuple
= NULL
;
267 PyObject
*filename
= NULL
;
269 if (handler_info
[slot
].tb_code
== NULL
) {
270 code
= PyString_FromString("");
273 name
= PyString_FromString(func_name
);
276 nulltuple
= PyTuple_New(0);
277 if (nulltuple
== NULL
)
279 filename
= PyString_FromString(__FILE__
);
280 handler_info
[slot
].tb_code
=
281 PyCode_New(0, /* argcount */
286 nulltuple
, /* consts */
287 nulltuple
, /* names */
288 nulltuple
, /* varnames */
289 #if PYTHON_API_VERSION >= 1010
290 nulltuple
, /* freevars */
291 nulltuple
, /* cellvars */
293 filename
, /* filename */
295 lineno
, /* firstlineno */
298 if (handler_info
[slot
].tb_code
== NULL
)
301 Py_DECREF(nulltuple
);
305 return handler_info
[slot
].tb_code
;
314 trace_frame(PyThreadState
*tstate
, PyFrameObject
*f
, int code
, PyObject
*val
)
317 if (!tstate
->use_tracing
|| tstate
->tracing
)
319 if (tstate
->c_profilefunc
!= NULL
) {
321 result
= tstate
->c_profilefunc(tstate
->c_profileobj
,
323 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
324 || (tstate
->c_profilefunc
!= NULL
));
329 if (tstate
->c_tracefunc
!= NULL
) {
331 result
= tstate
->c_tracefunc(tstate
->c_traceobj
,
333 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
334 || (tstate
->c_profilefunc
!= NULL
));
341 trace_frame_exc(PyThreadState
*tstate
, PyFrameObject
*f
)
343 PyObject
*type
, *value
, *traceback
, *arg
;
346 if (tstate
->c_tracefunc
== NULL
)
349 PyErr_Fetch(&type
, &value
, &traceback
);
354 #if PY_VERSION_HEX < 0x02040000
355 arg
= Py_BuildValue("(OOO)", type
, value
, traceback
);
357 arg
= PyTuple_Pack(3, type
, value
, traceback
);
360 PyErr_Restore(type
, value
, traceback
);
363 err
= trace_frame(tstate
, f
, PyTrace_EXCEPTION
, arg
);
366 PyErr_Restore(type
, value
, traceback
);
370 Py_XDECREF(traceback
);
377 call_with_frame(PyCodeObject
*c
, PyObject
* func
, PyObject
* args
,
378 xmlparseobject
*self
)
380 PyThreadState
*tstate
= PyThreadState_GET();
387 f
= PyFrame_New(tstate
, c
, PyEval_GetGlobals(), NULL
);
392 if (trace_frame(tstate
, f
, PyTrace_CALL
, Py_None
) < 0) {
396 res
= PyEval_CallObject(func
, args
);
398 if (tstate
->curexc_traceback
== NULL
)
400 XML_StopParser(self
->itself
, XML_FALSE
);
402 if (trace_frame_exc(tstate
, f
) < 0) {
407 if (trace_frame(tstate
, f
, PyTrace_RETURN
, res
) < 0) {
415 tstate
->frame
= f
->f_back
;
420 #ifndef Py_USING_UNICODE
421 #define STRING_CONV_FUNC conv_string_to_utf8
423 /* Python 2.0 and later versions, when built with Unicode support */
424 #define STRING_CONV_FUNC (self->returns_unicode \
425 ? conv_string_to_unicode : conv_string_to_utf8)
429 string_intern(xmlparseobject
*self
, const char* str
)
431 PyObject
*result
= STRING_CONV_FUNC(str
);
433 /* result can be NULL if the unicode conversion failed. */
438 value
= PyDict_GetItem(self
->intern
, result
);
440 if (PyDict_SetItem(self
->intern
, result
, result
) == 0)
450 /* Return 0 on success, -1 on exception.
451 * flag_error() will be called before return if needed.
454 call_character_handler(xmlparseobject
*self
, const XML_Char
*buffer
, int len
)
459 args
= PyTuple_New(1);
462 #ifdef Py_USING_UNICODE
463 temp
= (self
->returns_unicode
464 ? conv_string_len_to_unicode(buffer
, len
)
465 : conv_string_len_to_utf8(buffer
, len
));
467 temp
= conv_string_len_to_utf8(buffer
, len
);
472 XML_SetCharacterDataHandler(self
->itself
,
473 noop_character_data_handler
);
476 PyTuple_SET_ITEM(args
, 0, temp
);
477 /* temp is now a borrowed reference; consider it unused. */
478 self
->in_callback
= 1;
479 temp
= call_with_frame(getcode(CharacterData
, "CharacterData", __LINE__
),
480 self
->handlers
[CharacterData
], args
, self
);
481 /* temp is an owned reference again, or NULL */
482 self
->in_callback
= 0;
486 XML_SetCharacterDataHandler(self
->itself
,
487 noop_character_data_handler
);
495 flush_character_buffer(xmlparseobject
*self
)
498 if (self
->buffer
== NULL
|| self
->buffer_used
== 0)
500 rc
= call_character_handler(self
, self
->buffer
, self
->buffer_used
);
501 self
->buffer_used
= 0;
506 my_CharacterDataHandler(void *userData
, const XML_Char
*data
, int len
)
508 xmlparseobject
*self
= (xmlparseobject
*) userData
;
509 if (self
->buffer
== NULL
)
510 call_character_handler(self
, data
, len
);
512 if ((self
->buffer_used
+ len
) > self
->buffer_size
) {
513 if (flush_character_buffer(self
) < 0)
515 /* handler might have changed; drop the rest on the floor
516 * if there isn't a handler anymore
518 if (!have_handler(self
, CharacterData
))
521 if (len
> self
->buffer_size
) {
522 call_character_handler(self
, data
, len
);
523 self
->buffer_used
= 0;
526 memcpy(self
->buffer
+ self
->buffer_used
,
527 data
, len
* sizeof(XML_Char
));
528 self
->buffer_used
+= len
;
534 my_StartElementHandler(void *userData
,
535 const XML_Char
*name
, const XML_Char
*atts
[])
537 xmlparseobject
*self
= (xmlparseobject
*)userData
;
539 if (have_handler(self
, StartElement
)) {
540 PyObject
*container
, *rv
, *args
;
543 if (flush_character_buffer(self
) < 0)
545 /* Set max to the number of slots filled in atts[]; max/2 is
546 * the number of attributes we need to process.
548 if (self
->specified_attributes
) {
549 max
= XML_GetSpecifiedAttributeCount(self
->itself
);
553 while (atts
[max
] != NULL
)
556 /* Build the container. */
557 if (self
->ordered_attributes
)
558 container
= PyList_New(max
);
560 container
= PyDict_New();
561 if (container
== NULL
) {
565 for (i
= 0; i
< max
; i
+= 2) {
566 PyObject
*n
= string_intern(self
, (XML_Char
*) atts
[i
]);
570 Py_DECREF(container
);
573 v
= STRING_CONV_FUNC((XML_Char
*) atts
[i
+1]);
576 Py_DECREF(container
);
580 if (self
->ordered_attributes
) {
581 PyList_SET_ITEM(container
, i
, n
);
582 PyList_SET_ITEM(container
, i
+1, v
);
584 else if (PyDict_SetItem(container
, n
, v
)) {
595 args
= string_intern(self
, name
);
597 args
= Py_BuildValue("(NN)", args
, container
);
599 Py_DECREF(container
);
602 /* Container is now a borrowed reference; ignore it. */
603 self
->in_callback
= 1;
604 rv
= call_with_frame(getcode(StartElement
, "StartElement", __LINE__
),
605 self
->handlers
[StartElement
], args
, self
);
606 self
->in_callback
= 0;
616 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
617 RETURN, GETUSERDATA) \
619 my_##NAME##Handler PARAMS {\
620 xmlparseobject *self = GETUSERDATA ; \
621 PyObject *args = NULL; \
622 PyObject *rv = NULL; \
625 if (have_handler(self, NAME)) { \
626 if (flush_character_buffer(self) < 0) \
628 args = Py_BuildValue PARAM_FORMAT ;\
629 if (!args) { flag_error(self); return RETURN;} \
630 self->in_callback = 1; \
631 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
632 self->handlers[NAME], args, self); \
633 self->in_callback = 0; \
645 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
646 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
647 (xmlparseobject *)userData)
649 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
650 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
651 rc = PyInt_AsLong(rv);, rc, \
652 (xmlparseobject *)userData)
654 VOID_HANDLER(EndElement
,
655 (void *userData
, const XML_Char
*name
),
656 ("(N)", string_intern(self
, name
)))
658 VOID_HANDLER(ProcessingInstruction
,
660 const XML_Char
*target
,
661 const XML_Char
*data
),
662 ("(NO&)", string_intern(self
, target
), STRING_CONV_FUNC
,data
))
664 VOID_HANDLER(UnparsedEntityDecl
,
666 const XML_Char
*entityName
,
667 const XML_Char
*base
,
668 const XML_Char
*systemId
,
669 const XML_Char
*publicId
,
670 const XML_Char
*notationName
),
672 string_intern(self
, entityName
), string_intern(self
, base
),
673 string_intern(self
, systemId
), string_intern(self
, publicId
),
674 string_intern(self
, notationName
)))
676 #ifndef Py_USING_UNICODE
677 VOID_HANDLER(EntityDecl
,
679 const XML_Char
*entityName
,
680 int is_parameter_entity
,
681 const XML_Char
*value
,
683 const XML_Char
*base
,
684 const XML_Char
*systemId
,
685 const XML_Char
*publicId
,
686 const XML_Char
*notationName
),
688 string_intern(self
, entityName
), is_parameter_entity
,
689 conv_string_len_to_utf8(value
, value_length
),
690 string_intern(self
, base
), string_intern(self
, systemId
),
691 string_intern(self
, publicId
),
692 string_intern(self
, notationName
)))
694 VOID_HANDLER(EntityDecl
,
696 const XML_Char
*entityName
,
697 int is_parameter_entity
,
698 const XML_Char
*value
,
700 const XML_Char
*base
,
701 const XML_Char
*systemId
,
702 const XML_Char
*publicId
,
703 const XML_Char
*notationName
),
705 string_intern(self
, entityName
), is_parameter_entity
,
706 (self
->returns_unicode
707 ? conv_string_len_to_unicode(value
, value_length
)
708 : conv_string_len_to_utf8(value
, value_length
)),
709 string_intern(self
, base
), string_intern(self
, systemId
),
710 string_intern(self
, publicId
),
711 string_intern(self
, notationName
)))
714 VOID_HANDLER(XmlDecl
,
716 const XML_Char
*version
,
717 const XML_Char
*encoding
,
720 STRING_CONV_FUNC
,version
, STRING_CONV_FUNC
,encoding
,
724 conv_content_model(XML_Content
* const model
,
725 PyObject
*(*conv_string
)(const XML_Char
*))
727 PyObject
*result
= NULL
;
728 PyObject
*children
= PyTuple_New(model
->numchildren
);
731 if (children
!= NULL
) {
732 assert(model
->numchildren
< INT_MAX
);
733 for (i
= 0; i
< (int)model
->numchildren
; ++i
) {
734 PyObject
*child
= conv_content_model(&model
->children
[i
],
737 Py_XDECREF(children
);
740 PyTuple_SET_ITEM(children
, i
, child
);
742 result
= Py_BuildValue("(iiO&N)",
743 model
->type
, model
->quant
,
744 conv_string
,model
->name
, children
);
750 my_ElementDeclHandler(void *userData
,
751 const XML_Char
*name
,
754 xmlparseobject
*self
= (xmlparseobject
*)userData
;
755 PyObject
*args
= NULL
;
757 if (have_handler(self
, ElementDecl
)) {
759 PyObject
*modelobj
, *nameobj
;
761 if (flush_character_buffer(self
) < 0)
763 #ifdef Py_USING_UNICODE
764 modelobj
= conv_content_model(model
,
765 (self
->returns_unicode
766 ? conv_string_to_unicode
767 : conv_string_to_utf8
));
769 modelobj
= conv_content_model(model
, conv_string_to_utf8
);
771 if (modelobj
== NULL
) {
775 nameobj
= string_intern(self
, name
);
776 if (nameobj
== NULL
) {
781 args
= Py_BuildValue("NN", nameobj
, modelobj
);
787 self
->in_callback
= 1;
788 rv
= call_with_frame(getcode(ElementDecl
, "ElementDecl", __LINE__
),
789 self
->handlers
[ElementDecl
], args
, self
);
790 self
->in_callback
= 0;
799 XML_FreeContentModel(self
->itself
, model
);
803 VOID_HANDLER(AttlistDecl
,
805 const XML_Char
*elname
,
806 const XML_Char
*attname
,
807 const XML_Char
*att_type
,
808 const XML_Char
*dflt
,
811 string_intern(self
, elname
), string_intern(self
, attname
),
812 STRING_CONV_FUNC
,att_type
, STRING_CONV_FUNC
,dflt
,
815 #if XML_COMBINED_VERSION >= 19504
816 VOID_HANDLER(SkippedEntity
,
818 const XML_Char
*entityName
,
819 int is_parameter_entity
),
821 string_intern(self
, entityName
), is_parameter_entity
))
824 VOID_HANDLER(NotationDecl
,
826 const XML_Char
*notationName
,
827 const XML_Char
*base
,
828 const XML_Char
*systemId
,
829 const XML_Char
*publicId
),
831 string_intern(self
, notationName
), string_intern(self
, base
),
832 string_intern(self
, systemId
), string_intern(self
, publicId
)))
834 VOID_HANDLER(StartNamespaceDecl
,
836 const XML_Char
*prefix
,
837 const XML_Char
*uri
),
839 string_intern(self
, prefix
), string_intern(self
, uri
)))
841 VOID_HANDLER(EndNamespaceDecl
,
843 const XML_Char
*prefix
),
844 ("(N)", string_intern(self
, prefix
)))
846 VOID_HANDLER(Comment
,
847 (void *userData
, const XML_Char
*data
),
848 ("(O&)", STRING_CONV_FUNC
,data
))
850 VOID_HANDLER(StartCdataSection
,
854 VOID_HANDLER(EndCdataSection
,
858 #ifndef Py_USING_UNICODE
859 VOID_HANDLER(Default
,
860 (void *userData
, const XML_Char
*s
, int len
),
861 ("(N)", conv_string_len_to_utf8(s
,len
)))
863 VOID_HANDLER(DefaultHandlerExpand
,
864 (void *userData
, const XML_Char
*s
, int len
),
865 ("(N)", conv_string_len_to_utf8(s
,len
)))
867 VOID_HANDLER(Default
,
868 (void *userData
, const XML_Char
*s
, int len
),
869 ("(N)", (self
->returns_unicode
870 ? conv_string_len_to_unicode(s
,len
)
871 : conv_string_len_to_utf8(s
,len
))))
873 VOID_HANDLER(DefaultHandlerExpand
,
874 (void *userData
, const XML_Char
*s
, int len
),
875 ("(N)", (self
->returns_unicode
876 ? conv_string_len_to_unicode(s
,len
)
877 : conv_string_len_to_utf8(s
,len
))))
880 INT_HANDLER(NotStandalone
,
884 RC_HANDLER(int, ExternalEntityRef
,
886 const XML_Char
*context
,
887 const XML_Char
*base
,
888 const XML_Char
*systemId
,
889 const XML_Char
*publicId
),
892 STRING_CONV_FUNC
,context
, string_intern(self
, base
),
893 string_intern(self
, systemId
), string_intern(self
, publicId
)),
894 rc
= PyInt_AsLong(rv
);, rc
,
895 XML_GetUserData(parser
))
897 /* XXX UnknownEncodingHandler */
899 VOID_HANDLER(StartDoctypeDecl
,
900 (void *userData
, const XML_Char
*doctypeName
,
901 const XML_Char
*sysid
, const XML_Char
*pubid
,
902 int has_internal_subset
),
903 ("(NNNi)", string_intern(self
, doctypeName
),
904 string_intern(self
, sysid
), string_intern(self
, pubid
),
905 has_internal_subset
))
907 VOID_HANDLER(EndDoctypeDecl
, (void *userData
), ("()"))
909 /* ---------------------------------------------------------------- */
912 get_parse_result(xmlparseobject
*self
, int rv
)
914 if (PyErr_Occurred()) {
918 return set_error(self
, XML_GetErrorCode(self
->itself
));
920 if (flush_character_buffer(self
) < 0) {
923 return PyInt_FromLong(rv
);
926 PyDoc_STRVAR(xmlparse_Parse__doc__
,
927 "Parse(data[, isfinal])\n\
928 Parse XML data. `isfinal' should be true at end of input.");
931 xmlparse_Parse(xmlparseobject
*self
, PyObject
*args
)
937 if (!PyArg_ParseTuple(args
, "s#|i:Parse", &s
, &slen
, &isFinal
))
940 return get_parse_result(self
, XML_Parse(self
->itself
, s
, slen
, isFinal
));
943 /* File reading copied from cPickle */
945 #define BUF_SIZE 2048
948 readinst(char *buf
, int buf_size
, PyObject
*meth
)
950 PyObject
*arg
= NULL
;
951 PyObject
*bytes
= NULL
;
952 PyObject
*str
= NULL
;
955 if ((bytes
= PyInt_FromLong(buf_size
)) == NULL
)
958 if ((arg
= PyTuple_New(1)) == NULL
) {
963 PyTuple_SET_ITEM(arg
, 0, bytes
);
965 #if PY_VERSION_HEX < 0x02020000
966 str
= PyObject_CallObject(meth
, arg
);
968 str
= PyObject_Call(meth
, arg
, NULL
);
973 /* XXX what to do if it returns a Unicode string? */
974 if (!PyString_Check(str
)) {
975 PyErr_Format(PyExc_TypeError
,
976 "read() did not return a string object (type=%.400s)",
977 Py_TYPE(str
)->tp_name
);
980 len
= PyString_GET_SIZE(str
);
981 if (len
> buf_size
) {
982 PyErr_Format(PyExc_ValueError
,
983 "read() returned too much data: "
984 "%i bytes requested, %i returned",
988 memcpy(buf
, PyString_AsString(str
), len
);
995 PyDoc_STRVAR(xmlparse_ParseFile__doc__
,
997 Parse XML data from file-like object.");
1000 xmlparse_ParseFile(xmlparseobject
*self
, PyObject
*f
)
1004 PyObject
*readmethod
= NULL
;
1006 if (PyFile_Check(f
)) {
1007 fp
= PyFile_AsFile(f
);
1011 readmethod
= PyObject_GetAttrString(f
, "read");
1012 if (readmethod
== NULL
) {
1014 PyErr_SetString(PyExc_TypeError
,
1015 "argument must have 'read' attribute");
1021 void *buf
= XML_GetBuffer(self
->itself
, BUF_SIZE
);
1023 Py_XDECREF(readmethod
);
1024 return PyErr_NoMemory();
1028 bytes_read
= fread(buf
, sizeof(char), BUF_SIZE
, fp
);
1029 if (bytes_read
< 0) {
1030 PyErr_SetFromErrno(PyExc_IOError
);
1035 bytes_read
= readinst(buf
, BUF_SIZE
, readmethod
);
1036 if (bytes_read
< 0) {
1037 Py_DECREF(readmethod
);
1041 rv
= XML_ParseBuffer(self
->itself
, bytes_read
, bytes_read
== 0);
1042 if (PyErr_Occurred()) {
1043 Py_XDECREF(readmethod
);
1047 if (!rv
|| bytes_read
== 0)
1050 Py_XDECREF(readmethod
);
1051 return get_parse_result(self
, rv
);
1054 PyDoc_STRVAR(xmlparse_SetBase__doc__
,
1055 "SetBase(base_url)\n\
1056 Set the base URL for the parser.");
1059 xmlparse_SetBase(xmlparseobject
*self
, PyObject
*args
)
1063 if (!PyArg_ParseTuple(args
, "s:SetBase", &base
))
1065 if (!XML_SetBase(self
->itself
, base
)) {
1066 return PyErr_NoMemory();
1072 PyDoc_STRVAR(xmlparse_GetBase__doc__
,
1073 "GetBase() -> url\n\
1074 Return base URL string for the parser.");
1077 xmlparse_GetBase(xmlparseobject
*self
, PyObject
*unused
)
1079 return Py_BuildValue("z", XML_GetBase(self
->itself
));
1082 PyDoc_STRVAR(xmlparse_GetInputContext__doc__
,
1083 "GetInputContext() -> string\n\
1084 Return the untranslated text of the input that caused the current event.\n\
1085 If the event was generated by a large amount of text (such as a start tag\n\
1086 for an element with many attributes), not all of the text may be available.");
1089 xmlparse_GetInputContext(xmlparseobject
*self
, PyObject
*unused
)
1091 if (self
->in_callback
) {
1094 = XML_GetInputContext(self
->itself
, &offset
, &size
);
1097 return PyString_FromStringAndSize(buffer
+ offset
,
1106 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__
,
1107 "ExternalEntityParserCreate(context[, encoding])\n\
1108 Create a parser for parsing an external entity based on the\n\
1109 information passed to the ExternalEntityRefHandler.");
1112 xmlparse_ExternalEntityParserCreate(xmlparseobject
*self
, PyObject
*args
)
1115 char *encoding
= NULL
;
1116 xmlparseobject
*new_parser
;
1119 if (!PyArg_ParseTuple(args
, "z|s:ExternalEntityParserCreate",
1120 &context
, &encoding
)) {
1124 #ifndef Py_TPFLAGS_HAVE_GC
1125 /* Python versions 2.0 and 2.1 */
1126 new_parser
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1128 /* Python versions 2.2 and later */
1129 new_parser
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1132 if (new_parser
== NULL
)
1134 new_parser
->buffer_size
= self
->buffer_size
;
1135 new_parser
->buffer_used
= 0;
1136 if (self
->buffer
!= NULL
) {
1137 new_parser
->buffer
= malloc(new_parser
->buffer_size
);
1138 if (new_parser
->buffer
== NULL
) {
1139 #ifndef Py_TPFLAGS_HAVE_GC
1140 /* Code for versions 2.0 and 2.1 */
1141 PyObject_Del(new_parser
);
1143 /* Code for versions 2.2 and later. */
1144 PyObject_GC_Del(new_parser
);
1146 return PyErr_NoMemory();
1150 new_parser
->buffer
= NULL
;
1151 new_parser
->returns_unicode
= self
->returns_unicode
;
1152 new_parser
->ordered_attributes
= self
->ordered_attributes
;
1153 new_parser
->specified_attributes
= self
->specified_attributes
;
1154 new_parser
->in_callback
= 0;
1155 new_parser
->ns_prefixes
= self
->ns_prefixes
;
1156 new_parser
->itself
= XML_ExternalEntityParserCreate(self
->itself
, context
,
1158 new_parser
->handlers
= 0;
1159 new_parser
->intern
= self
->intern
;
1160 Py_XINCREF(new_parser
->intern
);
1161 #ifdef Py_TPFLAGS_HAVE_GC
1162 PyObject_GC_Track(new_parser
);
1164 PyObject_GC_Init(new_parser
);
1167 if (!new_parser
->itself
) {
1168 Py_DECREF(new_parser
);
1169 return PyErr_NoMemory();
1172 XML_SetUserData(new_parser
->itself
, (void *)new_parser
);
1174 /* allocate and clear handlers first */
1175 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1178 new_parser
->handlers
= malloc(sizeof(PyObject
*) * i
);
1179 if (!new_parser
->handlers
) {
1180 Py_DECREF(new_parser
);
1181 return PyErr_NoMemory();
1183 clear_handlers(new_parser
, 1);
1185 /* then copy handlers from self */
1186 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1187 PyObject
*handler
= self
->handlers
[i
];
1188 if (handler
!= NULL
) {
1190 new_parser
->handlers
[i
] = handler
;
1191 handler_info
[i
].setter(new_parser
->itself
,
1192 handler_info
[i
].handler
);
1195 return (PyObject
*)new_parser
;
1198 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__
,
1199 "SetParamEntityParsing(flag) -> success\n\
1200 Controls parsing of parameter entities (including the external DTD\n\
1201 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1202 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1203 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1207 xmlparse_SetParamEntityParsing(xmlparseobject
*p
, PyObject
* args
)
1210 if (!PyArg_ParseTuple(args
, "i", &flag
))
1212 flag
= XML_SetParamEntityParsing(p
->itself
, flag
);
1213 return PyInt_FromLong(flag
);
1217 #if XML_COMBINED_VERSION >= 19505
1218 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__
,
1219 "UseForeignDTD([flag])\n\
1220 Allows the application to provide an artificial external subset if one is\n\
1221 not specified as part of the document instance. This readily allows the\n\
1222 use of a 'default' document type controlled by the application, while still\n\
1223 getting the advantage of providing document type information to the parser.\n\
1224 'flag' defaults to True if not provided.");
1227 xmlparse_UseForeignDTD(xmlparseobject
*self
, PyObject
*args
)
1229 PyObject
*flagobj
= NULL
;
1230 XML_Bool flag
= XML_TRUE
;
1232 if (!PyArg_UnpackTuple(args
, "UseForeignDTD", 0, 1, &flagobj
))
1234 if (flagobj
!= NULL
)
1235 flag
= PyObject_IsTrue(flagobj
) ? XML_TRUE
: XML_FALSE
;
1236 rc
= XML_UseForeignDTD(self
->itself
, flag
);
1237 if (rc
!= XML_ERROR_NONE
) {
1238 return set_error(self
, rc
);
1245 static struct PyMethodDef xmlparse_methods
[] = {
1246 {"Parse", (PyCFunction
)xmlparse_Parse
,
1247 METH_VARARGS
, xmlparse_Parse__doc__
},
1248 {"ParseFile", (PyCFunction
)xmlparse_ParseFile
,
1249 METH_O
, xmlparse_ParseFile__doc__
},
1250 {"SetBase", (PyCFunction
)xmlparse_SetBase
,
1251 METH_VARARGS
, xmlparse_SetBase__doc__
},
1252 {"GetBase", (PyCFunction
)xmlparse_GetBase
,
1253 METH_NOARGS
, xmlparse_GetBase__doc__
},
1254 {"ExternalEntityParserCreate", (PyCFunction
)xmlparse_ExternalEntityParserCreate
,
1255 METH_VARARGS
, xmlparse_ExternalEntityParserCreate__doc__
},
1256 {"SetParamEntityParsing", (PyCFunction
)xmlparse_SetParamEntityParsing
,
1257 METH_VARARGS
, xmlparse_SetParamEntityParsing__doc__
},
1258 {"GetInputContext", (PyCFunction
)xmlparse_GetInputContext
,
1259 METH_NOARGS
, xmlparse_GetInputContext__doc__
},
1260 #if XML_COMBINED_VERSION >= 19505
1261 {"UseForeignDTD", (PyCFunction
)xmlparse_UseForeignDTD
,
1262 METH_VARARGS
, xmlparse_UseForeignDTD__doc__
},
1264 {NULL
, NULL
} /* sentinel */
1270 #ifdef Py_USING_UNICODE
1272 /* pyexpat international encoding support.
1273 Make it as simple as possible.
1276 static char template_buffer
[257];
1277 PyObject
*template_string
= NULL
;
1280 init_template_buffer(void)
1283 for (i
= 0; i
< 256; i
++) {
1284 template_buffer
[i
] = i
;
1286 template_buffer
[256] = 0;
1290 PyUnknownEncodingHandler(void *encodingHandlerData
,
1291 const XML_Char
*name
,
1294 PyUnicodeObject
*_u_string
= NULL
;
1298 /* Yes, supports only 8bit encodings */
1299 _u_string
= (PyUnicodeObject
*)
1300 PyUnicode_Decode(template_buffer
, 256, name
, "replace");
1302 if (_u_string
== NULL
)
1305 for (i
= 0; i
< 256; i
++) {
1306 /* Stupid to access directly, but fast */
1307 Py_UNICODE c
= _u_string
->str
[i
];
1308 if (c
== Py_UNICODE_REPLACEMENT_CHARACTER
)
1314 info
->convert
= NULL
;
1315 info
->release
= NULL
;
1317 Py_DECREF(_u_string
);
1324 newxmlparseobject(char *encoding
, char *namespace_separator
, PyObject
*intern
)
1327 xmlparseobject
*self
;
1329 #ifdef Py_TPFLAGS_HAVE_GC
1330 /* Code for versions 2.2 and later */
1331 self
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1333 self
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1338 #ifdef Py_USING_UNICODE
1339 self
->returns_unicode
= 1;
1341 self
->returns_unicode
= 0;
1344 self
->buffer
= NULL
;
1345 self
->buffer_size
= CHARACTER_DATA_BUFFER_SIZE
;
1346 self
->buffer_used
= 0;
1347 self
->ordered_attributes
= 0;
1348 self
->specified_attributes
= 0;
1349 self
->in_callback
= 0;
1350 self
->ns_prefixes
= 0;
1351 self
->handlers
= NULL
;
1352 if (namespace_separator
!= NULL
) {
1353 self
->itself
= XML_ParserCreateNS(encoding
, *namespace_separator
);
1356 self
->itself
= XML_ParserCreate(encoding
);
1358 self
->intern
= intern
;
1359 Py_XINCREF(self
->intern
);
1360 #ifdef Py_TPFLAGS_HAVE_GC
1361 PyObject_GC_Track(self
);
1363 PyObject_GC_Init(self
);
1365 if (self
->itself
== NULL
) {
1366 PyErr_SetString(PyExc_RuntimeError
,
1367 "XML_ParserCreate failed");
1371 XML_SetUserData(self
->itself
, (void *)self
);
1372 #ifdef Py_USING_UNICODE
1373 XML_SetUnknownEncodingHandler(self
->itself
,
1374 (XML_UnknownEncodingHandler
) PyUnknownEncodingHandler
, NULL
);
1377 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1380 self
->handlers
= malloc(sizeof(PyObject
*) * i
);
1381 if (!self
->handlers
) {
1383 return PyErr_NoMemory();
1385 clear_handlers(self
, 1);
1387 return (PyObject
*)self
;
1392 xmlparse_dealloc(xmlparseobject
*self
)
1395 #ifdef Py_TPFLAGS_HAVE_GC
1396 PyObject_GC_UnTrack(self
);
1398 PyObject_GC_Fini(self
);
1400 if (self
->itself
!= NULL
)
1401 XML_ParserFree(self
->itself
);
1402 self
->itself
= NULL
;
1404 if (self
->handlers
!= NULL
) {
1406 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1407 temp
= self
->handlers
[i
];
1408 self
->handlers
[i
] = NULL
;
1411 free(self
->handlers
);
1412 self
->handlers
= NULL
;
1414 if (self
->buffer
!= NULL
) {
1416 self
->buffer
= NULL
;
1418 Py_XDECREF(self
->intern
);
1419 #ifndef Py_TPFLAGS_HAVE_GC
1420 /* Code for versions 2.0 and 2.1 */
1423 /* Code for versions 2.2 and later. */
1424 PyObject_GC_Del(self
);
1429 handlername2int(const char *name
)
1432 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1433 if (strcmp(name
, handler_info
[i
].name
) == 0) {
1441 get_pybool(int istrue
)
1443 PyObject
*result
= istrue
? Py_True
: Py_False
;
1449 xmlparse_getattr(xmlparseobject
*self
, char *name
)
1451 int handlernum
= handlername2int(name
);
1453 if (handlernum
!= -1) {
1454 PyObject
*result
= self
->handlers
[handlernum
];
1460 if (name
[0] == 'E') {
1461 if (strcmp(name
, "ErrorCode") == 0)
1462 return PyInt_FromLong((long)
1463 XML_GetErrorCode(self
->itself
));
1464 if (strcmp(name
, "ErrorLineNumber") == 0)
1465 return PyInt_FromLong((long)
1466 XML_GetErrorLineNumber(self
->itself
));
1467 if (strcmp(name
, "ErrorColumnNumber") == 0)
1468 return PyInt_FromLong((long)
1469 XML_GetErrorColumnNumber(self
->itself
));
1470 if (strcmp(name
, "ErrorByteIndex") == 0)
1471 return PyInt_FromLong((long)
1472 XML_GetErrorByteIndex(self
->itself
));
1474 if (name
[0] == 'C') {
1475 if (strcmp(name
, "CurrentLineNumber") == 0)
1476 return PyInt_FromLong((long)
1477 XML_GetCurrentLineNumber(self
->itself
));
1478 if (strcmp(name
, "CurrentColumnNumber") == 0)
1479 return PyInt_FromLong((long)
1480 XML_GetCurrentColumnNumber(self
->itself
));
1481 if (strcmp(name
, "CurrentByteIndex") == 0)
1482 return PyInt_FromLong((long)
1483 XML_GetCurrentByteIndex(self
->itself
));
1485 if (name
[0] == 'b') {
1486 if (strcmp(name
, "buffer_size") == 0)
1487 return PyInt_FromLong((long) self
->buffer_size
);
1488 if (strcmp(name
, "buffer_text") == 0)
1489 return get_pybool(self
->buffer
!= NULL
);
1490 if (strcmp(name
, "buffer_used") == 0)
1491 return PyInt_FromLong((long) self
->buffer_used
);
1493 if (strcmp(name
, "namespace_prefixes") == 0)
1494 return get_pybool(self
->ns_prefixes
);
1495 if (strcmp(name
, "ordered_attributes") == 0)
1496 return get_pybool(self
->ordered_attributes
);
1497 if (strcmp(name
, "returns_unicode") == 0)
1498 return get_pybool((long) self
->returns_unicode
);
1499 if (strcmp(name
, "specified_attributes") == 0)
1500 return get_pybool((long) self
->specified_attributes
);
1501 if (strcmp(name
, "intern") == 0) {
1502 if (self
->intern
== NULL
) {
1507 Py_INCREF(self
->intern
);
1508 return self
->intern
;
1512 #define APPEND(list, str) \
1514 PyObject *o = PyString_FromString(str); \
1516 PyList_Append(list, o); \
1520 if (strcmp(name
, "__members__") == 0) {
1522 PyObject
*rc
= PyList_New(0);
1525 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1526 PyObject
*o
= get_handler_name(&handler_info
[i
]);
1528 PyList_Append(rc
, o
);
1531 APPEND(rc
, "ErrorCode");
1532 APPEND(rc
, "ErrorLineNumber");
1533 APPEND(rc
, "ErrorColumnNumber");
1534 APPEND(rc
, "ErrorByteIndex");
1535 APPEND(rc
, "CurrentLineNumber");
1536 APPEND(rc
, "CurrentColumnNumber");
1537 APPEND(rc
, "CurrentByteIndex");
1538 APPEND(rc
, "buffer_size");
1539 APPEND(rc
, "buffer_text");
1540 APPEND(rc
, "buffer_used");
1541 APPEND(rc
, "namespace_prefixes");
1542 APPEND(rc
, "ordered_attributes");
1543 APPEND(rc
, "returns_unicode");
1544 APPEND(rc
, "specified_attributes");
1545 APPEND(rc
, "intern");
1550 return Py_FindMethod(xmlparse_methods
, (PyObject
*)self
, name
);
1554 sethandler(xmlparseobject
*self
, const char *name
, PyObject
* v
)
1556 int handlernum
= handlername2int(name
);
1557 if (handlernum
>= 0) {
1558 xmlhandler c_handler
= NULL
;
1559 PyObject
*temp
= self
->handlers
[handlernum
];
1562 /* If this is the character data handler, and a character
1563 data handler is already active, we need to be more
1564 careful. What we can safely do is replace the existing
1565 character data handler callback function with a no-op
1566 function that will refuse to call Python. The downside
1567 is that this doesn't completely remove the character
1568 data handler from the C layer if there's any callback
1569 active, so Expat does a little more work than it
1570 otherwise would, but that's really an odd case. A more
1571 elaborate system of handlers and state could remove the
1572 C handler more effectively. */
1573 if (handlernum
== CharacterData
&& self
->in_callback
)
1574 c_handler
= noop_character_data_handler
;
1577 else if (v
!= NULL
) {
1579 c_handler
= handler_info
[handlernum
].handler
;
1581 self
->handlers
[handlernum
] = v
;
1583 handler_info
[handlernum
].setter(self
->itself
, c_handler
);
1590 xmlparse_setattr(xmlparseobject
*self
, char *name
, PyObject
*v
)
1592 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1594 PyErr_SetString(PyExc_RuntimeError
, "Cannot delete attribute");
1597 if (strcmp(name
, "buffer_text") == 0) {
1598 if (PyObject_IsTrue(v
)) {
1599 if (self
->buffer
== NULL
) {
1600 self
->buffer
= malloc(self
->buffer_size
);
1601 if (self
->buffer
== NULL
) {
1605 self
->buffer_used
= 0;
1608 else if (self
->buffer
!= NULL
) {
1609 if (flush_character_buffer(self
) < 0)
1612 self
->buffer
= NULL
;
1616 if (strcmp(name
, "namespace_prefixes") == 0) {
1617 if (PyObject_IsTrue(v
))
1618 self
->ns_prefixes
= 1;
1620 self
->ns_prefixes
= 0;
1621 XML_SetReturnNSTriplet(self
->itself
, self
->ns_prefixes
);
1624 if (strcmp(name
, "ordered_attributes") == 0) {
1625 if (PyObject_IsTrue(v
))
1626 self
->ordered_attributes
= 1;
1628 self
->ordered_attributes
= 0;
1631 if (strcmp(name
, "returns_unicode") == 0) {
1632 if (PyObject_IsTrue(v
)) {
1633 #ifndef Py_USING_UNICODE
1634 PyErr_SetString(PyExc_ValueError
,
1635 "Unicode support not available");
1638 self
->returns_unicode
= 1;
1642 self
->returns_unicode
= 0;
1645 if (strcmp(name
, "specified_attributes") == 0) {
1646 if (PyObject_IsTrue(v
))
1647 self
->specified_attributes
= 1;
1649 self
->specified_attributes
= 0;
1653 if (strcmp(name
, "buffer_size") == 0) {
1654 long new_buffer_size
;
1655 if (!PyInt_Check(v
)) {
1656 PyErr_SetString(PyExc_TypeError
, "buffer_size must be an integer");
1660 new_buffer_size
=PyInt_AS_LONG(v
);
1661 /* trivial case -- no change */
1662 if (new_buffer_size
== self
->buffer_size
) {
1666 if (new_buffer_size
<= 0) {
1667 PyErr_SetString(PyExc_ValueError
, "buffer_size must be greater than zero");
1672 if (new_buffer_size
> INT_MAX
) {
1674 sprintf(errmsg
, "buffer_size must not be greater than %i", INT_MAX
);
1675 PyErr_SetString(PyExc_ValueError
, errmsg
);
1679 if (self
->buffer
!= NULL
) {
1680 /* there is already a buffer */
1681 if (self
->buffer_used
!= 0) {
1682 flush_character_buffer(self
);
1684 /* free existing buffer */
1687 self
->buffer
= malloc(new_buffer_size
);
1688 if (self
->buffer
== NULL
) {
1692 self
->buffer_size
= new_buffer_size
;
1696 if (strcmp(name
, "CharacterDataHandler") == 0) {
1697 /* If we're changing the character data handler, flush all
1698 * cached data with the old handler. Not sure there's a
1699 * "right" thing to do, though, but this probably won't
1702 if (flush_character_buffer(self
) < 0)
1705 if (sethandler(self
, name
, v
)) {
1708 PyErr_SetString(PyExc_AttributeError
, name
);
1712 #ifdef WITH_CYCLE_GC
1714 xmlparse_traverse(xmlparseobject
*op
, visitproc visit
, void *arg
)
1717 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1718 Py_VISIT(op
->handlers
[i
]);
1723 xmlparse_clear(xmlparseobject
*op
)
1725 clear_handlers(op
, 0);
1726 Py_CLEAR(op
->intern
);
1731 PyDoc_STRVAR(Xmlparsetype__doc__
, "XML parser");
1733 static PyTypeObject Xmlparsetype
= {
1734 PyVarObject_HEAD_INIT(NULL
, 0)
1735 "pyexpat.xmlparser", /*tp_name*/
1736 sizeof(xmlparseobject
) + PyGC_HEAD_SIZE
,/*tp_basicsize*/
1739 (destructor
)xmlparse_dealloc
, /*tp_dealloc*/
1740 (printfunc
)0, /*tp_print*/
1741 (getattrfunc
)xmlparse_getattr
, /*tp_getattr*/
1742 (setattrfunc
)xmlparse_setattr
, /*tp_setattr*/
1743 (cmpfunc
)0, /*tp_compare*/
1744 (reprfunc
)0, /*tp_repr*/
1746 0, /*tp_as_sequence*/
1747 0, /*tp_as_mapping*/
1748 (hashfunc
)0, /*tp_hash*/
1749 (ternaryfunc
)0, /*tp_call*/
1750 (reprfunc
)0, /*tp_str*/
1751 0, /* tp_getattro */
1752 0, /* tp_setattro */
1753 0, /* tp_as_buffer */
1754 #ifdef Py_TPFLAGS_HAVE_GC
1755 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1757 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_GC
, /*tp_flags*/
1759 Xmlparsetype__doc__
, /* tp_doc - Documentation string */
1760 #ifdef WITH_CYCLE_GC
1761 (traverseproc
)xmlparse_traverse
, /* tp_traverse */
1762 (inquiry
)xmlparse_clear
/* tp_clear */
1768 /* End of code for xmlparser objects */
1769 /* -------------------------------------------------------- */
1771 PyDoc_STRVAR(pyexpat_ParserCreate__doc__
,
1772 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1773 Return a new XML parser object.");
1776 pyexpat_ParserCreate(PyObject
*notused
, PyObject
*args
, PyObject
*kw
)
1778 char *encoding
= NULL
;
1779 char *namespace_separator
= NULL
;
1780 PyObject
*intern
= NULL
;
1782 int intern_decref
= 0;
1783 static char *kwlist
[] = {"encoding", "namespace_separator",
1786 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|zzO:ParserCreate", kwlist
,
1787 &encoding
, &namespace_separator
, &intern
))
1789 if (namespace_separator
!= NULL
1790 && strlen(namespace_separator
) > 1) {
1791 PyErr_SetString(PyExc_ValueError
,
1792 "namespace_separator must be at most one"
1793 " character, omitted, or None");
1796 /* Explicitly passing None means no interning is desired.
1797 Not passing anything means that a new dictionary is used. */
1798 if (intern
== Py_None
)
1800 else if (intern
== NULL
) {
1801 intern
= PyDict_New();
1806 else if (!PyDict_Check(intern
)) {
1807 PyErr_SetString(PyExc_TypeError
, "intern must be a dictionary");
1811 result
= newxmlparseobject(encoding
, namespace_separator
, intern
);
1812 if (intern_decref
) {
1818 PyDoc_STRVAR(pyexpat_ErrorString__doc__
,
1819 "ErrorString(errno) -> string\n\
1820 Returns string error for given number.");
1823 pyexpat_ErrorString(PyObject
*self
, PyObject
*args
)
1827 if (!PyArg_ParseTuple(args
, "l:ErrorString", &code
))
1829 return Py_BuildValue("z", XML_ErrorString((int)code
));
1832 /* List of methods defined in the module */
1834 static struct PyMethodDef pyexpat_methods
[] = {
1835 {"ParserCreate", (PyCFunction
)pyexpat_ParserCreate
,
1836 METH_VARARGS
|METH_KEYWORDS
, pyexpat_ParserCreate__doc__
},
1837 {"ErrorString", (PyCFunction
)pyexpat_ErrorString
,
1838 METH_VARARGS
, pyexpat_ErrorString__doc__
},
1840 {NULL
, (PyCFunction
)NULL
, 0, NULL
} /* sentinel */
1843 /* Module docstring */
1845 PyDoc_STRVAR(pyexpat_module_documentation
,
1846 "Python wrapper for Expat parser.");
1848 /* Return a Python string that represents the version number without the
1849 * extra cruft added by revision control, even if the right options were
1850 * given to the "cvs export" command to make it not include the extra
1854 get_version_string(void)
1856 static char *rcsid
= "$Revision$";
1860 while (!isdigit(Py_CHARMASK(*rev
)))
1862 while (rev
[i
] != ' ' && rev
[i
] != '\0')
1865 return PyString_FromStringAndSize(rev
, i
);
1868 /* Initialization function for the module */
1871 #define MODULE_NAME "pyexpat"
1874 #ifndef MODULE_INITFUNC
1875 #define MODULE_INITFUNC initpyexpat
1878 #ifndef PyMODINIT_FUNC
1880 # define PyMODINIT_FUNC __declspec(dllexport) void
1882 # define PyMODINIT_FUNC void
1886 PyMODINIT_FUNC
MODULE_INITFUNC(void); /* avoid compiler warnings */
1889 MODULE_INITFUNC(void)
1892 PyObject
*errmod_name
= PyString_FromString(MODULE_NAME
".errors");
1893 PyObject
*errors_module
;
1894 PyObject
*modelmod_name
;
1895 PyObject
*model_module
;
1896 PyObject
*sys_modules
;
1897 static struct PyExpat_CAPI capi
;
1898 PyObject
* capi_object
;
1900 if (errmod_name
== NULL
)
1902 modelmod_name
= PyString_FromString(MODULE_NAME
".model");
1903 if (modelmod_name
== NULL
)
1906 Py_TYPE(&Xmlparsetype
) = &PyType_Type
;
1908 /* Create the module and add the functions */
1909 m
= Py_InitModule3(MODULE_NAME
, pyexpat_methods
,
1910 pyexpat_module_documentation
);
1914 /* Add some symbolic constants to the module */
1915 if (ErrorObject
== NULL
) {
1916 ErrorObject
= PyErr_NewException("xml.parsers.expat.ExpatError",
1918 if (ErrorObject
== NULL
)
1921 Py_INCREF(ErrorObject
);
1922 PyModule_AddObject(m
, "error", ErrorObject
);
1923 Py_INCREF(ErrorObject
);
1924 PyModule_AddObject(m
, "ExpatError", ErrorObject
);
1925 Py_INCREF(&Xmlparsetype
);
1926 PyModule_AddObject(m
, "XMLParserType", (PyObject
*) &Xmlparsetype
);
1928 PyModule_AddObject(m
, "__version__", get_version_string());
1929 PyModule_AddStringConstant(m
, "EXPAT_VERSION",
1930 (char *) XML_ExpatVersion());
1932 XML_Expat_Version info
= XML_ExpatVersionInfo();
1933 PyModule_AddObject(m
, "version_info",
1934 Py_BuildValue("(iii)", info
.major
,
1935 info
.minor
, info
.micro
));
1937 #ifdef Py_USING_UNICODE
1938 init_template_buffer();
1940 /* XXX When Expat supports some way of figuring out how it was
1941 compiled, this should check and set native_encoding
1944 PyModule_AddStringConstant(m
, "native_encoding", "UTF-8");
1946 sys_modules
= PySys_GetObject("modules");
1947 d
= PyModule_GetDict(m
);
1948 errors_module
= PyDict_GetItem(d
, errmod_name
);
1949 if (errors_module
== NULL
) {
1950 errors_module
= PyModule_New(MODULE_NAME
".errors");
1951 if (errors_module
!= NULL
) {
1952 PyDict_SetItem(sys_modules
, errmod_name
, errors_module
);
1953 /* gives away the reference to errors_module */
1954 PyModule_AddObject(m
, "errors", errors_module
);
1957 Py_DECREF(errmod_name
);
1958 model_module
= PyDict_GetItem(d
, modelmod_name
);
1959 if (model_module
== NULL
) {
1960 model_module
= PyModule_New(MODULE_NAME
".model");
1961 if (model_module
!= NULL
) {
1962 PyDict_SetItem(sys_modules
, modelmod_name
, model_module
);
1963 /* gives away the reference to model_module */
1964 PyModule_AddObject(m
, "model", model_module
);
1967 Py_DECREF(modelmod_name
);
1968 if (errors_module
== NULL
|| model_module
== NULL
)
1969 /* Don't core dump later! */
1972 #if XML_COMBINED_VERSION > 19505
1974 const XML_Feature
*features
= XML_GetFeatureList();
1975 PyObject
*list
= PyList_New(0);
1977 /* just ignore it */
1981 for (; features
[i
].feature
!= XML_FEATURE_END
; ++i
) {
1983 PyObject
*item
= Py_BuildValue("si", features
[i
].name
,
1990 ok
= PyList_Append(list
, item
);
1998 PyModule_AddObject(m
, "features", list
);
2003 #define MYCONST(name) \
2004 PyModule_AddStringConstant(errors_module, #name, \
2005 (char*)XML_ErrorString(name))
2007 MYCONST(XML_ERROR_NO_MEMORY
);
2008 MYCONST(XML_ERROR_SYNTAX
);
2009 MYCONST(XML_ERROR_NO_ELEMENTS
);
2010 MYCONST(XML_ERROR_INVALID_TOKEN
);
2011 MYCONST(XML_ERROR_UNCLOSED_TOKEN
);
2012 MYCONST(XML_ERROR_PARTIAL_CHAR
);
2013 MYCONST(XML_ERROR_TAG_MISMATCH
);
2014 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE
);
2015 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
2016 MYCONST(XML_ERROR_PARAM_ENTITY_REF
);
2017 MYCONST(XML_ERROR_UNDEFINED_ENTITY
);
2018 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF
);
2019 MYCONST(XML_ERROR_ASYNC_ENTITY
);
2020 MYCONST(XML_ERROR_BAD_CHAR_REF
);
2021 MYCONST(XML_ERROR_BINARY_ENTITY_REF
);
2022 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
2023 MYCONST(XML_ERROR_MISPLACED_XML_PI
);
2024 MYCONST(XML_ERROR_UNKNOWN_ENCODING
);
2025 MYCONST(XML_ERROR_INCORRECT_ENCODING
);
2026 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION
);
2027 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
2028 MYCONST(XML_ERROR_NOT_STANDALONE
);
2029 MYCONST(XML_ERROR_UNEXPECTED_STATE
);
2030 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE
);
2031 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD
);
2032 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
);
2033 /* Added in Expat 1.95.7. */
2034 MYCONST(XML_ERROR_UNBOUND_PREFIX
);
2035 /* Added in Expat 1.95.8. */
2036 MYCONST(XML_ERROR_UNDECLARING_PREFIX
);
2037 MYCONST(XML_ERROR_INCOMPLETE_PE
);
2038 MYCONST(XML_ERROR_XML_DECL
);
2039 MYCONST(XML_ERROR_TEXT_DECL
);
2040 MYCONST(XML_ERROR_PUBLICID
);
2041 MYCONST(XML_ERROR_SUSPENDED
);
2042 MYCONST(XML_ERROR_NOT_SUSPENDED
);
2043 MYCONST(XML_ERROR_ABORTED
);
2044 MYCONST(XML_ERROR_FINISHED
);
2045 MYCONST(XML_ERROR_SUSPEND_PE
);
2047 PyModule_AddStringConstant(errors_module
, "__doc__",
2048 "Constants used to describe error conditions.");
2052 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
2053 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER
);
2054 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
);
2055 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS
);
2058 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2059 PyModule_AddStringConstant(model_module
, "__doc__",
2060 "Constants used to interpret content model information.");
2062 MYCONST(XML_CTYPE_EMPTY
);
2063 MYCONST(XML_CTYPE_ANY
);
2064 MYCONST(XML_CTYPE_MIXED
);
2065 MYCONST(XML_CTYPE_NAME
);
2066 MYCONST(XML_CTYPE_CHOICE
);
2067 MYCONST(XML_CTYPE_SEQ
);
2069 MYCONST(XML_CQUANT_NONE
);
2070 MYCONST(XML_CQUANT_OPT
);
2071 MYCONST(XML_CQUANT_REP
);
2072 MYCONST(XML_CQUANT_PLUS
);
2075 /* initialize pyexpat dispatch table */
2076 capi
.size
= sizeof(capi
);
2077 capi
.magic
= PyExpat_CAPI_MAGIC
;
2078 capi
.MAJOR_VERSION
= XML_MAJOR_VERSION
;
2079 capi
.MINOR_VERSION
= XML_MINOR_VERSION
;
2080 capi
.MICRO_VERSION
= XML_MICRO_VERSION
;
2081 capi
.ErrorString
= XML_ErrorString
;
2082 capi
.GetErrorCode
= XML_GetErrorCode
;
2083 capi
.GetErrorColumnNumber
= XML_GetErrorColumnNumber
;
2084 capi
.GetErrorLineNumber
= XML_GetErrorLineNumber
;
2085 capi
.Parse
= XML_Parse
;
2086 capi
.ParserCreate_MM
= XML_ParserCreate_MM
;
2087 capi
.ParserFree
= XML_ParserFree
;
2088 capi
.SetCharacterDataHandler
= XML_SetCharacterDataHandler
;
2089 capi
.SetCommentHandler
= XML_SetCommentHandler
;
2090 capi
.SetDefaultHandlerExpand
= XML_SetDefaultHandlerExpand
;
2091 capi
.SetElementHandler
= XML_SetElementHandler
;
2092 capi
.SetNamespaceDeclHandler
= XML_SetNamespaceDeclHandler
;
2093 capi
.SetProcessingInstructionHandler
= XML_SetProcessingInstructionHandler
;
2094 capi
.SetUnknownEncodingHandler
= XML_SetUnknownEncodingHandler
;
2095 capi
.SetUserData
= XML_SetUserData
;
2097 /* export as cobject */
2098 capi_object
= PyCObject_FromVoidPtr(&capi
, NULL
);
2100 PyModule_AddObject(m
, "expat_CAPI", capi_object
);
2104 clear_handlers(xmlparseobject
*self
, int initial
)
2109 for (; handler_info
[i
].name
!= NULL
; i
++) {
2111 self
->handlers
[i
] = NULL
;
2113 temp
= self
->handlers
[i
];
2114 self
->handlers
[i
] = NULL
;
2116 handler_info
[i
].setter(self
->itself
, NULL
);
2121 static struct HandlerInfo handler_info
[] = {
2122 {"StartElementHandler",
2123 (xmlhandlersetter
)XML_SetStartElementHandler
,
2124 (xmlhandler
)my_StartElementHandler
},
2125 {"EndElementHandler",
2126 (xmlhandlersetter
)XML_SetEndElementHandler
,
2127 (xmlhandler
)my_EndElementHandler
},
2128 {"ProcessingInstructionHandler",
2129 (xmlhandlersetter
)XML_SetProcessingInstructionHandler
,
2130 (xmlhandler
)my_ProcessingInstructionHandler
},
2131 {"CharacterDataHandler",
2132 (xmlhandlersetter
)XML_SetCharacterDataHandler
,
2133 (xmlhandler
)my_CharacterDataHandler
},
2134 {"UnparsedEntityDeclHandler",
2135 (xmlhandlersetter
)XML_SetUnparsedEntityDeclHandler
,
2136 (xmlhandler
)my_UnparsedEntityDeclHandler
},
2137 {"NotationDeclHandler",
2138 (xmlhandlersetter
)XML_SetNotationDeclHandler
,
2139 (xmlhandler
)my_NotationDeclHandler
},
2140 {"StartNamespaceDeclHandler",
2141 (xmlhandlersetter
)XML_SetStartNamespaceDeclHandler
,
2142 (xmlhandler
)my_StartNamespaceDeclHandler
},
2143 {"EndNamespaceDeclHandler",
2144 (xmlhandlersetter
)XML_SetEndNamespaceDeclHandler
,
2145 (xmlhandler
)my_EndNamespaceDeclHandler
},
2147 (xmlhandlersetter
)XML_SetCommentHandler
,
2148 (xmlhandler
)my_CommentHandler
},
2149 {"StartCdataSectionHandler",
2150 (xmlhandlersetter
)XML_SetStartCdataSectionHandler
,
2151 (xmlhandler
)my_StartCdataSectionHandler
},
2152 {"EndCdataSectionHandler",
2153 (xmlhandlersetter
)XML_SetEndCdataSectionHandler
,
2154 (xmlhandler
)my_EndCdataSectionHandler
},
2156 (xmlhandlersetter
)XML_SetDefaultHandler
,
2157 (xmlhandler
)my_DefaultHandler
},
2158 {"DefaultHandlerExpand",
2159 (xmlhandlersetter
)XML_SetDefaultHandlerExpand
,
2160 (xmlhandler
)my_DefaultHandlerExpandHandler
},
2161 {"NotStandaloneHandler",
2162 (xmlhandlersetter
)XML_SetNotStandaloneHandler
,
2163 (xmlhandler
)my_NotStandaloneHandler
},
2164 {"ExternalEntityRefHandler",
2165 (xmlhandlersetter
)XML_SetExternalEntityRefHandler
,
2166 (xmlhandler
)my_ExternalEntityRefHandler
},
2167 {"StartDoctypeDeclHandler",
2168 (xmlhandlersetter
)XML_SetStartDoctypeDeclHandler
,
2169 (xmlhandler
)my_StartDoctypeDeclHandler
},
2170 {"EndDoctypeDeclHandler",
2171 (xmlhandlersetter
)XML_SetEndDoctypeDeclHandler
,
2172 (xmlhandler
)my_EndDoctypeDeclHandler
},
2173 {"EntityDeclHandler",
2174 (xmlhandlersetter
)XML_SetEntityDeclHandler
,
2175 (xmlhandler
)my_EntityDeclHandler
},
2177 (xmlhandlersetter
)XML_SetXmlDeclHandler
,
2178 (xmlhandler
)my_XmlDeclHandler
},
2179 {"ElementDeclHandler",
2180 (xmlhandlersetter
)XML_SetElementDeclHandler
,
2181 (xmlhandler
)my_ElementDeclHandler
},
2182 {"AttlistDeclHandler",
2183 (xmlhandlersetter
)XML_SetAttlistDeclHandler
,
2184 (xmlhandler
)my_AttlistDeclHandler
},
2185 #if XML_COMBINED_VERSION >= 19504
2186 {"SkippedEntityHandler",
2187 (xmlhandlersetter
)XML_SetSkippedEntityHandler
,
2188 (xmlhandler
)my_SkippedEntityHandler
},
2191 {NULL
, NULL
, NULL
} /* sentinel */