4 #include "frameobject.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
35 ProcessingInstruction
,
54 #if XML_COMBINED_VERSION >= 19504
60 static PyObject
*ErrorObject
;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
70 int returns_unicode
; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes
; /* Return attributes as a list. */
73 int specified_attributes
; /* Report only specified attributes. */
74 int in_callback
; /* Is a callback active? */
75 int ns_prefixes
; /* Namespace-triplets mode? */
76 XML_Char
*buffer
; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size
; /* Size of buffer, in XML_Char units */
79 int buffer_used
; /* Buffer units in use */
80 PyObject
*intern
; /* Dictionary to intern strings */
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype
;
88 typedef void (*xmlhandlersetter
)(XML_Parser self
, void *meth
);
89 typedef void* xmlhandler
;
93 xmlhandlersetter setter
;
95 PyCodeObject
*tb_code
;
99 static struct HandlerInfo handler_info
[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
105 set_error_attr(PyObject
*err
, char *name
, int value
)
107 PyObject
*v
= PyInt_FromLong(value
);
109 if (v
!= NULL
&& PyObject_SetAttrString(err
, name
, v
) == -1) {
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
121 set_error(xmlparseobject
*self
, enum XML_Error code
)
125 XML_Parser parser
= self
->itself
;
126 int lineno
= XML_GetErrorLineNumber(parser
);
127 int column
= XML_GetErrorColumnNumber(parser
);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer
, "%.200s: line %i, column %i",
132 XML_ErrorString(code
), lineno
, column
);
133 err
= PyObject_CallFunction(ErrorObject
, "s", buffer
);
135 && set_error_attr(err
, "code", code
)
136 && set_error_attr(err
, "offset", column
)
137 && set_error_attr(err
, "lineno", lineno
)) {
138 PyErr_SetObject(ErrorObject
, err
);
145 have_handler(xmlparseobject
*self
, int type
)
147 PyObject
*handler
= self
->handlers
[type
];
148 return handler
!= NULL
;
152 get_handler_name(struct HandlerInfo
*hinfo
)
154 PyObject
*name
= hinfo
->nameobj
;
156 name
= PyString_FromString(hinfo
->name
);
157 hinfo
->nameobj
= name
;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
169 conv_string_to_unicode(const XML_Char
*str
)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
178 return PyUnicode_DecodeUTF8(str
, strlen(str
), "strict");
182 conv_string_len_to_unicode(const XML_Char
*str
, int len
)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
191 return PyUnicode_DecodeUTF8((const char *)str
, len
, "strict");
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
199 conv_string_to_utf8(const XML_Char
*str
)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
208 return PyString_FromString(str
);
212 conv_string_len_to_utf8(const XML_Char
*str
, int len
)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
221 return PyString_FromStringAndSize((const char *)str
, len
);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject
*self
, int initial
);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
232 error_external_entity_ref_handler(XML_Parser parser
,
233 const XML_Char
*context
,
234 const XML_Char
*base
,
235 const XML_Char
*systemId
,
236 const XML_Char
*publicId
)
242 flag_error(xmlparseobject
*self
)
244 clear_handlers(self
, 0);
245 XML_SetExternalEntityRefHandler(self
->itself
,
246 error_external_entity_ref_handler
);
250 getcode(enum HandlerTypes slot
, char* func_name
, int lineno
)
252 PyObject
*code
= NULL
;
253 PyObject
*name
= NULL
;
254 PyObject
*nulltuple
= NULL
;
255 PyObject
*filename
= NULL
;
257 if (handler_info
[slot
].tb_code
== NULL
) {
258 code
= PyString_FromString("");
261 name
= PyString_FromString(func_name
);
264 nulltuple
= PyTuple_New(0);
265 if (nulltuple
== NULL
)
267 filename
= PyString_FromString(__FILE__
);
268 handler_info
[slot
].tb_code
=
269 PyCode_New(0, /* argcount */
274 nulltuple
, /* consts */
275 nulltuple
, /* names */
276 nulltuple
, /* varnames */
277 #if PYTHON_API_VERSION >= 1010
278 nulltuple
, /* freevars */
279 nulltuple
, /* cellvars */
281 filename
, /* filename */
283 lineno
, /* firstlineno */
286 if (handler_info
[slot
].tb_code
== NULL
)
289 Py_DECREF(nulltuple
);
293 return handler_info
[slot
].tb_code
;
302 trace_frame(PyThreadState
*tstate
, PyFrameObject
*f
, int code
, PyObject
*val
)
305 if (!tstate
->use_tracing
|| tstate
->tracing
)
307 if (tstate
->c_profilefunc
!= NULL
) {
309 result
= tstate
->c_profilefunc(tstate
->c_profileobj
,
311 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
312 || (tstate
->c_profilefunc
!= NULL
));
317 if (tstate
->c_tracefunc
!= NULL
) {
319 result
= tstate
->c_tracefunc(tstate
->c_traceobj
,
321 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
322 || (tstate
->c_profilefunc
!= NULL
));
329 trace_frame_exc(PyThreadState
*tstate
, PyFrameObject
*f
)
331 PyObject
*type
, *value
, *traceback
, *arg
;
334 if (tstate
->c_tracefunc
== NULL
)
337 PyErr_Fetch(&type
, &value
, &traceback
);
342 #if PY_VERSION_HEX < 0x02040000
343 arg
= Py_BuildValue("(OOO)", type
, value
, traceback
);
345 arg
= PyTuple_Pack(3, type
, value
, traceback
);
348 PyErr_Restore(type
, value
, traceback
);
351 err
= trace_frame(tstate
, f
, PyTrace_EXCEPTION
, arg
);
354 PyErr_Restore(type
, value
, traceback
);
358 Py_XDECREF(traceback
);
365 call_with_frame(PyCodeObject
*c
, PyObject
* func
, PyObject
* args
,
366 xmlparseobject
*self
)
368 PyThreadState
*tstate
= PyThreadState_GET();
375 f
= PyFrame_New(tstate
, c
, PyEval_GetGlobals(), NULL
);
380 if (trace_frame(tstate
, f
, PyTrace_CALL
, Py_None
) < 0) {
384 res
= PyEval_CallObject(func
, args
);
386 if (tstate
->curexc_traceback
== NULL
)
388 XML_StopParser(self
->itself
, XML_FALSE
);
390 if (trace_frame_exc(tstate
, f
) < 0) {
395 if (trace_frame(tstate
, f
, PyTrace_RETURN
, res
) < 0) {
403 tstate
->frame
= f
->f_back
;
408 #ifndef Py_USING_UNICODE
409 #define STRING_CONV_FUNC conv_string_to_utf8
411 /* Python 2.0 and later versions, when built with Unicode support */
412 #define STRING_CONV_FUNC (self->returns_unicode \
413 ? conv_string_to_unicode : conv_string_to_utf8)
417 string_intern(xmlparseobject
*self
, const char* str
)
419 PyObject
*result
= STRING_CONV_FUNC(str
);
421 /* result can be NULL if the unicode conversion failed. */
426 value
= PyDict_GetItem(self
->intern
, result
);
428 if (PyDict_SetItem(self
->intern
, result
, result
) == 0)
438 /* Return 0 on success, -1 on exception.
439 * flag_error() will be called before return if needed.
442 call_character_handler(xmlparseobject
*self
, const XML_Char
*buffer
, int len
)
447 args
= PyTuple_New(1);
450 #ifdef Py_USING_UNICODE
451 temp
= (self
->returns_unicode
452 ? conv_string_len_to_unicode(buffer
, len
)
453 : conv_string_len_to_utf8(buffer
, len
));
455 temp
= conv_string_len_to_utf8(buffer
, len
);
462 PyTuple_SET_ITEM(args
, 0, temp
);
463 /* temp is now a borrowed reference; consider it unused. */
464 self
->in_callback
= 1;
465 temp
= call_with_frame(getcode(CharacterData
, "CharacterData", __LINE__
),
466 self
->handlers
[CharacterData
], args
, self
);
467 /* temp is an owned reference again, or NULL */
468 self
->in_callback
= 0;
479 flush_character_buffer(xmlparseobject
*self
)
482 if (self
->buffer
== NULL
|| self
->buffer_used
== 0)
484 rc
= call_character_handler(self
, self
->buffer
, self
->buffer_used
);
485 self
->buffer_used
= 0;
490 my_CharacterDataHandler(void *userData
, const XML_Char
*data
, int len
)
492 xmlparseobject
*self
= (xmlparseobject
*) userData
;
493 if (self
->buffer
== NULL
)
494 call_character_handler(self
, data
, len
);
496 if ((self
->buffer_used
+ len
) > self
->buffer_size
) {
497 if (flush_character_buffer(self
) < 0)
499 /* handler might have changed; drop the rest on the floor
500 * if there isn't a handler anymore
502 if (!have_handler(self
, CharacterData
))
505 if (len
> self
->buffer_size
) {
506 call_character_handler(self
, data
, len
);
507 self
->buffer_used
= 0;
510 memcpy(self
->buffer
+ self
->buffer_used
,
511 data
, len
* sizeof(XML_Char
));
512 self
->buffer_used
+= len
;
518 my_StartElementHandler(void *userData
,
519 const XML_Char
*name
, const XML_Char
*atts
[])
521 xmlparseobject
*self
= (xmlparseobject
*)userData
;
523 if (have_handler(self
, StartElement
)) {
524 PyObject
*container
, *rv
, *args
;
527 if (flush_character_buffer(self
) < 0)
529 /* Set max to the number of slots filled in atts[]; max/2 is
530 * the number of attributes we need to process.
532 if (self
->specified_attributes
) {
533 max
= XML_GetSpecifiedAttributeCount(self
->itself
);
537 while (atts
[max
] != NULL
)
540 /* Build the container. */
541 if (self
->ordered_attributes
)
542 container
= PyList_New(max
);
544 container
= PyDict_New();
545 if (container
== NULL
) {
549 for (i
= 0; i
< max
; i
+= 2) {
550 PyObject
*n
= string_intern(self
, (XML_Char
*) atts
[i
]);
554 Py_DECREF(container
);
557 v
= STRING_CONV_FUNC((XML_Char
*) atts
[i
+1]);
560 Py_DECREF(container
);
564 if (self
->ordered_attributes
) {
565 PyList_SET_ITEM(container
, i
, n
);
566 PyList_SET_ITEM(container
, i
+1, v
);
568 else if (PyDict_SetItem(container
, n
, v
)) {
579 args
= string_intern(self
, name
);
581 args
= Py_BuildValue("(NN)", args
, container
);
583 Py_DECREF(container
);
586 /* Container is now a borrowed reference; ignore it. */
587 self
->in_callback
= 1;
588 rv
= call_with_frame(getcode(StartElement
, "StartElement", __LINE__
),
589 self
->handlers
[StartElement
], args
, self
);
590 self
->in_callback
= 0;
600 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
601 RETURN, GETUSERDATA) \
603 my_##NAME##Handler PARAMS {\
604 xmlparseobject *self = GETUSERDATA ; \
605 PyObject *args = NULL; \
606 PyObject *rv = NULL; \
609 if (have_handler(self, NAME)) { \
610 if (flush_character_buffer(self) < 0) \
612 args = Py_BuildValue PARAM_FORMAT ;\
613 if (!args) { flag_error(self); return RETURN;} \
614 self->in_callback = 1; \
615 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
616 self->handlers[NAME], args, self); \
617 self->in_callback = 0; \
629 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
630 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
631 (xmlparseobject *)userData)
633 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
634 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
635 rc = PyInt_AsLong(rv);, rc, \
636 (xmlparseobject *)userData)
638 VOID_HANDLER(EndElement
,
639 (void *userData
, const XML_Char
*name
),
640 ("(N)", string_intern(self
, name
)))
642 VOID_HANDLER(ProcessingInstruction
,
644 const XML_Char
*target
,
645 const XML_Char
*data
),
646 ("(NO&)", string_intern(self
, target
), STRING_CONV_FUNC
,data
))
648 VOID_HANDLER(UnparsedEntityDecl
,
650 const XML_Char
*entityName
,
651 const XML_Char
*base
,
652 const XML_Char
*systemId
,
653 const XML_Char
*publicId
,
654 const XML_Char
*notationName
),
656 string_intern(self
, entityName
), string_intern(self
, base
),
657 string_intern(self
, systemId
), string_intern(self
, publicId
),
658 string_intern(self
, notationName
)))
660 #ifndef Py_USING_UNICODE
661 VOID_HANDLER(EntityDecl
,
663 const XML_Char
*entityName
,
664 int is_parameter_entity
,
665 const XML_Char
*value
,
667 const XML_Char
*base
,
668 const XML_Char
*systemId
,
669 const XML_Char
*publicId
,
670 const XML_Char
*notationName
),
672 string_intern(self
, entityName
), is_parameter_entity
,
673 conv_string_len_to_utf8(value
, value_length
),
674 string_intern(self
, base
), string_intern(self
, systemId
),
675 string_intern(self
, publicId
),
676 string_intern(self
, notationName
)))
678 VOID_HANDLER(EntityDecl
,
680 const XML_Char
*entityName
,
681 int is_parameter_entity
,
682 const XML_Char
*value
,
684 const XML_Char
*base
,
685 const XML_Char
*systemId
,
686 const XML_Char
*publicId
,
687 const XML_Char
*notationName
),
689 string_intern(self
, entityName
), is_parameter_entity
,
690 (self
->returns_unicode
691 ? conv_string_len_to_unicode(value
, value_length
)
692 : conv_string_len_to_utf8(value
, value_length
)),
693 string_intern(self
, base
), string_intern(self
, systemId
),
694 string_intern(self
, publicId
),
695 string_intern(self
, notationName
)))
698 VOID_HANDLER(XmlDecl
,
700 const XML_Char
*version
,
701 const XML_Char
*encoding
,
704 STRING_CONV_FUNC
,version
, STRING_CONV_FUNC
,encoding
,
708 conv_content_model(XML_Content
* const model
,
709 PyObject
*(*conv_string
)(const XML_Char
*))
711 PyObject
*result
= NULL
;
712 PyObject
*children
= PyTuple_New(model
->numchildren
);
715 if (children
!= NULL
) {
716 assert(model
->numchildren
< INT_MAX
);
717 for (i
= 0; i
< (int)model
->numchildren
; ++i
) {
718 PyObject
*child
= conv_content_model(&model
->children
[i
],
721 Py_XDECREF(children
);
724 PyTuple_SET_ITEM(children
, i
, child
);
726 result
= Py_BuildValue("(iiO&N)",
727 model
->type
, model
->quant
,
728 conv_string
,model
->name
, children
);
734 my_ElementDeclHandler(void *userData
,
735 const XML_Char
*name
,
738 xmlparseobject
*self
= (xmlparseobject
*)userData
;
739 PyObject
*args
= NULL
;
741 if (have_handler(self
, ElementDecl
)) {
743 PyObject
*modelobj
, *nameobj
;
745 if (flush_character_buffer(self
) < 0)
747 #ifdef Py_USING_UNICODE
748 modelobj
= conv_content_model(model
,
749 (self
->returns_unicode
750 ? conv_string_to_unicode
751 : conv_string_to_utf8
));
753 modelobj
= conv_content_model(model
, conv_string_to_utf8
);
755 if (modelobj
== NULL
) {
759 nameobj
= string_intern(self
, name
);
760 if (nameobj
== NULL
) {
765 args
= Py_BuildValue("NN", nameobj
, modelobj
);
771 self
->in_callback
= 1;
772 rv
= call_with_frame(getcode(ElementDecl
, "ElementDecl", __LINE__
),
773 self
->handlers
[ElementDecl
], args
, self
);
774 self
->in_callback
= 0;
783 XML_FreeContentModel(self
->itself
, model
);
787 VOID_HANDLER(AttlistDecl
,
789 const XML_Char
*elname
,
790 const XML_Char
*attname
,
791 const XML_Char
*att_type
,
792 const XML_Char
*dflt
,
795 string_intern(self
, elname
), string_intern(self
, attname
),
796 STRING_CONV_FUNC
,att_type
, STRING_CONV_FUNC
,dflt
,
799 #if XML_COMBINED_VERSION >= 19504
800 VOID_HANDLER(SkippedEntity
,
802 const XML_Char
*entityName
,
803 int is_parameter_entity
),
805 string_intern(self
, entityName
), is_parameter_entity
))
808 VOID_HANDLER(NotationDecl
,
810 const XML_Char
*notationName
,
811 const XML_Char
*base
,
812 const XML_Char
*systemId
,
813 const XML_Char
*publicId
),
815 string_intern(self
, notationName
), string_intern(self
, base
),
816 string_intern(self
, systemId
), string_intern(self
, publicId
)))
818 VOID_HANDLER(StartNamespaceDecl
,
820 const XML_Char
*prefix
,
821 const XML_Char
*uri
),
823 string_intern(self
, prefix
), string_intern(self
, uri
)))
825 VOID_HANDLER(EndNamespaceDecl
,
827 const XML_Char
*prefix
),
828 ("(N)", string_intern(self
, prefix
)))
830 VOID_HANDLER(Comment
,
831 (void *userData
, const XML_Char
*data
),
832 ("(O&)", STRING_CONV_FUNC
,data
))
834 VOID_HANDLER(StartCdataSection
,
838 VOID_HANDLER(EndCdataSection
,
842 #ifndef Py_USING_UNICODE
843 VOID_HANDLER(Default
,
844 (void *userData
, const XML_Char
*s
, int len
),
845 ("(N)", conv_string_len_to_utf8(s
,len
)))
847 VOID_HANDLER(DefaultHandlerExpand
,
848 (void *userData
, const XML_Char
*s
, int len
),
849 ("(N)", conv_string_len_to_utf8(s
,len
)))
851 VOID_HANDLER(Default
,
852 (void *userData
, const XML_Char
*s
, int len
),
853 ("(N)", (self
->returns_unicode
854 ? conv_string_len_to_unicode(s
,len
)
855 : conv_string_len_to_utf8(s
,len
))))
857 VOID_HANDLER(DefaultHandlerExpand
,
858 (void *userData
, const XML_Char
*s
, int len
),
859 ("(N)", (self
->returns_unicode
860 ? conv_string_len_to_unicode(s
,len
)
861 : conv_string_len_to_utf8(s
,len
))))
864 INT_HANDLER(NotStandalone
,
868 RC_HANDLER(int, ExternalEntityRef
,
870 const XML_Char
*context
,
871 const XML_Char
*base
,
872 const XML_Char
*systemId
,
873 const XML_Char
*publicId
),
876 STRING_CONV_FUNC
,context
, string_intern(self
, base
),
877 string_intern(self
, systemId
), string_intern(self
, publicId
)),
878 rc
= PyInt_AsLong(rv
);, rc
,
879 XML_GetUserData(parser
))
881 /* XXX UnknownEncodingHandler */
883 VOID_HANDLER(StartDoctypeDecl
,
884 (void *userData
, const XML_Char
*doctypeName
,
885 const XML_Char
*sysid
, const XML_Char
*pubid
,
886 int has_internal_subset
),
887 ("(NNNi)", string_intern(self
, doctypeName
),
888 string_intern(self
, sysid
), string_intern(self
, pubid
),
889 has_internal_subset
))
891 VOID_HANDLER(EndDoctypeDecl
, (void *userData
), ("()"))
893 /* ---------------------------------------------------------------- */
896 get_parse_result(xmlparseobject
*self
, int rv
)
898 if (PyErr_Occurred()) {
902 return set_error(self
, XML_GetErrorCode(self
->itself
));
904 if (flush_character_buffer(self
) < 0) {
907 return PyInt_FromLong(rv
);
910 PyDoc_STRVAR(xmlparse_Parse__doc__
,
911 "Parse(data[, isfinal])\n\
912 Parse XML data. `isfinal' should be true at end of input.");
915 xmlparse_Parse(xmlparseobject
*self
, PyObject
*args
)
921 if (!PyArg_ParseTuple(args
, "s#|i:Parse", &s
, &slen
, &isFinal
))
924 return get_parse_result(self
, XML_Parse(self
->itself
, s
, slen
, isFinal
));
927 /* File reading copied from cPickle */
929 #define BUF_SIZE 2048
932 readinst(char *buf
, int buf_size
, PyObject
*meth
)
934 PyObject
*arg
= NULL
;
935 PyObject
*bytes
= NULL
;
936 PyObject
*str
= NULL
;
939 if ((bytes
= PyInt_FromLong(buf_size
)) == NULL
)
942 if ((arg
= PyTuple_New(1)) == NULL
) {
947 PyTuple_SET_ITEM(arg
, 0, bytes
);
949 #if PY_VERSION_HEX < 0x02020000
950 str
= PyObject_CallObject(meth
, arg
);
952 str
= PyObject_Call(meth
, arg
, NULL
);
957 /* XXX what to do if it returns a Unicode string? */
958 if (!PyString_Check(str
)) {
959 PyErr_Format(PyExc_TypeError
,
960 "read() did not return a string object (type=%.400s)",
961 str
->ob_type
->tp_name
);
964 len
= PyString_GET_SIZE(str
);
965 if (len
> buf_size
) {
966 PyErr_Format(PyExc_ValueError
,
967 "read() returned too much data: "
968 "%i bytes requested, %i returned",
972 memcpy(buf
, PyString_AsString(str
), len
);
979 PyDoc_STRVAR(xmlparse_ParseFile__doc__
,
981 Parse XML data from file-like object.");
984 xmlparse_ParseFile(xmlparseobject
*self
, PyObject
*args
)
989 PyObject
*readmethod
= NULL
;
991 if (!PyArg_ParseTuple(args
, "O:ParseFile", &f
))
994 if (PyFile_Check(f
)) {
995 fp
= PyFile_AsFile(f
);
999 readmethod
= PyObject_GetAttrString(f
, "read");
1000 if (readmethod
== NULL
) {
1002 PyErr_SetString(PyExc_TypeError
,
1003 "argument must have 'read' attribute");
1009 void *buf
= XML_GetBuffer(self
->itself
, BUF_SIZE
);
1011 Py_XDECREF(readmethod
);
1012 return PyErr_NoMemory();
1016 bytes_read
= fread(buf
, sizeof(char), BUF_SIZE
, fp
);
1017 if (bytes_read
< 0) {
1018 PyErr_SetFromErrno(PyExc_IOError
);
1023 bytes_read
= readinst(buf
, BUF_SIZE
, readmethod
);
1024 if (bytes_read
< 0) {
1025 Py_DECREF(readmethod
);
1029 rv
= XML_ParseBuffer(self
->itself
, bytes_read
, bytes_read
== 0);
1030 if (PyErr_Occurred()) {
1031 Py_XDECREF(readmethod
);
1035 if (!rv
|| bytes_read
== 0)
1038 Py_XDECREF(readmethod
);
1039 return get_parse_result(self
, rv
);
1042 PyDoc_STRVAR(xmlparse_SetBase__doc__
,
1043 "SetBase(base_url)\n\
1044 Set the base URL for the parser.");
1047 xmlparse_SetBase(xmlparseobject
*self
, PyObject
*args
)
1051 if (!PyArg_ParseTuple(args
, "s:SetBase", &base
))
1053 if (!XML_SetBase(self
->itself
, base
)) {
1054 return PyErr_NoMemory();
1060 PyDoc_STRVAR(xmlparse_GetBase__doc__
,
1061 "GetBase() -> url\n\
1062 Return base URL string for the parser.");
1065 xmlparse_GetBase(xmlparseobject
*self
, PyObject
*args
)
1067 if (!PyArg_ParseTuple(args
, ":GetBase"))
1070 return Py_BuildValue("z", XML_GetBase(self
->itself
));
1073 PyDoc_STRVAR(xmlparse_GetInputContext__doc__
,
1074 "GetInputContext() -> string\n\
1075 Return the untranslated text of the input that caused the current event.\n\
1076 If the event was generated by a large amount of text (such as a start tag\n\
1077 for an element with many attributes), not all of the text may be available.");
1080 xmlparse_GetInputContext(xmlparseobject
*self
, PyObject
*args
)
1082 PyObject
*result
= NULL
;
1084 if (PyArg_ParseTuple(args
, ":GetInputContext")) {
1085 if (self
->in_callback
) {
1088 = XML_GetInputContext(self
->itself
, &offset
, &size
);
1091 result
= PyString_FromStringAndSize(buffer
+ offset
, size
- offset
);
1105 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__
,
1106 "ExternalEntityParserCreate(context[, encoding])\n\
1107 Create a parser for parsing an external entity based on the\n\
1108 information passed to the ExternalEntityRefHandler.");
1111 xmlparse_ExternalEntityParserCreate(xmlparseobject
*self
, PyObject
*args
)
1114 char *encoding
= NULL
;
1115 xmlparseobject
*new_parser
;
1118 if (!PyArg_ParseTuple(args
, "z|s:ExternalEntityParserCreate",
1119 &context
, &encoding
)) {
1123 #ifndef Py_TPFLAGS_HAVE_GC
1124 /* Python versions 2.0 and 2.1 */
1125 new_parser
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1127 /* Python versions 2.2 and later */
1128 new_parser
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1131 if (new_parser
== NULL
)
1133 new_parser
->buffer_size
= self
->buffer_size
;
1134 new_parser
->buffer_used
= 0;
1135 if (self
->buffer
!= NULL
) {
1136 new_parser
->buffer
= malloc(new_parser
->buffer_size
);
1137 if (new_parser
->buffer
== NULL
) {
1138 #ifndef Py_TPFLAGS_HAVE_GC
1139 /* Code for versions 2.0 and 2.1 */
1140 PyObject_Del(new_parser
);
1142 /* Code for versions 2.2 and later. */
1143 PyObject_GC_Del(new_parser
);
1145 return PyErr_NoMemory();
1149 new_parser
->buffer
= NULL
;
1150 new_parser
->returns_unicode
= self
->returns_unicode
;
1151 new_parser
->ordered_attributes
= self
->ordered_attributes
;
1152 new_parser
->specified_attributes
= self
->specified_attributes
;
1153 new_parser
->in_callback
= 0;
1154 new_parser
->ns_prefixes
= self
->ns_prefixes
;
1155 new_parser
->itself
= XML_ExternalEntityParserCreate(self
->itself
, context
,
1157 new_parser
->handlers
= 0;
1158 new_parser
->intern
= self
->intern
;
1159 Py_XINCREF(new_parser
->intern
);
1160 #ifdef Py_TPFLAGS_HAVE_GC
1161 PyObject_GC_Track(new_parser
);
1163 PyObject_GC_Init(new_parser
);
1166 if (!new_parser
->itself
) {
1167 Py_DECREF(new_parser
);
1168 return PyErr_NoMemory();
1171 XML_SetUserData(new_parser
->itself
, (void *)new_parser
);
1173 /* allocate and clear handlers first */
1174 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1177 new_parser
->handlers
= malloc(sizeof(PyObject
*) * i
);
1178 if (!new_parser
->handlers
) {
1179 Py_DECREF(new_parser
);
1180 return PyErr_NoMemory();
1182 clear_handlers(new_parser
, 1);
1184 /* then copy handlers from self */
1185 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1186 PyObject
*handler
= self
->handlers
[i
];
1187 if (handler
!= NULL
) {
1189 new_parser
->handlers
[i
] = handler
;
1190 handler_info
[i
].setter(new_parser
->itself
,
1191 handler_info
[i
].handler
);
1194 return (PyObject
*)new_parser
;
1197 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__
,
1198 "SetParamEntityParsing(flag) -> success\n\
1199 Controls parsing of parameter entities (including the external DTD\n\
1200 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1201 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1202 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1206 xmlparse_SetParamEntityParsing(xmlparseobject
*p
, PyObject
* args
)
1209 if (!PyArg_ParseTuple(args
, "i", &flag
))
1211 flag
= XML_SetParamEntityParsing(p
->itself
, flag
);
1212 return PyInt_FromLong(flag
);
1216 #if XML_COMBINED_VERSION >= 19505
1217 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__
,
1218 "UseForeignDTD([flag])\n\
1219 Allows the application to provide an artificial external subset if one is\n\
1220 not specified as part of the document instance. This readily allows the\n\
1221 use of a 'default' document type controlled by the application, while still\n\
1222 getting the advantage of providing document type information to the parser.\n\
1223 'flag' defaults to True if not provided.");
1226 xmlparse_UseForeignDTD(xmlparseobject
*self
, PyObject
*args
)
1228 PyObject
*flagobj
= NULL
;
1229 XML_Bool flag
= XML_TRUE
;
1231 if (!PyArg_ParseTuple(args
, "|O:UseForeignDTD", &flagobj
))
1233 if (flagobj
!= NULL
)
1234 flag
= PyObject_IsTrue(flagobj
) ? XML_TRUE
: XML_FALSE
;
1235 rc
= XML_UseForeignDTD(self
->itself
, flag
);
1236 if (rc
!= XML_ERROR_NONE
) {
1237 return set_error(self
, rc
);
1244 static struct PyMethodDef xmlparse_methods
[] = {
1245 {"Parse", (PyCFunction
)xmlparse_Parse
,
1246 METH_VARARGS
, xmlparse_Parse__doc__
},
1247 {"ParseFile", (PyCFunction
)xmlparse_ParseFile
,
1248 METH_VARARGS
, xmlparse_ParseFile__doc__
},
1249 {"SetBase", (PyCFunction
)xmlparse_SetBase
,
1250 METH_VARARGS
, xmlparse_SetBase__doc__
},
1251 {"GetBase", (PyCFunction
)xmlparse_GetBase
,
1252 METH_VARARGS
, xmlparse_GetBase__doc__
},
1253 {"ExternalEntityParserCreate", (PyCFunction
)xmlparse_ExternalEntityParserCreate
,
1254 METH_VARARGS
, xmlparse_ExternalEntityParserCreate__doc__
},
1255 {"SetParamEntityParsing", (PyCFunction
)xmlparse_SetParamEntityParsing
,
1256 METH_VARARGS
, xmlparse_SetParamEntityParsing__doc__
},
1257 {"GetInputContext", (PyCFunction
)xmlparse_GetInputContext
,
1258 METH_VARARGS
, xmlparse_GetInputContext__doc__
},
1259 #if XML_COMBINED_VERSION >= 19505
1260 {"UseForeignDTD", (PyCFunction
)xmlparse_UseForeignDTD
,
1261 METH_VARARGS
, xmlparse_UseForeignDTD__doc__
},
1263 {NULL
, NULL
} /* sentinel */
1269 #ifdef Py_USING_UNICODE
1271 /* pyexpat international encoding support.
1272 Make it as simple as possible.
1275 static char template_buffer
[257];
1276 PyObject
*template_string
= NULL
;
1279 init_template_buffer(void)
1282 for (i
= 0; i
< 256; i
++) {
1283 template_buffer
[i
] = i
;
1285 template_buffer
[256] = 0;
1289 PyUnknownEncodingHandler(void *encodingHandlerData
,
1290 const XML_Char
*name
,
1293 PyUnicodeObject
*_u_string
= NULL
;
1297 /* Yes, supports only 8bit encodings */
1298 _u_string
= (PyUnicodeObject
*)
1299 PyUnicode_Decode(template_buffer
, 256, name
, "replace");
1301 if (_u_string
== NULL
)
1304 for (i
= 0; i
< 256; i
++) {
1305 /* Stupid to access directly, but fast */
1306 Py_UNICODE c
= _u_string
->str
[i
];
1307 if (c
== Py_UNICODE_REPLACEMENT_CHARACTER
)
1313 info
->convert
= NULL
;
1314 info
->release
= NULL
;
1316 Py_DECREF(_u_string
);
1323 newxmlparseobject(char *encoding
, char *namespace_separator
, PyObject
*intern
)
1326 xmlparseobject
*self
;
1328 #ifdef Py_TPFLAGS_HAVE_GC
1329 /* Code for versions 2.2 and later */
1330 self
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1332 self
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1337 #ifdef Py_USING_UNICODE
1338 self
->returns_unicode
= 1;
1340 self
->returns_unicode
= 0;
1343 self
->buffer
= NULL
;
1344 self
->buffer_size
= CHARACTER_DATA_BUFFER_SIZE
;
1345 self
->buffer_used
= 0;
1346 self
->ordered_attributes
= 0;
1347 self
->specified_attributes
= 0;
1348 self
->in_callback
= 0;
1349 self
->ns_prefixes
= 0;
1350 self
->handlers
= NULL
;
1351 if (namespace_separator
!= NULL
) {
1352 self
->itself
= XML_ParserCreateNS(encoding
, *namespace_separator
);
1355 self
->itself
= XML_ParserCreate(encoding
);
1357 self
->intern
= intern
;
1358 Py_XINCREF(self
->intern
);
1359 #ifdef Py_TPFLAGS_HAVE_GC
1360 PyObject_GC_Track(self
);
1362 PyObject_GC_Init(self
);
1364 if (self
->itself
== NULL
) {
1365 PyErr_SetString(PyExc_RuntimeError
,
1366 "XML_ParserCreate failed");
1370 XML_SetUserData(self
->itself
, (void *)self
);
1371 #ifdef Py_USING_UNICODE
1372 XML_SetUnknownEncodingHandler(self
->itself
,
1373 (XML_UnknownEncodingHandler
) PyUnknownEncodingHandler
, NULL
);
1376 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1379 self
->handlers
= malloc(sizeof(PyObject
*) * i
);
1380 if (!self
->handlers
) {
1382 return PyErr_NoMemory();
1384 clear_handlers(self
, 1);
1386 return (PyObject
*)self
;
1391 xmlparse_dealloc(xmlparseobject
*self
)
1394 #ifdef Py_TPFLAGS_HAVE_GC
1395 PyObject_GC_UnTrack(self
);
1397 PyObject_GC_Fini(self
);
1399 if (self
->itself
!= NULL
)
1400 XML_ParserFree(self
->itself
);
1401 self
->itself
= NULL
;
1403 if (self
->handlers
!= NULL
) {
1405 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1406 temp
= self
->handlers
[i
];
1407 self
->handlers
[i
] = NULL
;
1410 free(self
->handlers
);
1411 self
->handlers
= NULL
;
1413 if (self
->buffer
!= NULL
) {
1415 self
->buffer
= NULL
;
1417 Py_XDECREF(self
->intern
);
1418 #ifndef Py_TPFLAGS_HAVE_GC
1419 /* Code for versions 2.0 and 2.1 */
1422 /* Code for versions 2.2 and later. */
1423 PyObject_GC_Del(self
);
1428 handlername2int(const char *name
)
1431 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1432 if (strcmp(name
, handler_info
[i
].name
) == 0) {
1440 get_pybool(int istrue
)
1442 PyObject
*result
= istrue
? Py_True
: Py_False
;
1448 xmlparse_getattr(xmlparseobject
*self
, char *name
)
1450 int handlernum
= handlername2int(name
);
1452 if (handlernum
!= -1) {
1453 PyObject
*result
= self
->handlers
[handlernum
];
1459 if (name
[0] == 'E') {
1460 if (strcmp(name
, "ErrorCode") == 0)
1461 return PyInt_FromLong((long)
1462 XML_GetErrorCode(self
->itself
));
1463 if (strcmp(name
, "ErrorLineNumber") == 0)
1464 return PyInt_FromLong((long)
1465 XML_GetErrorLineNumber(self
->itself
));
1466 if (strcmp(name
, "ErrorColumnNumber") == 0)
1467 return PyInt_FromLong((long)
1468 XML_GetErrorColumnNumber(self
->itself
));
1469 if (strcmp(name
, "ErrorByteIndex") == 0)
1470 return PyInt_FromLong((long)
1471 XML_GetErrorByteIndex(self
->itself
));
1473 if (name
[0] == 'C') {
1474 if (strcmp(name
, "CurrentLineNumber") == 0)
1475 return PyInt_FromLong((long)
1476 XML_GetCurrentLineNumber(self
->itself
));
1477 if (strcmp(name
, "CurrentColumnNumber") == 0)
1478 return PyInt_FromLong((long)
1479 XML_GetCurrentColumnNumber(self
->itself
));
1480 if (strcmp(name
, "CurrentByteIndex") == 0)
1481 return PyInt_FromLong((long)
1482 XML_GetCurrentByteIndex(self
->itself
));
1484 if (name
[0] == 'b') {
1485 if (strcmp(name
, "buffer_size") == 0)
1486 return PyInt_FromLong((long) self
->buffer_size
);
1487 if (strcmp(name
, "buffer_text") == 0)
1488 return get_pybool(self
->buffer
!= NULL
);
1489 if (strcmp(name
, "buffer_used") == 0)
1490 return PyInt_FromLong((long) self
->buffer_used
);
1492 if (strcmp(name
, "namespace_prefixes") == 0)
1493 return get_pybool(self
->ns_prefixes
);
1494 if (strcmp(name
, "ordered_attributes") == 0)
1495 return get_pybool(self
->ordered_attributes
);
1496 if (strcmp(name
, "returns_unicode") == 0)
1497 return get_pybool((long) self
->returns_unicode
);
1498 if (strcmp(name
, "specified_attributes") == 0)
1499 return get_pybool((long) self
->specified_attributes
);
1500 if (strcmp(name
, "intern") == 0) {
1501 if (self
->intern
== NULL
) {
1506 Py_INCREF(self
->intern
);
1507 return self
->intern
;
1511 #define APPEND(list, str) \
1513 PyObject *o = PyString_FromString(str); \
1515 PyList_Append(list, o); \
1519 if (strcmp(name
, "__members__") == 0) {
1521 PyObject
*rc
= PyList_New(0);
1522 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1523 PyObject
*o
= get_handler_name(&handler_info
[i
]);
1525 PyList_Append(rc
, o
);
1528 APPEND(rc
, "ErrorCode");
1529 APPEND(rc
, "ErrorLineNumber");
1530 APPEND(rc
, "ErrorColumnNumber");
1531 APPEND(rc
, "ErrorByteIndex");
1532 APPEND(rc
, "CurrentLineNumber");
1533 APPEND(rc
, "CurrentColumnNumber");
1534 APPEND(rc
, "CurrentByteIndex");
1535 APPEND(rc
, "buffer_size");
1536 APPEND(rc
, "buffer_text");
1537 APPEND(rc
, "buffer_used");
1538 APPEND(rc
, "namespace_prefixes");
1539 APPEND(rc
, "ordered_attributes");
1540 APPEND(rc
, "returns_unicode");
1541 APPEND(rc
, "specified_attributes");
1542 APPEND(rc
, "intern");
1547 return Py_FindMethod(xmlparse_methods
, (PyObject
*)self
, name
);
1551 sethandler(xmlparseobject
*self
, const char *name
, PyObject
* v
)
1553 int handlernum
= handlername2int(name
);
1554 if (handlernum
>= 0) {
1555 xmlhandler c_handler
= NULL
;
1556 PyObject
*temp
= self
->handlers
[handlernum
];
1560 else if (v
!= NULL
) {
1562 c_handler
= handler_info
[handlernum
].handler
;
1564 self
->handlers
[handlernum
] = v
;
1566 handler_info
[handlernum
].setter(self
->itself
, c_handler
);
1573 xmlparse_setattr(xmlparseobject
*self
, char *name
, PyObject
*v
)
1575 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1577 PyErr_SetString(PyExc_RuntimeError
, "Cannot delete attribute");
1580 if (strcmp(name
, "buffer_text") == 0) {
1581 if (PyObject_IsTrue(v
)) {
1582 if (self
->buffer
== NULL
) {
1583 self
->buffer
= malloc(self
->buffer_size
);
1584 if (self
->buffer
== NULL
) {
1588 self
->buffer_used
= 0;
1591 else if (self
->buffer
!= NULL
) {
1592 if (flush_character_buffer(self
) < 0)
1595 self
->buffer
= NULL
;
1599 if (strcmp(name
, "namespace_prefixes") == 0) {
1600 if (PyObject_IsTrue(v
))
1601 self
->ns_prefixes
= 1;
1603 self
->ns_prefixes
= 0;
1604 XML_SetReturnNSTriplet(self
->itself
, self
->ns_prefixes
);
1607 if (strcmp(name
, "ordered_attributes") == 0) {
1608 if (PyObject_IsTrue(v
))
1609 self
->ordered_attributes
= 1;
1611 self
->ordered_attributes
= 0;
1614 if (strcmp(name
, "returns_unicode") == 0) {
1615 if (PyObject_IsTrue(v
)) {
1616 #ifndef Py_USING_UNICODE
1617 PyErr_SetString(PyExc_ValueError
,
1618 "Unicode support not available");
1621 self
->returns_unicode
= 1;
1625 self
->returns_unicode
= 0;
1628 if (strcmp(name
, "specified_attributes") == 0) {
1629 if (PyObject_IsTrue(v
))
1630 self
->specified_attributes
= 1;
1632 self
->specified_attributes
= 0;
1635 if (strcmp(name
, "CharacterDataHandler") == 0) {
1636 /* If we're changing the character data handler, flush all
1637 * cached data with the old handler. Not sure there's a
1638 * "right" thing to do, though, but this probably won't
1641 if (flush_character_buffer(self
) < 0)
1644 if (sethandler(self
, name
, v
)) {
1647 PyErr_SetString(PyExc_AttributeError
, name
);
1651 #ifdef WITH_CYCLE_GC
1653 xmlparse_traverse(xmlparseobject
*op
, visitproc visit
, void *arg
)
1656 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1657 if (!op
->handlers
[i
])
1659 err
= visit(op
->handlers
[i
], arg
);
1667 xmlparse_clear(xmlparseobject
*op
)
1669 clear_handlers(op
, 0);
1670 Py_XDECREF(op
->intern
);
1676 PyDoc_STRVAR(Xmlparsetype__doc__
, "XML parser");
1678 static PyTypeObject Xmlparsetype
= {
1679 PyObject_HEAD_INIT(NULL
)
1681 "pyexpat.xmlparser", /*tp_name*/
1682 sizeof(xmlparseobject
) + PyGC_HEAD_SIZE
,/*tp_basicsize*/
1685 (destructor
)xmlparse_dealloc
, /*tp_dealloc*/
1686 (printfunc
)0, /*tp_print*/
1687 (getattrfunc
)xmlparse_getattr
, /*tp_getattr*/
1688 (setattrfunc
)xmlparse_setattr
, /*tp_setattr*/
1689 (cmpfunc
)0, /*tp_compare*/
1690 (reprfunc
)0, /*tp_repr*/
1692 0, /*tp_as_sequence*/
1693 0, /*tp_as_mapping*/
1694 (hashfunc
)0, /*tp_hash*/
1695 (ternaryfunc
)0, /*tp_call*/
1696 (reprfunc
)0, /*tp_str*/
1697 0, /* tp_getattro */
1698 0, /* tp_setattro */
1699 0, /* tp_as_buffer */
1700 #ifdef Py_TPFLAGS_HAVE_GC
1701 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1703 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_GC
, /*tp_flags*/
1705 Xmlparsetype__doc__
, /* tp_doc - Documentation string */
1706 #ifdef WITH_CYCLE_GC
1707 (traverseproc
)xmlparse_traverse
, /* tp_traverse */
1708 (inquiry
)xmlparse_clear
/* tp_clear */
1714 /* End of code for xmlparser objects */
1715 /* -------------------------------------------------------- */
1717 PyDoc_STRVAR(pyexpat_ParserCreate__doc__
,
1718 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1719 Return a new XML parser object.");
1722 pyexpat_ParserCreate(PyObject
*notused
, PyObject
*args
, PyObject
*kw
)
1724 char *encoding
= NULL
;
1725 char *namespace_separator
= NULL
;
1726 PyObject
*intern
= NULL
;
1728 int intern_decref
= 0;
1729 static const char *kwlist
[] = {"encoding", "namespace_separator",
1732 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|zzO:ParserCreate", kwlist
,
1733 &encoding
, &namespace_separator
, &intern
))
1735 if (namespace_separator
!= NULL
1736 && strlen(namespace_separator
) > 1) {
1737 PyErr_SetString(PyExc_ValueError
,
1738 "namespace_separator must be at most one"
1739 " character, omitted, or None");
1742 /* Explicitly passing None means no interning is desired.
1743 Not passing anything means that a new dictionary is used. */
1744 if (intern
== Py_None
)
1746 else if (intern
== NULL
) {
1747 intern
= PyDict_New();
1752 else if (!PyDict_Check(intern
)) {
1753 PyErr_SetString(PyExc_TypeError
, "intern must be a dictionary");
1757 result
= newxmlparseobject(encoding
, namespace_separator
, intern
);
1758 if (intern_decref
) {
1764 PyDoc_STRVAR(pyexpat_ErrorString__doc__
,
1765 "ErrorString(errno) -> string\n\
1766 Returns string error for given number.");
1769 pyexpat_ErrorString(PyObject
*self
, PyObject
*args
)
1773 if (!PyArg_ParseTuple(args
, "l:ErrorString", &code
))
1775 return Py_BuildValue("z", XML_ErrorString((int)code
));
1778 /* List of methods defined in the module */
1780 static struct PyMethodDef pyexpat_methods
[] = {
1781 {"ParserCreate", (PyCFunction
)pyexpat_ParserCreate
,
1782 METH_VARARGS
|METH_KEYWORDS
, pyexpat_ParserCreate__doc__
},
1783 {"ErrorString", (PyCFunction
)pyexpat_ErrorString
,
1784 METH_VARARGS
, pyexpat_ErrorString__doc__
},
1786 {NULL
, (PyCFunction
)NULL
, 0, NULL
} /* sentinel */
1789 /* Module docstring */
1791 PyDoc_STRVAR(pyexpat_module_documentation
,
1792 "Python wrapper for Expat parser.");
1794 /* Return a Python string that represents the version number without the
1795 * extra cruft added by revision control, even if the right options were
1796 * given to the "cvs export" command to make it not include the extra
1800 get_version_string(void)
1802 static char *rcsid
= "$Revision$";
1806 while (!isdigit(Py_CHARMASK(*rev
)))
1808 while (rev
[i
] != ' ' && rev
[i
] != '\0')
1811 return PyString_FromStringAndSize(rev
, i
);
1814 /* Initialization function for the module */
1817 #define MODULE_NAME "pyexpat"
1820 #ifndef MODULE_INITFUNC
1821 #define MODULE_INITFUNC initpyexpat
1824 #ifndef PyMODINIT_FUNC
1826 # define PyMODINIT_FUNC __declspec(dllexport) void
1828 # define PyMODINIT_FUNC void
1832 PyMODINIT_FUNC
MODULE_INITFUNC(void); /* avoid compiler warnings */
1835 MODULE_INITFUNC(void)
1838 PyObject
*errmod_name
= PyString_FromString(MODULE_NAME
".errors");
1839 PyObject
*errors_module
;
1840 PyObject
*modelmod_name
;
1841 PyObject
*model_module
;
1842 PyObject
*sys_modules
;
1843 static struct PyExpat_CAPI capi
;
1844 PyObject
* capi_object
;
1846 if (errmod_name
== NULL
)
1848 modelmod_name
= PyString_FromString(MODULE_NAME
".model");
1849 if (modelmod_name
== NULL
)
1852 Xmlparsetype
.ob_type
= &PyType_Type
;
1854 /* Create the module and add the functions */
1855 m
= Py_InitModule3(MODULE_NAME
, pyexpat_methods
,
1856 pyexpat_module_documentation
);
1860 /* Add some symbolic constants to the module */
1861 if (ErrorObject
== NULL
) {
1862 ErrorObject
= PyErr_NewException("xml.parsers.expat.ExpatError",
1864 if (ErrorObject
== NULL
)
1867 Py_INCREF(ErrorObject
);
1868 PyModule_AddObject(m
, "error", ErrorObject
);
1869 Py_INCREF(ErrorObject
);
1870 PyModule_AddObject(m
, "ExpatError", ErrorObject
);
1871 Py_INCREF(&Xmlparsetype
);
1872 PyModule_AddObject(m
, "XMLParserType", (PyObject
*) &Xmlparsetype
);
1874 PyModule_AddObject(m
, "__version__", get_version_string());
1875 PyModule_AddStringConstant(m
, "EXPAT_VERSION",
1876 (char *) XML_ExpatVersion());
1878 XML_Expat_Version info
= XML_ExpatVersionInfo();
1879 PyModule_AddObject(m
, "version_info",
1880 Py_BuildValue("(iii)", info
.major
,
1881 info
.minor
, info
.micro
));
1883 #ifdef Py_USING_UNICODE
1884 init_template_buffer();
1886 /* XXX When Expat supports some way of figuring out how it was
1887 compiled, this should check and set native_encoding
1890 PyModule_AddStringConstant(m
, "native_encoding", "UTF-8");
1892 sys_modules
= PySys_GetObject("modules");
1893 d
= PyModule_GetDict(m
);
1894 errors_module
= PyDict_GetItem(d
, errmod_name
);
1895 if (errors_module
== NULL
) {
1896 errors_module
= PyModule_New(MODULE_NAME
".errors");
1897 if (errors_module
!= NULL
) {
1898 PyDict_SetItem(sys_modules
, errmod_name
, errors_module
);
1899 /* gives away the reference to errors_module */
1900 PyModule_AddObject(m
, "errors", errors_module
);
1903 Py_DECREF(errmod_name
);
1904 model_module
= PyDict_GetItem(d
, modelmod_name
);
1905 if (model_module
== NULL
) {
1906 model_module
= PyModule_New(MODULE_NAME
".model");
1907 if (model_module
!= NULL
) {
1908 PyDict_SetItem(sys_modules
, modelmod_name
, model_module
);
1909 /* gives away the reference to model_module */
1910 PyModule_AddObject(m
, "model", model_module
);
1913 Py_DECREF(modelmod_name
);
1914 if (errors_module
== NULL
|| model_module
== NULL
)
1915 /* Don't core dump later! */
1918 #if XML_COMBINED_VERSION > 19505
1920 const XML_Feature
*features
= XML_GetFeatureList();
1921 PyObject
*list
= PyList_New(0);
1923 /* just ignore it */
1927 for (; features
[i
].feature
!= XML_FEATURE_END
; ++i
) {
1929 PyObject
*item
= Py_BuildValue("si", features
[i
].name
,
1936 ok
= PyList_Append(list
, item
);
1944 PyModule_AddObject(m
, "features", list
);
1949 #define MYCONST(name) \
1950 PyModule_AddStringConstant(errors_module, #name, \
1951 (char*)XML_ErrorString(name))
1953 MYCONST(XML_ERROR_NO_MEMORY
);
1954 MYCONST(XML_ERROR_SYNTAX
);
1955 MYCONST(XML_ERROR_NO_ELEMENTS
);
1956 MYCONST(XML_ERROR_INVALID_TOKEN
);
1957 MYCONST(XML_ERROR_UNCLOSED_TOKEN
);
1958 MYCONST(XML_ERROR_PARTIAL_CHAR
);
1959 MYCONST(XML_ERROR_TAG_MISMATCH
);
1960 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE
);
1961 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
1962 MYCONST(XML_ERROR_PARAM_ENTITY_REF
);
1963 MYCONST(XML_ERROR_UNDEFINED_ENTITY
);
1964 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF
);
1965 MYCONST(XML_ERROR_ASYNC_ENTITY
);
1966 MYCONST(XML_ERROR_BAD_CHAR_REF
);
1967 MYCONST(XML_ERROR_BINARY_ENTITY_REF
);
1968 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
1969 MYCONST(XML_ERROR_MISPLACED_XML_PI
);
1970 MYCONST(XML_ERROR_UNKNOWN_ENCODING
);
1971 MYCONST(XML_ERROR_INCORRECT_ENCODING
);
1972 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION
);
1973 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
1974 MYCONST(XML_ERROR_NOT_STANDALONE
);
1975 MYCONST(XML_ERROR_UNEXPECTED_STATE
);
1976 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE
);
1977 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD
);
1978 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
);
1979 /* Added in Expat 1.95.7. */
1980 MYCONST(XML_ERROR_UNBOUND_PREFIX
);
1981 /* Added in Expat 1.95.8. */
1982 MYCONST(XML_ERROR_UNDECLARING_PREFIX
);
1983 MYCONST(XML_ERROR_INCOMPLETE_PE
);
1984 MYCONST(XML_ERROR_XML_DECL
);
1985 MYCONST(XML_ERROR_TEXT_DECL
);
1986 MYCONST(XML_ERROR_PUBLICID
);
1987 MYCONST(XML_ERROR_SUSPENDED
);
1988 MYCONST(XML_ERROR_NOT_SUSPENDED
);
1989 MYCONST(XML_ERROR_ABORTED
);
1990 MYCONST(XML_ERROR_FINISHED
);
1991 MYCONST(XML_ERROR_SUSPEND_PE
);
1993 PyModule_AddStringConstant(errors_module
, "__doc__",
1994 "Constants used to describe error conditions.");
1998 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1999 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER
);
2000 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
);
2001 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS
);
2004 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2005 PyModule_AddStringConstant(model_module
, "__doc__",
2006 "Constants used to interpret content model information.");
2008 MYCONST(XML_CTYPE_EMPTY
);
2009 MYCONST(XML_CTYPE_ANY
);
2010 MYCONST(XML_CTYPE_MIXED
);
2011 MYCONST(XML_CTYPE_NAME
);
2012 MYCONST(XML_CTYPE_CHOICE
);
2013 MYCONST(XML_CTYPE_SEQ
);
2015 MYCONST(XML_CQUANT_NONE
);
2016 MYCONST(XML_CQUANT_OPT
);
2017 MYCONST(XML_CQUANT_REP
);
2018 MYCONST(XML_CQUANT_PLUS
);
2021 /* initialize pyexpat dispatch table */
2022 capi
.size
= sizeof(capi
);
2023 capi
.magic
= PyExpat_CAPI_MAGIC
;
2024 capi
.MAJOR_VERSION
= XML_MAJOR_VERSION
;
2025 capi
.MINOR_VERSION
= XML_MINOR_VERSION
;
2026 capi
.MICRO_VERSION
= XML_MICRO_VERSION
;
2027 capi
.ErrorString
= XML_ErrorString
;
2028 capi
.GetErrorCode
= XML_GetErrorCode
;
2029 capi
.GetErrorColumnNumber
= XML_GetErrorColumnNumber
;
2030 capi
.GetErrorLineNumber
= XML_GetErrorLineNumber
;
2031 capi
.Parse
= XML_Parse
;
2032 capi
.ParserCreate_MM
= XML_ParserCreate_MM
;
2033 capi
.ParserFree
= XML_ParserFree
;
2034 capi
.SetCharacterDataHandler
= XML_SetCharacterDataHandler
;
2035 capi
.SetCommentHandler
= XML_SetCommentHandler
;
2036 capi
.SetDefaultHandlerExpand
= XML_SetDefaultHandlerExpand
;
2037 capi
.SetElementHandler
= XML_SetElementHandler
;
2038 capi
.SetNamespaceDeclHandler
= XML_SetNamespaceDeclHandler
;
2039 capi
.SetProcessingInstructionHandler
= XML_SetProcessingInstructionHandler
;
2040 capi
.SetUnknownEncodingHandler
= XML_SetUnknownEncodingHandler
;
2041 capi
.SetUserData
= XML_SetUserData
;
2043 /* export as cobject */
2044 capi_object
= PyCObject_FromVoidPtr(&capi
, NULL
);
2046 PyModule_AddObject(m
, "expat_CAPI", capi_object
);
2050 clear_handlers(xmlparseobject
*self
, int initial
)
2055 for (; handler_info
[i
].name
!= NULL
; i
++) {
2057 self
->handlers
[i
] = NULL
;
2059 temp
= self
->handlers
[i
];
2060 self
->handlers
[i
] = NULL
;
2062 handler_info
[i
].setter(self
->itself
, NULL
);
2067 static struct HandlerInfo handler_info
[] = {
2068 {"StartElementHandler",
2069 (xmlhandlersetter
)XML_SetStartElementHandler
,
2070 (xmlhandler
)my_StartElementHandler
},
2071 {"EndElementHandler",
2072 (xmlhandlersetter
)XML_SetEndElementHandler
,
2073 (xmlhandler
)my_EndElementHandler
},
2074 {"ProcessingInstructionHandler",
2075 (xmlhandlersetter
)XML_SetProcessingInstructionHandler
,
2076 (xmlhandler
)my_ProcessingInstructionHandler
},
2077 {"CharacterDataHandler",
2078 (xmlhandlersetter
)XML_SetCharacterDataHandler
,
2079 (xmlhandler
)my_CharacterDataHandler
},
2080 {"UnparsedEntityDeclHandler",
2081 (xmlhandlersetter
)XML_SetUnparsedEntityDeclHandler
,
2082 (xmlhandler
)my_UnparsedEntityDeclHandler
},
2083 {"NotationDeclHandler",
2084 (xmlhandlersetter
)XML_SetNotationDeclHandler
,
2085 (xmlhandler
)my_NotationDeclHandler
},
2086 {"StartNamespaceDeclHandler",
2087 (xmlhandlersetter
)XML_SetStartNamespaceDeclHandler
,
2088 (xmlhandler
)my_StartNamespaceDeclHandler
},
2089 {"EndNamespaceDeclHandler",
2090 (xmlhandlersetter
)XML_SetEndNamespaceDeclHandler
,
2091 (xmlhandler
)my_EndNamespaceDeclHandler
},
2093 (xmlhandlersetter
)XML_SetCommentHandler
,
2094 (xmlhandler
)my_CommentHandler
},
2095 {"StartCdataSectionHandler",
2096 (xmlhandlersetter
)XML_SetStartCdataSectionHandler
,
2097 (xmlhandler
)my_StartCdataSectionHandler
},
2098 {"EndCdataSectionHandler",
2099 (xmlhandlersetter
)XML_SetEndCdataSectionHandler
,
2100 (xmlhandler
)my_EndCdataSectionHandler
},
2102 (xmlhandlersetter
)XML_SetDefaultHandler
,
2103 (xmlhandler
)my_DefaultHandler
},
2104 {"DefaultHandlerExpand",
2105 (xmlhandlersetter
)XML_SetDefaultHandlerExpand
,
2106 (xmlhandler
)my_DefaultHandlerExpandHandler
},
2107 {"NotStandaloneHandler",
2108 (xmlhandlersetter
)XML_SetNotStandaloneHandler
,
2109 (xmlhandler
)my_NotStandaloneHandler
},
2110 {"ExternalEntityRefHandler",
2111 (xmlhandlersetter
)XML_SetExternalEntityRefHandler
,
2112 (xmlhandler
)my_ExternalEntityRefHandler
},
2113 {"StartDoctypeDeclHandler",
2114 (xmlhandlersetter
)XML_SetStartDoctypeDeclHandler
,
2115 (xmlhandler
)my_StartDoctypeDeclHandler
},
2116 {"EndDoctypeDeclHandler",
2117 (xmlhandlersetter
)XML_SetEndDoctypeDeclHandler
,
2118 (xmlhandler
)my_EndDoctypeDeclHandler
},
2119 {"EntityDeclHandler",
2120 (xmlhandlersetter
)XML_SetEntityDeclHandler
,
2121 (xmlhandler
)my_EntityDeclHandler
},
2123 (xmlhandlersetter
)XML_SetXmlDeclHandler
,
2124 (xmlhandler
)my_XmlDeclHandler
},
2125 {"ElementDeclHandler",
2126 (xmlhandlersetter
)XML_SetElementDeclHandler
,
2127 (xmlhandler
)my_ElementDeclHandler
},
2128 {"AttlistDeclHandler",
2129 (xmlhandlersetter
)XML_SetAttlistDeclHandler
,
2130 (xmlhandler
)my_AttlistDeclHandler
},
2131 #if XML_COMBINED_VERSION >= 19504
2132 {"SkippedEntityHandler",
2133 (xmlhandlersetter
)XML_SetSkippedEntityHandler
,
2134 (xmlhandler
)my_SkippedEntityHandler
},
2137 {NULL
, NULL
, NULL
} /* sentinel */