4 #include "frameobject.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
35 ProcessingInstruction
,
54 #if XML_COMBINED_VERSION >= 19504
60 static PyObject
*ErrorObject
;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
70 int returns_unicode
; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes
; /* Return attributes as a list. */
73 int specified_attributes
; /* Report only specified attributes. */
74 int in_callback
; /* Is a callback active? */
75 int ns_prefixes
; /* Namespace-triplets mode? */
76 XML_Char
*buffer
; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size
; /* Size of buffer, in XML_Char units */
79 int buffer_used
; /* Buffer units in use */
80 PyObject
*intern
; /* Dictionary to intern strings */
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype
;
88 typedef void (*xmlhandlersetter
)(XML_Parser self
, void *meth
);
89 typedef void* xmlhandler
;
93 xmlhandlersetter setter
;
95 PyCodeObject
*tb_code
;
99 static struct HandlerInfo handler_info
[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
105 set_error_attr(PyObject
*err
, char *name
, int value
)
107 PyObject
*v
= PyInt_FromLong(value
);
109 if (v
== NULL
|| PyObject_SetAttrString(err
, name
, v
) == -1) {
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
121 set_error(xmlparseobject
*self
, enum XML_Error code
)
125 XML_Parser parser
= self
->itself
;
126 int lineno
= XML_GetErrorLineNumber(parser
);
127 int column
= XML_GetErrorColumnNumber(parser
);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer
, "%.200s: line %i, column %i",
132 XML_ErrorString(code
), lineno
, column
);
133 err
= PyObject_CallFunction(ErrorObject
, "s", buffer
);
135 && set_error_attr(err
, "code", code
)
136 && set_error_attr(err
, "offset", column
)
137 && set_error_attr(err
, "lineno", lineno
)) {
138 PyErr_SetObject(ErrorObject
, err
);
145 have_handler(xmlparseobject
*self
, int type
)
147 PyObject
*handler
= self
->handlers
[type
];
148 return handler
!= NULL
;
152 get_handler_name(struct HandlerInfo
*hinfo
)
154 PyObject
*name
= hinfo
->nameobj
;
156 name
= PyString_FromString(hinfo
->name
);
157 hinfo
->nameobj
= name
;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
169 conv_string_to_unicode(const XML_Char
*str
)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
178 return PyUnicode_DecodeUTF8(str
, strlen(str
), "strict");
182 conv_string_len_to_unicode(const XML_Char
*str
, int len
)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
191 return PyUnicode_DecodeUTF8((const char *)str
, len
, "strict");
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
199 conv_string_to_utf8(const XML_Char
*str
)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
208 return PyString_FromString(str
);
212 conv_string_len_to_utf8(const XML_Char
*str
, int len
)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
221 return PyString_FromStringAndSize((const char *)str
, len
);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject
*self
, int initial
);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
232 error_external_entity_ref_handler(XML_Parser parser
,
233 const XML_Char
*context
,
234 const XML_Char
*base
,
235 const XML_Char
*systemId
,
236 const XML_Char
*publicId
)
242 flag_error(xmlparseobject
*self
)
244 clear_handlers(self
, 0);
245 XML_SetExternalEntityRefHandler(self
->itself
,
246 error_external_entity_ref_handler
);
250 getcode(enum HandlerTypes slot
, char* func_name
, int lineno
)
252 PyObject
*code
= NULL
;
253 PyObject
*name
= NULL
;
254 PyObject
*nulltuple
= NULL
;
255 PyObject
*filename
= NULL
;
257 if (handler_info
[slot
].tb_code
== NULL
) {
258 code
= PyString_FromString("");
261 name
= PyString_FromString(func_name
);
264 nulltuple
= PyTuple_New(0);
265 if (nulltuple
== NULL
)
267 filename
= PyString_FromString(__FILE__
);
268 handler_info
[slot
].tb_code
=
269 PyCode_New(0, /* argcount */
274 nulltuple
, /* consts */
275 nulltuple
, /* names */
276 nulltuple
, /* varnames */
277 #if PYTHON_API_VERSION >= 1010
278 nulltuple
, /* freevars */
279 nulltuple
, /* cellvars */
281 filename
, /* filename */
283 lineno
, /* firstlineno */
286 if (handler_info
[slot
].tb_code
== NULL
)
289 Py_DECREF(nulltuple
);
293 return handler_info
[slot
].tb_code
;
302 trace_frame(PyThreadState
*tstate
, PyFrameObject
*f
, int code
, PyObject
*val
)
305 if (!tstate
->use_tracing
|| tstate
->tracing
)
307 if (tstate
->c_profilefunc
!= NULL
) {
309 result
= tstate
->c_profilefunc(tstate
->c_profileobj
,
311 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
312 || (tstate
->c_profilefunc
!= NULL
));
317 if (tstate
->c_tracefunc
!= NULL
) {
319 result
= tstate
->c_tracefunc(tstate
->c_traceobj
,
321 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
322 || (tstate
->c_profilefunc
!= NULL
));
329 trace_frame_exc(PyThreadState
*tstate
, PyFrameObject
*f
)
331 PyObject
*type
, *value
, *traceback
, *arg
;
334 if (tstate
->c_tracefunc
== NULL
)
337 PyErr_Fetch(&type
, &value
, &traceback
);
342 #if PY_VERSION_HEX < 0x02040000
343 arg
= Py_BuildValue("(OOO)", type
, value
, traceback
);
345 arg
= PyTuple_Pack(3, type
, value
, traceback
);
348 PyErr_Restore(type
, value
, traceback
);
351 err
= trace_frame(tstate
, f
, PyTrace_EXCEPTION
, arg
);
354 PyErr_Restore(type
, value
, traceback
);
358 Py_XDECREF(traceback
);
365 call_with_frame(PyCodeObject
*c
, PyObject
* func
, PyObject
* args
,
366 xmlparseobject
*self
)
368 PyThreadState
*tstate
= PyThreadState_GET();
375 f
= PyFrame_New(tstate
, c
, PyEval_GetGlobals(), NULL
);
380 if (trace_frame(tstate
, f
, PyTrace_CALL
, Py_None
) < 0) {
384 res
= PyEval_CallObject(func
, args
);
386 if (tstate
->curexc_traceback
== NULL
)
388 XML_StopParser(self
->itself
, XML_FALSE
);
390 if (trace_frame_exc(tstate
, f
) < 0) {
395 if (trace_frame(tstate
, f
, PyTrace_RETURN
, res
) < 0) {
403 tstate
->frame
= f
->f_back
;
408 #ifndef Py_USING_UNICODE
409 #define STRING_CONV_FUNC conv_string_to_utf8
411 /* Python 2.0 and later versions, when built with Unicode support */
412 #define STRING_CONV_FUNC (self->returns_unicode \
413 ? conv_string_to_unicode : conv_string_to_utf8)
417 string_intern(xmlparseobject
*self
, const char* str
)
419 PyObject
*result
= STRING_CONV_FUNC(str
);
421 /* result can be NULL if the unicode conversion failed. */
426 value
= PyDict_GetItem(self
->intern
, result
);
428 if (PyDict_SetItem(self
->intern
, result
, result
) == 0)
438 /* Return 0 on success, -1 on exception.
439 * flag_error() will be called before return if needed.
442 call_character_handler(xmlparseobject
*self
, const XML_Char
*buffer
, int len
)
447 args
= PyTuple_New(1);
450 #ifdef Py_USING_UNICODE
451 temp
= (self
->returns_unicode
452 ? conv_string_len_to_unicode(buffer
, len
)
453 : conv_string_len_to_utf8(buffer
, len
));
455 temp
= conv_string_len_to_utf8(buffer
, len
);
462 PyTuple_SET_ITEM(args
, 0, temp
);
463 /* temp is now a borrowed reference; consider it unused. */
464 self
->in_callback
= 1;
465 temp
= call_with_frame(getcode(CharacterData
, "CharacterData", __LINE__
),
466 self
->handlers
[CharacterData
], args
, self
);
467 /* temp is an owned reference again, or NULL */
468 self
->in_callback
= 0;
479 flush_character_buffer(xmlparseobject
*self
)
482 if (self
->buffer
== NULL
|| self
->buffer_used
== 0)
484 rc
= call_character_handler(self
, self
->buffer
, self
->buffer_used
);
485 self
->buffer_used
= 0;
490 my_CharacterDataHandler(void *userData
, const XML_Char
*data
, int len
)
492 xmlparseobject
*self
= (xmlparseobject
*) userData
;
493 if (self
->buffer
== NULL
)
494 call_character_handler(self
, data
, len
);
496 if ((self
->buffer_used
+ len
) > self
->buffer_size
) {
497 if (flush_character_buffer(self
) < 0)
499 /* handler might have changed; drop the rest on the floor
500 * if there isn't a handler anymore
502 if (!have_handler(self
, CharacterData
))
505 if (len
> self
->buffer_size
) {
506 call_character_handler(self
, data
, len
);
507 self
->buffer_used
= 0;
510 memcpy(self
->buffer
+ self
->buffer_used
,
511 data
, len
* sizeof(XML_Char
));
512 self
->buffer_used
+= len
;
518 my_StartElementHandler(void *userData
,
519 const XML_Char
*name
, const XML_Char
*atts
[])
521 xmlparseobject
*self
= (xmlparseobject
*)userData
;
523 if (have_handler(self
, StartElement
)) {
524 PyObject
*container
, *rv
, *args
;
527 if (flush_character_buffer(self
) < 0)
529 /* Set max to the number of slots filled in atts[]; max/2 is
530 * the number of attributes we need to process.
532 if (self
->specified_attributes
) {
533 max
= XML_GetSpecifiedAttributeCount(self
->itself
);
537 while (atts
[max
] != NULL
)
540 /* Build the container. */
541 if (self
->ordered_attributes
)
542 container
= PyList_New(max
);
544 container
= PyDict_New();
545 if (container
== NULL
) {
549 for (i
= 0; i
< max
; i
+= 2) {
550 PyObject
*n
= string_intern(self
, (XML_Char
*) atts
[i
]);
554 Py_DECREF(container
);
557 v
= STRING_CONV_FUNC((XML_Char
*) atts
[i
+1]);
560 Py_DECREF(container
);
564 if (self
->ordered_attributes
) {
565 PyList_SET_ITEM(container
, i
, n
);
566 PyList_SET_ITEM(container
, i
+1, v
);
568 else if (PyDict_SetItem(container
, n
, v
)) {
579 args
= string_intern(self
, name
);
581 args
= Py_BuildValue("(NN)", args
, container
);
583 Py_DECREF(container
);
586 /* Container is now a borrowed reference; ignore it. */
587 self
->in_callback
= 1;
588 rv
= call_with_frame(getcode(StartElement
, "StartElement", __LINE__
),
589 self
->handlers
[StartElement
], args
, self
);
590 self
->in_callback
= 0;
600 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
601 RETURN, GETUSERDATA) \
603 my_##NAME##Handler PARAMS {\
604 xmlparseobject *self = GETUSERDATA ; \
605 PyObject *args = NULL; \
606 PyObject *rv = NULL; \
609 if (have_handler(self, NAME)) { \
610 if (flush_character_buffer(self) < 0) \
612 args = Py_BuildValue PARAM_FORMAT ;\
613 if (!args) { flag_error(self); return RETURN;} \
614 self->in_callback = 1; \
615 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
616 self->handlers[NAME], args, self); \
617 self->in_callback = 0; \
629 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
630 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
631 (xmlparseobject *)userData)
633 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
634 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
635 rc = PyInt_AsLong(rv);, rc, \
636 (xmlparseobject *)userData)
638 VOID_HANDLER(EndElement
,
639 (void *userData
, const XML_Char
*name
),
640 ("(N)", string_intern(self
, name
)))
642 VOID_HANDLER(ProcessingInstruction
,
644 const XML_Char
*target
,
645 const XML_Char
*data
),
646 ("(NO&)", string_intern(self
, target
), STRING_CONV_FUNC
,data
))
648 VOID_HANDLER(UnparsedEntityDecl
,
650 const XML_Char
*entityName
,
651 const XML_Char
*base
,
652 const XML_Char
*systemId
,
653 const XML_Char
*publicId
,
654 const XML_Char
*notationName
),
656 string_intern(self
, entityName
), string_intern(self
, base
),
657 string_intern(self
, systemId
), string_intern(self
, publicId
),
658 string_intern(self
, notationName
)))
660 #ifndef Py_USING_UNICODE
661 VOID_HANDLER(EntityDecl
,
663 const XML_Char
*entityName
,
664 int is_parameter_entity
,
665 const XML_Char
*value
,
667 const XML_Char
*base
,
668 const XML_Char
*systemId
,
669 const XML_Char
*publicId
,
670 const XML_Char
*notationName
),
672 string_intern(self
, entityName
), is_parameter_entity
,
673 conv_string_len_to_utf8(value
, value_length
),
674 string_intern(self
, base
), string_intern(self
, systemId
),
675 string_intern(self
, publicId
),
676 string_intern(self
, notationName
)))
678 VOID_HANDLER(EntityDecl
,
680 const XML_Char
*entityName
,
681 int is_parameter_entity
,
682 const XML_Char
*value
,
684 const XML_Char
*base
,
685 const XML_Char
*systemId
,
686 const XML_Char
*publicId
,
687 const XML_Char
*notationName
),
689 string_intern(self
, entityName
), is_parameter_entity
,
690 (self
->returns_unicode
691 ? conv_string_len_to_unicode(value
, value_length
)
692 : conv_string_len_to_utf8(value
, value_length
)),
693 string_intern(self
, base
), string_intern(self
, systemId
),
694 string_intern(self
, publicId
),
695 string_intern(self
, notationName
)))
698 VOID_HANDLER(XmlDecl
,
700 const XML_Char
*version
,
701 const XML_Char
*encoding
,
704 STRING_CONV_FUNC
,version
, STRING_CONV_FUNC
,encoding
,
708 conv_content_model(XML_Content
* const model
,
709 PyObject
*(*conv_string
)(const XML_Char
*))
711 PyObject
*result
= NULL
;
712 PyObject
*children
= PyTuple_New(model
->numchildren
);
715 if (children
!= NULL
) {
716 assert(model
->numchildren
< INT_MAX
);
717 for (i
= 0; i
< (int)model
->numchildren
; ++i
) {
718 PyObject
*child
= conv_content_model(&model
->children
[i
],
721 Py_XDECREF(children
);
724 PyTuple_SET_ITEM(children
, i
, child
);
726 result
= Py_BuildValue("(iiO&N)",
727 model
->type
, model
->quant
,
728 conv_string
,model
->name
, children
);
734 my_ElementDeclHandler(void *userData
,
735 const XML_Char
*name
,
738 xmlparseobject
*self
= (xmlparseobject
*)userData
;
739 PyObject
*args
= NULL
;
741 if (have_handler(self
, ElementDecl
)) {
743 PyObject
*modelobj
, *nameobj
;
745 if (flush_character_buffer(self
) < 0)
747 #ifdef Py_USING_UNICODE
748 modelobj
= conv_content_model(model
,
749 (self
->returns_unicode
750 ? conv_string_to_unicode
751 : conv_string_to_utf8
));
753 modelobj
= conv_content_model(model
, conv_string_to_utf8
);
755 if (modelobj
== NULL
) {
759 nameobj
= string_intern(self
, name
);
760 if (nameobj
== NULL
) {
765 args
= Py_BuildValue("NN", nameobj
, modelobj
);
771 self
->in_callback
= 1;
772 rv
= call_with_frame(getcode(ElementDecl
, "ElementDecl", __LINE__
),
773 self
->handlers
[ElementDecl
], args
, self
);
774 self
->in_callback
= 0;
783 XML_FreeContentModel(self
->itself
, model
);
787 VOID_HANDLER(AttlistDecl
,
789 const XML_Char
*elname
,
790 const XML_Char
*attname
,
791 const XML_Char
*att_type
,
792 const XML_Char
*dflt
,
795 string_intern(self
, elname
), string_intern(self
, attname
),
796 STRING_CONV_FUNC
,att_type
, STRING_CONV_FUNC
,dflt
,
799 #if XML_COMBINED_VERSION >= 19504
800 VOID_HANDLER(SkippedEntity
,
802 const XML_Char
*entityName
,
803 int is_parameter_entity
),
805 string_intern(self
, entityName
), is_parameter_entity
))
808 VOID_HANDLER(NotationDecl
,
810 const XML_Char
*notationName
,
811 const XML_Char
*base
,
812 const XML_Char
*systemId
,
813 const XML_Char
*publicId
),
815 string_intern(self
, notationName
), string_intern(self
, base
),
816 string_intern(self
, systemId
), string_intern(self
, publicId
)))
818 VOID_HANDLER(StartNamespaceDecl
,
820 const XML_Char
*prefix
,
821 const XML_Char
*uri
),
823 string_intern(self
, prefix
), string_intern(self
, uri
)))
825 VOID_HANDLER(EndNamespaceDecl
,
827 const XML_Char
*prefix
),
828 ("(N)", string_intern(self
, prefix
)))
830 VOID_HANDLER(Comment
,
831 (void *userData
, const XML_Char
*data
),
832 ("(O&)", STRING_CONV_FUNC
,data
))
834 VOID_HANDLER(StartCdataSection
,
838 VOID_HANDLER(EndCdataSection
,
842 #ifndef Py_USING_UNICODE
843 VOID_HANDLER(Default
,
844 (void *userData
, const XML_Char
*s
, int len
),
845 ("(N)", conv_string_len_to_utf8(s
,len
)))
847 VOID_HANDLER(DefaultHandlerExpand
,
848 (void *userData
, const XML_Char
*s
, int len
),
849 ("(N)", conv_string_len_to_utf8(s
,len
)))
851 VOID_HANDLER(Default
,
852 (void *userData
, const XML_Char
*s
, int len
),
853 ("(N)", (self
->returns_unicode
854 ? conv_string_len_to_unicode(s
,len
)
855 : conv_string_len_to_utf8(s
,len
))))
857 VOID_HANDLER(DefaultHandlerExpand
,
858 (void *userData
, const XML_Char
*s
, int len
),
859 ("(N)", (self
->returns_unicode
860 ? conv_string_len_to_unicode(s
,len
)
861 : conv_string_len_to_utf8(s
,len
))))
864 INT_HANDLER(NotStandalone
,
868 RC_HANDLER(int, ExternalEntityRef
,
870 const XML_Char
*context
,
871 const XML_Char
*base
,
872 const XML_Char
*systemId
,
873 const XML_Char
*publicId
),
876 STRING_CONV_FUNC
,context
, string_intern(self
, base
),
877 string_intern(self
, systemId
), string_intern(self
, publicId
)),
878 rc
= PyInt_AsLong(rv
);, rc
,
879 XML_GetUserData(parser
))
881 /* XXX UnknownEncodingHandler */
883 VOID_HANDLER(StartDoctypeDecl
,
884 (void *userData
, const XML_Char
*doctypeName
,
885 const XML_Char
*sysid
, const XML_Char
*pubid
,
886 int has_internal_subset
),
887 ("(NNNi)", string_intern(self
, doctypeName
),
888 string_intern(self
, sysid
), string_intern(self
, pubid
),
889 has_internal_subset
))
891 VOID_HANDLER(EndDoctypeDecl
, (void *userData
), ("()"))
893 /* ---------------------------------------------------------------- */
896 get_parse_result(xmlparseobject
*self
, int rv
)
898 if (PyErr_Occurred()) {
902 return set_error(self
, XML_GetErrorCode(self
->itself
));
904 if (flush_character_buffer(self
) < 0) {
907 return PyInt_FromLong(rv
);
910 PyDoc_STRVAR(xmlparse_Parse__doc__
,
911 "Parse(data[, isfinal])\n\
912 Parse XML data. `isfinal' should be true at end of input.");
915 xmlparse_Parse(xmlparseobject
*self
, PyObject
*args
)
921 if (!PyArg_ParseTuple(args
, "s#|i:Parse", &s
, &slen
, &isFinal
))
924 return get_parse_result(self
, XML_Parse(self
->itself
, s
, slen
, isFinal
));
927 /* File reading copied from cPickle */
929 #define BUF_SIZE 2048
932 readinst(char *buf
, int buf_size
, PyObject
*meth
)
934 PyObject
*arg
= NULL
;
935 PyObject
*bytes
= NULL
;
936 PyObject
*str
= NULL
;
939 if ((bytes
= PyInt_FromLong(buf_size
)) == NULL
)
942 if ((arg
= PyTuple_New(1)) == NULL
) {
947 PyTuple_SET_ITEM(arg
, 0, bytes
);
949 #if PY_VERSION_HEX < 0x02020000
950 str
= PyObject_CallObject(meth
, arg
);
952 str
= PyObject_Call(meth
, arg
, NULL
);
957 /* XXX what to do if it returns a Unicode string? */
958 if (!PyString_Check(str
)) {
959 PyErr_Format(PyExc_TypeError
,
960 "read() did not return a string object (type=%.400s)",
961 str
->ob_type
->tp_name
);
964 len
= PyString_GET_SIZE(str
);
965 if (len
> buf_size
) {
966 PyErr_Format(PyExc_ValueError
,
967 "read() returned too much data: "
968 "%i bytes requested, %i returned",
972 memcpy(buf
, PyString_AsString(str
), len
);
979 PyDoc_STRVAR(xmlparse_ParseFile__doc__
,
981 Parse XML data from file-like object.");
984 xmlparse_ParseFile(xmlparseobject
*self
, PyObject
*f
)
988 PyObject
*readmethod
= NULL
;
990 if (PyFile_Check(f
)) {
991 fp
= PyFile_AsFile(f
);
995 readmethod
= PyObject_GetAttrString(f
, "read");
996 if (readmethod
== NULL
) {
998 PyErr_SetString(PyExc_TypeError
,
999 "argument must have 'read' attribute");
1005 void *buf
= XML_GetBuffer(self
->itself
, BUF_SIZE
);
1007 Py_XDECREF(readmethod
);
1008 return PyErr_NoMemory();
1012 bytes_read
= fread(buf
, sizeof(char), BUF_SIZE
, fp
);
1013 if (bytes_read
< 0) {
1014 PyErr_SetFromErrno(PyExc_IOError
);
1019 bytes_read
= readinst(buf
, BUF_SIZE
, readmethod
);
1020 if (bytes_read
< 0) {
1021 Py_DECREF(readmethod
);
1025 rv
= XML_ParseBuffer(self
->itself
, bytes_read
, bytes_read
== 0);
1026 if (PyErr_Occurred()) {
1027 Py_XDECREF(readmethod
);
1031 if (!rv
|| bytes_read
== 0)
1034 Py_XDECREF(readmethod
);
1035 return get_parse_result(self
, rv
);
1038 PyDoc_STRVAR(xmlparse_SetBase__doc__
,
1039 "SetBase(base_url)\n\
1040 Set the base URL for the parser.");
1043 xmlparse_SetBase(xmlparseobject
*self
, PyObject
*args
)
1047 if (!PyArg_ParseTuple(args
, "s:SetBase", &base
))
1049 if (!XML_SetBase(self
->itself
, base
)) {
1050 return PyErr_NoMemory();
1056 PyDoc_STRVAR(xmlparse_GetBase__doc__
,
1057 "GetBase() -> url\n\
1058 Return base URL string for the parser.");
1061 xmlparse_GetBase(xmlparseobject
*self
, PyObject
*unused
)
1063 return Py_BuildValue("z", XML_GetBase(self
->itself
));
1066 PyDoc_STRVAR(xmlparse_GetInputContext__doc__
,
1067 "GetInputContext() -> string\n\
1068 Return the untranslated text of the input that caused the current event.\n\
1069 If the event was generated by a large amount of text (such as a start tag\n\
1070 for an element with many attributes), not all of the text may be available.");
1073 xmlparse_GetInputContext(xmlparseobject
*self
, PyObject
*unused
)
1075 if (self
->in_callback
) {
1078 = XML_GetInputContext(self
->itself
, &offset
, &size
);
1081 return PyString_FromStringAndSize(buffer
+ offset
,
1090 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__
,
1091 "ExternalEntityParserCreate(context[, encoding])\n\
1092 Create a parser for parsing an external entity based on the\n\
1093 information passed to the ExternalEntityRefHandler.");
1096 xmlparse_ExternalEntityParserCreate(xmlparseobject
*self
, PyObject
*args
)
1099 char *encoding
= NULL
;
1100 xmlparseobject
*new_parser
;
1103 if (!PyArg_ParseTuple(args
, "z|s:ExternalEntityParserCreate",
1104 &context
, &encoding
)) {
1108 #ifndef Py_TPFLAGS_HAVE_GC
1109 /* Python versions 2.0 and 2.1 */
1110 new_parser
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1112 /* Python versions 2.2 and later */
1113 new_parser
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1116 if (new_parser
== NULL
)
1118 new_parser
->buffer_size
= self
->buffer_size
;
1119 new_parser
->buffer_used
= 0;
1120 if (self
->buffer
!= NULL
) {
1121 new_parser
->buffer
= malloc(new_parser
->buffer_size
);
1122 if (new_parser
->buffer
== NULL
) {
1123 #ifndef Py_TPFLAGS_HAVE_GC
1124 /* Code for versions 2.0 and 2.1 */
1125 PyObject_Del(new_parser
);
1127 /* Code for versions 2.2 and later. */
1128 PyObject_GC_Del(new_parser
);
1130 return PyErr_NoMemory();
1134 new_parser
->buffer
= NULL
;
1135 new_parser
->returns_unicode
= self
->returns_unicode
;
1136 new_parser
->ordered_attributes
= self
->ordered_attributes
;
1137 new_parser
->specified_attributes
= self
->specified_attributes
;
1138 new_parser
->in_callback
= 0;
1139 new_parser
->ns_prefixes
= self
->ns_prefixes
;
1140 new_parser
->itself
= XML_ExternalEntityParserCreate(self
->itself
, context
,
1142 new_parser
->handlers
= 0;
1143 new_parser
->intern
= self
->intern
;
1144 Py_XINCREF(new_parser
->intern
);
1145 #ifdef Py_TPFLAGS_HAVE_GC
1146 PyObject_GC_Track(new_parser
);
1148 PyObject_GC_Init(new_parser
);
1151 if (!new_parser
->itself
) {
1152 Py_DECREF(new_parser
);
1153 return PyErr_NoMemory();
1156 XML_SetUserData(new_parser
->itself
, (void *)new_parser
);
1158 /* allocate and clear handlers first */
1159 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1162 new_parser
->handlers
= malloc(sizeof(PyObject
*) * i
);
1163 if (!new_parser
->handlers
) {
1164 Py_DECREF(new_parser
);
1165 return PyErr_NoMemory();
1167 clear_handlers(new_parser
, 1);
1169 /* then copy handlers from self */
1170 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1171 PyObject
*handler
= self
->handlers
[i
];
1172 if (handler
!= NULL
) {
1174 new_parser
->handlers
[i
] = handler
;
1175 handler_info
[i
].setter(new_parser
->itself
,
1176 handler_info
[i
].handler
);
1179 return (PyObject
*)new_parser
;
1182 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__
,
1183 "SetParamEntityParsing(flag) -> success\n\
1184 Controls parsing of parameter entities (including the external DTD\n\
1185 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1186 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1187 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1191 xmlparse_SetParamEntityParsing(xmlparseobject
*p
, PyObject
* args
)
1194 if (!PyArg_ParseTuple(args
, "i", &flag
))
1196 flag
= XML_SetParamEntityParsing(p
->itself
, flag
);
1197 return PyInt_FromLong(flag
);
1201 #if XML_COMBINED_VERSION >= 19505
1202 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__
,
1203 "UseForeignDTD([flag])\n\
1204 Allows the application to provide an artificial external subset if one is\n\
1205 not specified as part of the document instance. This readily allows the\n\
1206 use of a 'default' document type controlled by the application, while still\n\
1207 getting the advantage of providing document type information to the parser.\n\
1208 'flag' defaults to True if not provided.");
1211 xmlparse_UseForeignDTD(xmlparseobject
*self
, PyObject
*args
)
1213 PyObject
*flagobj
= NULL
;
1214 XML_Bool flag
= XML_TRUE
;
1216 if (!PyArg_UnpackTuple(args
, "UseForeignDTD", 0, 1, &flagobj
))
1218 if (flagobj
!= NULL
)
1219 flag
= PyObject_IsTrue(flagobj
) ? XML_TRUE
: XML_FALSE
;
1220 rc
= XML_UseForeignDTD(self
->itself
, flag
);
1221 if (rc
!= XML_ERROR_NONE
) {
1222 return set_error(self
, rc
);
1229 static struct PyMethodDef xmlparse_methods
[] = {
1230 {"Parse", (PyCFunction
)xmlparse_Parse
,
1231 METH_VARARGS
, xmlparse_Parse__doc__
},
1232 {"ParseFile", (PyCFunction
)xmlparse_ParseFile
,
1233 METH_O
, xmlparse_ParseFile__doc__
},
1234 {"SetBase", (PyCFunction
)xmlparse_SetBase
,
1235 METH_VARARGS
, xmlparse_SetBase__doc__
},
1236 {"GetBase", (PyCFunction
)xmlparse_GetBase
,
1237 METH_NOARGS
, xmlparse_GetBase__doc__
},
1238 {"ExternalEntityParserCreate", (PyCFunction
)xmlparse_ExternalEntityParserCreate
,
1239 METH_VARARGS
, xmlparse_ExternalEntityParserCreate__doc__
},
1240 {"SetParamEntityParsing", (PyCFunction
)xmlparse_SetParamEntityParsing
,
1241 METH_VARARGS
, xmlparse_SetParamEntityParsing__doc__
},
1242 {"GetInputContext", (PyCFunction
)xmlparse_GetInputContext
,
1243 METH_NOARGS
, xmlparse_GetInputContext__doc__
},
1244 #if XML_COMBINED_VERSION >= 19505
1245 {"UseForeignDTD", (PyCFunction
)xmlparse_UseForeignDTD
,
1246 METH_VARARGS
, xmlparse_UseForeignDTD__doc__
},
1248 {NULL
, NULL
} /* sentinel */
1254 #ifdef Py_USING_UNICODE
1256 /* pyexpat international encoding support.
1257 Make it as simple as possible.
1260 static char template_buffer
[257];
1261 PyObject
*template_string
= NULL
;
1264 init_template_buffer(void)
1267 for (i
= 0; i
< 256; i
++) {
1268 template_buffer
[i
] = i
;
1270 template_buffer
[256] = 0;
1274 PyUnknownEncodingHandler(void *encodingHandlerData
,
1275 const XML_Char
*name
,
1278 PyUnicodeObject
*_u_string
= NULL
;
1282 /* Yes, supports only 8bit encodings */
1283 _u_string
= (PyUnicodeObject
*)
1284 PyUnicode_Decode(template_buffer
, 256, name
, "replace");
1286 if (_u_string
== NULL
)
1289 for (i
= 0; i
< 256; i
++) {
1290 /* Stupid to access directly, but fast */
1291 Py_UNICODE c
= _u_string
->str
[i
];
1292 if (c
== Py_UNICODE_REPLACEMENT_CHARACTER
)
1298 info
->convert
= NULL
;
1299 info
->release
= NULL
;
1301 Py_DECREF(_u_string
);
1308 newxmlparseobject(char *encoding
, char *namespace_separator
, PyObject
*intern
)
1311 xmlparseobject
*self
;
1313 #ifdef Py_TPFLAGS_HAVE_GC
1314 /* Code for versions 2.2 and later */
1315 self
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1317 self
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1322 #ifdef Py_USING_UNICODE
1323 self
->returns_unicode
= 1;
1325 self
->returns_unicode
= 0;
1328 self
->buffer
= NULL
;
1329 self
->buffer_size
= CHARACTER_DATA_BUFFER_SIZE
;
1330 self
->buffer_used
= 0;
1331 self
->ordered_attributes
= 0;
1332 self
->specified_attributes
= 0;
1333 self
->in_callback
= 0;
1334 self
->ns_prefixes
= 0;
1335 self
->handlers
= NULL
;
1336 if (namespace_separator
!= NULL
) {
1337 self
->itself
= XML_ParserCreateNS(encoding
, *namespace_separator
);
1340 self
->itself
= XML_ParserCreate(encoding
);
1342 self
->intern
= intern
;
1343 Py_XINCREF(self
->intern
);
1344 #ifdef Py_TPFLAGS_HAVE_GC
1345 PyObject_GC_Track(self
);
1347 PyObject_GC_Init(self
);
1349 if (self
->itself
== NULL
) {
1350 PyErr_SetString(PyExc_RuntimeError
,
1351 "XML_ParserCreate failed");
1355 XML_SetUserData(self
->itself
, (void *)self
);
1356 #ifdef Py_USING_UNICODE
1357 XML_SetUnknownEncodingHandler(self
->itself
,
1358 (XML_UnknownEncodingHandler
) PyUnknownEncodingHandler
, NULL
);
1361 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1364 self
->handlers
= malloc(sizeof(PyObject
*) * i
);
1365 if (!self
->handlers
) {
1367 return PyErr_NoMemory();
1369 clear_handlers(self
, 1);
1371 return (PyObject
*)self
;
1376 xmlparse_dealloc(xmlparseobject
*self
)
1379 #ifdef Py_TPFLAGS_HAVE_GC
1380 PyObject_GC_UnTrack(self
);
1382 PyObject_GC_Fini(self
);
1384 if (self
->itself
!= NULL
)
1385 XML_ParserFree(self
->itself
);
1386 self
->itself
= NULL
;
1388 if (self
->handlers
!= NULL
) {
1390 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1391 temp
= self
->handlers
[i
];
1392 self
->handlers
[i
] = NULL
;
1395 free(self
->handlers
);
1396 self
->handlers
= NULL
;
1398 if (self
->buffer
!= NULL
) {
1400 self
->buffer
= NULL
;
1402 Py_XDECREF(self
->intern
);
1403 #ifndef Py_TPFLAGS_HAVE_GC
1404 /* Code for versions 2.0 and 2.1 */
1407 /* Code for versions 2.2 and later. */
1408 PyObject_GC_Del(self
);
1413 handlername2int(const char *name
)
1416 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1417 if (strcmp(name
, handler_info
[i
].name
) == 0) {
1425 get_pybool(int istrue
)
1427 PyObject
*result
= istrue
? Py_True
: Py_False
;
1433 xmlparse_getattr(xmlparseobject
*self
, char *name
)
1435 int handlernum
= handlername2int(name
);
1437 if (handlernum
!= -1) {
1438 PyObject
*result
= self
->handlers
[handlernum
];
1444 if (name
[0] == 'E') {
1445 if (strcmp(name
, "ErrorCode") == 0)
1446 return PyInt_FromLong((long)
1447 XML_GetErrorCode(self
->itself
));
1448 if (strcmp(name
, "ErrorLineNumber") == 0)
1449 return PyInt_FromLong((long)
1450 XML_GetErrorLineNumber(self
->itself
));
1451 if (strcmp(name
, "ErrorColumnNumber") == 0)
1452 return PyInt_FromLong((long)
1453 XML_GetErrorColumnNumber(self
->itself
));
1454 if (strcmp(name
, "ErrorByteIndex") == 0)
1455 return PyInt_FromLong((long)
1456 XML_GetErrorByteIndex(self
->itself
));
1458 if (name
[0] == 'C') {
1459 if (strcmp(name
, "CurrentLineNumber") == 0)
1460 return PyInt_FromLong((long)
1461 XML_GetCurrentLineNumber(self
->itself
));
1462 if (strcmp(name
, "CurrentColumnNumber") == 0)
1463 return PyInt_FromLong((long)
1464 XML_GetCurrentColumnNumber(self
->itself
));
1465 if (strcmp(name
, "CurrentByteIndex") == 0)
1466 return PyInt_FromLong((long)
1467 XML_GetCurrentByteIndex(self
->itself
));
1469 if (name
[0] == 'b') {
1470 if (strcmp(name
, "buffer_size") == 0)
1471 return PyInt_FromLong((long) self
->buffer_size
);
1472 if (strcmp(name
, "buffer_text") == 0)
1473 return get_pybool(self
->buffer
!= NULL
);
1474 if (strcmp(name
, "buffer_used") == 0)
1475 return PyInt_FromLong((long) self
->buffer_used
);
1477 if (strcmp(name
, "namespace_prefixes") == 0)
1478 return get_pybool(self
->ns_prefixes
);
1479 if (strcmp(name
, "ordered_attributes") == 0)
1480 return get_pybool(self
->ordered_attributes
);
1481 if (strcmp(name
, "returns_unicode") == 0)
1482 return get_pybool((long) self
->returns_unicode
);
1483 if (strcmp(name
, "specified_attributes") == 0)
1484 return get_pybool((long) self
->specified_attributes
);
1485 if (strcmp(name
, "intern") == 0) {
1486 if (self
->intern
== NULL
) {
1491 Py_INCREF(self
->intern
);
1492 return self
->intern
;
1496 #define APPEND(list, str) \
1498 PyObject *o = PyString_FromString(str); \
1500 PyList_Append(list, o); \
1504 if (strcmp(name
, "__members__") == 0) {
1506 PyObject
*rc
= PyList_New(0);
1509 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1510 PyObject
*o
= get_handler_name(&handler_info
[i
]);
1512 PyList_Append(rc
, o
);
1515 APPEND(rc
, "ErrorCode");
1516 APPEND(rc
, "ErrorLineNumber");
1517 APPEND(rc
, "ErrorColumnNumber");
1518 APPEND(rc
, "ErrorByteIndex");
1519 APPEND(rc
, "CurrentLineNumber");
1520 APPEND(rc
, "CurrentColumnNumber");
1521 APPEND(rc
, "CurrentByteIndex");
1522 APPEND(rc
, "buffer_size");
1523 APPEND(rc
, "buffer_text");
1524 APPEND(rc
, "buffer_used");
1525 APPEND(rc
, "namespace_prefixes");
1526 APPEND(rc
, "ordered_attributes");
1527 APPEND(rc
, "returns_unicode");
1528 APPEND(rc
, "specified_attributes");
1529 APPEND(rc
, "intern");
1534 return Py_FindMethod(xmlparse_methods
, (PyObject
*)self
, name
);
1538 sethandler(xmlparseobject
*self
, const char *name
, PyObject
* v
)
1540 int handlernum
= handlername2int(name
);
1541 if (handlernum
>= 0) {
1542 xmlhandler c_handler
= NULL
;
1543 PyObject
*temp
= self
->handlers
[handlernum
];
1547 else if (v
!= NULL
) {
1549 c_handler
= handler_info
[handlernum
].handler
;
1551 self
->handlers
[handlernum
] = v
;
1553 handler_info
[handlernum
].setter(self
->itself
, c_handler
);
1560 xmlparse_setattr(xmlparseobject
*self
, char *name
, PyObject
*v
)
1562 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1564 PyErr_SetString(PyExc_RuntimeError
, "Cannot delete attribute");
1567 if (strcmp(name
, "buffer_text") == 0) {
1568 if (PyObject_IsTrue(v
)) {
1569 if (self
->buffer
== NULL
) {
1570 self
->buffer
= malloc(self
->buffer_size
);
1571 if (self
->buffer
== NULL
) {
1575 self
->buffer_used
= 0;
1578 else if (self
->buffer
!= NULL
) {
1579 if (flush_character_buffer(self
) < 0)
1582 self
->buffer
= NULL
;
1586 if (strcmp(name
, "namespace_prefixes") == 0) {
1587 if (PyObject_IsTrue(v
))
1588 self
->ns_prefixes
= 1;
1590 self
->ns_prefixes
= 0;
1591 XML_SetReturnNSTriplet(self
->itself
, self
->ns_prefixes
);
1594 if (strcmp(name
, "ordered_attributes") == 0) {
1595 if (PyObject_IsTrue(v
))
1596 self
->ordered_attributes
= 1;
1598 self
->ordered_attributes
= 0;
1601 if (strcmp(name
, "returns_unicode") == 0) {
1602 if (PyObject_IsTrue(v
)) {
1603 #ifndef Py_USING_UNICODE
1604 PyErr_SetString(PyExc_ValueError
,
1605 "Unicode support not available");
1608 self
->returns_unicode
= 1;
1612 self
->returns_unicode
= 0;
1615 if (strcmp(name
, "specified_attributes") == 0) {
1616 if (PyObject_IsTrue(v
))
1617 self
->specified_attributes
= 1;
1619 self
->specified_attributes
= 0;
1622 if (strcmp(name
, "CharacterDataHandler") == 0) {
1623 /* If we're changing the character data handler, flush all
1624 * cached data with the old handler. Not sure there's a
1625 * "right" thing to do, though, but this probably won't
1628 if (flush_character_buffer(self
) < 0)
1631 if (sethandler(self
, name
, v
)) {
1634 PyErr_SetString(PyExc_AttributeError
, name
);
1638 #ifdef WITH_CYCLE_GC
1640 xmlparse_traverse(xmlparseobject
*op
, visitproc visit
, void *arg
)
1643 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1644 Py_VISIT(op
->handlers
[i
]);
1649 xmlparse_clear(xmlparseobject
*op
)
1651 clear_handlers(op
, 0);
1652 Py_CLEAR(op
->intern
);
1657 PyDoc_STRVAR(Xmlparsetype__doc__
, "XML parser");
1659 static PyTypeObject Xmlparsetype
= {
1660 PyObject_HEAD_INIT(NULL
)
1662 "pyexpat.xmlparser", /*tp_name*/
1663 sizeof(xmlparseobject
) + PyGC_HEAD_SIZE
,/*tp_basicsize*/
1666 (destructor
)xmlparse_dealloc
, /*tp_dealloc*/
1667 (printfunc
)0, /*tp_print*/
1668 (getattrfunc
)xmlparse_getattr
, /*tp_getattr*/
1669 (setattrfunc
)xmlparse_setattr
, /*tp_setattr*/
1670 (cmpfunc
)0, /*tp_compare*/
1671 (reprfunc
)0, /*tp_repr*/
1673 0, /*tp_as_sequence*/
1674 0, /*tp_as_mapping*/
1675 (hashfunc
)0, /*tp_hash*/
1676 (ternaryfunc
)0, /*tp_call*/
1677 (reprfunc
)0, /*tp_str*/
1678 0, /* tp_getattro */
1679 0, /* tp_setattro */
1680 0, /* tp_as_buffer */
1681 #ifdef Py_TPFLAGS_HAVE_GC
1682 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1684 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_GC
, /*tp_flags*/
1686 Xmlparsetype__doc__
, /* tp_doc - Documentation string */
1687 #ifdef WITH_CYCLE_GC
1688 (traverseproc
)xmlparse_traverse
, /* tp_traverse */
1689 (inquiry
)xmlparse_clear
/* tp_clear */
1695 /* End of code for xmlparser objects */
1696 /* -------------------------------------------------------- */
1698 PyDoc_STRVAR(pyexpat_ParserCreate__doc__
,
1699 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1700 Return a new XML parser object.");
1703 pyexpat_ParserCreate(PyObject
*notused
, PyObject
*args
, PyObject
*kw
)
1705 char *encoding
= NULL
;
1706 char *namespace_separator
= NULL
;
1707 PyObject
*intern
= NULL
;
1709 int intern_decref
= 0;
1710 static char *kwlist
[] = {"encoding", "namespace_separator",
1713 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|zzO:ParserCreate", kwlist
,
1714 &encoding
, &namespace_separator
, &intern
))
1716 if (namespace_separator
!= NULL
1717 && strlen(namespace_separator
) > 1) {
1718 PyErr_SetString(PyExc_ValueError
,
1719 "namespace_separator must be at most one"
1720 " character, omitted, or None");
1723 /* Explicitly passing None means no interning is desired.
1724 Not passing anything means that a new dictionary is used. */
1725 if (intern
== Py_None
)
1727 else if (intern
== NULL
) {
1728 intern
= PyDict_New();
1733 else if (!PyDict_Check(intern
)) {
1734 PyErr_SetString(PyExc_TypeError
, "intern must be a dictionary");
1738 result
= newxmlparseobject(encoding
, namespace_separator
, intern
);
1739 if (intern_decref
) {
1745 PyDoc_STRVAR(pyexpat_ErrorString__doc__
,
1746 "ErrorString(errno) -> string\n\
1747 Returns string error for given number.");
1750 pyexpat_ErrorString(PyObject
*self
, PyObject
*args
)
1754 if (!PyArg_ParseTuple(args
, "l:ErrorString", &code
))
1756 return Py_BuildValue("z", XML_ErrorString((int)code
));
1759 /* List of methods defined in the module */
1761 static struct PyMethodDef pyexpat_methods
[] = {
1762 {"ParserCreate", (PyCFunction
)pyexpat_ParserCreate
,
1763 METH_VARARGS
|METH_KEYWORDS
, pyexpat_ParserCreate__doc__
},
1764 {"ErrorString", (PyCFunction
)pyexpat_ErrorString
,
1765 METH_VARARGS
, pyexpat_ErrorString__doc__
},
1767 {NULL
, (PyCFunction
)NULL
, 0, NULL
} /* sentinel */
1770 /* Module docstring */
1772 PyDoc_STRVAR(pyexpat_module_documentation
,
1773 "Python wrapper for Expat parser.");
1775 /* Return a Python string that represents the version number without the
1776 * extra cruft added by revision control, even if the right options were
1777 * given to the "cvs export" command to make it not include the extra
1781 get_version_string(void)
1783 static char *rcsid
= "$Revision$";
1787 while (!isdigit(Py_CHARMASK(*rev
)))
1789 while (rev
[i
] != ' ' && rev
[i
] != '\0')
1792 return PyString_FromStringAndSize(rev
, i
);
1795 /* Initialization function for the module */
1798 #define MODULE_NAME "pyexpat"
1801 #ifndef MODULE_INITFUNC
1802 #define MODULE_INITFUNC initpyexpat
1805 #ifndef PyMODINIT_FUNC
1807 # define PyMODINIT_FUNC __declspec(dllexport) void
1809 # define PyMODINIT_FUNC void
1813 PyMODINIT_FUNC
MODULE_INITFUNC(void); /* avoid compiler warnings */
1816 MODULE_INITFUNC(void)
1819 PyObject
*errmod_name
= PyString_FromString(MODULE_NAME
".errors");
1820 PyObject
*errors_module
;
1821 PyObject
*modelmod_name
;
1822 PyObject
*model_module
;
1823 PyObject
*sys_modules
;
1824 static struct PyExpat_CAPI capi
;
1825 PyObject
* capi_object
;
1827 if (errmod_name
== NULL
)
1829 modelmod_name
= PyString_FromString(MODULE_NAME
".model");
1830 if (modelmod_name
== NULL
)
1833 Xmlparsetype
.ob_type
= &PyType_Type
;
1835 /* Create the module and add the functions */
1836 m
= Py_InitModule3(MODULE_NAME
, pyexpat_methods
,
1837 pyexpat_module_documentation
);
1841 /* Add some symbolic constants to the module */
1842 if (ErrorObject
== NULL
) {
1843 ErrorObject
= PyErr_NewException("xml.parsers.expat.ExpatError",
1845 if (ErrorObject
== NULL
)
1848 Py_INCREF(ErrorObject
);
1849 PyModule_AddObject(m
, "error", ErrorObject
);
1850 Py_INCREF(ErrorObject
);
1851 PyModule_AddObject(m
, "ExpatError", ErrorObject
);
1852 Py_INCREF(&Xmlparsetype
);
1853 PyModule_AddObject(m
, "XMLParserType", (PyObject
*) &Xmlparsetype
);
1855 PyModule_AddObject(m
, "__version__", get_version_string());
1856 PyModule_AddStringConstant(m
, "EXPAT_VERSION",
1857 (char *) XML_ExpatVersion());
1859 XML_Expat_Version info
= XML_ExpatVersionInfo();
1860 PyModule_AddObject(m
, "version_info",
1861 Py_BuildValue("(iii)", info
.major
,
1862 info
.minor
, info
.micro
));
1864 #ifdef Py_USING_UNICODE
1865 init_template_buffer();
1867 /* XXX When Expat supports some way of figuring out how it was
1868 compiled, this should check and set native_encoding
1871 PyModule_AddStringConstant(m
, "native_encoding", "UTF-8");
1873 sys_modules
= PySys_GetObject("modules");
1874 d
= PyModule_GetDict(m
);
1875 errors_module
= PyDict_GetItem(d
, errmod_name
);
1876 if (errors_module
== NULL
) {
1877 errors_module
= PyModule_New(MODULE_NAME
".errors");
1878 if (errors_module
!= NULL
) {
1879 PyDict_SetItem(sys_modules
, errmod_name
, errors_module
);
1880 /* gives away the reference to errors_module */
1881 PyModule_AddObject(m
, "errors", errors_module
);
1884 Py_DECREF(errmod_name
);
1885 model_module
= PyDict_GetItem(d
, modelmod_name
);
1886 if (model_module
== NULL
) {
1887 model_module
= PyModule_New(MODULE_NAME
".model");
1888 if (model_module
!= NULL
) {
1889 PyDict_SetItem(sys_modules
, modelmod_name
, model_module
);
1890 /* gives away the reference to model_module */
1891 PyModule_AddObject(m
, "model", model_module
);
1894 Py_DECREF(modelmod_name
);
1895 if (errors_module
== NULL
|| model_module
== NULL
)
1896 /* Don't core dump later! */
1899 #if XML_COMBINED_VERSION > 19505
1901 const XML_Feature
*features
= XML_GetFeatureList();
1902 PyObject
*list
= PyList_New(0);
1904 /* just ignore it */
1908 for (; features
[i
].feature
!= XML_FEATURE_END
; ++i
) {
1910 PyObject
*item
= Py_BuildValue("si", features
[i
].name
,
1917 ok
= PyList_Append(list
, item
);
1925 PyModule_AddObject(m
, "features", list
);
1930 #define MYCONST(name) \
1931 PyModule_AddStringConstant(errors_module, #name, \
1932 (char*)XML_ErrorString(name))
1934 MYCONST(XML_ERROR_NO_MEMORY
);
1935 MYCONST(XML_ERROR_SYNTAX
);
1936 MYCONST(XML_ERROR_NO_ELEMENTS
);
1937 MYCONST(XML_ERROR_INVALID_TOKEN
);
1938 MYCONST(XML_ERROR_UNCLOSED_TOKEN
);
1939 MYCONST(XML_ERROR_PARTIAL_CHAR
);
1940 MYCONST(XML_ERROR_TAG_MISMATCH
);
1941 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE
);
1942 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
1943 MYCONST(XML_ERROR_PARAM_ENTITY_REF
);
1944 MYCONST(XML_ERROR_UNDEFINED_ENTITY
);
1945 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF
);
1946 MYCONST(XML_ERROR_ASYNC_ENTITY
);
1947 MYCONST(XML_ERROR_BAD_CHAR_REF
);
1948 MYCONST(XML_ERROR_BINARY_ENTITY_REF
);
1949 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
1950 MYCONST(XML_ERROR_MISPLACED_XML_PI
);
1951 MYCONST(XML_ERROR_UNKNOWN_ENCODING
);
1952 MYCONST(XML_ERROR_INCORRECT_ENCODING
);
1953 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION
);
1954 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
1955 MYCONST(XML_ERROR_NOT_STANDALONE
);
1956 MYCONST(XML_ERROR_UNEXPECTED_STATE
);
1957 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE
);
1958 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD
);
1959 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
);
1960 /* Added in Expat 1.95.7. */
1961 MYCONST(XML_ERROR_UNBOUND_PREFIX
);
1962 /* Added in Expat 1.95.8. */
1963 MYCONST(XML_ERROR_UNDECLARING_PREFIX
);
1964 MYCONST(XML_ERROR_INCOMPLETE_PE
);
1965 MYCONST(XML_ERROR_XML_DECL
);
1966 MYCONST(XML_ERROR_TEXT_DECL
);
1967 MYCONST(XML_ERROR_PUBLICID
);
1968 MYCONST(XML_ERROR_SUSPENDED
);
1969 MYCONST(XML_ERROR_NOT_SUSPENDED
);
1970 MYCONST(XML_ERROR_ABORTED
);
1971 MYCONST(XML_ERROR_FINISHED
);
1972 MYCONST(XML_ERROR_SUSPEND_PE
);
1974 PyModule_AddStringConstant(errors_module
, "__doc__",
1975 "Constants used to describe error conditions.");
1979 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1980 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER
);
1981 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
);
1982 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS
);
1985 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1986 PyModule_AddStringConstant(model_module
, "__doc__",
1987 "Constants used to interpret content model information.");
1989 MYCONST(XML_CTYPE_EMPTY
);
1990 MYCONST(XML_CTYPE_ANY
);
1991 MYCONST(XML_CTYPE_MIXED
);
1992 MYCONST(XML_CTYPE_NAME
);
1993 MYCONST(XML_CTYPE_CHOICE
);
1994 MYCONST(XML_CTYPE_SEQ
);
1996 MYCONST(XML_CQUANT_NONE
);
1997 MYCONST(XML_CQUANT_OPT
);
1998 MYCONST(XML_CQUANT_REP
);
1999 MYCONST(XML_CQUANT_PLUS
);
2002 /* initialize pyexpat dispatch table */
2003 capi
.size
= sizeof(capi
);
2004 capi
.magic
= PyExpat_CAPI_MAGIC
;
2005 capi
.MAJOR_VERSION
= XML_MAJOR_VERSION
;
2006 capi
.MINOR_VERSION
= XML_MINOR_VERSION
;
2007 capi
.MICRO_VERSION
= XML_MICRO_VERSION
;
2008 capi
.ErrorString
= XML_ErrorString
;
2009 capi
.GetErrorCode
= XML_GetErrorCode
;
2010 capi
.GetErrorColumnNumber
= XML_GetErrorColumnNumber
;
2011 capi
.GetErrorLineNumber
= XML_GetErrorLineNumber
;
2012 capi
.Parse
= XML_Parse
;
2013 capi
.ParserCreate_MM
= XML_ParserCreate_MM
;
2014 capi
.ParserFree
= XML_ParserFree
;
2015 capi
.SetCharacterDataHandler
= XML_SetCharacterDataHandler
;
2016 capi
.SetCommentHandler
= XML_SetCommentHandler
;
2017 capi
.SetDefaultHandlerExpand
= XML_SetDefaultHandlerExpand
;
2018 capi
.SetElementHandler
= XML_SetElementHandler
;
2019 capi
.SetNamespaceDeclHandler
= XML_SetNamespaceDeclHandler
;
2020 capi
.SetProcessingInstructionHandler
= XML_SetProcessingInstructionHandler
;
2021 capi
.SetUnknownEncodingHandler
= XML_SetUnknownEncodingHandler
;
2022 capi
.SetUserData
= XML_SetUserData
;
2024 /* export as cobject */
2025 capi_object
= PyCObject_FromVoidPtr(&capi
, NULL
);
2027 PyModule_AddObject(m
, "expat_CAPI", capi_object
);
2031 clear_handlers(xmlparseobject
*self
, int initial
)
2036 for (; handler_info
[i
].name
!= NULL
; i
++) {
2038 self
->handlers
[i
] = NULL
;
2040 temp
= self
->handlers
[i
];
2041 self
->handlers
[i
] = NULL
;
2043 handler_info
[i
].setter(self
->itself
, NULL
);
2048 static struct HandlerInfo handler_info
[] = {
2049 {"StartElementHandler",
2050 (xmlhandlersetter
)XML_SetStartElementHandler
,
2051 (xmlhandler
)my_StartElementHandler
},
2052 {"EndElementHandler",
2053 (xmlhandlersetter
)XML_SetEndElementHandler
,
2054 (xmlhandler
)my_EndElementHandler
},
2055 {"ProcessingInstructionHandler",
2056 (xmlhandlersetter
)XML_SetProcessingInstructionHandler
,
2057 (xmlhandler
)my_ProcessingInstructionHandler
},
2058 {"CharacterDataHandler",
2059 (xmlhandlersetter
)XML_SetCharacterDataHandler
,
2060 (xmlhandler
)my_CharacterDataHandler
},
2061 {"UnparsedEntityDeclHandler",
2062 (xmlhandlersetter
)XML_SetUnparsedEntityDeclHandler
,
2063 (xmlhandler
)my_UnparsedEntityDeclHandler
},
2064 {"NotationDeclHandler",
2065 (xmlhandlersetter
)XML_SetNotationDeclHandler
,
2066 (xmlhandler
)my_NotationDeclHandler
},
2067 {"StartNamespaceDeclHandler",
2068 (xmlhandlersetter
)XML_SetStartNamespaceDeclHandler
,
2069 (xmlhandler
)my_StartNamespaceDeclHandler
},
2070 {"EndNamespaceDeclHandler",
2071 (xmlhandlersetter
)XML_SetEndNamespaceDeclHandler
,
2072 (xmlhandler
)my_EndNamespaceDeclHandler
},
2074 (xmlhandlersetter
)XML_SetCommentHandler
,
2075 (xmlhandler
)my_CommentHandler
},
2076 {"StartCdataSectionHandler",
2077 (xmlhandlersetter
)XML_SetStartCdataSectionHandler
,
2078 (xmlhandler
)my_StartCdataSectionHandler
},
2079 {"EndCdataSectionHandler",
2080 (xmlhandlersetter
)XML_SetEndCdataSectionHandler
,
2081 (xmlhandler
)my_EndCdataSectionHandler
},
2083 (xmlhandlersetter
)XML_SetDefaultHandler
,
2084 (xmlhandler
)my_DefaultHandler
},
2085 {"DefaultHandlerExpand",
2086 (xmlhandlersetter
)XML_SetDefaultHandlerExpand
,
2087 (xmlhandler
)my_DefaultHandlerExpandHandler
},
2088 {"NotStandaloneHandler",
2089 (xmlhandlersetter
)XML_SetNotStandaloneHandler
,
2090 (xmlhandler
)my_NotStandaloneHandler
},
2091 {"ExternalEntityRefHandler",
2092 (xmlhandlersetter
)XML_SetExternalEntityRefHandler
,
2093 (xmlhandler
)my_ExternalEntityRefHandler
},
2094 {"StartDoctypeDeclHandler",
2095 (xmlhandlersetter
)XML_SetStartDoctypeDeclHandler
,
2096 (xmlhandler
)my_StartDoctypeDeclHandler
},
2097 {"EndDoctypeDeclHandler",
2098 (xmlhandlersetter
)XML_SetEndDoctypeDeclHandler
,
2099 (xmlhandler
)my_EndDoctypeDeclHandler
},
2100 {"EntityDeclHandler",
2101 (xmlhandlersetter
)XML_SetEntityDeclHandler
,
2102 (xmlhandler
)my_EntityDeclHandler
},
2104 (xmlhandlersetter
)XML_SetXmlDeclHandler
,
2105 (xmlhandler
)my_XmlDeclHandler
},
2106 {"ElementDeclHandler",
2107 (xmlhandlersetter
)XML_SetElementDeclHandler
,
2108 (xmlhandler
)my_ElementDeclHandler
},
2109 {"AttlistDeclHandler",
2110 (xmlhandlersetter
)XML_SetAttlistDeclHandler
,
2111 (xmlhandler
)my_AttlistDeclHandler
},
2112 #if XML_COMBINED_VERSION >= 19504
2113 {"SkippedEntityHandler",
2114 (xmlhandlersetter
)XML_SetSkippedEntityHandler
,
2115 (xmlhandler
)my_SkippedEntityHandler
},
2118 {NULL
, NULL
, NULL
} /* sentinel */