4 #include "frameobject.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
35 ProcessingInstruction
,
54 #if XML_COMBINED_VERSION >= 19504
60 static PyObject
*ErrorObject
;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
70 int returns_unicode
; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes
; /* Return attributes as a list. */
73 int specified_attributes
; /* Report only specified attributes. */
74 int in_callback
; /* Is a callback active? */
75 int ns_prefixes
; /* Namespace-triplets mode? */
76 XML_Char
*buffer
; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size
; /* Size of buffer, in XML_Char units */
79 int buffer_used
; /* Buffer units in use */
80 PyObject
*intern
; /* Dictionary to intern strings */
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype
;
88 typedef void (*xmlhandlersetter
)(XML_Parser self
, void *meth
);
89 typedef void* xmlhandler
;
93 xmlhandlersetter setter
;
95 PyCodeObject
*tb_code
;
99 static struct HandlerInfo handler_info
[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
105 set_error_attr(PyObject
*err
, char *name
, int value
)
107 PyObject
*v
= PyInt_FromLong(value
);
109 if (v
== NULL
|| PyObject_SetAttrString(err
, name
, v
) == -1) {
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
121 set_error(xmlparseobject
*self
, enum XML_Error code
)
125 XML_Parser parser
= self
->itself
;
126 int lineno
= XML_GetErrorLineNumber(parser
);
127 int column
= XML_GetErrorColumnNumber(parser
);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer
, "%.200s: line %i, column %i",
132 XML_ErrorString(code
), lineno
, column
);
133 err
= PyObject_CallFunction(ErrorObject
, "s", buffer
);
135 && set_error_attr(err
, "code", code
)
136 && set_error_attr(err
, "offset", column
)
137 && set_error_attr(err
, "lineno", lineno
)) {
138 PyErr_SetObject(ErrorObject
, err
);
145 have_handler(xmlparseobject
*self
, int type
)
147 PyObject
*handler
= self
->handlers
[type
];
148 return handler
!= NULL
;
152 get_handler_name(struct HandlerInfo
*hinfo
)
154 PyObject
*name
= hinfo
->nameobj
;
156 name
= PyString_FromString(hinfo
->name
);
157 hinfo
->nameobj
= name
;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
169 conv_string_to_unicode(const XML_Char
*str
)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
178 return PyUnicode_DecodeUTF8(str
, strlen(str
), "strict");
182 conv_string_len_to_unicode(const XML_Char
*str
, int len
)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
191 return PyUnicode_DecodeUTF8((const char *)str
, len
, "strict");
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
199 conv_string_to_utf8(const XML_Char
*str
)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
208 return PyString_FromString(str
);
212 conv_string_len_to_utf8(const XML_Char
*str
, int len
)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
221 return PyString_FromStringAndSize((const char *)str
, len
);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject
*self
, int initial
);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
232 error_external_entity_ref_handler(XML_Parser parser
,
233 const XML_Char
*context
,
234 const XML_Char
*base
,
235 const XML_Char
*systemId
,
236 const XML_Char
*publicId
)
241 /* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
248 noop_character_data_handler(void *userData
, const XML_Char
*data
, int len
)
254 flag_error(xmlparseobject
*self
)
256 clear_handlers(self
, 0);
257 XML_SetExternalEntityRefHandler(self
->itself
,
258 error_external_entity_ref_handler
);
262 getcode(enum HandlerTypes slot
, char* func_name
, int lineno
)
264 if (handler_info
[slot
].tb_code
== NULL
) {
265 handler_info
[slot
].tb_code
=
266 PyCode_NewEmpty(__FILE__
, func_name
, lineno
);
268 return handler_info
[slot
].tb_code
;
273 trace_frame(PyThreadState
*tstate
, PyFrameObject
*f
, int code
, PyObject
*val
)
276 if (!tstate
->use_tracing
|| tstate
->tracing
)
278 if (tstate
->c_profilefunc
!= NULL
) {
280 result
= tstate
->c_profilefunc(tstate
->c_profileobj
,
282 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
283 || (tstate
->c_profilefunc
!= NULL
));
288 if (tstate
->c_tracefunc
!= NULL
) {
290 result
= tstate
->c_tracefunc(tstate
->c_traceobj
,
292 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
293 || (tstate
->c_profilefunc
!= NULL
));
300 trace_frame_exc(PyThreadState
*tstate
, PyFrameObject
*f
)
302 PyObject
*type
, *value
, *traceback
, *arg
;
305 if (tstate
->c_tracefunc
== NULL
)
308 PyErr_Fetch(&type
, &value
, &traceback
);
313 #if PY_VERSION_HEX < 0x02040000
314 arg
= Py_BuildValue("(OOO)", type
, value
, traceback
);
316 arg
= PyTuple_Pack(3, type
, value
, traceback
);
319 PyErr_Restore(type
, value
, traceback
);
322 err
= trace_frame(tstate
, f
, PyTrace_EXCEPTION
, arg
);
325 PyErr_Restore(type
, value
, traceback
);
329 Py_XDECREF(traceback
);
336 call_with_frame(PyCodeObject
*c
, PyObject
* func
, PyObject
* args
,
337 xmlparseobject
*self
)
339 PyThreadState
*tstate
= PyThreadState_GET();
346 f
= PyFrame_New(tstate
, c
, PyEval_GetGlobals(), NULL
);
351 if (trace_frame(tstate
, f
, PyTrace_CALL
, Py_None
) < 0) {
355 res
= PyEval_CallObject(func
, args
);
357 if (tstate
->curexc_traceback
== NULL
)
359 XML_StopParser(self
->itself
, XML_FALSE
);
361 if (trace_frame_exc(tstate
, f
) < 0) {
366 if (trace_frame(tstate
, f
, PyTrace_RETURN
, res
) < 0) {
374 tstate
->frame
= f
->f_back
;
379 #ifndef Py_USING_UNICODE
380 #define STRING_CONV_FUNC conv_string_to_utf8
382 /* Python 2.0 and later versions, when built with Unicode support */
383 #define STRING_CONV_FUNC (self->returns_unicode \
384 ? conv_string_to_unicode : conv_string_to_utf8)
388 string_intern(xmlparseobject
*self
, const char* str
)
390 PyObject
*result
= STRING_CONV_FUNC(str
);
392 /* result can be NULL if the unicode conversion failed. */
397 value
= PyDict_GetItem(self
->intern
, result
);
399 if (PyDict_SetItem(self
->intern
, result
, result
) == 0)
409 /* Return 0 on success, -1 on exception.
410 * flag_error() will be called before return if needed.
413 call_character_handler(xmlparseobject
*self
, const XML_Char
*buffer
, int len
)
418 args
= PyTuple_New(1);
421 #ifdef Py_USING_UNICODE
422 temp
= (self
->returns_unicode
423 ? conv_string_len_to_unicode(buffer
, len
)
424 : conv_string_len_to_utf8(buffer
, len
));
426 temp
= conv_string_len_to_utf8(buffer
, len
);
431 XML_SetCharacterDataHandler(self
->itself
,
432 noop_character_data_handler
);
435 PyTuple_SET_ITEM(args
, 0, temp
);
436 /* temp is now a borrowed reference; consider it unused. */
437 self
->in_callback
= 1;
438 temp
= call_with_frame(getcode(CharacterData
, "CharacterData", __LINE__
),
439 self
->handlers
[CharacterData
], args
, self
);
440 /* temp is an owned reference again, or NULL */
441 self
->in_callback
= 0;
445 XML_SetCharacterDataHandler(self
->itself
,
446 noop_character_data_handler
);
454 flush_character_buffer(xmlparseobject
*self
)
457 if (self
->buffer
== NULL
|| self
->buffer_used
== 0)
459 rc
= call_character_handler(self
, self
->buffer
, self
->buffer_used
);
460 self
->buffer_used
= 0;
465 my_CharacterDataHandler(void *userData
, const XML_Char
*data
, int len
)
467 xmlparseobject
*self
= (xmlparseobject
*) userData
;
468 if (self
->buffer
== NULL
)
469 call_character_handler(self
, data
, len
);
471 if ((self
->buffer_used
+ len
) > self
->buffer_size
) {
472 if (flush_character_buffer(self
) < 0)
474 /* handler might have changed; drop the rest on the floor
475 * if there isn't a handler anymore
477 if (!have_handler(self
, CharacterData
))
480 if (len
> self
->buffer_size
) {
481 call_character_handler(self
, data
, len
);
482 self
->buffer_used
= 0;
485 memcpy(self
->buffer
+ self
->buffer_used
,
486 data
, len
* sizeof(XML_Char
));
487 self
->buffer_used
+= len
;
493 my_StartElementHandler(void *userData
,
494 const XML_Char
*name
, const XML_Char
*atts
[])
496 xmlparseobject
*self
= (xmlparseobject
*)userData
;
498 if (have_handler(self
, StartElement
)) {
499 PyObject
*container
, *rv
, *args
;
502 if (flush_character_buffer(self
) < 0)
504 /* Set max to the number of slots filled in atts[]; max/2 is
505 * the number of attributes we need to process.
507 if (self
->specified_attributes
) {
508 max
= XML_GetSpecifiedAttributeCount(self
->itself
);
512 while (atts
[max
] != NULL
)
515 /* Build the container. */
516 if (self
->ordered_attributes
)
517 container
= PyList_New(max
);
519 container
= PyDict_New();
520 if (container
== NULL
) {
524 for (i
= 0; i
< max
; i
+= 2) {
525 PyObject
*n
= string_intern(self
, (XML_Char
*) atts
[i
]);
529 Py_DECREF(container
);
532 v
= STRING_CONV_FUNC((XML_Char
*) atts
[i
+1]);
535 Py_DECREF(container
);
539 if (self
->ordered_attributes
) {
540 PyList_SET_ITEM(container
, i
, n
);
541 PyList_SET_ITEM(container
, i
+1, v
);
543 else if (PyDict_SetItem(container
, n
, v
)) {
554 args
= string_intern(self
, name
);
556 args
= Py_BuildValue("(NN)", args
, container
);
558 Py_DECREF(container
);
561 /* Container is now a borrowed reference; ignore it. */
562 self
->in_callback
= 1;
563 rv
= call_with_frame(getcode(StartElement
, "StartElement", __LINE__
),
564 self
->handlers
[StartElement
], args
, self
);
565 self
->in_callback
= 0;
575 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
576 RETURN, GETUSERDATA) \
578 my_##NAME##Handler PARAMS {\
579 xmlparseobject *self = GETUSERDATA ; \
580 PyObject *args = NULL; \
581 PyObject *rv = NULL; \
584 if (have_handler(self, NAME)) { \
585 if (flush_character_buffer(self) < 0) \
587 args = Py_BuildValue PARAM_FORMAT ;\
588 if (!args) { flag_error(self); return RETURN;} \
589 self->in_callback = 1; \
590 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
591 self->handlers[NAME], args, self); \
592 self->in_callback = 0; \
604 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
605 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
606 (xmlparseobject *)userData)
608 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
609 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
610 rc = PyInt_AsLong(rv);, rc, \
611 (xmlparseobject *)userData)
613 VOID_HANDLER(EndElement
,
614 (void *userData
, const XML_Char
*name
),
615 ("(N)", string_intern(self
, name
)))
617 VOID_HANDLER(ProcessingInstruction
,
619 const XML_Char
*target
,
620 const XML_Char
*data
),
621 ("(NO&)", string_intern(self
, target
), STRING_CONV_FUNC
,data
))
623 VOID_HANDLER(UnparsedEntityDecl
,
625 const XML_Char
*entityName
,
626 const XML_Char
*base
,
627 const XML_Char
*systemId
,
628 const XML_Char
*publicId
,
629 const XML_Char
*notationName
),
631 string_intern(self
, entityName
), string_intern(self
, base
),
632 string_intern(self
, systemId
), string_intern(self
, publicId
),
633 string_intern(self
, notationName
)))
635 #ifndef Py_USING_UNICODE
636 VOID_HANDLER(EntityDecl
,
638 const XML_Char
*entityName
,
639 int is_parameter_entity
,
640 const XML_Char
*value
,
642 const XML_Char
*base
,
643 const XML_Char
*systemId
,
644 const XML_Char
*publicId
,
645 const XML_Char
*notationName
),
647 string_intern(self
, entityName
), is_parameter_entity
,
648 conv_string_len_to_utf8(value
, value_length
),
649 string_intern(self
, base
), string_intern(self
, systemId
),
650 string_intern(self
, publicId
),
651 string_intern(self
, notationName
)))
653 VOID_HANDLER(EntityDecl
,
655 const XML_Char
*entityName
,
656 int is_parameter_entity
,
657 const XML_Char
*value
,
659 const XML_Char
*base
,
660 const XML_Char
*systemId
,
661 const XML_Char
*publicId
,
662 const XML_Char
*notationName
),
664 string_intern(self
, entityName
), is_parameter_entity
,
665 (self
->returns_unicode
666 ? conv_string_len_to_unicode(value
, value_length
)
667 : conv_string_len_to_utf8(value
, value_length
)),
668 string_intern(self
, base
), string_intern(self
, systemId
),
669 string_intern(self
, publicId
),
670 string_intern(self
, notationName
)))
673 VOID_HANDLER(XmlDecl
,
675 const XML_Char
*version
,
676 const XML_Char
*encoding
,
679 STRING_CONV_FUNC
,version
, STRING_CONV_FUNC
,encoding
,
683 conv_content_model(XML_Content
* const model
,
684 PyObject
*(*conv_string
)(const XML_Char
*))
686 PyObject
*result
= NULL
;
687 PyObject
*children
= PyTuple_New(model
->numchildren
);
690 if (children
!= NULL
) {
691 assert(model
->numchildren
< INT_MAX
);
692 for (i
= 0; i
< (int)model
->numchildren
; ++i
) {
693 PyObject
*child
= conv_content_model(&model
->children
[i
],
696 Py_XDECREF(children
);
699 PyTuple_SET_ITEM(children
, i
, child
);
701 result
= Py_BuildValue("(iiO&N)",
702 model
->type
, model
->quant
,
703 conv_string
,model
->name
, children
);
709 my_ElementDeclHandler(void *userData
,
710 const XML_Char
*name
,
713 xmlparseobject
*self
= (xmlparseobject
*)userData
;
714 PyObject
*args
= NULL
;
716 if (have_handler(self
, ElementDecl
)) {
718 PyObject
*modelobj
, *nameobj
;
720 if (flush_character_buffer(self
) < 0)
722 #ifdef Py_USING_UNICODE
723 modelobj
= conv_content_model(model
,
724 (self
->returns_unicode
725 ? conv_string_to_unicode
726 : conv_string_to_utf8
));
728 modelobj
= conv_content_model(model
, conv_string_to_utf8
);
730 if (modelobj
== NULL
) {
734 nameobj
= string_intern(self
, name
);
735 if (nameobj
== NULL
) {
740 args
= Py_BuildValue("NN", nameobj
, modelobj
);
746 self
->in_callback
= 1;
747 rv
= call_with_frame(getcode(ElementDecl
, "ElementDecl", __LINE__
),
748 self
->handlers
[ElementDecl
], args
, self
);
749 self
->in_callback
= 0;
758 XML_FreeContentModel(self
->itself
, model
);
762 VOID_HANDLER(AttlistDecl
,
764 const XML_Char
*elname
,
765 const XML_Char
*attname
,
766 const XML_Char
*att_type
,
767 const XML_Char
*dflt
,
770 string_intern(self
, elname
), string_intern(self
, attname
),
771 STRING_CONV_FUNC
,att_type
, STRING_CONV_FUNC
,dflt
,
774 #if XML_COMBINED_VERSION >= 19504
775 VOID_HANDLER(SkippedEntity
,
777 const XML_Char
*entityName
,
778 int is_parameter_entity
),
780 string_intern(self
, entityName
), is_parameter_entity
))
783 VOID_HANDLER(NotationDecl
,
785 const XML_Char
*notationName
,
786 const XML_Char
*base
,
787 const XML_Char
*systemId
,
788 const XML_Char
*publicId
),
790 string_intern(self
, notationName
), string_intern(self
, base
),
791 string_intern(self
, systemId
), string_intern(self
, publicId
)))
793 VOID_HANDLER(StartNamespaceDecl
,
795 const XML_Char
*prefix
,
796 const XML_Char
*uri
),
798 string_intern(self
, prefix
), string_intern(self
, uri
)))
800 VOID_HANDLER(EndNamespaceDecl
,
802 const XML_Char
*prefix
),
803 ("(N)", string_intern(self
, prefix
)))
805 VOID_HANDLER(Comment
,
806 (void *userData
, const XML_Char
*data
),
807 ("(O&)", STRING_CONV_FUNC
,data
))
809 VOID_HANDLER(StartCdataSection
,
813 VOID_HANDLER(EndCdataSection
,
817 #ifndef Py_USING_UNICODE
818 VOID_HANDLER(Default
,
819 (void *userData
, const XML_Char
*s
, int len
),
820 ("(N)", conv_string_len_to_utf8(s
,len
)))
822 VOID_HANDLER(DefaultHandlerExpand
,
823 (void *userData
, const XML_Char
*s
, int len
),
824 ("(N)", conv_string_len_to_utf8(s
,len
)))
826 VOID_HANDLER(Default
,
827 (void *userData
, const XML_Char
*s
, int len
),
828 ("(N)", (self
->returns_unicode
829 ? conv_string_len_to_unicode(s
,len
)
830 : conv_string_len_to_utf8(s
,len
))))
832 VOID_HANDLER(DefaultHandlerExpand
,
833 (void *userData
, const XML_Char
*s
, int len
),
834 ("(N)", (self
->returns_unicode
835 ? conv_string_len_to_unicode(s
,len
)
836 : conv_string_len_to_utf8(s
,len
))))
839 INT_HANDLER(NotStandalone
,
843 RC_HANDLER(int, ExternalEntityRef
,
845 const XML_Char
*context
,
846 const XML_Char
*base
,
847 const XML_Char
*systemId
,
848 const XML_Char
*publicId
),
851 STRING_CONV_FUNC
,context
, string_intern(self
, base
),
852 string_intern(self
, systemId
), string_intern(self
, publicId
)),
853 rc
= PyInt_AsLong(rv
);, rc
,
854 XML_GetUserData(parser
))
856 /* XXX UnknownEncodingHandler */
858 VOID_HANDLER(StartDoctypeDecl
,
859 (void *userData
, const XML_Char
*doctypeName
,
860 const XML_Char
*sysid
, const XML_Char
*pubid
,
861 int has_internal_subset
),
862 ("(NNNi)", string_intern(self
, doctypeName
),
863 string_intern(self
, sysid
), string_intern(self
, pubid
),
864 has_internal_subset
))
866 VOID_HANDLER(EndDoctypeDecl
, (void *userData
), ("()"))
868 /* ---------------------------------------------------------------- */
871 get_parse_result(xmlparseobject
*self
, int rv
)
873 if (PyErr_Occurred()) {
877 return set_error(self
, XML_GetErrorCode(self
->itself
));
879 if (flush_character_buffer(self
) < 0) {
882 return PyInt_FromLong(rv
);
885 PyDoc_STRVAR(xmlparse_Parse__doc__
,
886 "Parse(data[, isfinal])\n\
887 Parse XML data. `isfinal' should be true at end of input.");
890 xmlparse_Parse(xmlparseobject
*self
, PyObject
*args
)
896 if (!PyArg_ParseTuple(args
, "s#|i:Parse", &s
, &slen
, &isFinal
))
899 return get_parse_result(self
, XML_Parse(self
->itself
, s
, slen
, isFinal
));
902 /* File reading copied from cPickle */
904 #define BUF_SIZE 2048
907 readinst(char *buf
, int buf_size
, PyObject
*meth
)
909 PyObject
*arg
= NULL
;
910 PyObject
*bytes
= NULL
;
911 PyObject
*str
= NULL
;
914 if ((bytes
= PyInt_FromLong(buf_size
)) == NULL
)
917 if ((arg
= PyTuple_New(1)) == NULL
) {
922 PyTuple_SET_ITEM(arg
, 0, bytes
);
924 #if PY_VERSION_HEX < 0x02020000
925 str
= PyObject_CallObject(meth
, arg
);
927 str
= PyObject_Call(meth
, arg
, NULL
);
932 /* XXX what to do if it returns a Unicode string? */
933 if (!PyString_Check(str
)) {
934 PyErr_Format(PyExc_TypeError
,
935 "read() did not return a string object (type=%.400s)",
936 Py_TYPE(str
)->tp_name
);
939 len
= PyString_GET_SIZE(str
);
940 if (len
> buf_size
) {
941 PyErr_Format(PyExc_ValueError
,
942 "read() returned too much data: "
943 "%i bytes requested, %i returned",
947 memcpy(buf
, PyString_AsString(str
), len
);
954 PyDoc_STRVAR(xmlparse_ParseFile__doc__
,
956 Parse XML data from file-like object.");
959 xmlparse_ParseFile(xmlparseobject
*self
, PyObject
*f
)
963 PyObject
*readmethod
= NULL
;
965 if (PyFile_Check(f
)) {
966 fp
= PyFile_AsFile(f
);
970 readmethod
= PyObject_GetAttrString(f
, "read");
971 if (readmethod
== NULL
) {
973 PyErr_SetString(PyExc_TypeError
,
974 "argument must have 'read' attribute");
980 void *buf
= XML_GetBuffer(self
->itself
, BUF_SIZE
);
982 Py_XDECREF(readmethod
);
983 return PyErr_NoMemory();
987 bytes_read
= fread(buf
, sizeof(char), BUF_SIZE
, fp
);
988 if (bytes_read
< 0) {
989 PyErr_SetFromErrno(PyExc_IOError
);
994 bytes_read
= readinst(buf
, BUF_SIZE
, readmethod
);
995 if (bytes_read
< 0) {
996 Py_DECREF(readmethod
);
1000 rv
= XML_ParseBuffer(self
->itself
, bytes_read
, bytes_read
== 0);
1001 if (PyErr_Occurred()) {
1002 Py_XDECREF(readmethod
);
1006 if (!rv
|| bytes_read
== 0)
1009 Py_XDECREF(readmethod
);
1010 return get_parse_result(self
, rv
);
1013 PyDoc_STRVAR(xmlparse_SetBase__doc__
,
1014 "SetBase(base_url)\n\
1015 Set the base URL for the parser.");
1018 xmlparse_SetBase(xmlparseobject
*self
, PyObject
*args
)
1022 if (!PyArg_ParseTuple(args
, "s:SetBase", &base
))
1024 if (!XML_SetBase(self
->itself
, base
)) {
1025 return PyErr_NoMemory();
1031 PyDoc_STRVAR(xmlparse_GetBase__doc__
,
1032 "GetBase() -> url\n\
1033 Return base URL string for the parser.");
1036 xmlparse_GetBase(xmlparseobject
*self
, PyObject
*unused
)
1038 return Py_BuildValue("z", XML_GetBase(self
->itself
));
1041 PyDoc_STRVAR(xmlparse_GetInputContext__doc__
,
1042 "GetInputContext() -> string\n\
1043 Return the untranslated text of the input that caused the current event.\n\
1044 If the event was generated by a large amount of text (such as a start tag\n\
1045 for an element with many attributes), not all of the text may be available.");
1048 xmlparse_GetInputContext(xmlparseobject
*self
, PyObject
*unused
)
1050 if (self
->in_callback
) {
1053 = XML_GetInputContext(self
->itself
, &offset
, &size
);
1056 return PyString_FromStringAndSize(buffer
+ offset
,
1065 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__
,
1066 "ExternalEntityParserCreate(context[, encoding])\n\
1067 Create a parser for parsing an external entity based on the\n\
1068 information passed to the ExternalEntityRefHandler.");
1071 xmlparse_ExternalEntityParserCreate(xmlparseobject
*self
, PyObject
*args
)
1074 char *encoding
= NULL
;
1075 xmlparseobject
*new_parser
;
1078 if (!PyArg_ParseTuple(args
, "z|s:ExternalEntityParserCreate",
1079 &context
, &encoding
)) {
1083 #ifndef Py_TPFLAGS_HAVE_GC
1084 /* Python versions 2.0 and 2.1 */
1085 new_parser
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1087 /* Python versions 2.2 and later */
1088 new_parser
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1091 if (new_parser
== NULL
)
1093 new_parser
->buffer_size
= self
->buffer_size
;
1094 new_parser
->buffer_used
= 0;
1095 if (self
->buffer
!= NULL
) {
1096 new_parser
->buffer
= malloc(new_parser
->buffer_size
);
1097 if (new_parser
->buffer
== NULL
) {
1098 #ifndef Py_TPFLAGS_HAVE_GC
1099 /* Code for versions 2.0 and 2.1 */
1100 PyObject_Del(new_parser
);
1102 /* Code for versions 2.2 and later. */
1103 PyObject_GC_Del(new_parser
);
1105 return PyErr_NoMemory();
1109 new_parser
->buffer
= NULL
;
1110 new_parser
->returns_unicode
= self
->returns_unicode
;
1111 new_parser
->ordered_attributes
= self
->ordered_attributes
;
1112 new_parser
->specified_attributes
= self
->specified_attributes
;
1113 new_parser
->in_callback
= 0;
1114 new_parser
->ns_prefixes
= self
->ns_prefixes
;
1115 new_parser
->itself
= XML_ExternalEntityParserCreate(self
->itself
, context
,
1117 new_parser
->handlers
= 0;
1118 new_parser
->intern
= self
->intern
;
1119 Py_XINCREF(new_parser
->intern
);
1120 #ifdef Py_TPFLAGS_HAVE_GC
1121 PyObject_GC_Track(new_parser
);
1123 PyObject_GC_Init(new_parser
);
1126 if (!new_parser
->itself
) {
1127 Py_DECREF(new_parser
);
1128 return PyErr_NoMemory();
1131 XML_SetUserData(new_parser
->itself
, (void *)new_parser
);
1133 /* allocate and clear handlers first */
1134 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1137 new_parser
->handlers
= malloc(sizeof(PyObject
*) * i
);
1138 if (!new_parser
->handlers
) {
1139 Py_DECREF(new_parser
);
1140 return PyErr_NoMemory();
1142 clear_handlers(new_parser
, 1);
1144 /* then copy handlers from self */
1145 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1146 PyObject
*handler
= self
->handlers
[i
];
1147 if (handler
!= NULL
) {
1149 new_parser
->handlers
[i
] = handler
;
1150 handler_info
[i
].setter(new_parser
->itself
,
1151 handler_info
[i
].handler
);
1154 return (PyObject
*)new_parser
;
1157 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__
,
1158 "SetParamEntityParsing(flag) -> success\n\
1159 Controls parsing of parameter entities (including the external DTD\n\
1160 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1161 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1162 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1166 xmlparse_SetParamEntityParsing(xmlparseobject
*p
, PyObject
* args
)
1169 if (!PyArg_ParseTuple(args
, "i", &flag
))
1171 flag
= XML_SetParamEntityParsing(p
->itself
, flag
);
1172 return PyInt_FromLong(flag
);
1176 #if XML_COMBINED_VERSION >= 19505
1177 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__
,
1178 "UseForeignDTD([flag])\n\
1179 Allows the application to provide an artificial external subset if one is\n\
1180 not specified as part of the document instance. This readily allows the\n\
1181 use of a 'default' document type controlled by the application, while still\n\
1182 getting the advantage of providing document type information to the parser.\n\
1183 'flag' defaults to True if not provided.");
1186 xmlparse_UseForeignDTD(xmlparseobject
*self
, PyObject
*args
)
1188 PyObject
*flagobj
= NULL
;
1189 XML_Bool flag
= XML_TRUE
;
1191 if (!PyArg_UnpackTuple(args
, "UseForeignDTD", 0, 1, &flagobj
))
1193 if (flagobj
!= NULL
)
1194 flag
= PyObject_IsTrue(flagobj
) ? XML_TRUE
: XML_FALSE
;
1195 rc
= XML_UseForeignDTD(self
->itself
, flag
);
1196 if (rc
!= XML_ERROR_NONE
) {
1197 return set_error(self
, rc
);
1204 static struct PyMethodDef xmlparse_methods
[] = {
1205 {"Parse", (PyCFunction
)xmlparse_Parse
,
1206 METH_VARARGS
, xmlparse_Parse__doc__
},
1207 {"ParseFile", (PyCFunction
)xmlparse_ParseFile
,
1208 METH_O
, xmlparse_ParseFile__doc__
},
1209 {"SetBase", (PyCFunction
)xmlparse_SetBase
,
1210 METH_VARARGS
, xmlparse_SetBase__doc__
},
1211 {"GetBase", (PyCFunction
)xmlparse_GetBase
,
1212 METH_NOARGS
, xmlparse_GetBase__doc__
},
1213 {"ExternalEntityParserCreate", (PyCFunction
)xmlparse_ExternalEntityParserCreate
,
1214 METH_VARARGS
, xmlparse_ExternalEntityParserCreate__doc__
},
1215 {"SetParamEntityParsing", (PyCFunction
)xmlparse_SetParamEntityParsing
,
1216 METH_VARARGS
, xmlparse_SetParamEntityParsing__doc__
},
1217 {"GetInputContext", (PyCFunction
)xmlparse_GetInputContext
,
1218 METH_NOARGS
, xmlparse_GetInputContext__doc__
},
1219 #if XML_COMBINED_VERSION >= 19505
1220 {"UseForeignDTD", (PyCFunction
)xmlparse_UseForeignDTD
,
1221 METH_VARARGS
, xmlparse_UseForeignDTD__doc__
},
1223 {NULL
, NULL
} /* sentinel */
1229 #ifdef Py_USING_UNICODE
1231 /* pyexpat international encoding support.
1232 Make it as simple as possible.
1235 static char template_buffer
[257];
1236 PyObject
*template_string
= NULL
;
1239 init_template_buffer(void)
1242 for (i
= 0; i
< 256; i
++) {
1243 template_buffer
[i
] = i
;
1245 template_buffer
[256] = 0;
1249 PyUnknownEncodingHandler(void *encodingHandlerData
,
1250 const XML_Char
*name
,
1253 PyUnicodeObject
*_u_string
= NULL
;
1257 /* Yes, supports only 8bit encodings */
1258 _u_string
= (PyUnicodeObject
*)
1259 PyUnicode_Decode(template_buffer
, 256, name
, "replace");
1261 if (_u_string
== NULL
)
1264 for (i
= 0; i
< 256; i
++) {
1265 /* Stupid to access directly, but fast */
1266 Py_UNICODE c
= _u_string
->str
[i
];
1267 if (c
== Py_UNICODE_REPLACEMENT_CHARACTER
)
1273 info
->convert
= NULL
;
1274 info
->release
= NULL
;
1276 Py_DECREF(_u_string
);
1283 newxmlparseobject(char *encoding
, char *namespace_separator
, PyObject
*intern
)
1286 xmlparseobject
*self
;
1288 #ifdef Py_TPFLAGS_HAVE_GC
1289 /* Code for versions 2.2 and later */
1290 self
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1292 self
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1297 #ifdef Py_USING_UNICODE
1298 self
->returns_unicode
= 1;
1300 self
->returns_unicode
= 0;
1303 self
->buffer
= NULL
;
1304 self
->buffer_size
= CHARACTER_DATA_BUFFER_SIZE
;
1305 self
->buffer_used
= 0;
1306 self
->ordered_attributes
= 0;
1307 self
->specified_attributes
= 0;
1308 self
->in_callback
= 0;
1309 self
->ns_prefixes
= 0;
1310 self
->handlers
= NULL
;
1311 if (namespace_separator
!= NULL
) {
1312 self
->itself
= XML_ParserCreateNS(encoding
, *namespace_separator
);
1315 self
->itself
= XML_ParserCreate(encoding
);
1317 self
->intern
= intern
;
1318 Py_XINCREF(self
->intern
);
1319 #ifdef Py_TPFLAGS_HAVE_GC
1320 PyObject_GC_Track(self
);
1322 PyObject_GC_Init(self
);
1324 if (self
->itself
== NULL
) {
1325 PyErr_SetString(PyExc_RuntimeError
,
1326 "XML_ParserCreate failed");
1330 XML_SetUserData(self
->itself
, (void *)self
);
1331 #ifdef Py_USING_UNICODE
1332 XML_SetUnknownEncodingHandler(self
->itself
,
1333 (XML_UnknownEncodingHandler
) PyUnknownEncodingHandler
, NULL
);
1336 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1339 self
->handlers
= malloc(sizeof(PyObject
*) * i
);
1340 if (!self
->handlers
) {
1342 return PyErr_NoMemory();
1344 clear_handlers(self
, 1);
1346 return (PyObject
*)self
;
1351 xmlparse_dealloc(xmlparseobject
*self
)
1354 #ifdef Py_TPFLAGS_HAVE_GC
1355 PyObject_GC_UnTrack(self
);
1357 PyObject_GC_Fini(self
);
1359 if (self
->itself
!= NULL
)
1360 XML_ParserFree(self
->itself
);
1361 self
->itself
= NULL
;
1363 if (self
->handlers
!= NULL
) {
1365 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1366 temp
= self
->handlers
[i
];
1367 self
->handlers
[i
] = NULL
;
1370 free(self
->handlers
);
1371 self
->handlers
= NULL
;
1373 if (self
->buffer
!= NULL
) {
1375 self
->buffer
= NULL
;
1377 Py_XDECREF(self
->intern
);
1378 #ifndef Py_TPFLAGS_HAVE_GC
1379 /* Code for versions 2.0 and 2.1 */
1382 /* Code for versions 2.2 and later. */
1383 PyObject_GC_Del(self
);
1388 handlername2int(const char *name
)
1391 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1392 if (strcmp(name
, handler_info
[i
].name
) == 0) {
1400 get_pybool(int istrue
)
1402 PyObject
*result
= istrue
? Py_True
: Py_False
;
1408 xmlparse_getattr(xmlparseobject
*self
, char *name
)
1410 int handlernum
= handlername2int(name
);
1412 if (handlernum
!= -1) {
1413 PyObject
*result
= self
->handlers
[handlernum
];
1419 if (name
[0] == 'E') {
1420 if (strcmp(name
, "ErrorCode") == 0)
1421 return PyInt_FromLong((long)
1422 XML_GetErrorCode(self
->itself
));
1423 if (strcmp(name
, "ErrorLineNumber") == 0)
1424 return PyInt_FromLong((long)
1425 XML_GetErrorLineNumber(self
->itself
));
1426 if (strcmp(name
, "ErrorColumnNumber") == 0)
1427 return PyInt_FromLong((long)
1428 XML_GetErrorColumnNumber(self
->itself
));
1429 if (strcmp(name
, "ErrorByteIndex") == 0)
1430 return PyInt_FromLong((long)
1431 XML_GetErrorByteIndex(self
->itself
));
1433 if (name
[0] == 'C') {
1434 if (strcmp(name
, "CurrentLineNumber") == 0)
1435 return PyInt_FromLong((long)
1436 XML_GetCurrentLineNumber(self
->itself
));
1437 if (strcmp(name
, "CurrentColumnNumber") == 0)
1438 return PyInt_FromLong((long)
1439 XML_GetCurrentColumnNumber(self
->itself
));
1440 if (strcmp(name
, "CurrentByteIndex") == 0)
1441 return PyInt_FromLong((long)
1442 XML_GetCurrentByteIndex(self
->itself
));
1444 if (name
[0] == 'b') {
1445 if (strcmp(name
, "buffer_size") == 0)
1446 return PyInt_FromLong((long) self
->buffer_size
);
1447 if (strcmp(name
, "buffer_text") == 0)
1448 return get_pybool(self
->buffer
!= NULL
);
1449 if (strcmp(name
, "buffer_used") == 0)
1450 return PyInt_FromLong((long) self
->buffer_used
);
1452 if (strcmp(name
, "namespace_prefixes") == 0)
1453 return get_pybool(self
->ns_prefixes
);
1454 if (strcmp(name
, "ordered_attributes") == 0)
1455 return get_pybool(self
->ordered_attributes
);
1456 if (strcmp(name
, "returns_unicode") == 0)
1457 return get_pybool((long) self
->returns_unicode
);
1458 if (strcmp(name
, "specified_attributes") == 0)
1459 return get_pybool((long) self
->specified_attributes
);
1460 if (strcmp(name
, "intern") == 0) {
1461 if (self
->intern
== NULL
) {
1466 Py_INCREF(self
->intern
);
1467 return self
->intern
;
1471 #define APPEND(list, str) \
1473 PyObject *o = PyString_FromString(str); \
1475 PyList_Append(list, o); \
1479 if (strcmp(name
, "__members__") == 0) {
1481 PyObject
*rc
= PyList_New(0);
1484 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1485 PyObject
*o
= get_handler_name(&handler_info
[i
]);
1487 PyList_Append(rc
, o
);
1490 APPEND(rc
, "ErrorCode");
1491 APPEND(rc
, "ErrorLineNumber");
1492 APPEND(rc
, "ErrorColumnNumber");
1493 APPEND(rc
, "ErrorByteIndex");
1494 APPEND(rc
, "CurrentLineNumber");
1495 APPEND(rc
, "CurrentColumnNumber");
1496 APPEND(rc
, "CurrentByteIndex");
1497 APPEND(rc
, "buffer_size");
1498 APPEND(rc
, "buffer_text");
1499 APPEND(rc
, "buffer_used");
1500 APPEND(rc
, "namespace_prefixes");
1501 APPEND(rc
, "ordered_attributes");
1502 APPEND(rc
, "returns_unicode");
1503 APPEND(rc
, "specified_attributes");
1504 APPEND(rc
, "intern");
1509 return Py_FindMethod(xmlparse_methods
, (PyObject
*)self
, name
);
1513 sethandler(xmlparseobject
*self
, const char *name
, PyObject
* v
)
1515 int handlernum
= handlername2int(name
);
1516 if (handlernum
>= 0) {
1517 xmlhandler c_handler
= NULL
;
1518 PyObject
*temp
= self
->handlers
[handlernum
];
1521 /* If this is the character data handler, and a character
1522 data handler is already active, we need to be more
1523 careful. What we can safely do is replace the existing
1524 character data handler callback function with a no-op
1525 function that will refuse to call Python. The downside
1526 is that this doesn't completely remove the character
1527 data handler from the C layer if there's any callback
1528 active, so Expat does a little more work than it
1529 otherwise would, but that's really an odd case. A more
1530 elaborate system of handlers and state could remove the
1531 C handler more effectively. */
1532 if (handlernum
== CharacterData
&& self
->in_callback
)
1533 c_handler
= noop_character_data_handler
;
1536 else if (v
!= NULL
) {
1538 c_handler
= handler_info
[handlernum
].handler
;
1540 self
->handlers
[handlernum
] = v
;
1542 handler_info
[handlernum
].setter(self
->itself
, c_handler
);
1549 xmlparse_setattr(xmlparseobject
*self
, char *name
, PyObject
*v
)
1551 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1553 PyErr_SetString(PyExc_RuntimeError
, "Cannot delete attribute");
1556 if (strcmp(name
, "buffer_text") == 0) {
1557 if (PyObject_IsTrue(v
)) {
1558 if (self
->buffer
== NULL
) {
1559 self
->buffer
= malloc(self
->buffer_size
);
1560 if (self
->buffer
== NULL
) {
1564 self
->buffer_used
= 0;
1567 else if (self
->buffer
!= NULL
) {
1568 if (flush_character_buffer(self
) < 0)
1571 self
->buffer
= NULL
;
1575 if (strcmp(name
, "namespace_prefixes") == 0) {
1576 if (PyObject_IsTrue(v
))
1577 self
->ns_prefixes
= 1;
1579 self
->ns_prefixes
= 0;
1580 XML_SetReturnNSTriplet(self
->itself
, self
->ns_prefixes
);
1583 if (strcmp(name
, "ordered_attributes") == 0) {
1584 if (PyObject_IsTrue(v
))
1585 self
->ordered_attributes
= 1;
1587 self
->ordered_attributes
= 0;
1590 if (strcmp(name
, "returns_unicode") == 0) {
1591 if (PyObject_IsTrue(v
)) {
1592 #ifndef Py_USING_UNICODE
1593 PyErr_SetString(PyExc_ValueError
,
1594 "Unicode support not available");
1597 self
->returns_unicode
= 1;
1601 self
->returns_unicode
= 0;
1604 if (strcmp(name
, "specified_attributes") == 0) {
1605 if (PyObject_IsTrue(v
))
1606 self
->specified_attributes
= 1;
1608 self
->specified_attributes
= 0;
1612 if (strcmp(name
, "buffer_size") == 0) {
1613 long new_buffer_size
;
1614 if (!PyInt_Check(v
)) {
1615 PyErr_SetString(PyExc_TypeError
, "buffer_size must be an integer");
1619 new_buffer_size
=PyInt_AS_LONG(v
);
1620 /* trivial case -- no change */
1621 if (new_buffer_size
== self
->buffer_size
) {
1625 if (new_buffer_size
<= 0) {
1626 PyErr_SetString(PyExc_ValueError
, "buffer_size must be greater than zero");
1631 if (new_buffer_size
> INT_MAX
) {
1633 sprintf(errmsg
, "buffer_size must not be greater than %i", INT_MAX
);
1634 PyErr_SetString(PyExc_ValueError
, errmsg
);
1638 if (self
->buffer
!= NULL
) {
1639 /* there is already a buffer */
1640 if (self
->buffer_used
!= 0) {
1641 flush_character_buffer(self
);
1643 /* free existing buffer */
1646 self
->buffer
= malloc(new_buffer_size
);
1647 if (self
->buffer
== NULL
) {
1651 self
->buffer_size
= new_buffer_size
;
1655 if (strcmp(name
, "CharacterDataHandler") == 0) {
1656 /* If we're changing the character data handler, flush all
1657 * cached data with the old handler. Not sure there's a
1658 * "right" thing to do, though, but this probably won't
1661 if (flush_character_buffer(self
) < 0)
1664 if (sethandler(self
, name
, v
)) {
1667 PyErr_SetString(PyExc_AttributeError
, name
);
1671 #ifdef WITH_CYCLE_GC
1673 xmlparse_traverse(xmlparseobject
*op
, visitproc visit
, void *arg
)
1676 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1677 Py_VISIT(op
->handlers
[i
]);
1682 xmlparse_clear(xmlparseobject
*op
)
1684 clear_handlers(op
, 0);
1685 Py_CLEAR(op
->intern
);
1690 PyDoc_STRVAR(Xmlparsetype__doc__
, "XML parser");
1692 static PyTypeObject Xmlparsetype
= {
1693 PyVarObject_HEAD_INIT(NULL
, 0)
1694 "pyexpat.xmlparser", /*tp_name*/
1695 sizeof(xmlparseobject
) + PyGC_HEAD_SIZE
,/*tp_basicsize*/
1698 (destructor
)xmlparse_dealloc
, /*tp_dealloc*/
1699 (printfunc
)0, /*tp_print*/
1700 (getattrfunc
)xmlparse_getattr
, /*tp_getattr*/
1701 (setattrfunc
)xmlparse_setattr
, /*tp_setattr*/
1702 (cmpfunc
)0, /*tp_compare*/
1703 (reprfunc
)0, /*tp_repr*/
1705 0, /*tp_as_sequence*/
1706 0, /*tp_as_mapping*/
1707 (hashfunc
)0, /*tp_hash*/
1708 (ternaryfunc
)0, /*tp_call*/
1709 (reprfunc
)0, /*tp_str*/
1710 0, /* tp_getattro */
1711 0, /* tp_setattro */
1712 0, /* tp_as_buffer */
1713 #ifdef Py_TPFLAGS_HAVE_GC
1714 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1716 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_GC
, /*tp_flags*/
1718 Xmlparsetype__doc__
, /* tp_doc - Documentation string */
1719 #ifdef WITH_CYCLE_GC
1720 (traverseproc
)xmlparse_traverse
, /* tp_traverse */
1721 (inquiry
)xmlparse_clear
/* tp_clear */
1727 /* End of code for xmlparser objects */
1728 /* -------------------------------------------------------- */
1730 PyDoc_STRVAR(pyexpat_ParserCreate__doc__
,
1731 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1732 Return a new XML parser object.");
1735 pyexpat_ParserCreate(PyObject
*notused
, PyObject
*args
, PyObject
*kw
)
1737 char *encoding
= NULL
;
1738 char *namespace_separator
= NULL
;
1739 PyObject
*intern
= NULL
;
1741 int intern_decref
= 0;
1742 static char *kwlist
[] = {"encoding", "namespace_separator",
1745 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|zzO:ParserCreate", kwlist
,
1746 &encoding
, &namespace_separator
, &intern
))
1748 if (namespace_separator
!= NULL
1749 && strlen(namespace_separator
) > 1) {
1750 PyErr_SetString(PyExc_ValueError
,
1751 "namespace_separator must be at most one"
1752 " character, omitted, or None");
1755 /* Explicitly passing None means no interning is desired.
1756 Not passing anything means that a new dictionary is used. */
1757 if (intern
== Py_None
)
1759 else if (intern
== NULL
) {
1760 intern
= PyDict_New();
1765 else if (!PyDict_Check(intern
)) {
1766 PyErr_SetString(PyExc_TypeError
, "intern must be a dictionary");
1770 result
= newxmlparseobject(encoding
, namespace_separator
, intern
);
1771 if (intern_decref
) {
1777 PyDoc_STRVAR(pyexpat_ErrorString__doc__
,
1778 "ErrorString(errno) -> string\n\
1779 Returns string error for given number.");
1782 pyexpat_ErrorString(PyObject
*self
, PyObject
*args
)
1786 if (!PyArg_ParseTuple(args
, "l:ErrorString", &code
))
1788 return Py_BuildValue("z", XML_ErrorString((int)code
));
1791 /* List of methods defined in the module */
1793 static struct PyMethodDef pyexpat_methods
[] = {
1794 {"ParserCreate", (PyCFunction
)pyexpat_ParserCreate
,
1795 METH_VARARGS
|METH_KEYWORDS
, pyexpat_ParserCreate__doc__
},
1796 {"ErrorString", (PyCFunction
)pyexpat_ErrorString
,
1797 METH_VARARGS
, pyexpat_ErrorString__doc__
},
1799 {NULL
, (PyCFunction
)NULL
, 0, NULL
} /* sentinel */
1802 /* Module docstring */
1804 PyDoc_STRVAR(pyexpat_module_documentation
,
1805 "Python wrapper for Expat parser.");
1807 /* Return a Python string that represents the version number without the
1808 * extra cruft added by revision control, even if the right options were
1809 * given to the "cvs export" command to make it not include the extra
1813 get_version_string(void)
1815 static char *rcsid
= "$Revision$";
1819 while (!isdigit(Py_CHARMASK(*rev
)))
1821 while (rev
[i
] != ' ' && rev
[i
] != '\0')
1824 return PyString_FromStringAndSize(rev
, i
);
1827 /* Initialization function for the module */
1830 #define MODULE_NAME "pyexpat"
1833 #ifndef MODULE_INITFUNC
1834 #define MODULE_INITFUNC initpyexpat
1837 #ifndef PyMODINIT_FUNC
1839 # define PyMODINIT_FUNC __declspec(dllexport) void
1841 # define PyMODINIT_FUNC void
1845 PyMODINIT_FUNC
MODULE_INITFUNC(void); /* avoid compiler warnings */
1848 MODULE_INITFUNC(void)
1851 PyObject
*errmod_name
= PyString_FromString(MODULE_NAME
".errors");
1852 PyObject
*errors_module
;
1853 PyObject
*modelmod_name
;
1854 PyObject
*model_module
;
1855 PyObject
*sys_modules
;
1856 static struct PyExpat_CAPI capi
;
1857 PyObject
* capi_object
;
1859 if (errmod_name
== NULL
)
1861 modelmod_name
= PyString_FromString(MODULE_NAME
".model");
1862 if (modelmod_name
== NULL
)
1865 Py_TYPE(&Xmlparsetype
) = &PyType_Type
;
1867 /* Create the module and add the functions */
1868 m
= Py_InitModule3(MODULE_NAME
, pyexpat_methods
,
1869 pyexpat_module_documentation
);
1873 /* Add some symbolic constants to the module */
1874 if (ErrorObject
== NULL
) {
1875 ErrorObject
= PyErr_NewException("xml.parsers.expat.ExpatError",
1877 if (ErrorObject
== NULL
)
1880 Py_INCREF(ErrorObject
);
1881 PyModule_AddObject(m
, "error", ErrorObject
);
1882 Py_INCREF(ErrorObject
);
1883 PyModule_AddObject(m
, "ExpatError", ErrorObject
);
1884 Py_INCREF(&Xmlparsetype
);
1885 PyModule_AddObject(m
, "XMLParserType", (PyObject
*) &Xmlparsetype
);
1887 PyModule_AddObject(m
, "__version__", get_version_string());
1888 PyModule_AddStringConstant(m
, "EXPAT_VERSION",
1889 (char *) XML_ExpatVersion());
1891 XML_Expat_Version info
= XML_ExpatVersionInfo();
1892 PyModule_AddObject(m
, "version_info",
1893 Py_BuildValue("(iii)", info
.major
,
1894 info
.minor
, info
.micro
));
1896 #ifdef Py_USING_UNICODE
1897 init_template_buffer();
1899 /* XXX When Expat supports some way of figuring out how it was
1900 compiled, this should check and set native_encoding
1903 PyModule_AddStringConstant(m
, "native_encoding", "UTF-8");
1905 sys_modules
= PySys_GetObject("modules");
1906 d
= PyModule_GetDict(m
);
1907 errors_module
= PyDict_GetItem(d
, errmod_name
);
1908 if (errors_module
== NULL
) {
1909 errors_module
= PyModule_New(MODULE_NAME
".errors");
1910 if (errors_module
!= NULL
) {
1911 PyDict_SetItem(sys_modules
, errmod_name
, errors_module
);
1912 /* gives away the reference to errors_module */
1913 PyModule_AddObject(m
, "errors", errors_module
);
1916 Py_DECREF(errmod_name
);
1917 model_module
= PyDict_GetItem(d
, modelmod_name
);
1918 if (model_module
== NULL
) {
1919 model_module
= PyModule_New(MODULE_NAME
".model");
1920 if (model_module
!= NULL
) {
1921 PyDict_SetItem(sys_modules
, modelmod_name
, model_module
);
1922 /* gives away the reference to model_module */
1923 PyModule_AddObject(m
, "model", model_module
);
1926 Py_DECREF(modelmod_name
);
1927 if (errors_module
== NULL
|| model_module
== NULL
)
1928 /* Don't core dump later! */
1931 #if XML_COMBINED_VERSION > 19505
1933 const XML_Feature
*features
= XML_GetFeatureList();
1934 PyObject
*list
= PyList_New(0);
1936 /* just ignore it */
1940 for (; features
[i
].feature
!= XML_FEATURE_END
; ++i
) {
1942 PyObject
*item
= Py_BuildValue("si", features
[i
].name
,
1949 ok
= PyList_Append(list
, item
);
1957 PyModule_AddObject(m
, "features", list
);
1962 #define MYCONST(name) \
1963 PyModule_AddStringConstant(errors_module, #name, \
1964 (char*)XML_ErrorString(name))
1966 MYCONST(XML_ERROR_NO_MEMORY
);
1967 MYCONST(XML_ERROR_SYNTAX
);
1968 MYCONST(XML_ERROR_NO_ELEMENTS
);
1969 MYCONST(XML_ERROR_INVALID_TOKEN
);
1970 MYCONST(XML_ERROR_UNCLOSED_TOKEN
);
1971 MYCONST(XML_ERROR_PARTIAL_CHAR
);
1972 MYCONST(XML_ERROR_TAG_MISMATCH
);
1973 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE
);
1974 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
1975 MYCONST(XML_ERROR_PARAM_ENTITY_REF
);
1976 MYCONST(XML_ERROR_UNDEFINED_ENTITY
);
1977 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF
);
1978 MYCONST(XML_ERROR_ASYNC_ENTITY
);
1979 MYCONST(XML_ERROR_BAD_CHAR_REF
);
1980 MYCONST(XML_ERROR_BINARY_ENTITY_REF
);
1981 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
1982 MYCONST(XML_ERROR_MISPLACED_XML_PI
);
1983 MYCONST(XML_ERROR_UNKNOWN_ENCODING
);
1984 MYCONST(XML_ERROR_INCORRECT_ENCODING
);
1985 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION
);
1986 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
1987 MYCONST(XML_ERROR_NOT_STANDALONE
);
1988 MYCONST(XML_ERROR_UNEXPECTED_STATE
);
1989 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE
);
1990 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD
);
1991 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
);
1992 /* Added in Expat 1.95.7. */
1993 MYCONST(XML_ERROR_UNBOUND_PREFIX
);
1994 /* Added in Expat 1.95.8. */
1995 MYCONST(XML_ERROR_UNDECLARING_PREFIX
);
1996 MYCONST(XML_ERROR_INCOMPLETE_PE
);
1997 MYCONST(XML_ERROR_XML_DECL
);
1998 MYCONST(XML_ERROR_TEXT_DECL
);
1999 MYCONST(XML_ERROR_PUBLICID
);
2000 MYCONST(XML_ERROR_SUSPENDED
);
2001 MYCONST(XML_ERROR_NOT_SUSPENDED
);
2002 MYCONST(XML_ERROR_ABORTED
);
2003 MYCONST(XML_ERROR_FINISHED
);
2004 MYCONST(XML_ERROR_SUSPEND_PE
);
2006 PyModule_AddStringConstant(errors_module
, "__doc__",
2007 "Constants used to describe error conditions.");
2011 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
2012 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER
);
2013 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
);
2014 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS
);
2017 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2018 PyModule_AddStringConstant(model_module
, "__doc__",
2019 "Constants used to interpret content model information.");
2021 MYCONST(XML_CTYPE_EMPTY
);
2022 MYCONST(XML_CTYPE_ANY
);
2023 MYCONST(XML_CTYPE_MIXED
);
2024 MYCONST(XML_CTYPE_NAME
);
2025 MYCONST(XML_CTYPE_CHOICE
);
2026 MYCONST(XML_CTYPE_SEQ
);
2028 MYCONST(XML_CQUANT_NONE
);
2029 MYCONST(XML_CQUANT_OPT
);
2030 MYCONST(XML_CQUANT_REP
);
2031 MYCONST(XML_CQUANT_PLUS
);
2034 /* initialize pyexpat dispatch table */
2035 capi
.size
= sizeof(capi
);
2036 capi
.magic
= PyExpat_CAPI_MAGIC
;
2037 capi
.MAJOR_VERSION
= XML_MAJOR_VERSION
;
2038 capi
.MINOR_VERSION
= XML_MINOR_VERSION
;
2039 capi
.MICRO_VERSION
= XML_MICRO_VERSION
;
2040 capi
.ErrorString
= XML_ErrorString
;
2041 capi
.GetErrorCode
= XML_GetErrorCode
;
2042 capi
.GetErrorColumnNumber
= XML_GetErrorColumnNumber
;
2043 capi
.GetErrorLineNumber
= XML_GetErrorLineNumber
;
2044 capi
.Parse
= XML_Parse
;
2045 capi
.ParserCreate_MM
= XML_ParserCreate_MM
;
2046 capi
.ParserFree
= XML_ParserFree
;
2047 capi
.SetCharacterDataHandler
= XML_SetCharacterDataHandler
;
2048 capi
.SetCommentHandler
= XML_SetCommentHandler
;
2049 capi
.SetDefaultHandlerExpand
= XML_SetDefaultHandlerExpand
;
2050 capi
.SetElementHandler
= XML_SetElementHandler
;
2051 capi
.SetNamespaceDeclHandler
= XML_SetNamespaceDeclHandler
;
2052 capi
.SetProcessingInstructionHandler
= XML_SetProcessingInstructionHandler
;
2053 capi
.SetUnknownEncodingHandler
= XML_SetUnknownEncodingHandler
;
2054 capi
.SetUserData
= XML_SetUserData
;
2056 /* export as cobject */
2057 capi_object
= PyCObject_FromVoidPtr(&capi
, NULL
);
2059 PyModule_AddObject(m
, "expat_CAPI", capi_object
);
2063 clear_handlers(xmlparseobject
*self
, int initial
)
2068 for (; handler_info
[i
].name
!= NULL
; i
++) {
2070 self
->handlers
[i
] = NULL
;
2072 temp
= self
->handlers
[i
];
2073 self
->handlers
[i
] = NULL
;
2075 handler_info
[i
].setter(self
->itself
, NULL
);
2080 static struct HandlerInfo handler_info
[] = {
2081 {"StartElementHandler",
2082 (xmlhandlersetter
)XML_SetStartElementHandler
,
2083 (xmlhandler
)my_StartElementHandler
},
2084 {"EndElementHandler",
2085 (xmlhandlersetter
)XML_SetEndElementHandler
,
2086 (xmlhandler
)my_EndElementHandler
},
2087 {"ProcessingInstructionHandler",
2088 (xmlhandlersetter
)XML_SetProcessingInstructionHandler
,
2089 (xmlhandler
)my_ProcessingInstructionHandler
},
2090 {"CharacterDataHandler",
2091 (xmlhandlersetter
)XML_SetCharacterDataHandler
,
2092 (xmlhandler
)my_CharacterDataHandler
},
2093 {"UnparsedEntityDeclHandler",
2094 (xmlhandlersetter
)XML_SetUnparsedEntityDeclHandler
,
2095 (xmlhandler
)my_UnparsedEntityDeclHandler
},
2096 {"NotationDeclHandler",
2097 (xmlhandlersetter
)XML_SetNotationDeclHandler
,
2098 (xmlhandler
)my_NotationDeclHandler
},
2099 {"StartNamespaceDeclHandler",
2100 (xmlhandlersetter
)XML_SetStartNamespaceDeclHandler
,
2101 (xmlhandler
)my_StartNamespaceDeclHandler
},
2102 {"EndNamespaceDeclHandler",
2103 (xmlhandlersetter
)XML_SetEndNamespaceDeclHandler
,
2104 (xmlhandler
)my_EndNamespaceDeclHandler
},
2106 (xmlhandlersetter
)XML_SetCommentHandler
,
2107 (xmlhandler
)my_CommentHandler
},
2108 {"StartCdataSectionHandler",
2109 (xmlhandlersetter
)XML_SetStartCdataSectionHandler
,
2110 (xmlhandler
)my_StartCdataSectionHandler
},
2111 {"EndCdataSectionHandler",
2112 (xmlhandlersetter
)XML_SetEndCdataSectionHandler
,
2113 (xmlhandler
)my_EndCdataSectionHandler
},
2115 (xmlhandlersetter
)XML_SetDefaultHandler
,
2116 (xmlhandler
)my_DefaultHandler
},
2117 {"DefaultHandlerExpand",
2118 (xmlhandlersetter
)XML_SetDefaultHandlerExpand
,
2119 (xmlhandler
)my_DefaultHandlerExpandHandler
},
2120 {"NotStandaloneHandler",
2121 (xmlhandlersetter
)XML_SetNotStandaloneHandler
,
2122 (xmlhandler
)my_NotStandaloneHandler
},
2123 {"ExternalEntityRefHandler",
2124 (xmlhandlersetter
)XML_SetExternalEntityRefHandler
,
2125 (xmlhandler
)my_ExternalEntityRefHandler
},
2126 {"StartDoctypeDeclHandler",
2127 (xmlhandlersetter
)XML_SetStartDoctypeDeclHandler
,
2128 (xmlhandler
)my_StartDoctypeDeclHandler
},
2129 {"EndDoctypeDeclHandler",
2130 (xmlhandlersetter
)XML_SetEndDoctypeDeclHandler
,
2131 (xmlhandler
)my_EndDoctypeDeclHandler
},
2132 {"EntityDeclHandler",
2133 (xmlhandlersetter
)XML_SetEntityDeclHandler
,
2134 (xmlhandler
)my_EntityDeclHandler
},
2136 (xmlhandlersetter
)XML_SetXmlDeclHandler
,
2137 (xmlhandler
)my_XmlDeclHandler
},
2138 {"ElementDeclHandler",
2139 (xmlhandlersetter
)XML_SetElementDeclHandler
,
2140 (xmlhandler
)my_ElementDeclHandler
},
2141 {"AttlistDeclHandler",
2142 (xmlhandlersetter
)XML_SetAttlistDeclHandler
,
2143 (xmlhandler
)my_AttlistDeclHandler
},
2144 #if XML_COMBINED_VERSION >= 19504
2145 {"SkippedEntityHandler",
2146 (xmlhandlersetter
)XML_SetSkippedEntityHandler
,
2147 (xmlhandler
)my_SkippedEntityHandler
},
2150 {NULL
, NULL
, NULL
} /* sentinel */