4 #include "frameobject.h"
9 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
20 #define PyDoc_STR(str) str
21 #define PyDoc_VAR(name) static char name[]
22 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
25 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26 /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
27 #define Py_USING_UNICODE
35 ProcessingInstruction
,
54 #if XML_COMBINED_VERSION >= 19504
60 static PyObject
*ErrorObject
;
62 /* ----------------------------------------------------- */
64 /* Declarations for objects of type xmlparser */
70 int returns_unicode
; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes
; /* Return attributes as a list. */
73 int specified_attributes
; /* Report only specified attributes. */
74 int in_callback
; /* Is a callback active? */
75 int ns_prefixes
; /* Namespace-triplets mode? */
76 XML_Char
*buffer
; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size
; /* Size of buffer, in XML_Char units */
79 int buffer_used
; /* Buffer units in use */
80 PyObject
*intern
; /* Dictionary to intern strings */
84 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 static PyTypeObject Xmlparsetype
;
88 typedef void (*xmlhandlersetter
)(XML_Parser self
, void *meth
);
89 typedef void* xmlhandler
;
93 xmlhandlersetter setter
;
95 PyCodeObject
*tb_code
;
99 static struct HandlerInfo handler_info
[64];
101 /* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
105 set_error_attr(PyObject
*err
, char *name
, int value
)
107 PyObject
*v
= PyInt_FromLong(value
);
109 if (v
== NULL
|| PyObject_SetAttrString(err
, name
, v
) == -1) {
117 /* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
121 set_error(xmlparseobject
*self
, enum XML_Error code
)
125 XML_Parser parser
= self
->itself
;
126 int lineno
= XML_GetErrorLineNumber(parser
);
127 int column
= XML_GetErrorColumnNumber(parser
);
129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer
, "%.200s: line %i, column %i",
132 XML_ErrorString(code
), lineno
, column
);
133 err
= PyObject_CallFunction(ErrorObject
, "s", buffer
);
135 && set_error_attr(err
, "code", code
)
136 && set_error_attr(err
, "offset", column
)
137 && set_error_attr(err
, "lineno", lineno
)) {
138 PyErr_SetObject(ErrorObject
, err
);
145 have_handler(xmlparseobject
*self
, int type
)
147 PyObject
*handler
= self
->handlers
[type
];
148 return handler
!= NULL
;
152 get_handler_name(struct HandlerInfo
*hinfo
)
154 PyObject
*name
= hinfo
->nameobj
;
156 name
= PyString_FromString(hinfo
->name
);
157 hinfo
->nameobj
= name
;
164 #ifdef Py_USING_UNICODE
165 /* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
169 conv_string_to_unicode(const XML_Char
*str
)
171 /* XXX currently this code assumes that XML_Char is 8-bit,
172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
178 return PyUnicode_DecodeUTF8(str
, strlen(str
), "strict");
182 conv_string_len_to_unicode(const XML_Char
*str
, int len
)
184 /* XXX currently this code assumes that XML_Char is 8-bit,
185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
191 return PyUnicode_DecodeUTF8((const char *)str
, len
, "strict");
195 /* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
199 conv_string_to_utf8(const XML_Char
*str
)
201 /* XXX currently this code assumes that XML_Char is 8-bit,
202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
208 return PyString_FromString(str
);
212 conv_string_len_to_utf8(const XML_Char
*str
, int len
)
214 /* XXX currently this code assumes that XML_Char is 8-bit,
215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
221 return PyString_FromStringAndSize((const char *)str
, len
);
224 /* Callback routines */
226 static void clear_handlers(xmlparseobject
*self
, int initial
);
228 /* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
232 error_external_entity_ref_handler(XML_Parser parser
,
233 const XML_Char
*context
,
234 const XML_Char
*base
,
235 const XML_Char
*systemId
,
236 const XML_Char
*publicId
)
241 /* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
248 noop_character_data_handler(void *userData
, const XML_Char
*data
, int len
)
254 flag_error(xmlparseobject
*self
)
256 clear_handlers(self
, 0);
257 XML_SetExternalEntityRefHandler(self
->itself
,
258 error_external_entity_ref_handler
);
262 getcode(enum HandlerTypes slot
, char* func_name
, int lineno
)
264 PyObject
*code
= NULL
;
265 PyObject
*name
= NULL
;
266 PyObject
*nulltuple
= NULL
;
267 PyObject
*filename
= NULL
;
269 if (handler_info
[slot
].tb_code
== NULL
) {
270 code
= PyString_FromString("");
273 name
= PyString_FromString(func_name
);
276 nulltuple
= PyTuple_New(0);
277 if (nulltuple
== NULL
)
279 filename
= PyString_FromString(__FILE__
);
280 handler_info
[slot
].tb_code
=
281 PyCode_New(0, /* argcount */
286 nulltuple
, /* consts */
287 nulltuple
, /* names */
288 nulltuple
, /* varnames */
289 #if PYTHON_API_VERSION >= 1010
290 nulltuple
, /* freevars */
291 nulltuple
, /* cellvars */
293 filename
, /* filename */
295 lineno
, /* firstlineno */
298 if (handler_info
[slot
].tb_code
== NULL
)
301 Py_DECREF(nulltuple
);
305 return handler_info
[slot
].tb_code
;
314 trace_frame(PyThreadState
*tstate
, PyFrameObject
*f
, int code
, PyObject
*val
)
317 if (!tstate
->use_tracing
|| tstate
->tracing
)
319 if (tstate
->c_profilefunc
!= NULL
) {
321 result
= tstate
->c_profilefunc(tstate
->c_profileobj
,
323 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
324 || (tstate
->c_profilefunc
!= NULL
));
329 if (tstate
->c_tracefunc
!= NULL
) {
331 result
= tstate
->c_tracefunc(tstate
->c_traceobj
,
333 tstate
->use_tracing
= ((tstate
->c_tracefunc
!= NULL
)
334 || (tstate
->c_profilefunc
!= NULL
));
341 trace_frame_exc(PyThreadState
*tstate
, PyFrameObject
*f
)
343 PyObject
*type
, *value
, *traceback
, *arg
;
346 if (tstate
->c_tracefunc
== NULL
)
349 PyErr_Fetch(&type
, &value
, &traceback
);
354 #if PY_VERSION_HEX < 0x02040000
355 arg
= Py_BuildValue("(OOO)", type
, value
, traceback
);
357 arg
= PyTuple_Pack(3, type
, value
, traceback
);
360 PyErr_Restore(type
, value
, traceback
);
363 err
= trace_frame(tstate
, f
, PyTrace_EXCEPTION
, arg
);
366 PyErr_Restore(type
, value
, traceback
);
370 Py_XDECREF(traceback
);
377 call_with_frame(PyCodeObject
*c
, PyObject
* func
, PyObject
* args
,
378 xmlparseobject
*self
)
380 PyThreadState
*tstate
= PyThreadState_GET();
387 f
= PyFrame_New(tstate
, c
, PyEval_GetGlobals(), NULL
);
392 if (trace_frame(tstate
, f
, PyTrace_CALL
, Py_None
) < 0) {
396 res
= PyEval_CallObject(func
, args
);
398 if (tstate
->curexc_traceback
== NULL
)
400 XML_StopParser(self
->itself
, XML_FALSE
);
402 if (trace_frame_exc(tstate
, f
) < 0) {
407 if (trace_frame(tstate
, f
, PyTrace_RETURN
, res
) < 0) {
415 tstate
->frame
= f
->f_back
;
420 #ifndef Py_USING_UNICODE
421 #define STRING_CONV_FUNC conv_string_to_utf8
423 /* Python 2.0 and later versions, when built with Unicode support */
424 #define STRING_CONV_FUNC (self->returns_unicode \
425 ? conv_string_to_unicode : conv_string_to_utf8)
429 string_intern(xmlparseobject
*self
, const char* str
)
431 PyObject
*result
= STRING_CONV_FUNC(str
);
433 /* result can be NULL if the unicode conversion failed. */
438 value
= PyDict_GetItem(self
->intern
, result
);
440 if (PyDict_SetItem(self
->intern
, result
, result
) == 0)
450 /* Return 0 on success, -1 on exception.
451 * flag_error() will be called before return if needed.
454 call_character_handler(xmlparseobject
*self
, const XML_Char
*buffer
, int len
)
459 args
= PyTuple_New(1);
462 #ifdef Py_USING_UNICODE
463 temp
= (self
->returns_unicode
464 ? conv_string_len_to_unicode(buffer
, len
)
465 : conv_string_len_to_utf8(buffer
, len
));
467 temp
= conv_string_len_to_utf8(buffer
, len
);
472 XML_SetCharacterDataHandler(self
->itself
,
473 noop_character_data_handler
);
476 PyTuple_SET_ITEM(args
, 0, temp
);
477 /* temp is now a borrowed reference; consider it unused. */
478 self
->in_callback
= 1;
479 temp
= call_with_frame(getcode(CharacterData
, "CharacterData", __LINE__
),
480 self
->handlers
[CharacterData
], args
, self
);
481 /* temp is an owned reference again, or NULL */
482 self
->in_callback
= 0;
486 XML_SetCharacterDataHandler(self
->itself
,
487 noop_character_data_handler
);
495 flush_character_buffer(xmlparseobject
*self
)
498 if (self
->buffer
== NULL
|| self
->buffer_used
== 0)
500 rc
= call_character_handler(self
, self
->buffer
, self
->buffer_used
);
501 self
->buffer_used
= 0;
506 my_CharacterDataHandler(void *userData
, const XML_Char
*data
, int len
)
508 xmlparseobject
*self
= (xmlparseobject
*) userData
;
509 if (self
->buffer
== NULL
)
510 call_character_handler(self
, data
, len
);
512 if ((self
->buffer_used
+ len
) > self
->buffer_size
) {
513 if (flush_character_buffer(self
) < 0)
515 /* handler might have changed; drop the rest on the floor
516 * if there isn't a handler anymore
518 if (!have_handler(self
, CharacterData
))
521 if (len
> self
->buffer_size
) {
522 call_character_handler(self
, data
, len
);
523 self
->buffer_used
= 0;
526 memcpy(self
->buffer
+ self
->buffer_used
,
527 data
, len
* sizeof(XML_Char
));
528 self
->buffer_used
+= len
;
534 my_StartElementHandler(void *userData
,
535 const XML_Char
*name
, const XML_Char
*atts
[])
537 xmlparseobject
*self
= (xmlparseobject
*)userData
;
539 if (have_handler(self
, StartElement
)) {
540 PyObject
*container
, *rv
, *args
;
543 if (flush_character_buffer(self
) < 0)
545 /* Set max to the number of slots filled in atts[]; max/2 is
546 * the number of attributes we need to process.
548 if (self
->specified_attributes
) {
549 max
= XML_GetSpecifiedAttributeCount(self
->itself
);
553 while (atts
[max
] != NULL
)
556 /* Build the container. */
557 if (self
->ordered_attributes
)
558 container
= PyList_New(max
);
560 container
= PyDict_New();
561 if (container
== NULL
) {
565 for (i
= 0; i
< max
; i
+= 2) {
566 PyObject
*n
= string_intern(self
, (XML_Char
*) atts
[i
]);
570 Py_DECREF(container
);
573 v
= STRING_CONV_FUNC((XML_Char
*) atts
[i
+1]);
576 Py_DECREF(container
);
580 if (self
->ordered_attributes
) {
581 PyList_SET_ITEM(container
, i
, n
);
582 PyList_SET_ITEM(container
, i
+1, v
);
584 else if (PyDict_SetItem(container
, n
, v
)) {
595 args
= string_intern(self
, name
);
597 args
= Py_BuildValue("(NN)", args
, container
);
599 Py_DECREF(container
);
602 /* Container is now a borrowed reference; ignore it. */
603 self
->in_callback
= 1;
604 rv
= call_with_frame(getcode(StartElement
, "StartElement", __LINE__
),
605 self
->handlers
[StartElement
], args
, self
);
606 self
->in_callback
= 0;
616 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
617 RETURN, GETUSERDATA) \
619 my_##NAME##Handler PARAMS {\
620 xmlparseobject *self = GETUSERDATA ; \
621 PyObject *args = NULL; \
622 PyObject *rv = NULL; \
625 if (have_handler(self, NAME)) { \
626 if (flush_character_buffer(self) < 0) \
628 args = Py_BuildValue PARAM_FORMAT ;\
629 if (!args) { flag_error(self); return RETURN;} \
630 self->in_callback = 1; \
631 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
632 self->handlers[NAME], args, self); \
633 self->in_callback = 0; \
645 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
646 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
647 (xmlparseobject *)userData)
649 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
650 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
651 rc = PyInt_AsLong(rv);, rc, \
652 (xmlparseobject *)userData)
654 VOID_HANDLER(EndElement
,
655 (void *userData
, const XML_Char
*name
),
656 ("(N)", string_intern(self
, name
)))
658 VOID_HANDLER(ProcessingInstruction
,
660 const XML_Char
*target
,
661 const XML_Char
*data
),
662 ("(NO&)", string_intern(self
, target
), STRING_CONV_FUNC
,data
))
664 VOID_HANDLER(UnparsedEntityDecl
,
666 const XML_Char
*entityName
,
667 const XML_Char
*base
,
668 const XML_Char
*systemId
,
669 const XML_Char
*publicId
,
670 const XML_Char
*notationName
),
672 string_intern(self
, entityName
), string_intern(self
, base
),
673 string_intern(self
, systemId
), string_intern(self
, publicId
),
674 string_intern(self
, notationName
)))
676 #ifndef Py_USING_UNICODE
677 VOID_HANDLER(EntityDecl
,
679 const XML_Char
*entityName
,
680 int is_parameter_entity
,
681 const XML_Char
*value
,
683 const XML_Char
*base
,
684 const XML_Char
*systemId
,
685 const XML_Char
*publicId
,
686 const XML_Char
*notationName
),
688 string_intern(self
, entityName
), is_parameter_entity
,
689 conv_string_len_to_utf8(value
, value_length
),
690 string_intern(self
, base
), string_intern(self
, systemId
),
691 string_intern(self
, publicId
),
692 string_intern(self
, notationName
)))
694 VOID_HANDLER(EntityDecl
,
696 const XML_Char
*entityName
,
697 int is_parameter_entity
,
698 const XML_Char
*value
,
700 const XML_Char
*base
,
701 const XML_Char
*systemId
,
702 const XML_Char
*publicId
,
703 const XML_Char
*notationName
),
705 string_intern(self
, entityName
), is_parameter_entity
,
706 (self
->returns_unicode
707 ? conv_string_len_to_unicode(value
, value_length
)
708 : conv_string_len_to_utf8(value
, value_length
)),
709 string_intern(self
, base
), string_intern(self
, systemId
),
710 string_intern(self
, publicId
),
711 string_intern(self
, notationName
)))
714 VOID_HANDLER(XmlDecl
,
716 const XML_Char
*version
,
717 const XML_Char
*encoding
,
720 STRING_CONV_FUNC
,version
, STRING_CONV_FUNC
,encoding
,
724 conv_content_model(XML_Content
* const model
,
725 PyObject
*(*conv_string
)(const XML_Char
*))
727 PyObject
*result
= NULL
;
728 PyObject
*children
= PyTuple_New(model
->numchildren
);
731 if (children
!= NULL
) {
732 assert(model
->numchildren
< INT_MAX
);
733 for (i
= 0; i
< (int)model
->numchildren
; ++i
) {
734 PyObject
*child
= conv_content_model(&model
->children
[i
],
737 Py_XDECREF(children
);
740 PyTuple_SET_ITEM(children
, i
, child
);
742 result
= Py_BuildValue("(iiO&N)",
743 model
->type
, model
->quant
,
744 conv_string
,model
->name
, children
);
750 my_ElementDeclHandler(void *userData
,
751 const XML_Char
*name
,
754 xmlparseobject
*self
= (xmlparseobject
*)userData
;
755 PyObject
*args
= NULL
;
757 if (have_handler(self
, ElementDecl
)) {
759 PyObject
*modelobj
, *nameobj
;
761 if (flush_character_buffer(self
) < 0)
763 #ifdef Py_USING_UNICODE
764 modelobj
= conv_content_model(model
,
765 (self
->returns_unicode
766 ? conv_string_to_unicode
767 : conv_string_to_utf8
));
769 modelobj
= conv_content_model(model
, conv_string_to_utf8
);
771 if (modelobj
== NULL
) {
775 nameobj
= string_intern(self
, name
);
776 if (nameobj
== NULL
) {
781 args
= Py_BuildValue("NN", nameobj
, modelobj
);
787 self
->in_callback
= 1;
788 rv
= call_with_frame(getcode(ElementDecl
, "ElementDecl", __LINE__
),
789 self
->handlers
[ElementDecl
], args
, self
);
790 self
->in_callback
= 0;
799 XML_FreeContentModel(self
->itself
, model
);
803 VOID_HANDLER(AttlistDecl
,
805 const XML_Char
*elname
,
806 const XML_Char
*attname
,
807 const XML_Char
*att_type
,
808 const XML_Char
*dflt
,
811 string_intern(self
, elname
), string_intern(self
, attname
),
812 STRING_CONV_FUNC
,att_type
, STRING_CONV_FUNC
,dflt
,
815 #if XML_COMBINED_VERSION >= 19504
816 VOID_HANDLER(SkippedEntity
,
818 const XML_Char
*entityName
,
819 int is_parameter_entity
),
821 string_intern(self
, entityName
), is_parameter_entity
))
824 VOID_HANDLER(NotationDecl
,
826 const XML_Char
*notationName
,
827 const XML_Char
*base
,
828 const XML_Char
*systemId
,
829 const XML_Char
*publicId
),
831 string_intern(self
, notationName
), string_intern(self
, base
),
832 string_intern(self
, systemId
), string_intern(self
, publicId
)))
834 VOID_HANDLER(StartNamespaceDecl
,
836 const XML_Char
*prefix
,
837 const XML_Char
*uri
),
839 string_intern(self
, prefix
), string_intern(self
, uri
)))
841 VOID_HANDLER(EndNamespaceDecl
,
843 const XML_Char
*prefix
),
844 ("(N)", string_intern(self
, prefix
)))
846 VOID_HANDLER(Comment
,
847 (void *userData
, const XML_Char
*data
),
848 ("(O&)", STRING_CONV_FUNC
,data
))
850 VOID_HANDLER(StartCdataSection
,
854 VOID_HANDLER(EndCdataSection
,
858 #ifndef Py_USING_UNICODE
859 VOID_HANDLER(Default
,
860 (void *userData
, const XML_Char
*s
, int len
),
861 ("(N)", conv_string_len_to_utf8(s
,len
)))
863 VOID_HANDLER(DefaultHandlerExpand
,
864 (void *userData
, const XML_Char
*s
, int len
),
865 ("(N)", conv_string_len_to_utf8(s
,len
)))
867 VOID_HANDLER(Default
,
868 (void *userData
, const XML_Char
*s
, int len
),
869 ("(N)", (self
->returns_unicode
870 ? conv_string_len_to_unicode(s
,len
)
871 : conv_string_len_to_utf8(s
,len
))))
873 VOID_HANDLER(DefaultHandlerExpand
,
874 (void *userData
, const XML_Char
*s
, int len
),
875 ("(N)", (self
->returns_unicode
876 ? conv_string_len_to_unicode(s
,len
)
877 : conv_string_len_to_utf8(s
,len
))))
880 INT_HANDLER(NotStandalone
,
884 RC_HANDLER(int, ExternalEntityRef
,
886 const XML_Char
*context
,
887 const XML_Char
*base
,
888 const XML_Char
*systemId
,
889 const XML_Char
*publicId
),
892 STRING_CONV_FUNC
,context
, string_intern(self
, base
),
893 string_intern(self
, systemId
), string_intern(self
, publicId
)),
894 rc
= PyInt_AsLong(rv
);, rc
,
895 XML_GetUserData(parser
))
897 /* XXX UnknownEncodingHandler */
899 VOID_HANDLER(StartDoctypeDecl
,
900 (void *userData
, const XML_Char
*doctypeName
,
901 const XML_Char
*sysid
, const XML_Char
*pubid
,
902 int has_internal_subset
),
903 ("(NNNi)", string_intern(self
, doctypeName
),
904 string_intern(self
, sysid
), string_intern(self
, pubid
),
905 has_internal_subset
))
907 VOID_HANDLER(EndDoctypeDecl
, (void *userData
), ("()"))
909 /* ---------------------------------------------------------------- */
912 get_parse_result(xmlparseobject
*self
, int rv
)
914 if (PyErr_Occurred()) {
918 return set_error(self
, XML_GetErrorCode(self
->itself
));
920 if (flush_character_buffer(self
) < 0) {
923 return PyInt_FromLong(rv
);
926 PyDoc_STRVAR(xmlparse_Parse__doc__
,
927 "Parse(data[, isfinal])\n\
928 Parse XML data. `isfinal' should be true at end of input.");
931 xmlparse_Parse(xmlparseobject
*self
, PyObject
*args
)
937 if (!PyArg_ParseTuple(args
, "s#|i:Parse", &s
, &slen
, &isFinal
))
940 return get_parse_result(self
, XML_Parse(self
->itself
, s
, slen
, isFinal
));
943 /* File reading copied from cPickle */
945 #define BUF_SIZE 2048
948 readinst(char *buf
, int buf_size
, PyObject
*meth
)
950 PyObject
*arg
= NULL
;
951 PyObject
*bytes
= NULL
;
952 PyObject
*str
= NULL
;
955 if ((bytes
= PyInt_FromLong(buf_size
)) == NULL
)
958 if ((arg
= PyTuple_New(1)) == NULL
) {
963 PyTuple_SET_ITEM(arg
, 0, bytes
);
965 #if PY_VERSION_HEX < 0x02020000
966 str
= PyObject_CallObject(meth
, arg
);
968 str
= PyObject_Call(meth
, arg
, NULL
);
973 /* XXX what to do if it returns a Unicode string? */
974 if (!PyString_Check(str
)) {
975 PyErr_Format(PyExc_TypeError
,
976 "read() did not return a string object (type=%.400s)",
977 str
->ob_type
->tp_name
);
980 len
= PyString_GET_SIZE(str
);
981 if (len
> buf_size
) {
982 PyErr_Format(PyExc_ValueError
,
983 "read() returned too much data: "
984 "%i bytes requested, %i returned",
988 memcpy(buf
, PyString_AsString(str
), len
);
995 PyDoc_STRVAR(xmlparse_ParseFile__doc__
,
997 Parse XML data from file-like object.");
1000 xmlparse_ParseFile(xmlparseobject
*self
, PyObject
*f
)
1004 PyObject
*readmethod
= NULL
;
1006 if (PyFile_Check(f
)) {
1007 fp
= PyFile_AsFile(f
);
1011 readmethod
= PyObject_GetAttrString(f
, "read");
1012 if (readmethod
== NULL
) {
1014 PyErr_SetString(PyExc_TypeError
,
1015 "argument must have 'read' attribute");
1021 void *buf
= XML_GetBuffer(self
->itself
, BUF_SIZE
);
1023 Py_XDECREF(readmethod
);
1024 return PyErr_NoMemory();
1028 bytes_read
= fread(buf
, sizeof(char), BUF_SIZE
, fp
);
1029 if (bytes_read
< 0) {
1030 PyErr_SetFromErrno(PyExc_IOError
);
1035 bytes_read
= readinst(buf
, BUF_SIZE
, readmethod
);
1036 if (bytes_read
< 0) {
1037 Py_DECREF(readmethod
);
1041 rv
= XML_ParseBuffer(self
->itself
, bytes_read
, bytes_read
== 0);
1042 if (PyErr_Occurred()) {
1043 Py_XDECREF(readmethod
);
1047 if (!rv
|| bytes_read
== 0)
1050 Py_XDECREF(readmethod
);
1051 return get_parse_result(self
, rv
);
1054 PyDoc_STRVAR(xmlparse_SetBase__doc__
,
1055 "SetBase(base_url)\n\
1056 Set the base URL for the parser.");
1059 xmlparse_SetBase(xmlparseobject
*self
, PyObject
*args
)
1063 if (!PyArg_ParseTuple(args
, "s:SetBase", &base
))
1065 if (!XML_SetBase(self
->itself
, base
)) {
1066 return PyErr_NoMemory();
1072 PyDoc_STRVAR(xmlparse_GetBase__doc__
,
1073 "GetBase() -> url\n\
1074 Return base URL string for the parser.");
1077 xmlparse_GetBase(xmlparseobject
*self
, PyObject
*unused
)
1079 return Py_BuildValue("z", XML_GetBase(self
->itself
));
1082 PyDoc_STRVAR(xmlparse_GetInputContext__doc__
,
1083 "GetInputContext() -> string\n\
1084 Return the untranslated text of the input that caused the current event.\n\
1085 If the event was generated by a large amount of text (such as a start tag\n\
1086 for an element with many attributes), not all of the text may be available.");
1089 xmlparse_GetInputContext(xmlparseobject
*self
, PyObject
*unused
)
1091 if (self
->in_callback
) {
1094 = XML_GetInputContext(self
->itself
, &offset
, &size
);
1097 return PyString_FromStringAndSize(buffer
+ offset
,
1106 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__
,
1107 "ExternalEntityParserCreate(context[, encoding])\n\
1108 Create a parser for parsing an external entity based on the\n\
1109 information passed to the ExternalEntityRefHandler.");
1112 xmlparse_ExternalEntityParserCreate(xmlparseobject
*self
, PyObject
*args
)
1115 char *encoding
= NULL
;
1116 xmlparseobject
*new_parser
;
1119 if (!PyArg_ParseTuple(args
, "z|s:ExternalEntityParserCreate",
1120 &context
, &encoding
)) {
1124 #ifndef Py_TPFLAGS_HAVE_GC
1125 /* Python versions 2.0 and 2.1 */
1126 new_parser
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1128 /* Python versions 2.2 and later */
1129 new_parser
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1132 if (new_parser
== NULL
)
1134 new_parser
->buffer_size
= self
->buffer_size
;
1135 new_parser
->buffer_used
= 0;
1136 if (self
->buffer
!= NULL
) {
1137 new_parser
->buffer
= malloc(new_parser
->buffer_size
);
1138 if (new_parser
->buffer
== NULL
) {
1139 #ifndef Py_TPFLAGS_HAVE_GC
1140 /* Code for versions 2.0 and 2.1 */
1141 PyObject_Del(new_parser
);
1143 /* Code for versions 2.2 and later. */
1144 PyObject_GC_Del(new_parser
);
1146 return PyErr_NoMemory();
1150 new_parser
->buffer
= NULL
;
1151 new_parser
->returns_unicode
= self
->returns_unicode
;
1152 new_parser
->ordered_attributes
= self
->ordered_attributes
;
1153 new_parser
->specified_attributes
= self
->specified_attributes
;
1154 new_parser
->in_callback
= 0;
1155 new_parser
->ns_prefixes
= self
->ns_prefixes
;
1156 new_parser
->itself
= XML_ExternalEntityParserCreate(self
->itself
, context
,
1158 new_parser
->handlers
= 0;
1159 new_parser
->intern
= self
->intern
;
1160 Py_XINCREF(new_parser
->intern
);
1161 #ifdef Py_TPFLAGS_HAVE_GC
1162 PyObject_GC_Track(new_parser
);
1164 PyObject_GC_Init(new_parser
);
1167 if (!new_parser
->itself
) {
1168 Py_DECREF(new_parser
);
1169 return PyErr_NoMemory();
1172 XML_SetUserData(new_parser
->itself
, (void *)new_parser
);
1174 /* allocate and clear handlers first */
1175 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1178 new_parser
->handlers
= malloc(sizeof(PyObject
*) * i
);
1179 if (!new_parser
->handlers
) {
1180 Py_DECREF(new_parser
);
1181 return PyErr_NoMemory();
1183 clear_handlers(new_parser
, 1);
1185 /* then copy handlers from self */
1186 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1187 PyObject
*handler
= self
->handlers
[i
];
1188 if (handler
!= NULL
) {
1190 new_parser
->handlers
[i
] = handler
;
1191 handler_info
[i
].setter(new_parser
->itself
,
1192 handler_info
[i
].handler
);
1195 return (PyObject
*)new_parser
;
1198 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__
,
1199 "SetParamEntityParsing(flag) -> success\n\
1200 Controls parsing of parameter entities (including the external DTD\n\
1201 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1202 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1203 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
1207 xmlparse_SetParamEntityParsing(xmlparseobject
*p
, PyObject
* args
)
1210 if (!PyArg_ParseTuple(args
, "i", &flag
))
1212 flag
= XML_SetParamEntityParsing(p
->itself
, flag
);
1213 return PyInt_FromLong(flag
);
1217 #if XML_COMBINED_VERSION >= 19505
1218 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__
,
1219 "UseForeignDTD([flag])\n\
1220 Allows the application to provide an artificial external subset if one is\n\
1221 not specified as part of the document instance. This readily allows the\n\
1222 use of a 'default' document type controlled by the application, while still\n\
1223 getting the advantage of providing document type information to the parser.\n\
1224 'flag' defaults to True if not provided.");
1227 xmlparse_UseForeignDTD(xmlparseobject
*self
, PyObject
*args
)
1229 PyObject
*flagobj
= NULL
;
1230 XML_Bool flag
= XML_TRUE
;
1232 if (!PyArg_UnpackTuple(args
, "UseForeignDTD", 0, 1, &flagobj
))
1234 if (flagobj
!= NULL
)
1235 flag
= PyObject_IsTrue(flagobj
) ? XML_TRUE
: XML_FALSE
;
1236 rc
= XML_UseForeignDTD(self
->itself
, flag
);
1237 if (rc
!= XML_ERROR_NONE
) {
1238 return set_error(self
, rc
);
1245 static struct PyMethodDef xmlparse_methods
[] = {
1246 {"Parse", (PyCFunction
)xmlparse_Parse
,
1247 METH_VARARGS
, xmlparse_Parse__doc__
},
1248 {"ParseFile", (PyCFunction
)xmlparse_ParseFile
,
1249 METH_O
, xmlparse_ParseFile__doc__
},
1250 {"SetBase", (PyCFunction
)xmlparse_SetBase
,
1251 METH_VARARGS
, xmlparse_SetBase__doc__
},
1252 {"GetBase", (PyCFunction
)xmlparse_GetBase
,
1253 METH_NOARGS
, xmlparse_GetBase__doc__
},
1254 {"ExternalEntityParserCreate", (PyCFunction
)xmlparse_ExternalEntityParserCreate
,
1255 METH_VARARGS
, xmlparse_ExternalEntityParserCreate__doc__
},
1256 {"SetParamEntityParsing", (PyCFunction
)xmlparse_SetParamEntityParsing
,
1257 METH_VARARGS
, xmlparse_SetParamEntityParsing__doc__
},
1258 {"GetInputContext", (PyCFunction
)xmlparse_GetInputContext
,
1259 METH_NOARGS
, xmlparse_GetInputContext__doc__
},
1260 #if XML_COMBINED_VERSION >= 19505
1261 {"UseForeignDTD", (PyCFunction
)xmlparse_UseForeignDTD
,
1262 METH_VARARGS
, xmlparse_UseForeignDTD__doc__
},
1264 {NULL
, NULL
} /* sentinel */
1270 #ifdef Py_USING_UNICODE
1272 /* pyexpat international encoding support.
1273 Make it as simple as possible.
1276 static char template_buffer
[257];
1277 PyObject
*template_string
= NULL
;
1280 init_template_buffer(void)
1283 for (i
= 0; i
< 256; i
++) {
1284 template_buffer
[i
] = i
;
1286 template_buffer
[256] = 0;
1290 PyUnknownEncodingHandler(void *encodingHandlerData
,
1291 const XML_Char
*name
,
1294 PyUnicodeObject
*_u_string
= NULL
;
1298 /* Yes, supports only 8bit encodings */
1299 _u_string
= (PyUnicodeObject
*)
1300 PyUnicode_Decode(template_buffer
, 256, name
, "replace");
1302 if (_u_string
== NULL
)
1305 for (i
= 0; i
< 256; i
++) {
1306 /* Stupid to access directly, but fast */
1307 Py_UNICODE c
= _u_string
->str
[i
];
1308 if (c
== Py_UNICODE_REPLACEMENT_CHARACTER
)
1314 info
->convert
= NULL
;
1315 info
->release
= NULL
;
1317 Py_DECREF(_u_string
);
1324 newxmlparseobject(char *encoding
, char *namespace_separator
, PyObject
*intern
)
1327 xmlparseobject
*self
;
1329 #ifdef Py_TPFLAGS_HAVE_GC
1330 /* Code for versions 2.2 and later */
1331 self
= PyObject_GC_New(xmlparseobject
, &Xmlparsetype
);
1333 self
= PyObject_New(xmlparseobject
, &Xmlparsetype
);
1338 #ifdef Py_USING_UNICODE
1339 self
->returns_unicode
= 1;
1341 self
->returns_unicode
= 0;
1344 self
->buffer
= NULL
;
1345 self
->buffer_size
= CHARACTER_DATA_BUFFER_SIZE
;
1346 self
->buffer_used
= 0;
1347 self
->ordered_attributes
= 0;
1348 self
->specified_attributes
= 0;
1349 self
->in_callback
= 0;
1350 self
->ns_prefixes
= 0;
1351 self
->handlers
= NULL
;
1352 if (namespace_separator
!= NULL
) {
1353 self
->itself
= XML_ParserCreateNS(encoding
, *namespace_separator
);
1356 self
->itself
= XML_ParserCreate(encoding
);
1358 self
->intern
= intern
;
1359 Py_XINCREF(self
->intern
);
1360 #ifdef Py_TPFLAGS_HAVE_GC
1361 PyObject_GC_Track(self
);
1363 PyObject_GC_Init(self
);
1365 if (self
->itself
== NULL
) {
1366 PyErr_SetString(PyExc_RuntimeError
,
1367 "XML_ParserCreate failed");
1371 XML_SetUserData(self
->itself
, (void *)self
);
1372 #ifdef Py_USING_UNICODE
1373 XML_SetUnknownEncodingHandler(self
->itself
,
1374 (XML_UnknownEncodingHandler
) PyUnknownEncodingHandler
, NULL
);
1377 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1380 self
->handlers
= malloc(sizeof(PyObject
*) * i
);
1381 if (!self
->handlers
) {
1383 return PyErr_NoMemory();
1385 clear_handlers(self
, 1);
1387 return (PyObject
*)self
;
1392 xmlparse_dealloc(xmlparseobject
*self
)
1395 #ifdef Py_TPFLAGS_HAVE_GC
1396 PyObject_GC_UnTrack(self
);
1398 PyObject_GC_Fini(self
);
1400 if (self
->itself
!= NULL
)
1401 XML_ParserFree(self
->itself
);
1402 self
->itself
= NULL
;
1404 if (self
->handlers
!= NULL
) {
1406 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1407 temp
= self
->handlers
[i
];
1408 self
->handlers
[i
] = NULL
;
1411 free(self
->handlers
);
1412 self
->handlers
= NULL
;
1414 if (self
->buffer
!= NULL
) {
1416 self
->buffer
= NULL
;
1418 Py_XDECREF(self
->intern
);
1419 #ifndef Py_TPFLAGS_HAVE_GC
1420 /* Code for versions 2.0 and 2.1 */
1423 /* Code for versions 2.2 and later. */
1424 PyObject_GC_Del(self
);
1429 handlername2int(const char *name
)
1432 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1433 if (strcmp(name
, handler_info
[i
].name
) == 0) {
1441 get_pybool(int istrue
)
1443 PyObject
*result
= istrue
? Py_True
: Py_False
;
1449 xmlparse_getattr(xmlparseobject
*self
, char *name
)
1451 int handlernum
= handlername2int(name
);
1453 if (handlernum
!= -1) {
1454 PyObject
*result
= self
->handlers
[handlernum
];
1460 if (name
[0] == 'E') {
1461 if (strcmp(name
, "ErrorCode") == 0)
1462 return PyInt_FromLong((long)
1463 XML_GetErrorCode(self
->itself
));
1464 if (strcmp(name
, "ErrorLineNumber") == 0)
1465 return PyInt_FromLong((long)
1466 XML_GetErrorLineNumber(self
->itself
));
1467 if (strcmp(name
, "ErrorColumnNumber") == 0)
1468 return PyInt_FromLong((long)
1469 XML_GetErrorColumnNumber(self
->itself
));
1470 if (strcmp(name
, "ErrorByteIndex") == 0)
1471 return PyInt_FromLong((long)
1472 XML_GetErrorByteIndex(self
->itself
));
1474 if (name
[0] == 'C') {
1475 if (strcmp(name
, "CurrentLineNumber") == 0)
1476 return PyInt_FromLong((long)
1477 XML_GetCurrentLineNumber(self
->itself
));
1478 if (strcmp(name
, "CurrentColumnNumber") == 0)
1479 return PyInt_FromLong((long)
1480 XML_GetCurrentColumnNumber(self
->itself
));
1481 if (strcmp(name
, "CurrentByteIndex") == 0)
1482 return PyInt_FromLong((long)
1483 XML_GetCurrentByteIndex(self
->itself
));
1485 if (name
[0] == 'b') {
1486 if (strcmp(name
, "buffer_size") == 0)
1487 return PyInt_FromLong((long) self
->buffer_size
);
1488 if (strcmp(name
, "buffer_text") == 0)
1489 return get_pybool(self
->buffer
!= NULL
);
1490 if (strcmp(name
, "buffer_used") == 0)
1491 return PyInt_FromLong((long) self
->buffer_used
);
1493 if (strcmp(name
, "namespace_prefixes") == 0)
1494 return get_pybool(self
->ns_prefixes
);
1495 if (strcmp(name
, "ordered_attributes") == 0)
1496 return get_pybool(self
->ordered_attributes
);
1497 if (strcmp(name
, "returns_unicode") == 0)
1498 return get_pybool((long) self
->returns_unicode
);
1499 if (strcmp(name
, "specified_attributes") == 0)
1500 return get_pybool((long) self
->specified_attributes
);
1501 if (strcmp(name
, "intern") == 0) {
1502 if (self
->intern
== NULL
) {
1507 Py_INCREF(self
->intern
);
1508 return self
->intern
;
1512 #define APPEND(list, str) \
1514 PyObject *o = PyString_FromString(str); \
1516 PyList_Append(list, o); \
1520 if (strcmp(name
, "__members__") == 0) {
1522 PyObject
*rc
= PyList_New(0);
1525 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++) {
1526 PyObject
*o
= get_handler_name(&handler_info
[i
]);
1528 PyList_Append(rc
, o
);
1531 APPEND(rc
, "ErrorCode");
1532 APPEND(rc
, "ErrorLineNumber");
1533 APPEND(rc
, "ErrorColumnNumber");
1534 APPEND(rc
, "ErrorByteIndex");
1535 APPEND(rc
, "CurrentLineNumber");
1536 APPEND(rc
, "CurrentColumnNumber");
1537 APPEND(rc
, "CurrentByteIndex");
1538 APPEND(rc
, "buffer_size");
1539 APPEND(rc
, "buffer_text");
1540 APPEND(rc
, "buffer_used");
1541 APPEND(rc
, "namespace_prefixes");
1542 APPEND(rc
, "ordered_attributes");
1543 APPEND(rc
, "returns_unicode");
1544 APPEND(rc
, "specified_attributes");
1545 APPEND(rc
, "intern");
1550 return Py_FindMethod(xmlparse_methods
, (PyObject
*)self
, name
);
1554 sethandler(xmlparseobject
*self
, const char *name
, PyObject
* v
)
1556 int handlernum
= handlername2int(name
);
1557 if (handlernum
>= 0) {
1558 xmlhandler c_handler
= NULL
;
1559 PyObject
*temp
= self
->handlers
[handlernum
];
1562 /* If this is the character data handler, and a character
1563 data handler is already active, we need to be more
1564 careful. What we can safely do is replace the existing
1565 character data handler callback function with a no-op
1566 function that will refuse to call Python. The downside
1567 is that this doesn't completely remove the character
1568 data handler from the C layer if there's any callback
1569 active, so Expat does a little more work than it
1570 otherwise would, but that's really an odd case. A more
1571 elaborate system of handlers and state could remove the
1572 C handler more effectively. */
1573 if (handlernum
== CharacterData
&& self
->in_callback
)
1574 c_handler
= noop_character_data_handler
;
1577 else if (v
!= NULL
) {
1579 c_handler
= handler_info
[handlernum
].handler
;
1581 self
->handlers
[handlernum
] = v
;
1583 handler_info
[handlernum
].setter(self
->itself
, c_handler
);
1590 xmlparse_setattr(xmlparseobject
*self
, char *name
, PyObject
*v
)
1592 /* Set attribute 'name' to value 'v'. v==NULL means delete */
1594 PyErr_SetString(PyExc_RuntimeError
, "Cannot delete attribute");
1597 if (strcmp(name
, "buffer_text") == 0) {
1598 if (PyObject_IsTrue(v
)) {
1599 if (self
->buffer
== NULL
) {
1600 self
->buffer
= malloc(self
->buffer_size
);
1601 if (self
->buffer
== NULL
) {
1605 self
->buffer_used
= 0;
1608 else if (self
->buffer
!= NULL
) {
1609 if (flush_character_buffer(self
) < 0)
1612 self
->buffer
= NULL
;
1616 if (strcmp(name
, "namespace_prefixes") == 0) {
1617 if (PyObject_IsTrue(v
))
1618 self
->ns_prefixes
= 1;
1620 self
->ns_prefixes
= 0;
1621 XML_SetReturnNSTriplet(self
->itself
, self
->ns_prefixes
);
1624 if (strcmp(name
, "ordered_attributes") == 0) {
1625 if (PyObject_IsTrue(v
))
1626 self
->ordered_attributes
= 1;
1628 self
->ordered_attributes
= 0;
1631 if (strcmp(name
, "returns_unicode") == 0) {
1632 if (PyObject_IsTrue(v
)) {
1633 #ifndef Py_USING_UNICODE
1634 PyErr_SetString(PyExc_ValueError
,
1635 "Unicode support not available");
1638 self
->returns_unicode
= 1;
1642 self
->returns_unicode
= 0;
1645 if (strcmp(name
, "specified_attributes") == 0) {
1646 if (PyObject_IsTrue(v
))
1647 self
->specified_attributes
= 1;
1649 self
->specified_attributes
= 0;
1652 if (strcmp(name
, "CharacterDataHandler") == 0) {
1653 /* If we're changing the character data handler, flush all
1654 * cached data with the old handler. Not sure there's a
1655 * "right" thing to do, though, but this probably won't
1658 if (flush_character_buffer(self
) < 0)
1661 if (sethandler(self
, name
, v
)) {
1664 PyErr_SetString(PyExc_AttributeError
, name
);
1668 #ifdef WITH_CYCLE_GC
1670 xmlparse_traverse(xmlparseobject
*op
, visitproc visit
, void *arg
)
1673 for (i
= 0; handler_info
[i
].name
!= NULL
; i
++)
1674 Py_VISIT(op
->handlers
[i
]);
1679 xmlparse_clear(xmlparseobject
*op
)
1681 clear_handlers(op
, 0);
1682 Py_CLEAR(op
->intern
);
1687 PyDoc_STRVAR(Xmlparsetype__doc__
, "XML parser");
1689 static PyTypeObject Xmlparsetype
= {
1690 PyObject_HEAD_INIT(NULL
)
1692 "pyexpat.xmlparser", /*tp_name*/
1693 sizeof(xmlparseobject
) + PyGC_HEAD_SIZE
,/*tp_basicsize*/
1696 (destructor
)xmlparse_dealloc
, /*tp_dealloc*/
1697 (printfunc
)0, /*tp_print*/
1698 (getattrfunc
)xmlparse_getattr
, /*tp_getattr*/
1699 (setattrfunc
)xmlparse_setattr
, /*tp_setattr*/
1700 (cmpfunc
)0, /*tp_compare*/
1701 (reprfunc
)0, /*tp_repr*/
1703 0, /*tp_as_sequence*/
1704 0, /*tp_as_mapping*/
1705 (hashfunc
)0, /*tp_hash*/
1706 (ternaryfunc
)0, /*tp_call*/
1707 (reprfunc
)0, /*tp_str*/
1708 0, /* tp_getattro */
1709 0, /* tp_setattro */
1710 0, /* tp_as_buffer */
1711 #ifdef Py_TPFLAGS_HAVE_GC
1712 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /*tp_flags*/
1714 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_GC
, /*tp_flags*/
1716 Xmlparsetype__doc__
, /* tp_doc - Documentation string */
1717 #ifdef WITH_CYCLE_GC
1718 (traverseproc
)xmlparse_traverse
, /* tp_traverse */
1719 (inquiry
)xmlparse_clear
/* tp_clear */
1725 /* End of code for xmlparser objects */
1726 /* -------------------------------------------------------- */
1728 PyDoc_STRVAR(pyexpat_ParserCreate__doc__
,
1729 "ParserCreate([encoding[, namespace_separator]]) -> parser\n\
1730 Return a new XML parser object.");
1733 pyexpat_ParserCreate(PyObject
*notused
, PyObject
*args
, PyObject
*kw
)
1735 char *encoding
= NULL
;
1736 char *namespace_separator
= NULL
;
1737 PyObject
*intern
= NULL
;
1739 int intern_decref
= 0;
1740 static char *kwlist
[] = {"encoding", "namespace_separator",
1743 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|zzO:ParserCreate", kwlist
,
1744 &encoding
, &namespace_separator
, &intern
))
1746 if (namespace_separator
!= NULL
1747 && strlen(namespace_separator
) > 1) {
1748 PyErr_SetString(PyExc_ValueError
,
1749 "namespace_separator must be at most one"
1750 " character, omitted, or None");
1753 /* Explicitly passing None means no interning is desired.
1754 Not passing anything means that a new dictionary is used. */
1755 if (intern
== Py_None
)
1757 else if (intern
== NULL
) {
1758 intern
= PyDict_New();
1763 else if (!PyDict_Check(intern
)) {
1764 PyErr_SetString(PyExc_TypeError
, "intern must be a dictionary");
1768 result
= newxmlparseobject(encoding
, namespace_separator
, intern
);
1769 if (intern_decref
) {
1775 PyDoc_STRVAR(pyexpat_ErrorString__doc__
,
1776 "ErrorString(errno) -> string\n\
1777 Returns string error for given number.");
1780 pyexpat_ErrorString(PyObject
*self
, PyObject
*args
)
1784 if (!PyArg_ParseTuple(args
, "l:ErrorString", &code
))
1786 return Py_BuildValue("z", XML_ErrorString((int)code
));
1789 /* List of methods defined in the module */
1791 static struct PyMethodDef pyexpat_methods
[] = {
1792 {"ParserCreate", (PyCFunction
)pyexpat_ParserCreate
,
1793 METH_VARARGS
|METH_KEYWORDS
, pyexpat_ParserCreate__doc__
},
1794 {"ErrorString", (PyCFunction
)pyexpat_ErrorString
,
1795 METH_VARARGS
, pyexpat_ErrorString__doc__
},
1797 {NULL
, (PyCFunction
)NULL
, 0, NULL
} /* sentinel */
1800 /* Module docstring */
1802 PyDoc_STRVAR(pyexpat_module_documentation
,
1803 "Python wrapper for Expat parser.");
1805 /* Return a Python string that represents the version number without the
1806 * extra cruft added by revision control, even if the right options were
1807 * given to the "cvs export" command to make it not include the extra
1811 get_version_string(void)
1813 static char *rcsid
= "$Revision$";
1817 while (!isdigit(Py_CHARMASK(*rev
)))
1819 while (rev
[i
] != ' ' && rev
[i
] != '\0')
1822 return PyString_FromStringAndSize(rev
, i
);
1825 /* Initialization function for the module */
1828 #define MODULE_NAME "pyexpat"
1831 #ifndef MODULE_INITFUNC
1832 #define MODULE_INITFUNC initpyexpat
1835 #ifndef PyMODINIT_FUNC
1837 # define PyMODINIT_FUNC __declspec(dllexport) void
1839 # define PyMODINIT_FUNC void
1843 PyMODINIT_FUNC
MODULE_INITFUNC(void); /* avoid compiler warnings */
1846 MODULE_INITFUNC(void)
1849 PyObject
*errmod_name
= PyString_FromString(MODULE_NAME
".errors");
1850 PyObject
*errors_module
;
1851 PyObject
*modelmod_name
;
1852 PyObject
*model_module
;
1853 PyObject
*sys_modules
;
1854 static struct PyExpat_CAPI capi
;
1855 PyObject
* capi_object
;
1857 if (errmod_name
== NULL
)
1859 modelmod_name
= PyString_FromString(MODULE_NAME
".model");
1860 if (modelmod_name
== NULL
)
1863 Xmlparsetype
.ob_type
= &PyType_Type
;
1865 /* Create the module and add the functions */
1866 m
= Py_InitModule3(MODULE_NAME
, pyexpat_methods
,
1867 pyexpat_module_documentation
);
1871 /* Add some symbolic constants to the module */
1872 if (ErrorObject
== NULL
) {
1873 ErrorObject
= PyErr_NewException("xml.parsers.expat.ExpatError",
1875 if (ErrorObject
== NULL
)
1878 Py_INCREF(ErrorObject
);
1879 PyModule_AddObject(m
, "error", ErrorObject
);
1880 Py_INCREF(ErrorObject
);
1881 PyModule_AddObject(m
, "ExpatError", ErrorObject
);
1882 Py_INCREF(&Xmlparsetype
);
1883 PyModule_AddObject(m
, "XMLParserType", (PyObject
*) &Xmlparsetype
);
1885 PyModule_AddObject(m
, "__version__", get_version_string());
1886 PyModule_AddStringConstant(m
, "EXPAT_VERSION",
1887 (char *) XML_ExpatVersion());
1889 XML_Expat_Version info
= XML_ExpatVersionInfo();
1890 PyModule_AddObject(m
, "version_info",
1891 Py_BuildValue("(iii)", info
.major
,
1892 info
.minor
, info
.micro
));
1894 #ifdef Py_USING_UNICODE
1895 init_template_buffer();
1897 /* XXX When Expat supports some way of figuring out how it was
1898 compiled, this should check and set native_encoding
1901 PyModule_AddStringConstant(m
, "native_encoding", "UTF-8");
1903 sys_modules
= PySys_GetObject("modules");
1904 d
= PyModule_GetDict(m
);
1905 errors_module
= PyDict_GetItem(d
, errmod_name
);
1906 if (errors_module
== NULL
) {
1907 errors_module
= PyModule_New(MODULE_NAME
".errors");
1908 if (errors_module
!= NULL
) {
1909 PyDict_SetItem(sys_modules
, errmod_name
, errors_module
);
1910 /* gives away the reference to errors_module */
1911 PyModule_AddObject(m
, "errors", errors_module
);
1914 Py_DECREF(errmod_name
);
1915 model_module
= PyDict_GetItem(d
, modelmod_name
);
1916 if (model_module
== NULL
) {
1917 model_module
= PyModule_New(MODULE_NAME
".model");
1918 if (model_module
!= NULL
) {
1919 PyDict_SetItem(sys_modules
, modelmod_name
, model_module
);
1920 /* gives away the reference to model_module */
1921 PyModule_AddObject(m
, "model", model_module
);
1924 Py_DECREF(modelmod_name
);
1925 if (errors_module
== NULL
|| model_module
== NULL
)
1926 /* Don't core dump later! */
1929 #if XML_COMBINED_VERSION > 19505
1931 const XML_Feature
*features
= XML_GetFeatureList();
1932 PyObject
*list
= PyList_New(0);
1934 /* just ignore it */
1938 for (; features
[i
].feature
!= XML_FEATURE_END
; ++i
) {
1940 PyObject
*item
= Py_BuildValue("si", features
[i
].name
,
1947 ok
= PyList_Append(list
, item
);
1955 PyModule_AddObject(m
, "features", list
);
1960 #define MYCONST(name) \
1961 PyModule_AddStringConstant(errors_module, #name, \
1962 (char*)XML_ErrorString(name))
1964 MYCONST(XML_ERROR_NO_MEMORY
);
1965 MYCONST(XML_ERROR_SYNTAX
);
1966 MYCONST(XML_ERROR_NO_ELEMENTS
);
1967 MYCONST(XML_ERROR_INVALID_TOKEN
);
1968 MYCONST(XML_ERROR_UNCLOSED_TOKEN
);
1969 MYCONST(XML_ERROR_PARTIAL_CHAR
);
1970 MYCONST(XML_ERROR_TAG_MISMATCH
);
1971 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE
);
1972 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
1973 MYCONST(XML_ERROR_PARAM_ENTITY_REF
);
1974 MYCONST(XML_ERROR_UNDEFINED_ENTITY
);
1975 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF
);
1976 MYCONST(XML_ERROR_ASYNC_ENTITY
);
1977 MYCONST(XML_ERROR_BAD_CHAR_REF
);
1978 MYCONST(XML_ERROR_BINARY_ENTITY_REF
);
1979 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
1980 MYCONST(XML_ERROR_MISPLACED_XML_PI
);
1981 MYCONST(XML_ERROR_UNKNOWN_ENCODING
);
1982 MYCONST(XML_ERROR_INCORRECT_ENCODING
);
1983 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION
);
1984 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
1985 MYCONST(XML_ERROR_NOT_STANDALONE
);
1986 MYCONST(XML_ERROR_UNEXPECTED_STATE
);
1987 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE
);
1988 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD
);
1989 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
);
1990 /* Added in Expat 1.95.7. */
1991 MYCONST(XML_ERROR_UNBOUND_PREFIX
);
1992 /* Added in Expat 1.95.8. */
1993 MYCONST(XML_ERROR_UNDECLARING_PREFIX
);
1994 MYCONST(XML_ERROR_INCOMPLETE_PE
);
1995 MYCONST(XML_ERROR_XML_DECL
);
1996 MYCONST(XML_ERROR_TEXT_DECL
);
1997 MYCONST(XML_ERROR_PUBLICID
);
1998 MYCONST(XML_ERROR_SUSPENDED
);
1999 MYCONST(XML_ERROR_NOT_SUSPENDED
);
2000 MYCONST(XML_ERROR_ABORTED
);
2001 MYCONST(XML_ERROR_FINISHED
);
2002 MYCONST(XML_ERROR_SUSPEND_PE
);
2004 PyModule_AddStringConstant(errors_module
, "__doc__",
2005 "Constants used to describe error conditions.");
2009 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
2010 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER
);
2011 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
);
2012 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS
);
2015 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2016 PyModule_AddStringConstant(model_module
, "__doc__",
2017 "Constants used to interpret content model information.");
2019 MYCONST(XML_CTYPE_EMPTY
);
2020 MYCONST(XML_CTYPE_ANY
);
2021 MYCONST(XML_CTYPE_MIXED
);
2022 MYCONST(XML_CTYPE_NAME
);
2023 MYCONST(XML_CTYPE_CHOICE
);
2024 MYCONST(XML_CTYPE_SEQ
);
2026 MYCONST(XML_CQUANT_NONE
);
2027 MYCONST(XML_CQUANT_OPT
);
2028 MYCONST(XML_CQUANT_REP
);
2029 MYCONST(XML_CQUANT_PLUS
);
2032 /* initialize pyexpat dispatch table */
2033 capi
.size
= sizeof(capi
);
2034 capi
.magic
= PyExpat_CAPI_MAGIC
;
2035 capi
.MAJOR_VERSION
= XML_MAJOR_VERSION
;
2036 capi
.MINOR_VERSION
= XML_MINOR_VERSION
;
2037 capi
.MICRO_VERSION
= XML_MICRO_VERSION
;
2038 capi
.ErrorString
= XML_ErrorString
;
2039 capi
.GetErrorCode
= XML_GetErrorCode
;
2040 capi
.GetErrorColumnNumber
= XML_GetErrorColumnNumber
;
2041 capi
.GetErrorLineNumber
= XML_GetErrorLineNumber
;
2042 capi
.Parse
= XML_Parse
;
2043 capi
.ParserCreate_MM
= XML_ParserCreate_MM
;
2044 capi
.ParserFree
= XML_ParserFree
;
2045 capi
.SetCharacterDataHandler
= XML_SetCharacterDataHandler
;
2046 capi
.SetCommentHandler
= XML_SetCommentHandler
;
2047 capi
.SetDefaultHandlerExpand
= XML_SetDefaultHandlerExpand
;
2048 capi
.SetElementHandler
= XML_SetElementHandler
;
2049 capi
.SetNamespaceDeclHandler
= XML_SetNamespaceDeclHandler
;
2050 capi
.SetProcessingInstructionHandler
= XML_SetProcessingInstructionHandler
;
2051 capi
.SetUnknownEncodingHandler
= XML_SetUnknownEncodingHandler
;
2052 capi
.SetUserData
= XML_SetUserData
;
2054 /* export as cobject */
2055 capi_object
= PyCObject_FromVoidPtr(&capi
, NULL
);
2057 PyModule_AddObject(m
, "expat_CAPI", capi_object
);
2061 clear_handlers(xmlparseobject
*self
, int initial
)
2066 for (; handler_info
[i
].name
!= NULL
; i
++) {
2068 self
->handlers
[i
] = NULL
;
2070 temp
= self
->handlers
[i
];
2071 self
->handlers
[i
] = NULL
;
2073 handler_info
[i
].setter(self
->itself
, NULL
);
2078 static struct HandlerInfo handler_info
[] = {
2079 {"StartElementHandler",
2080 (xmlhandlersetter
)XML_SetStartElementHandler
,
2081 (xmlhandler
)my_StartElementHandler
},
2082 {"EndElementHandler",
2083 (xmlhandlersetter
)XML_SetEndElementHandler
,
2084 (xmlhandler
)my_EndElementHandler
},
2085 {"ProcessingInstructionHandler",
2086 (xmlhandlersetter
)XML_SetProcessingInstructionHandler
,
2087 (xmlhandler
)my_ProcessingInstructionHandler
},
2088 {"CharacterDataHandler",
2089 (xmlhandlersetter
)XML_SetCharacterDataHandler
,
2090 (xmlhandler
)my_CharacterDataHandler
},
2091 {"UnparsedEntityDeclHandler",
2092 (xmlhandlersetter
)XML_SetUnparsedEntityDeclHandler
,
2093 (xmlhandler
)my_UnparsedEntityDeclHandler
},
2094 {"NotationDeclHandler",
2095 (xmlhandlersetter
)XML_SetNotationDeclHandler
,
2096 (xmlhandler
)my_NotationDeclHandler
},
2097 {"StartNamespaceDeclHandler",
2098 (xmlhandlersetter
)XML_SetStartNamespaceDeclHandler
,
2099 (xmlhandler
)my_StartNamespaceDeclHandler
},
2100 {"EndNamespaceDeclHandler",
2101 (xmlhandlersetter
)XML_SetEndNamespaceDeclHandler
,
2102 (xmlhandler
)my_EndNamespaceDeclHandler
},
2104 (xmlhandlersetter
)XML_SetCommentHandler
,
2105 (xmlhandler
)my_CommentHandler
},
2106 {"StartCdataSectionHandler",
2107 (xmlhandlersetter
)XML_SetStartCdataSectionHandler
,
2108 (xmlhandler
)my_StartCdataSectionHandler
},
2109 {"EndCdataSectionHandler",
2110 (xmlhandlersetter
)XML_SetEndCdataSectionHandler
,
2111 (xmlhandler
)my_EndCdataSectionHandler
},
2113 (xmlhandlersetter
)XML_SetDefaultHandler
,
2114 (xmlhandler
)my_DefaultHandler
},
2115 {"DefaultHandlerExpand",
2116 (xmlhandlersetter
)XML_SetDefaultHandlerExpand
,
2117 (xmlhandler
)my_DefaultHandlerExpandHandler
},
2118 {"NotStandaloneHandler",
2119 (xmlhandlersetter
)XML_SetNotStandaloneHandler
,
2120 (xmlhandler
)my_NotStandaloneHandler
},
2121 {"ExternalEntityRefHandler",
2122 (xmlhandlersetter
)XML_SetExternalEntityRefHandler
,
2123 (xmlhandler
)my_ExternalEntityRefHandler
},
2124 {"StartDoctypeDeclHandler",
2125 (xmlhandlersetter
)XML_SetStartDoctypeDeclHandler
,
2126 (xmlhandler
)my_StartDoctypeDeclHandler
},
2127 {"EndDoctypeDeclHandler",
2128 (xmlhandlersetter
)XML_SetEndDoctypeDeclHandler
,
2129 (xmlhandler
)my_EndDoctypeDeclHandler
},
2130 {"EntityDeclHandler",
2131 (xmlhandlersetter
)XML_SetEntityDeclHandler
,
2132 (xmlhandler
)my_EntityDeclHandler
},
2134 (xmlhandlersetter
)XML_SetXmlDeclHandler
,
2135 (xmlhandler
)my_XmlDeclHandler
},
2136 {"ElementDeclHandler",
2137 (xmlhandlersetter
)XML_SetElementDeclHandler
,
2138 (xmlhandler
)my_ElementDeclHandler
},
2139 {"AttlistDeclHandler",
2140 (xmlhandlersetter
)XML_SetAttlistDeclHandler
,
2141 (xmlhandler
)my_AttlistDeclHandler
},
2142 #if XML_COMBINED_VERSION >= 19504
2143 {"SkippedEntityHandler",
2144 (xmlhandlersetter
)XML_SetSkippedEntityHandler
,
2145 (xmlhandler
)my_SkippedEntityHandler
},
2148 {NULL
, NULL
, NULL
} /* sentinel */