1 # XXX TypeErrors on calling handlers, or on bad return values from a
2 # handler, are obscure and unhelpful.
7 from xml
.parsers
import expat
9 from test
.test_support
import sortdict
, run_unittest
12 class SetAttributeTest(unittest
.TestCase
):
14 self
.parser
= expat
.ParserCreate(namespace_separator
='!')
15 self
.set_get_pairs
= [
22 def test_returns_unicode(self
):
23 for x
, y
in self
.set_get_pairs
:
24 self
.parser
.returns_unicode
= x
25 self
.assertEquals(self
.parser
.returns_unicode
, y
)
27 def test_ordered_attributes(self
):
28 for x
, y
in self
.set_get_pairs
:
29 self
.parser
.ordered_attributes
= x
30 self
.assertEquals(self
.parser
.ordered_attributes
, y
)
32 def test_specified_attributes(self
):
33 for x
, y
in self
.set_get_pairs
:
34 self
.parser
.specified_attributes
= x
35 self
.assertEquals(self
.parser
.specified_attributes
, y
)
39 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
40 <?xml-stylesheet href="stylesheet.css"?>
42 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
44 <!NOTATION notation SYSTEM "notation.jpeg">
45 <!ENTITY acirc "â">
46 <!ENTITY external_entity SYSTEM "entity.file">
47 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
51 <root attr1="value1" attr2="value2ὀ">
52 <myns:subelement xmlns:myns="http://www.python.org/namespace">
53 Contents of subelements
55 <sub2><![CDATA[contents of CDATA section]]></sub2>
61 # Produce UTF-8 output
62 class ParseTest(unittest
.TestCase
):
67 def StartElementHandler(self
, name
, attrs
):
68 self
.out
.append('Start element: ' + repr(name
) + ' ' +
71 def EndElementHandler(self
, name
):
72 self
.out
.append('End element: ' + repr(name
))
74 def CharacterDataHandler(self
, data
):
77 self
.out
.append('Character data: ' + repr(data
))
79 def ProcessingInstructionHandler(self
, target
, data
):
80 self
.out
.append('PI: ' + repr(target
) + ' ' + repr(data
))
82 def StartNamespaceDeclHandler(self
, prefix
, uri
):
83 self
.out
.append('NS decl: ' + repr(prefix
) + ' ' + repr(uri
))
85 def EndNamespaceDeclHandler(self
, prefix
):
86 self
.out
.append('End of NS decl: ' + repr(prefix
))
88 def StartCdataSectionHandler(self
):
89 self
.out
.append('Start of CDATA section')
91 def EndCdataSectionHandler(self
):
92 self
.out
.append('End of CDATA section')
94 def CommentHandler(self
, text
):
95 self
.out
.append('Comment: ' + repr(text
))
97 def NotationDeclHandler(self
, *args
):
98 name
, base
, sysid
, pubid
= args
99 self
.out
.append('Notation declared: %s' %(args
,))
101 def UnparsedEntityDeclHandler(self
, *args
):
102 entityName
, base
, systemId
, publicId
, notationName
= args
103 self
.out
.append('Unparsed entity decl: %s' %(args
,))
105 def NotStandaloneHandler(self
, userData
):
106 self
.out
.append('Not standalone')
109 def ExternalEntityRefHandler(self
, *args
):
110 context
, base
, sysId
, pubId
= args
111 self
.out
.append('External entity ref: %s' %(args
[1:],))
114 def DefaultHandler(self
, userData
):
117 def DefaultHandlerExpand(self
, userData
):
121 'StartElementHandler', 'EndElementHandler',
122 'CharacterDataHandler', 'ProcessingInstructionHandler',
123 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
124 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
125 'CommentHandler', 'StartCdataSectionHandler',
126 'EndCdataSectionHandler',
127 'DefaultHandler', 'DefaultHandlerExpand',
128 #'NotStandaloneHandler',
129 'ExternalEntityRefHandler'
134 out
= self
.Outputter()
135 parser
= expat
.ParserCreate(namespace_separator
='!')
136 for name
in self
.handler_names
:
137 setattr(parser
, name
, getattr(out
, name
))
138 parser
.returns_unicode
= 0
139 parser
.Parse(data
, 1)
143 self
.assertEquals(op
[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
144 self
.assertEquals(op
[1], "Comment: ' comment data '")
145 self
.assertEquals(op
[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
146 self
.assertEquals(op
[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
147 self
.assertEquals(op
[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
148 self
.assertEquals(op
[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
149 self
.assertEquals(op
[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
150 self
.assertEquals(op
[7], "Character data: 'Contents of subelements'")
151 self
.assertEquals(op
[8], "End element: 'http://www.python.org/namespace!subelement'")
152 self
.assertEquals(op
[9], "End of NS decl: 'myns'")
153 self
.assertEquals(op
[10], "Start element: 'sub2' {}")
154 self
.assertEquals(op
[11], 'Start of CDATA section')
155 self
.assertEquals(op
[12], "Character data: 'contents of CDATA section'")
156 self
.assertEquals(op
[13], 'End of CDATA section')
157 self
.assertEquals(op
[14], "End element: 'sub2'")
158 self
.assertEquals(op
[15], "External entity ref: (None, 'entity.file', None)")
159 self
.assertEquals(op
[16], "End element: 'root'")
161 def test_unicode(self
):
162 # Try the parse again, this time producing Unicode output
163 out
= self
.Outputter()
164 parser
= expat
.ParserCreate(namespace_separator
='!')
165 parser
.returns_unicode
= 1
166 for name
in self
.handler_names
:
167 setattr(parser
, name
, getattr(out
, name
))
169 parser
.Parse(data
, 1)
172 self
.assertEquals(op
[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
173 self
.assertEquals(op
[1], "Comment: u' comment data '")
174 self
.assertEquals(op
[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
175 self
.assertEquals(op
[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
176 self
.assertEquals(op
[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
177 self
.assertEquals(op
[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
178 self
.assertEquals(op
[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
179 self
.assertEquals(op
[7], "Character data: u'Contents of subelements'")
180 self
.assertEquals(op
[8], "End element: u'http://www.python.org/namespace!subelement'")
181 self
.assertEquals(op
[9], "End of NS decl: u'myns'")
182 self
.assertEquals(op
[10], "Start element: u'sub2' {}")
183 self
.assertEquals(op
[11], 'Start of CDATA section')
184 self
.assertEquals(op
[12], "Character data: u'contents of CDATA section'")
185 self
.assertEquals(op
[13], 'End of CDATA section')
186 self
.assertEquals(op
[14], "End element: u'sub2'")
187 self
.assertEquals(op
[15], "External entity ref: (None, u'entity.file', None)")
188 self
.assertEquals(op
[16], "End element: u'root'")
190 def test_parse_file(self
):
192 out
= self
.Outputter()
193 parser
= expat
.ParserCreate(namespace_separator
='!')
194 parser
.returns_unicode
= 1
195 for name
in self
.handler_names
:
196 setattr(parser
, name
, getattr(out
, name
))
197 file = StringIO
.StringIO(data
)
199 parser
.ParseFile(file)
202 self
.assertEquals(op
[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
203 self
.assertEquals(op
[1], "Comment: u' comment data '")
204 self
.assertEquals(op
[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
205 self
.assertEquals(op
[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
206 self
.assertEquals(op
[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
207 self
.assertEquals(op
[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
208 self
.assertEquals(op
[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
209 self
.assertEquals(op
[7], "Character data: u'Contents of subelements'")
210 self
.assertEquals(op
[8], "End element: u'http://www.python.org/namespace!subelement'")
211 self
.assertEquals(op
[9], "End of NS decl: u'myns'")
212 self
.assertEquals(op
[10], "Start element: u'sub2' {}")
213 self
.assertEquals(op
[11], 'Start of CDATA section')
214 self
.assertEquals(op
[12], "Character data: u'contents of CDATA section'")
215 self
.assertEquals(op
[13], 'End of CDATA section')
216 self
.assertEquals(op
[14], "End element: u'sub2'")
217 self
.assertEquals(op
[15], "External entity ref: (None, u'entity.file', None)")
218 self
.assertEquals(op
[16], "End element: u'root'")
221 class NamespaceSeparatorTest(unittest
.TestCase
):
222 def test_legal(self
):
223 # Tests that make sure we get errors when the namespace_separator value
224 # is illegal, and that we don't for good values:
226 expat
.ParserCreate(namespace_separator
=None)
227 expat
.ParserCreate(namespace_separator
=' ')
229 def test_illegal(self
):
231 expat
.ParserCreate(namespace_separator
=42)
234 self
.assertEquals(str(e
),
235 'ParserCreate() argument 2 must be string or None, not int')
238 expat
.ParserCreate(namespace_separator
='too long')
240 except ValueError, e
:
241 self
.assertEquals(str(e
),
242 'namespace_separator must be at most one character, omitted, or None')
244 def test_zero_length(self
):
245 # ParserCreate() needs to accept a namespace_separator of zero length
246 # to satisfy the requirements of RDF applications that are required
247 # to simply glue together the namespace URI and the localname. Though
248 # considered a wart of the RDF specifications, it needs to be supported.
250 # See XML-SIG mailing list thread starting with
251 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
253 expat
.ParserCreate(namespace_separator
='') # too short
256 class InterningTest(unittest
.TestCase
):
258 # Test the interning machinery.
259 p
= expat
.ParserCreate()
261 def collector(name
, *args
):
263 p
.StartElementHandler
= collector
264 p
.EndElementHandler
= collector
265 p
.Parse("<e> <e/> <e></e> </e>", 1)
267 self
.assertEquals(len(L
), 6)
269 # L should have the same string repeated over and over.
270 self
.assertTrue(tag
is entry
)
273 class BufferTextTest(unittest
.TestCase
):
276 self
.parser
= expat
.ParserCreate()
277 self
.parser
.buffer_text
= 1
278 self
.parser
.CharacterDataHandler
= self
.CharacterDataHandler
280 def check(self
, expected
, label
):
281 self
.assertEquals(self
.stuff
, expected
,
282 "%s\nstuff = %r\nexpected = %r"
283 % (label
, self
.stuff
, map(unicode, expected
)))
285 def CharacterDataHandler(self
, text
):
286 self
.stuff
.append(text
)
288 def StartElementHandler(self
, name
, attrs
):
289 self
.stuff
.append("<%s>" % name
)
290 bt
= attrs
.get("buffer-text")
292 self
.parser
.buffer_text
= 1
294 self
.parser
.buffer_text
= 0
296 def EndElementHandler(self
, name
):
297 self
.stuff
.append("</%s>" % name
)
299 def CommentHandler(self
, data
):
300 self
.stuff
.append("<!--%s-->" % data
)
302 def setHandlers(self
, handlers
=[]):
303 for name
in handlers
:
304 setattr(self
.parser
, name
, getattr(self
, name
))
306 def test_default_to_disabled(self
):
307 parser
= expat
.ParserCreate()
308 self
.assertFalse(parser
.buffer_text
)
310 def test_buffering_enabled(self
):
311 # Make sure buffering is turned on
312 self
.assertTrue(self
.parser
.buffer_text
)
313 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
314 self
.assertEquals(self
.stuff
, ['123'],
315 "buffered text not properly collapsed")
318 # XXX This test exposes more detail of Expat's text chunking than we
319 # XXX like, but it tests what we need to concisely.
320 self
.setHandlers(["StartElementHandler"])
321 self
.parser
.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
322 self
.assertEquals(self
.stuff
,
323 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
324 "buffering control not reacting as expected")
327 self
.parser
.Parse("<a>1<b/><2><c/> \n 3</a>", 1)
328 self
.assertEquals(self
.stuff
, ["1<2> \n 3"],
329 "buffered text not properly collapsed")
332 self
.setHandlers(["StartElementHandler"])
333 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
334 self
.assertEquals(self
.stuff
, ["<a>", "1", "<b>", "2", "<c>", "3"],
335 "buffered text not properly split")
338 self
.setHandlers(["StartElementHandler", "EndElementHandler"])
339 self
.parser
.CharacterDataHandler
= None
340 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
341 self
.assertEquals(self
.stuff
,
342 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
345 self
.setHandlers(["StartElementHandler", "EndElementHandler"])
346 self
.parser
.Parse("<a>1<b></b>2<c/>3</a>", 1)
347 self
.assertEquals(self
.stuff
,
348 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
351 self
.setHandlers(["CommentHandler", "EndElementHandler",
352 "StartElementHandler"])
353 self
.parser
.Parse("<a>1<b/>2<c></c>345</a> ", 1)
354 self
.assertEquals(self
.stuff
,
355 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
356 "buffered text not properly split")
359 self
.setHandlers(["CommentHandler", "EndElementHandler",
360 "StartElementHandler"])
361 self
.parser
.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
362 self
.assertEquals(self
.stuff
,
363 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
364 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
365 "buffered text not properly split")
368 # Test handling of exception from callback:
369 class HandlerExceptionTest(unittest
.TestCase
):
370 def StartElementHandler(self
, name
, attrs
):
371 raise RuntimeError(name
)
374 parser
= expat
.ParserCreate()
375 parser
.StartElementHandler
= self
.StartElementHandler
377 parser
.Parse("<a><b><c/></b></a>", 1)
379 except RuntimeError, e
:
380 self
.assertEquals(e
.args
[0], 'a',
381 "Expected RuntimeError for element 'a', but" + \
382 " found %r" % e
.args
[0])
385 # Test Current* members:
386 class PositionTest(unittest
.TestCase
):
387 def StartElementHandler(self
, name
, attrs
):
390 def EndElementHandler(self
, name
):
393 def check_pos(self
, event
):
395 self
.parser
.CurrentByteIndex
,
396 self
.parser
.CurrentLineNumber
,
397 self
.parser
.CurrentColumnNumber
)
398 self
.assertTrue(self
.upto
< len(self
.expected_list
),
399 'too many parser events')
400 expected
= self
.expected_list
[self
.upto
]
401 self
.assertEquals(pos
, expected
,
402 'Expected position %s, got position %s' %(pos
, expected
))
406 self
.parser
= expat
.ParserCreate()
407 self
.parser
.StartElementHandler
= self
.StartElementHandler
408 self
.parser
.EndElementHandler
= self
.EndElementHandler
410 self
.expected_list
= [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
411 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
413 xml
= '<a>\n <b>\n <c/>\n </b>\n</a>'
414 self
.parser
.Parse(xml
, 1)
417 class sf1296433Test(unittest
.TestCase
):
418 def test_parse_only_xml_data(self
):
419 # http://python.org/sf/1296433
421 xml
= "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
422 # this one doesn't crash
423 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
425 class SpecificException(Exception):
429 raise SpecificException
431 parser
= expat
.ParserCreate()
432 parser
.CharacterDataHandler
= handler
434 self
.assertRaises(Exception, parser
.Parse
, xml
)
436 class ChardataBufferTest(unittest
.TestCase
):
438 test setting of chardata buffer size
441 def test_1025_bytes(self
):
442 self
.assertEquals(self
.small_buffer_test(1025), 2)
444 def test_1000_bytes(self
):
445 self
.assertEquals(self
.small_buffer_test(1000), 1)
447 def test_wrong_size(self
):
448 parser
= expat
.ParserCreate()
449 parser
.buffer_text
= 1
451 parser
.buffer_size
= size
453 self
.assertRaises(TypeError, f
, sys
.maxint
+1)
454 self
.assertRaises(ValueError, f
, -1)
455 self
.assertRaises(ValueError, f
, 0)
457 def test_unchanged_size(self
):
458 xml1
= ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
459 xml2
= 'a'*512 + '</s>'
460 parser
= expat
.ParserCreate()
461 parser
.CharacterDataHandler
= self
.counting_handler
462 parser
.buffer_size
= 512
463 parser
.buffer_text
= 1
465 # Feed 512 bytes of character data: the handler should be called
469 self
.assertEquals(self
.n
, 1)
471 # Reassign to buffer_size, but assign the same size.
472 parser
.buffer_size
= parser
.buffer_size
473 self
.assertEquals(self
.n
, 1)
475 # Try parsing rest of the document
477 self
.assertEquals(self
.n
, 2)
480 def test_disabling_buffer(self
):
481 xml1
= "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
483 xml3
= "%s</a>" % ('c' * 1024)
484 parser
= expat
.ParserCreate()
485 parser
.CharacterDataHandler
= self
.counting_handler
486 parser
.buffer_text
= 1
487 parser
.buffer_size
= 1024
488 self
.assertEquals(parser
.buffer_size
, 1024)
490 # Parse one chunk of XML
492 parser
.Parse(xml1
, 0)
493 self
.assertEquals(parser
.buffer_size
, 1024)
494 self
.assertEquals(self
.n
, 1)
496 # Turn off buffering and parse the next chunk.
497 parser
.buffer_text
= 0
498 self
.assertFalse(parser
.buffer_text
)
499 self
.assertEquals(parser
.buffer_size
, 1024)
501 parser
.Parse(xml2
, 0)
502 self
.assertEquals(self
.n
, 11)
504 parser
.buffer_text
= 1
505 self
.assertTrue(parser
.buffer_text
)
506 self
.assertEquals(parser
.buffer_size
, 1024)
507 parser
.Parse(xml3
, 1)
508 self
.assertEquals(self
.n
, 12)
512 def make_document(self
, bytes
):
513 return ("<?xml version='1.0'?><tag>" + bytes
* 'a' + '</tag>')
515 def counting_handler(self
, text
):
518 def small_buffer_test(self
, buffer_len
):
519 xml
= "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len
)
520 parser
= expat
.ParserCreate()
521 parser
.CharacterDataHandler
= self
.counting_handler
522 parser
.buffer_size
= 1024
523 parser
.buffer_text
= 1
529 def test_change_size_1(self
):
530 xml1
= "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
531 xml2
= "aaa</s><s>%s</s></a>" % ('a' * 1025)
532 parser
= expat
.ParserCreate()
533 parser
.CharacterDataHandler
= self
.counting_handler
534 parser
.buffer_text
= 1
535 parser
.buffer_size
= 1024
536 self
.assertEquals(parser
.buffer_size
, 1024)
539 parser
.Parse(xml1
, 0)
540 parser
.buffer_size
*= 2
541 self
.assertEquals(parser
.buffer_size
, 2048)
542 parser
.Parse(xml2
, 1)
543 self
.assertEquals(self
.n
, 2)
545 def test_change_size_2(self
):
546 xml1
= "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
547 xml2
= "aaa</s><s>%s</s></a>" % ('a' * 1025)
548 parser
= expat
.ParserCreate()
549 parser
.CharacterDataHandler
= self
.counting_handler
550 parser
.buffer_text
= 1
551 parser
.buffer_size
= 2048
552 self
.assertEquals(parser
.buffer_size
, 2048)
555 parser
.Parse(xml1
, 0)
556 parser
.buffer_size
//= 2
557 self
.assertEquals(parser
.buffer_size
, 1024)
558 parser
.Parse(xml2
, 1)
559 self
.assertEquals(self
.n
, 4)
561 class MalformedInputText(unittest
.TestCase
):
564 parser
= expat
.ParserCreate()
566 parser
.Parse(xml
, True)
568 except expat
.ExpatError
as e
:
569 self
.assertEquals(str(e
), 'unclosed token: line 2, column 0')
572 xml
= "<?xml version\xc2\x85='1.0'?>\r\n"
573 parser
= expat
.ParserCreate()
575 parser
.Parse(xml
, True)
577 except expat
.ExpatError
as e
:
578 self
.assertEquals(str(e
), 'XML declaration not well-formed: line 1, column 14')
581 run_unittest(SetAttributeTest
,
583 NamespaceSeparatorTest
,
586 HandlerExceptionTest
,
592 if __name__
== "__main__":