1 # XXX TypeErrors on calling handlers, or on bad return values from a
2 # handler, are obscure and unhelpful.
8 from xml
.parsers
import expat
10 from test
.test_support
import sortdict
, run_unittest
13 class SetAttributeTest(unittest
.TestCase
):
15 self
.parser
= expat
.ParserCreate(namespace_separator
='!')
16 self
.set_get_pairs
= [
23 def test_returns_unicode(self
):
24 for x
, y
in self
.set_get_pairs
:
25 self
.parser
.returns_unicode
= x
26 self
.assertEquals(self
.parser
.returns_unicode
, y
)
28 def test_ordered_attributes(self
):
29 for x
, y
in self
.set_get_pairs
:
30 self
.parser
.ordered_attributes
= x
31 self
.assertEquals(self
.parser
.ordered_attributes
, y
)
33 def test_specified_attributes(self
):
34 for x
, y
in self
.set_get_pairs
:
35 self
.parser
.specified_attributes
= x
36 self
.assertEquals(self
.parser
.specified_attributes
, y
)
40 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
41 <?xml-stylesheet href="stylesheet.css"?>
43 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
45 <!NOTATION notation SYSTEM "notation.jpeg">
46 <!ENTITY acirc "â">
47 <!ENTITY external_entity SYSTEM "entity.file">
48 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
52 <root attr1="value1" attr2="value2ὀ">
53 <myns:subelement xmlns:myns="http://www.python.org/namespace">
54 Contents of subelements
56 <sub2><![CDATA[contents of CDATA section]]></sub2>
62 # Produce UTF-8 output
63 class ParseTest(unittest
.TestCase
):
68 def StartElementHandler(self
, name
, attrs
):
69 self
.out
.append('Start element: ' + repr(name
) + ' ' +
72 def EndElementHandler(self
, name
):
73 self
.out
.append('End element: ' + repr(name
))
75 def CharacterDataHandler(self
, data
):
78 self
.out
.append('Character data: ' + repr(data
))
80 def ProcessingInstructionHandler(self
, target
, data
):
81 self
.out
.append('PI: ' + repr(target
) + ' ' + repr(data
))
83 def StartNamespaceDeclHandler(self
, prefix
, uri
):
84 self
.out
.append('NS decl: ' + repr(prefix
) + ' ' + repr(uri
))
86 def EndNamespaceDeclHandler(self
, prefix
):
87 self
.out
.append('End of NS decl: ' + repr(prefix
))
89 def StartCdataSectionHandler(self
):
90 self
.out
.append('Start of CDATA section')
92 def EndCdataSectionHandler(self
):
93 self
.out
.append('End of CDATA section')
95 def CommentHandler(self
, text
):
96 self
.out
.append('Comment: ' + repr(text
))
98 def NotationDeclHandler(self
, *args
):
99 name
, base
, sysid
, pubid
= args
100 self
.out
.append('Notation declared: %s' %(args
,))
102 def UnparsedEntityDeclHandler(self
, *args
):
103 entityName
, base
, systemId
, publicId
, notationName
= args
104 self
.out
.append('Unparsed entity decl: %s' %(args
,))
106 def NotStandaloneHandler(self
, userData
):
107 self
.out
.append('Not standalone')
110 def ExternalEntityRefHandler(self
, *args
):
111 context
, base
, sysId
, pubId
= args
112 self
.out
.append('External entity ref: %s' %(args
[1:],))
115 def DefaultHandler(self
, userData
):
118 def DefaultHandlerExpand(self
, userData
):
122 'StartElementHandler', 'EndElementHandler',
123 'CharacterDataHandler', 'ProcessingInstructionHandler',
124 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
125 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
126 'CommentHandler', 'StartCdataSectionHandler',
127 'EndCdataSectionHandler',
128 'DefaultHandler', 'DefaultHandlerExpand',
129 #'NotStandaloneHandler',
130 'ExternalEntityRefHandler'
135 out
= self
.Outputter()
136 parser
= expat
.ParserCreate(namespace_separator
='!')
137 for name
in self
.handler_names
:
138 setattr(parser
, name
, getattr(out
, name
))
139 parser
.returns_unicode
= 0
140 parser
.Parse(data
, 1)
144 self
.assertEquals(op
[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
145 self
.assertEquals(op
[1], "Comment: ' comment data '")
146 self
.assertEquals(op
[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
147 self
.assertEquals(op
[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
148 self
.assertEquals(op
[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
149 self
.assertEquals(op
[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
150 self
.assertEquals(op
[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
151 self
.assertEquals(op
[7], "Character data: 'Contents of subelements'")
152 self
.assertEquals(op
[8], "End element: 'http://www.python.org/namespace!subelement'")
153 self
.assertEquals(op
[9], "End of NS decl: 'myns'")
154 self
.assertEquals(op
[10], "Start element: 'sub2' {}")
155 self
.assertEquals(op
[11], 'Start of CDATA section')
156 self
.assertEquals(op
[12], "Character data: 'contents of CDATA section'")
157 self
.assertEquals(op
[13], 'End of CDATA section')
158 self
.assertEquals(op
[14], "End element: 'sub2'")
159 self
.assertEquals(op
[15], "External entity ref: (None, 'entity.file', None)")
160 self
.assertEquals(op
[16], "End element: 'root'")
162 def test_unicode(self
):
163 # Try the parse again, this time producing Unicode output
164 out
= self
.Outputter()
165 parser
= expat
.ParserCreate(namespace_separator
='!')
166 parser
.returns_unicode
= 1
167 for name
in self
.handler_names
:
168 setattr(parser
, name
, getattr(out
, name
))
170 parser
.Parse(data
, 1)
173 self
.assertEquals(op
[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
174 self
.assertEquals(op
[1], "Comment: u' comment data '")
175 self
.assertEquals(op
[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
176 self
.assertEquals(op
[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
177 self
.assertEquals(op
[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
178 self
.assertEquals(op
[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
179 self
.assertEquals(op
[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
180 self
.assertEquals(op
[7], "Character data: u'Contents of subelements'")
181 self
.assertEquals(op
[8], "End element: u'http://www.python.org/namespace!subelement'")
182 self
.assertEquals(op
[9], "End of NS decl: u'myns'")
183 self
.assertEquals(op
[10], "Start element: u'sub2' {}")
184 self
.assertEquals(op
[11], 'Start of CDATA section')
185 self
.assertEquals(op
[12], "Character data: u'contents of CDATA section'")
186 self
.assertEquals(op
[13], 'End of CDATA section')
187 self
.assertEquals(op
[14], "End element: u'sub2'")
188 self
.assertEquals(op
[15], "External entity ref: (None, u'entity.file', None)")
189 self
.assertEquals(op
[16], "End element: u'root'")
191 def test_parse_file(self
):
193 out
= self
.Outputter()
194 parser
= expat
.ParserCreate(namespace_separator
='!')
195 parser
.returns_unicode
= 1
196 for name
in self
.handler_names
:
197 setattr(parser
, name
, getattr(out
, name
))
198 file = StringIO
.StringIO(data
)
200 parser
.ParseFile(file)
203 self
.assertEquals(op
[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
204 self
.assertEquals(op
[1], "Comment: u' comment data '")
205 self
.assertEquals(op
[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
206 self
.assertEquals(op
[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
207 self
.assertEquals(op
[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
208 self
.assertEquals(op
[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
209 self
.assertEquals(op
[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
210 self
.assertEquals(op
[7], "Character data: u'Contents of subelements'")
211 self
.assertEquals(op
[8], "End element: u'http://www.python.org/namespace!subelement'")
212 self
.assertEquals(op
[9], "End of NS decl: u'myns'")
213 self
.assertEquals(op
[10], "Start element: u'sub2' {}")
214 self
.assertEquals(op
[11], 'Start of CDATA section')
215 self
.assertEquals(op
[12], "Character data: u'contents of CDATA section'")
216 self
.assertEquals(op
[13], 'End of CDATA section')
217 self
.assertEquals(op
[14], "End element: u'sub2'")
218 self
.assertEquals(op
[15], "External entity ref: (None, u'entity.file', None)")
219 self
.assertEquals(op
[16], "End element: u'root'")
222 class NamespaceSeparatorTest(unittest
.TestCase
):
223 def test_legal(self
):
224 # Tests that make sure we get errors when the namespace_separator value
225 # is illegal, and that we don't for good values:
227 expat
.ParserCreate(namespace_separator
=None)
228 expat
.ParserCreate(namespace_separator
=' ')
230 def test_illegal(self
):
232 expat
.ParserCreate(namespace_separator
=42)
235 self
.assertEquals(str(e
),
236 'ParserCreate() argument 2 must be string or None, not int')
239 expat
.ParserCreate(namespace_separator
='too long')
241 except ValueError, e
:
242 self
.assertEquals(str(e
),
243 'namespace_separator must be at most one character, omitted, or None')
245 def test_zero_length(self
):
246 # ParserCreate() needs to accept a namespace_separator of zero length
247 # to satisfy the requirements of RDF applications that are required
248 # to simply glue together the namespace URI and the localname. Though
249 # considered a wart of the RDF specifications, it needs to be supported.
251 # See XML-SIG mailing list thread starting with
252 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
254 expat
.ParserCreate(namespace_separator
='') # too short
257 class InterningTest(unittest
.TestCase
):
259 # Test the interning machinery.
260 p
= expat
.ParserCreate()
262 def collector(name
, *args
):
264 p
.StartElementHandler
= collector
265 p
.EndElementHandler
= collector
266 p
.Parse("<e> <e/> <e></e> </e>", 1)
268 self
.assertEquals(len(L
), 6)
270 # L should have the same string repeated over and over.
271 self
.assertTrue(tag
is entry
)
274 class BufferTextTest(unittest
.TestCase
):
277 self
.parser
= expat
.ParserCreate()
278 self
.parser
.buffer_text
= 1
279 self
.parser
.CharacterDataHandler
= self
.CharacterDataHandler
281 def check(self
, expected
, label
):
282 self
.assertEquals(self
.stuff
, expected
,
283 "%s\nstuff = %r\nexpected = %r"
284 % (label
, self
.stuff
, map(unicode, expected
)))
286 def CharacterDataHandler(self
, text
):
287 self
.stuff
.append(text
)
289 def StartElementHandler(self
, name
, attrs
):
290 self
.stuff
.append("<%s>" % name
)
291 bt
= attrs
.get("buffer-text")
293 self
.parser
.buffer_text
= 1
295 self
.parser
.buffer_text
= 0
297 def EndElementHandler(self
, name
):
298 self
.stuff
.append("</%s>" % name
)
300 def CommentHandler(self
, data
):
301 self
.stuff
.append("<!--%s-->" % data
)
303 def setHandlers(self
, handlers
=[]):
304 for name
in handlers
:
305 setattr(self
.parser
, name
, getattr(self
, name
))
307 def test_default_to_disabled(self
):
308 parser
= expat
.ParserCreate()
309 self
.assertFalse(parser
.buffer_text
)
311 def test_buffering_enabled(self
):
312 # Make sure buffering is turned on
313 self
.assertTrue(self
.parser
.buffer_text
)
314 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
315 self
.assertEquals(self
.stuff
, ['123'],
316 "buffered text not properly collapsed")
319 # XXX This test exposes more detail of Expat's text chunking than we
320 # XXX like, but it tests what we need to concisely.
321 self
.setHandlers(["StartElementHandler"])
322 self
.parser
.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
323 self
.assertEquals(self
.stuff
,
324 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
325 "buffering control not reacting as expected")
328 self
.parser
.Parse("<a>1<b/><2><c/> \n 3</a>", 1)
329 self
.assertEquals(self
.stuff
, ["1<2> \n 3"],
330 "buffered text not properly collapsed")
333 self
.setHandlers(["StartElementHandler"])
334 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
335 self
.assertEquals(self
.stuff
, ["<a>", "1", "<b>", "2", "<c>", "3"],
336 "buffered text not properly split")
339 self
.setHandlers(["StartElementHandler", "EndElementHandler"])
340 self
.parser
.CharacterDataHandler
= None
341 self
.parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
342 self
.assertEquals(self
.stuff
,
343 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
346 self
.setHandlers(["StartElementHandler", "EndElementHandler"])
347 self
.parser
.Parse("<a>1<b></b>2<c/>3</a>", 1)
348 self
.assertEquals(self
.stuff
,
349 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
352 self
.setHandlers(["CommentHandler", "EndElementHandler",
353 "StartElementHandler"])
354 self
.parser
.Parse("<a>1<b/>2<c></c>345</a> ", 1)
355 self
.assertEquals(self
.stuff
,
356 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
357 "buffered text not properly split")
360 self
.setHandlers(["CommentHandler", "EndElementHandler",
361 "StartElementHandler"])
362 self
.parser
.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
363 self
.assertEquals(self
.stuff
,
364 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
365 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
366 "buffered text not properly split")
369 # Test handling of exception from callback:
370 class HandlerExceptionTest(unittest
.TestCase
):
371 def StartElementHandler(self
, name
, attrs
):
372 raise RuntimeError(name
)
375 parser
= expat
.ParserCreate()
376 parser
.StartElementHandler
= self
.StartElementHandler
378 parser
.Parse("<a><b><c/></b></a>", 1)
380 except RuntimeError, e
:
381 self
.assertEquals(e
.args
[0], 'a',
382 "Expected RuntimeError for element 'a', but" + \
383 " found %r" % e
.args
[0])
386 # Test Current* members:
387 class PositionTest(unittest
.TestCase
):
388 def StartElementHandler(self
, name
, attrs
):
391 def EndElementHandler(self
, name
):
394 def check_pos(self
, event
):
396 self
.parser
.CurrentByteIndex
,
397 self
.parser
.CurrentLineNumber
,
398 self
.parser
.CurrentColumnNumber
)
399 self
.assertTrue(self
.upto
< len(self
.expected_list
),
400 'too many parser events')
401 expected
= self
.expected_list
[self
.upto
]
402 self
.assertEquals(pos
, expected
,
403 'Expected position %s, got position %s' %(pos
, expected
))
407 self
.parser
= expat
.ParserCreate()
408 self
.parser
.StartElementHandler
= self
.StartElementHandler
409 self
.parser
.EndElementHandler
= self
.EndElementHandler
411 self
.expected_list
= [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
412 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
414 xml
= '<a>\n <b>\n <c/>\n </b>\n</a>'
415 self
.parser
.Parse(xml
, 1)
418 class sf1296433Test(unittest
.TestCase
):
419 def test_parse_only_xml_data(self
):
420 # http://python.org/sf/1296433
422 xml
= "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
423 # this one doesn't crash
424 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
426 class SpecificException(Exception):
430 raise SpecificException
432 parser
= expat
.ParserCreate()
433 parser
.CharacterDataHandler
= handler
435 self
.assertRaises(Exception, parser
.Parse
, xml
)
437 class ChardataBufferTest(unittest
.TestCase
):
439 test setting of chardata buffer size
442 def test_1025_bytes(self
):
443 self
.assertEquals(self
.small_buffer_test(1025), 2)
445 def test_1000_bytes(self
):
446 self
.assertEquals(self
.small_buffer_test(1000), 1)
448 def test_wrong_size(self
):
449 parser
= expat
.ParserCreate()
450 parser
.buffer_text
= 1
452 parser
.buffer_size
= size
454 self
.assertRaises(TypeError, f
, sys
.maxint
+1)
455 self
.assertRaises(ValueError, f
, -1)
456 self
.assertRaises(ValueError, f
, 0)
458 def test_unchanged_size(self
):
459 xml1
= ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
460 xml2
= 'a'*512 + '</s>'
461 parser
= expat
.ParserCreate()
462 parser
.CharacterDataHandler
= self
.counting_handler
463 parser
.buffer_size
= 512
464 parser
.buffer_text
= 1
466 # Feed 512 bytes of character data: the handler should be called
470 self
.assertEquals(self
.n
, 1)
472 # Reassign to buffer_size, but assign the same size.
473 parser
.buffer_size
= parser
.buffer_size
474 self
.assertEquals(self
.n
, 1)
476 # Try parsing rest of the document
478 self
.assertEquals(self
.n
, 2)
481 def test_disabling_buffer(self
):
482 xml1
= "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
484 xml3
= "%s</a>" % ('c' * 1024)
485 parser
= expat
.ParserCreate()
486 parser
.CharacterDataHandler
= self
.counting_handler
487 parser
.buffer_text
= 1
488 parser
.buffer_size
= 1024
489 self
.assertEquals(parser
.buffer_size
, 1024)
491 # Parse one chunk of XML
493 parser
.Parse(xml1
, 0)
494 self
.assertEquals(parser
.buffer_size
, 1024)
495 self
.assertEquals(self
.n
, 1)
497 # Turn off buffering and parse the next chunk.
498 parser
.buffer_text
= 0
499 self
.assertFalse(parser
.buffer_text
)
500 self
.assertEquals(parser
.buffer_size
, 1024)
502 parser
.Parse(xml2
, 0)
503 self
.assertEquals(self
.n
, 11)
505 parser
.buffer_text
= 1
506 self
.assertTrue(parser
.buffer_text
)
507 self
.assertEquals(parser
.buffer_size
, 1024)
508 parser
.Parse(xml3
, 1)
509 self
.assertEquals(self
.n
, 12)
513 def make_document(self
, bytes
):
514 return ("<?xml version='1.0'?><tag>" + bytes
* 'a' + '</tag>')
516 def counting_handler(self
, text
):
519 def small_buffer_test(self
, buffer_len
):
520 xml
= "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len
)
521 parser
= expat
.ParserCreate()
522 parser
.CharacterDataHandler
= self
.counting_handler
523 parser
.buffer_size
= 1024
524 parser
.buffer_text
= 1
530 def test_change_size_1(self
):
531 xml1
= "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
532 xml2
= "aaa</s><s>%s</s></a>" % ('a' * 1025)
533 parser
= expat
.ParserCreate()
534 parser
.CharacterDataHandler
= self
.counting_handler
535 parser
.buffer_text
= 1
536 parser
.buffer_size
= 1024
537 self
.assertEquals(parser
.buffer_size
, 1024)
540 parser
.Parse(xml1
, 0)
541 parser
.buffer_size
*= 2
542 self
.assertEquals(parser
.buffer_size
, 2048)
543 parser
.Parse(xml2
, 1)
544 self
.assertEquals(self
.n
, 2)
546 def test_change_size_2(self
):
547 xml1
= "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
548 xml2
= "aaa</s><s>%s</s></a>" % ('a' * 1025)
549 parser
= expat
.ParserCreate()
550 parser
.CharacterDataHandler
= self
.counting_handler
551 parser
.buffer_text
= 1
552 parser
.buffer_size
= 2048
553 self
.assertEquals(parser
.buffer_size
, 2048)
556 parser
.Parse(xml1
, 0)
557 parser
.buffer_size
/= 2
558 self
.assertEquals(parser
.buffer_size
, 1024)
559 parser
.Parse(xml2
, 1)
560 self
.assertEquals(self
.n
, 4)
562 class MalformedInputText(unittest
.TestCase
):
565 parser
= expat
.ParserCreate()
567 parser
.Parse(xml
, True)
569 except expat
.ExpatError
as e
:
570 self
.assertEquals(str(e
), 'no element found: line 2, column 1')
573 xml
= "<?xml version\xc2\x85='1.0'?>\r\n"
574 parser
= expat
.ParserCreate()
576 parser
.Parse(xml
, True)
578 except expat
.ExpatError
as e
:
579 self
.assertEquals(str(e
), 'XML declaration not well-formed: line 1, column 14')
582 run_unittest(SetAttributeTest
,
584 NamespaceSeparatorTest
,
587 HandlerExceptionTest
,
593 if __name__
== "__main__":