#1153769: document PEP 237 changes to string formatting.
[python.git] / Lib / test / test_pyexpat.py
blobde5cded6c5aa0f9225d994d2cf944618153ee8e6
1 # XXX TypeErrors on calling handlers, or on bad return values from a
2 # handler, are obscure and unhelpful.
4 import StringIO, sys
5 import unittest
7 import pyexpat
8 from xml.parsers import expat
10 from test.test_support import sortdict, run_unittest
13 class SetAttributeTest(unittest.TestCase):
14 def setUp(self):
15 self.parser = expat.ParserCreate(namespace_separator='!')
16 self.set_get_pairs = [
17 [0, 0],
18 [1, 1],
19 [2, 1],
20 [0, 0],
23 def test_returns_unicode(self):
24 for x, y in self.set_get_pairs:
25 self.parser.returns_unicode = x
26 self.assertEquals(self.parser.returns_unicode, y)
28 def test_ordered_attributes(self):
29 for x, y in self.set_get_pairs:
30 self.parser.ordered_attributes = x
31 self.assertEquals(self.parser.ordered_attributes, y)
33 def test_specified_attributes(self):
34 for x, y in self.set_get_pairs:
35 self.parser.specified_attributes = x
36 self.assertEquals(self.parser.specified_attributes, y)
39 data = '''\
40 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
41 <?xml-stylesheet href="stylesheet.css"?>
42 <!-- comment data -->
43 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
44 <!ELEMENT root ANY>
45 <!NOTATION notation SYSTEM "notation.jpeg">
46 <!ENTITY acirc "&#226;">
47 <!ENTITY external_entity SYSTEM "entity.file">
48 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
49 %unparsed_entity;
52 <root attr1="value1" attr2="value2&#8000;">
53 <myns:subelement xmlns:myns="http://www.python.org/namespace">
54 Contents of subelements
55 </myns:subelement>
56 <sub2><![CDATA[contents of CDATA section]]></sub2>
57 &external_entity;
58 </root>
59 '''
62 # Produce UTF-8 output
63 class ParseTest(unittest.TestCase):
64 class Outputter:
65 def __init__(self):
66 self.out = []
68 def StartElementHandler(self, name, attrs):
69 self.out.append('Start element: ' + repr(name) + ' ' +
70 sortdict(attrs))
72 def EndElementHandler(self, name):
73 self.out.append('End element: ' + repr(name))
75 def CharacterDataHandler(self, data):
76 data = data.strip()
77 if data:
78 self.out.append('Character data: ' + repr(data))
80 def ProcessingInstructionHandler(self, target, data):
81 self.out.append('PI: ' + repr(target) + ' ' + repr(data))
83 def StartNamespaceDeclHandler(self, prefix, uri):
84 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
86 def EndNamespaceDeclHandler(self, prefix):
87 self.out.append('End of NS decl: ' + repr(prefix))
89 def StartCdataSectionHandler(self):
90 self.out.append('Start of CDATA section')
92 def EndCdataSectionHandler(self):
93 self.out.append('End of CDATA section')
95 def CommentHandler(self, text):
96 self.out.append('Comment: ' + repr(text))
98 def NotationDeclHandler(self, *args):
99 name, base, sysid, pubid = args
100 self.out.append('Notation declared: %s' %(args,))
102 def UnparsedEntityDeclHandler(self, *args):
103 entityName, base, systemId, publicId, notationName = args
104 self.out.append('Unparsed entity decl: %s' %(args,))
106 def NotStandaloneHandler(self, userData):
107 self.out.append('Not standalone')
108 return 1
110 def ExternalEntityRefHandler(self, *args):
111 context, base, sysId, pubId = args
112 self.out.append('External entity ref: %s' %(args[1:],))
113 return 1
115 def DefaultHandler(self, userData):
116 pass
118 def DefaultHandlerExpand(self, userData):
119 pass
121 handler_names = [
122 'StartElementHandler', 'EndElementHandler',
123 'CharacterDataHandler', 'ProcessingInstructionHandler',
124 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
125 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
126 'CommentHandler', 'StartCdataSectionHandler',
127 'EndCdataSectionHandler',
128 'DefaultHandler', 'DefaultHandlerExpand',
129 #'NotStandaloneHandler',
130 'ExternalEntityRefHandler'
133 def test_utf8(self):
135 out = self.Outputter()
136 parser = expat.ParserCreate(namespace_separator='!')
137 for name in self.handler_names:
138 setattr(parser, name, getattr(out, name))
139 parser.returns_unicode = 0
140 parser.Parse(data, 1)
142 # Verify output
143 op = out.out
144 self.assertEquals(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
145 self.assertEquals(op[1], "Comment: ' comment data '")
146 self.assertEquals(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
147 self.assertEquals(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
148 self.assertEquals(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
149 self.assertEquals(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
150 self.assertEquals(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
151 self.assertEquals(op[7], "Character data: 'Contents of subelements'")
152 self.assertEquals(op[8], "End element: 'http://www.python.org/namespace!subelement'")
153 self.assertEquals(op[9], "End of NS decl: 'myns'")
154 self.assertEquals(op[10], "Start element: 'sub2' {}")
155 self.assertEquals(op[11], 'Start of CDATA section')
156 self.assertEquals(op[12], "Character data: 'contents of CDATA section'")
157 self.assertEquals(op[13], 'End of CDATA section')
158 self.assertEquals(op[14], "End element: 'sub2'")
159 self.assertEquals(op[15], "External entity ref: (None, 'entity.file', None)")
160 self.assertEquals(op[16], "End element: 'root'")
162 def test_unicode(self):
163 # Try the parse again, this time producing Unicode output
164 out = self.Outputter()
165 parser = expat.ParserCreate(namespace_separator='!')
166 parser.returns_unicode = 1
167 for name in self.handler_names:
168 setattr(parser, name, getattr(out, name))
170 parser.Parse(data, 1)
172 op = out.out
173 self.assertEquals(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
174 self.assertEquals(op[1], "Comment: u' comment data '")
175 self.assertEquals(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
176 self.assertEquals(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
177 self.assertEquals(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
178 self.assertEquals(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
179 self.assertEquals(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
180 self.assertEquals(op[7], "Character data: u'Contents of subelements'")
181 self.assertEquals(op[8], "End element: u'http://www.python.org/namespace!subelement'")
182 self.assertEquals(op[9], "End of NS decl: u'myns'")
183 self.assertEquals(op[10], "Start element: u'sub2' {}")
184 self.assertEquals(op[11], 'Start of CDATA section')
185 self.assertEquals(op[12], "Character data: u'contents of CDATA section'")
186 self.assertEquals(op[13], 'End of CDATA section')
187 self.assertEquals(op[14], "End element: u'sub2'")
188 self.assertEquals(op[15], "External entity ref: (None, u'entity.file', None)")
189 self.assertEquals(op[16], "End element: u'root'")
191 def test_parse_file(self):
192 # Try parsing a file
193 out = self.Outputter()
194 parser = expat.ParserCreate(namespace_separator='!')
195 parser.returns_unicode = 1
196 for name in self.handler_names:
197 setattr(parser, name, getattr(out, name))
198 file = StringIO.StringIO(data)
200 parser.ParseFile(file)
202 op = out.out
203 self.assertEquals(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
204 self.assertEquals(op[1], "Comment: u' comment data '")
205 self.assertEquals(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
206 self.assertEquals(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
207 self.assertEquals(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
208 self.assertEquals(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
209 self.assertEquals(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
210 self.assertEquals(op[7], "Character data: u'Contents of subelements'")
211 self.assertEquals(op[8], "End element: u'http://www.python.org/namespace!subelement'")
212 self.assertEquals(op[9], "End of NS decl: u'myns'")
213 self.assertEquals(op[10], "Start element: u'sub2' {}")
214 self.assertEquals(op[11], 'Start of CDATA section')
215 self.assertEquals(op[12], "Character data: u'contents of CDATA section'")
216 self.assertEquals(op[13], 'End of CDATA section')
217 self.assertEquals(op[14], "End element: u'sub2'")
218 self.assertEquals(op[15], "External entity ref: (None, u'entity.file', None)")
219 self.assertEquals(op[16], "End element: u'root'")
222 class NamespaceSeparatorTest(unittest.TestCase):
223 def test_legal(self):
224 # Tests that make sure we get errors when the namespace_separator value
225 # is illegal, and that we don't for good values:
226 expat.ParserCreate()
227 expat.ParserCreate(namespace_separator=None)
228 expat.ParserCreate(namespace_separator=' ')
230 def test_illegal(self):
231 try:
232 expat.ParserCreate(namespace_separator=42)
233 self.fail()
234 except TypeError, e:
235 self.assertEquals(str(e),
236 'ParserCreate() argument 2 must be string or None, not int')
238 try:
239 expat.ParserCreate(namespace_separator='too long')
240 self.fail()
241 except ValueError, e:
242 self.assertEquals(str(e),
243 'namespace_separator must be at most one character, omitted, or None')
245 def test_zero_length(self):
246 # ParserCreate() needs to accept a namespace_separator of zero length
247 # to satisfy the requirements of RDF applications that are required
248 # to simply glue together the namespace URI and the localname. Though
249 # considered a wart of the RDF specifications, it needs to be supported.
251 # See XML-SIG mailing list thread starting with
252 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
254 expat.ParserCreate(namespace_separator='') # too short
257 class InterningTest(unittest.TestCase):
258 def test(self):
259 # Test the interning machinery.
260 p = expat.ParserCreate()
261 L = []
262 def collector(name, *args):
263 L.append(name)
264 p.StartElementHandler = collector
265 p.EndElementHandler = collector
266 p.Parse("<e> <e/> <e></e> </e>", 1)
267 tag = L[0]
268 self.assertEquals(len(L), 6)
269 for entry in L:
270 # L should have the same string repeated over and over.
271 self.assertTrue(tag is entry)
274 class BufferTextTest(unittest.TestCase):
275 def setUp(self):
276 self.stuff = []
277 self.parser = expat.ParserCreate()
278 self.parser.buffer_text = 1
279 self.parser.CharacterDataHandler = self.CharacterDataHandler
281 def check(self, expected, label):
282 self.assertEquals(self.stuff, expected,
283 "%s\nstuff = %r\nexpected = %r"
284 % (label, self.stuff, map(unicode, expected)))
286 def CharacterDataHandler(self, text):
287 self.stuff.append(text)
289 def StartElementHandler(self, name, attrs):
290 self.stuff.append("<%s>" % name)
291 bt = attrs.get("buffer-text")
292 if bt == "yes":
293 self.parser.buffer_text = 1
294 elif bt == "no":
295 self.parser.buffer_text = 0
297 def EndElementHandler(self, name):
298 self.stuff.append("</%s>" % name)
300 def CommentHandler(self, data):
301 self.stuff.append("<!--%s-->" % data)
303 def setHandlers(self, handlers=[]):
304 for name in handlers:
305 setattr(self.parser, name, getattr(self, name))
307 def test_default_to_disabled(self):
308 parser = expat.ParserCreate()
309 self.assertFalse(parser.buffer_text)
311 def test_buffering_enabled(self):
312 # Make sure buffering is turned on
313 self.assertTrue(self.parser.buffer_text)
314 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
315 self.assertEquals(self.stuff, ['123'],
316 "buffered text not properly collapsed")
318 def test1(self):
319 # XXX This test exposes more detail of Expat's text chunking than we
320 # XXX like, but it tests what we need to concisely.
321 self.setHandlers(["StartElementHandler"])
322 self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
323 self.assertEquals(self.stuff,
324 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
325 "buffering control not reacting as expected")
327 def test2(self):
328 self.parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
329 self.assertEquals(self.stuff, ["1<2> \n 3"],
330 "buffered text not properly collapsed")
332 def test3(self):
333 self.setHandlers(["StartElementHandler"])
334 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
335 self.assertEquals(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
336 "buffered text not properly split")
338 def test4(self):
339 self.setHandlers(["StartElementHandler", "EndElementHandler"])
340 self.parser.CharacterDataHandler = None
341 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
342 self.assertEquals(self.stuff,
343 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
345 def test5(self):
346 self.setHandlers(["StartElementHandler", "EndElementHandler"])
347 self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
348 self.assertEquals(self.stuff,
349 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
351 def test6(self):
352 self.setHandlers(["CommentHandler", "EndElementHandler",
353 "StartElementHandler"])
354 self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
355 self.assertEquals(self.stuff,
356 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
357 "buffered text not properly split")
359 def test7(self):
360 self.setHandlers(["CommentHandler", "EndElementHandler",
361 "StartElementHandler"])
362 self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
363 self.assertEquals(self.stuff,
364 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
365 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
366 "buffered text not properly split")
369 # Test handling of exception from callback:
370 class HandlerExceptionTest(unittest.TestCase):
371 def StartElementHandler(self, name, attrs):
372 raise RuntimeError(name)
374 def test(self):
375 parser = expat.ParserCreate()
376 parser.StartElementHandler = self.StartElementHandler
377 try:
378 parser.Parse("<a><b><c/></b></a>", 1)
379 self.fail()
380 except RuntimeError, e:
381 self.assertEquals(e.args[0], 'a',
382 "Expected RuntimeError for element 'a', but" + \
383 " found %r" % e.args[0])
386 # Test Current* members:
387 class PositionTest(unittest.TestCase):
388 def StartElementHandler(self, name, attrs):
389 self.check_pos('s')
391 def EndElementHandler(self, name):
392 self.check_pos('e')
394 def check_pos(self, event):
395 pos = (event,
396 self.parser.CurrentByteIndex,
397 self.parser.CurrentLineNumber,
398 self.parser.CurrentColumnNumber)
399 self.assertTrue(self.upto < len(self.expected_list),
400 'too many parser events')
401 expected = self.expected_list[self.upto]
402 self.assertEquals(pos, expected,
403 'Expected position %s, got position %s' %(pos, expected))
404 self.upto += 1
406 def test(self):
407 self.parser = expat.ParserCreate()
408 self.parser.StartElementHandler = self.StartElementHandler
409 self.parser.EndElementHandler = self.EndElementHandler
410 self.upto = 0
411 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
412 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
414 xml = '<a>\n <b>\n <c/>\n </b>\n</a>'
415 self.parser.Parse(xml, 1)
418 class sf1296433Test(unittest.TestCase):
419 def test_parse_only_xml_data(self):
420 # http://python.org/sf/1296433
422 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
423 # this one doesn't crash
424 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
426 class SpecificException(Exception):
427 pass
429 def handler(text):
430 raise SpecificException
432 parser = expat.ParserCreate()
433 parser.CharacterDataHandler = handler
435 self.assertRaises(Exception, parser.Parse, xml)
437 class ChardataBufferTest(unittest.TestCase):
439 test setting of chardata buffer size
442 def test_1025_bytes(self):
443 self.assertEquals(self.small_buffer_test(1025), 2)
445 def test_1000_bytes(self):
446 self.assertEquals(self.small_buffer_test(1000), 1)
448 def test_wrong_size(self):
449 parser = expat.ParserCreate()
450 parser.buffer_text = 1
451 def f(size):
452 parser.buffer_size = size
454 self.assertRaises(TypeError, f, sys.maxint+1)
455 self.assertRaises(ValueError, f, -1)
456 self.assertRaises(ValueError, f, 0)
458 def test_unchanged_size(self):
459 xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
460 xml2 = 'a'*512 + '</s>'
461 parser = expat.ParserCreate()
462 parser.CharacterDataHandler = self.counting_handler
463 parser.buffer_size = 512
464 parser.buffer_text = 1
466 # Feed 512 bytes of character data: the handler should be called
467 # once.
468 self.n = 0
469 parser.Parse(xml1)
470 self.assertEquals(self.n, 1)
472 # Reassign to buffer_size, but assign the same size.
473 parser.buffer_size = parser.buffer_size
474 self.assertEquals(self.n, 1)
476 # Try parsing rest of the document
477 parser.Parse(xml2)
478 self.assertEquals(self.n, 2)
481 def test_disabling_buffer(self):
482 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
483 xml2 = ('b' * 1024)
484 xml3 = "%s</a>" % ('c' * 1024)
485 parser = expat.ParserCreate()
486 parser.CharacterDataHandler = self.counting_handler
487 parser.buffer_text = 1
488 parser.buffer_size = 1024
489 self.assertEquals(parser.buffer_size, 1024)
491 # Parse one chunk of XML
492 self.n = 0
493 parser.Parse(xml1, 0)
494 self.assertEquals(parser.buffer_size, 1024)
495 self.assertEquals(self.n, 1)
497 # Turn off buffering and parse the next chunk.
498 parser.buffer_text = 0
499 self.assertFalse(parser.buffer_text)
500 self.assertEquals(parser.buffer_size, 1024)
501 for i in range(10):
502 parser.Parse(xml2, 0)
503 self.assertEquals(self.n, 11)
505 parser.buffer_text = 1
506 self.assertTrue(parser.buffer_text)
507 self.assertEquals(parser.buffer_size, 1024)
508 parser.Parse(xml3, 1)
509 self.assertEquals(self.n, 12)
513 def make_document(self, bytes):
514 return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
516 def counting_handler(self, text):
517 self.n += 1
519 def small_buffer_test(self, buffer_len):
520 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
521 parser = expat.ParserCreate()
522 parser.CharacterDataHandler = self.counting_handler
523 parser.buffer_size = 1024
524 parser.buffer_text = 1
526 self.n = 0
527 parser.Parse(xml)
528 return self.n
530 def test_change_size_1(self):
531 xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
532 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
533 parser = expat.ParserCreate()
534 parser.CharacterDataHandler = self.counting_handler
535 parser.buffer_text = 1
536 parser.buffer_size = 1024
537 self.assertEquals(parser.buffer_size, 1024)
539 self.n = 0
540 parser.Parse(xml1, 0)
541 parser.buffer_size *= 2
542 self.assertEquals(parser.buffer_size, 2048)
543 parser.Parse(xml2, 1)
544 self.assertEquals(self.n, 2)
546 def test_change_size_2(self):
547 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
548 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
549 parser = expat.ParserCreate()
550 parser.CharacterDataHandler = self.counting_handler
551 parser.buffer_text = 1
552 parser.buffer_size = 2048
553 self.assertEquals(parser.buffer_size, 2048)
555 self.n=0
556 parser.Parse(xml1, 0)
557 parser.buffer_size /= 2
558 self.assertEquals(parser.buffer_size, 1024)
559 parser.Parse(xml2, 1)
560 self.assertEquals(self.n, 4)
563 def test_main():
564 run_unittest(SetAttributeTest,
565 ParseTest,
566 NamespaceSeparatorTest,
567 InterningTest,
568 BufferTextTest,
569 HandlerExceptionTest,
570 PositionTest,
571 sf1296433Test,
572 ChardataBufferTest)
574 if __name__ == "__main__":
575 test_main()