move sections
[python/dscho.git] / Lib / test / test_textwrap.py
blobc7da26b2660ca0e4b6967a508c1732deedfc48e3
2 # Test suite for the textwrap module.
4 # Original tests written by Greg Ward <gward@python.net>.
5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
6 # Currently maintained by Greg Ward.
8 # $Id$
11 import unittest
12 from test import test_support
14 from textwrap import TextWrapper, wrap, fill, dedent
17 class BaseTestCase(unittest.TestCase):
18 '''Parent class with utility methods for textwrap tests.'''
20 def show(self, textin):
21 if isinstance(textin, list):
22 result = []
23 for i in range(len(textin)):
24 result.append(" %d: %r" % (i, textin[i]))
25 result = '\n'.join(result)
26 elif isinstance(textin, basestring):
27 result = " %s\n" % repr(textin)
28 return result
31 def check(self, result, expect):
32 self.assertEquals(result, expect,
33 'expected:\n%s\nbut got:\n%s' % (
34 self.show(expect), self.show(result)))
36 def check_wrap(self, text, width, expect, **kwargs):
37 result = wrap(text, width, **kwargs)
38 self.check(result, expect)
40 def check_split(self, text, expect):
41 result = self.wrapper._split(text)
42 self.assertEquals(result, expect,
43 "\nexpected %r\n"
44 "but got %r" % (expect, result))
47 class WrapTestCase(BaseTestCase):
49 def setUp(self):
50 self.wrapper = TextWrapper(width=45)
52 def test_simple(self):
53 # Simple case: just words, spaces, and a bit of punctuation
55 text = "Hello there, how are you this fine day? I'm glad to hear it!"
57 self.check_wrap(text, 12,
58 ["Hello there,",
59 "how are you",
60 "this fine",
61 "day? I'm",
62 "glad to hear",
63 "it!"])
64 self.check_wrap(text, 42,
65 ["Hello there, how are you this fine day?",
66 "I'm glad to hear it!"])
67 self.check_wrap(text, 80, [text])
70 def test_whitespace(self):
71 # Whitespace munging and end-of-sentence detection
73 text = """\
74 This is a paragraph that already has
75 line breaks. But some of its lines are much longer than the others,
76 so it needs to be wrapped.
77 Some lines are \ttabbed too.
78 What a mess!
79 """
81 expect = ["This is a paragraph that already has line",
82 "breaks. But some of its lines are much",
83 "longer than the others, so it needs to be",
84 "wrapped. Some lines are tabbed too. What a",
85 "mess!"]
87 wrapper = TextWrapper(45, fix_sentence_endings=True)
88 result = wrapper.wrap(text)
89 self.check(result, expect)
91 result = wrapper.fill(text)
92 self.check(result, '\n'.join(expect))
94 def test_fix_sentence_endings(self):
95 wrapper = TextWrapper(60, fix_sentence_endings=True)
97 # SF #847346: ensure that fix_sentence_endings=True does the
98 # right thing even on input short enough that it doesn't need to
99 # be wrapped.
100 text = "A short line. Note the single space."
101 expect = ["A short line. Note the single space."]
102 self.check(wrapper.wrap(text), expect)
104 # Test some of the hairy end cases that _fix_sentence_endings()
105 # is supposed to handle (the easy stuff is tested in
106 # test_whitespace() above).
107 text = "Well, Doctor? What do you think?"
108 expect = ["Well, Doctor? What do you think?"]
109 self.check(wrapper.wrap(text), expect)
111 text = "Well, Doctor?\nWhat do you think?"
112 self.check(wrapper.wrap(text), expect)
114 text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
115 expect = ['I say, chaps! Anyone for "tennis?" Hmmph!']
116 self.check(wrapper.wrap(text), expect)
118 wrapper.width = 20
119 expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
120 self.check(wrapper.wrap(text), expect)
122 text = 'And she said, "Go to hell!"\nCan you believe that?'
123 expect = ['And she said, "Go to',
124 'hell!" Can you',
125 'believe that?']
126 self.check(wrapper.wrap(text), expect)
128 wrapper.width = 60
129 expect = ['And she said, "Go to hell!" Can you believe that?']
130 self.check(wrapper.wrap(text), expect)
132 text = 'File stdio.h is nice.'
133 expect = ['File stdio.h is nice.']
134 self.check(wrapper.wrap(text), expect)
136 def test_wrap_short(self):
137 # Wrapping to make short lines longer
139 text = "This is a\nshort paragraph."
141 self.check_wrap(text, 20, ["This is a short",
142 "paragraph."])
143 self.check_wrap(text, 40, ["This is a short paragraph."])
146 def test_wrap_short_1line(self):
147 # Test endcases
149 text = "This is a short line."
151 self.check_wrap(text, 30, ["This is a short line."])
152 self.check_wrap(text, 30, ["(1) This is a short line."],
153 initial_indent="(1) ")
156 def test_hyphenated(self):
157 # Test breaking hyphenated words
159 text = ("this-is-a-useful-feature-for-"
160 "reformatting-posts-from-tim-peters'ly")
162 self.check_wrap(text, 40,
163 ["this-is-a-useful-feature-for-",
164 "reformatting-posts-from-tim-peters'ly"])
165 self.check_wrap(text, 41,
166 ["this-is-a-useful-feature-for-",
167 "reformatting-posts-from-tim-peters'ly"])
168 self.check_wrap(text, 42,
169 ["this-is-a-useful-feature-for-reformatting-",
170 "posts-from-tim-peters'ly"])
172 def test_hyphenated_numbers(self):
173 # Test that hyphenated numbers (eg. dates) are not broken like words.
174 text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n"
175 "released on 1994-02-15.")
177 self.check_wrap(text, 35, ['Python 1.0.0 was released on',
178 '1994-01-26. Python 1.0.1 was',
179 'released on 1994-02-15.'])
180 self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
181 'Python 1.0.1 was released on 1994-02-15.'])
183 text = "I do all my shopping at 7-11."
184 self.check_wrap(text, 25, ["I do all my shopping at",
185 "7-11."])
186 self.check_wrap(text, 27, ["I do all my shopping at",
187 "7-11."])
188 self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
190 def test_em_dash(self):
191 # Test text with em-dashes
192 text = "Em-dashes should be written -- thus."
193 self.check_wrap(text, 25,
194 ["Em-dashes should be",
195 "written -- thus."])
197 # Probe the boundaries of the properly written em-dash,
198 # ie. " -- ".
199 self.check_wrap(text, 29,
200 ["Em-dashes should be written",
201 "-- thus."])
202 expect = ["Em-dashes should be written --",
203 "thus."]
204 self.check_wrap(text, 30, expect)
205 self.check_wrap(text, 35, expect)
206 self.check_wrap(text, 36,
207 ["Em-dashes should be written -- thus."])
209 # The improperly written em-dash is handled too, because
210 # it's adjacent to non-whitespace on both sides.
211 text = "You can also do--this or even---this."
212 expect = ["You can also do",
213 "--this or even",
214 "---this."]
215 self.check_wrap(text, 15, expect)
216 self.check_wrap(text, 16, expect)
217 expect = ["You can also do--",
218 "this or even---",
219 "this."]
220 self.check_wrap(text, 17, expect)
221 self.check_wrap(text, 19, expect)
222 expect = ["You can also do--this or even",
223 "---this."]
224 self.check_wrap(text, 29, expect)
225 self.check_wrap(text, 31, expect)
226 expect = ["You can also do--this or even---",
227 "this."]
228 self.check_wrap(text, 32, expect)
229 self.check_wrap(text, 35, expect)
231 # All of the above behaviour could be deduced by probing the
232 # _split() method.
233 text = "Here's an -- em-dash and--here's another---and another!"
234 expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
235 "and", "--", "here's", " ", "another", "---",
236 "and", " ", "another!"]
237 self.check_split(text, expect)
239 text = "and then--bam!--he was gone"
240 expect = ["and", " ", "then", "--", "bam!", "--",
241 "he", " ", "was", " ", "gone"]
242 self.check_split(text, expect)
245 def test_unix_options (self):
246 # Test that Unix-style command-line options are wrapped correctly.
247 # Both Optik (OptionParser) and Docutils rely on this behaviour!
249 text = "You should use the -n option, or --dry-run in its long form."
250 self.check_wrap(text, 20,
251 ["You should use the",
252 "-n option, or --dry-",
253 "run in its long",
254 "form."])
255 self.check_wrap(text, 21,
256 ["You should use the -n",
257 "option, or --dry-run",
258 "in its long form."])
259 expect = ["You should use the -n option, or",
260 "--dry-run in its long form."]
261 self.check_wrap(text, 32, expect)
262 self.check_wrap(text, 34, expect)
263 self.check_wrap(text, 35, expect)
264 self.check_wrap(text, 38, expect)
265 expect = ["You should use the -n option, or --dry-",
266 "run in its long form."]
267 self.check_wrap(text, 39, expect)
268 self.check_wrap(text, 41, expect)
269 expect = ["You should use the -n option, or --dry-run",
270 "in its long form."]
271 self.check_wrap(text, 42, expect)
273 # Again, all of the above can be deduced from _split().
274 text = "the -n option, or --dry-run or --dryrun"
275 expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
276 "--dry-", "run", " ", "or", " ", "--dryrun"]
277 self.check_split(text, expect)
279 def test_funky_hyphens (self):
280 # Screwy edge cases cooked up by David Goodger. All reported
281 # in SF bug #596434.
282 self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
283 self.check_split("what the--", ["what", " ", "the--"])
284 self.check_split("what the--.", ["what", " ", "the--."])
285 self.check_split("--text--.", ["--text--."])
287 # When I first read bug #596434, this is what I thought David
288 # was talking about. I was wrong; these have always worked
289 # fine. The real problem is tested in test_funky_parens()
290 # below...
291 self.check_split("--option", ["--option"])
292 self.check_split("--option-opt", ["--option-", "opt"])
293 self.check_split("foo --option-opt bar",
294 ["foo", " ", "--option-", "opt", " ", "bar"])
296 def test_punct_hyphens(self):
297 # Oh bother, SF #965425 found another problem with hyphens --
298 # hyphenated words in single quotes weren't handled correctly.
299 # In fact, the bug is that *any* punctuation around a hyphenated
300 # word was handled incorrectly, except for a leading "--", which
301 # was special-cased for Optik and Docutils. So test a variety
302 # of styles of punctuation around a hyphenated word.
303 # (Actually this is based on an Optik bug report, #813077).
304 self.check_split("the 'wibble-wobble' widget",
305 ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
306 self.check_split('the "wibble-wobble" widget',
307 ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
308 self.check_split("the (wibble-wobble) widget",
309 ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
310 self.check_split("the ['wibble-wobble'] widget",
311 ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
313 def test_funky_parens (self):
314 # Second part of SF bug #596434: long option strings inside
315 # parentheses.
316 self.check_split("foo (--option) bar",
317 ["foo", " ", "(--option)", " ", "bar"])
319 # Related stuff -- make sure parens work in simpler contexts.
320 self.check_split("foo (bar) baz",
321 ["foo", " ", "(bar)", " ", "baz"])
322 self.check_split("blah (ding dong), wubba",
323 ["blah", " ", "(ding", " ", "dong),",
324 " ", "wubba"])
326 def test_initial_whitespace(self):
327 # SF bug #622849 reported inconsistent handling of leading
328 # whitespace; let's test that a bit, shall we?
329 text = " This is a sentence with leading whitespace."
330 self.check_wrap(text, 50,
331 [" This is a sentence with leading whitespace."])
332 self.check_wrap(text, 30,
333 [" This is a sentence with", "leading whitespace."])
335 def test_no_drop_whitespace(self):
336 # SF patch #1581073
337 text = " This is a sentence with much whitespace."
338 self.check_wrap(text, 10,
339 [" This is a", " ", "sentence ",
340 "with ", "much white", "space."],
341 drop_whitespace=False)
343 if test_support.have_unicode:
344 def test_unicode(self):
345 # *Very* simple test of wrapping Unicode strings. I'm sure
346 # there's more to it than this, but let's at least make
347 # sure textwrap doesn't crash on Unicode input!
348 text = u"Hello there, how are you today?"
349 self.check_wrap(text, 50, [u"Hello there, how are you today?"])
350 self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
351 olines = self.wrapper.wrap(text)
352 self.assertIsInstance(olines, list)
353 self.assertIsInstance(olines[0], unicode)
354 otext = self.wrapper.fill(text)
355 self.assertIsInstance(otext, unicode)
357 def test_no_split_at_umlaut(self):
358 text = u"Die Empf\xe4nger-Auswahl"
359 self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
361 def test_umlaut_followed_by_dash(self):
362 text = u"aa \xe4\xe4-\xe4\xe4"
363 self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
365 def test_split(self):
366 # Ensure that the standard _split() method works as advertised
367 # in the comments
369 text = "Hello there -- you goof-ball, use the -b option!"
371 result = self.wrapper._split(text)
372 self.check(result,
373 ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
374 "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
376 def test_break_on_hyphens(self):
377 # Ensure that the break_on_hyphens attributes work
378 text = "yaba daba-doo"
379 self.check_wrap(text, 10, ["yaba daba-", "doo"],
380 break_on_hyphens=True)
381 self.check_wrap(text, 10, ["yaba", "daba-doo"],
382 break_on_hyphens=False)
384 def test_bad_width(self):
385 # Ensure that width <= 0 is caught.
386 text = "Whatever, it doesn't matter."
387 self.assertRaises(ValueError, wrap, text, 0)
388 self.assertRaises(ValueError, wrap, text, -1)
391 class LongWordTestCase (BaseTestCase):
392 def setUp(self):
393 self.wrapper = TextWrapper()
394 self.text = '''\
395 Did you say "supercalifragilisticexpialidocious?"
396 How *do* you spell that odd word, anyways?
399 def test_break_long(self):
400 # Wrap text with long words and lots of punctuation
402 self.check_wrap(self.text, 30,
403 ['Did you say "supercalifragilis',
404 'ticexpialidocious?" How *do*',
405 'you spell that odd word,',
406 'anyways?'])
407 self.check_wrap(self.text, 50,
408 ['Did you say "supercalifragilisticexpialidocious?"',
409 'How *do* you spell that odd word, anyways?'])
411 # SF bug 797650. Prevent an infinite loop by making sure that at
412 # least one character gets split off on every pass.
413 self.check_wrap('-'*10+'hello', 10,
414 ['----------',
415 ' h',
416 ' e',
417 ' l',
418 ' l',
419 ' o'],
420 subsequent_indent = ' '*15)
422 # bug 1146. Prevent a long word to be wrongly wrapped when the
423 # preceding word is exactly one character shorter than the width
424 self.check_wrap(self.text, 12,
425 ['Did you say ',
426 '"supercalifr',
427 'agilisticexp',
428 'ialidocious?',
429 '" How *do*',
430 'you spell',
431 'that odd',
432 'word,',
433 'anyways?'])
435 def test_nobreak_long(self):
436 # Test with break_long_words disabled
437 self.wrapper.break_long_words = 0
438 self.wrapper.width = 30
439 expect = ['Did you say',
440 '"supercalifragilisticexpialidocious?"',
441 'How *do* you spell that odd',
442 'word, anyways?'
444 result = self.wrapper.wrap(self.text)
445 self.check(result, expect)
447 # Same thing with kwargs passed to standalone wrap() function.
448 result = wrap(self.text, width=30, break_long_words=0)
449 self.check(result, expect)
452 class IndentTestCases(BaseTestCase):
454 # called before each test method
455 def setUp(self):
456 self.text = '''\
457 This paragraph will be filled, first without any indentation,
458 and then with some (including a hanging indent).'''
461 def test_fill(self):
462 # Test the fill() method
464 expect = '''\
465 This paragraph will be filled, first
466 without any indentation, and then with
467 some (including a hanging indent).'''
469 result = fill(self.text, 40)
470 self.check(result, expect)
473 def test_initial_indent(self):
474 # Test initial_indent parameter
476 expect = [" This paragraph will be filled,",
477 "first without any indentation, and then",
478 "with some (including a hanging indent)."]
479 result = wrap(self.text, 40, initial_indent=" ")
480 self.check(result, expect)
482 expect = "\n".join(expect)
483 result = fill(self.text, 40, initial_indent=" ")
484 self.check(result, expect)
487 def test_subsequent_indent(self):
488 # Test subsequent_indent parameter
490 expect = '''\
491 * This paragraph will be filled, first
492 without any indentation, and then
493 with some (including a hanging
494 indent).'''
496 result = fill(self.text, 40,
497 initial_indent=" * ", subsequent_indent=" ")
498 self.check(result, expect)
501 # Despite the similar names, DedentTestCase is *not* the inverse
502 # of IndentTestCase!
503 class DedentTestCase(unittest.TestCase):
505 def assertUnchanged(self, text):
506 """assert that dedent() has no effect on 'text'"""
507 self.assertEquals(text, dedent(text))
509 def test_dedent_nomargin(self):
510 # No lines indented.
511 text = "Hello there.\nHow are you?\nOh good, I'm glad."
512 self.assertUnchanged(text)
514 # Similar, with a blank line.
515 text = "Hello there.\n\nBoo!"
516 self.assertUnchanged(text)
518 # Some lines indented, but overall margin is still zero.
519 text = "Hello there.\n This is indented."
520 self.assertUnchanged(text)
522 # Again, add a blank line.
523 text = "Hello there.\n\n Boo!\n"
524 self.assertUnchanged(text)
526 def test_dedent_even(self):
527 # All lines indented by two spaces.
528 text = " Hello there.\n How are ya?\n Oh good."
529 expect = "Hello there.\nHow are ya?\nOh good."
530 self.assertEquals(expect, dedent(text))
532 # Same, with blank lines.
533 text = " Hello there.\n\n How are ya?\n Oh good.\n"
534 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
535 self.assertEquals(expect, dedent(text))
537 # Now indent one of the blank lines.
538 text = " Hello there.\n \n How are ya?\n Oh good.\n"
539 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
540 self.assertEquals(expect, dedent(text))
542 def test_dedent_uneven(self):
543 # Lines indented unevenly.
544 text = '''\
545 def foo():
546 while 1:
547 return foo
549 expect = '''\
550 def foo():
551 while 1:
552 return foo
554 self.assertEquals(expect, dedent(text))
556 # Uneven indentation with a blank line.
557 text = " Foo\n Bar\n\n Baz\n"
558 expect = "Foo\n Bar\n\n Baz\n"
559 self.assertEquals(expect, dedent(text))
561 # Uneven indentation with a whitespace-only line.
562 text = " Foo\n Bar\n \n Baz\n"
563 expect = "Foo\n Bar\n\n Baz\n"
564 self.assertEquals(expect, dedent(text))
566 # dedent() should not mangle internal tabs
567 def test_dedent_preserve_internal_tabs(self):
568 text = " hello\tthere\n how are\tyou?"
569 expect = "hello\tthere\nhow are\tyou?"
570 self.assertEquals(expect, dedent(text))
572 # make sure that it preserves tabs when it's not making any
573 # changes at all
574 self.assertEquals(expect, dedent(expect))
576 # dedent() should not mangle tabs in the margin (i.e.
577 # tabs and spaces both count as margin, but are *not*
578 # considered equivalent)
579 def test_dedent_preserve_margin_tabs(self):
580 text = " hello there\n\thow are you?"
581 self.assertUnchanged(text)
583 # same effect even if we have 8 spaces
584 text = " hello there\n\thow are you?"
585 self.assertUnchanged(text)
587 # dedent() only removes whitespace that can be uniformly removed!
588 text = "\thello there\n\thow are you?"
589 expect = "hello there\nhow are you?"
590 self.assertEquals(expect, dedent(text))
592 text = " \thello there\n \thow are you?"
593 self.assertEquals(expect, dedent(text))
595 text = " \t hello there\n \t how are you?"
596 self.assertEquals(expect, dedent(text))
598 text = " \thello there\n \t how are you?"
599 expect = "hello there\n how are you?"
600 self.assertEquals(expect, dedent(text))
603 def test_main():
604 test_support.run_unittest(WrapTestCase,
605 LongWordTestCase,
606 IndentTestCases,
607 DedentTestCase)
609 if __name__ == '__main__':
610 test_main()