Issue #7051: Clarify behaviour of 'g' and 'G'-style formatting.
[python.git] / Lib / test / test_textwrap.py
blobc91e242dfdb8655e26386e406e7b706efb46d52f
2 # Test suite for the textwrap module.
4 # Original tests written by Greg Ward <gward@python.net>.
5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
6 # Currently maintained by Greg Ward.
8 # $Id$
11 import unittest
12 from test import test_support
14 from textwrap import TextWrapper, wrap, fill, dedent
17 class BaseTestCase(unittest.TestCase):
18 '''Parent class with utility methods for textwrap tests.'''
20 def show(self, textin):
21 if isinstance(textin, list):
22 result = []
23 for i in range(len(textin)):
24 result.append(" %d: %r" % (i, textin[i]))
25 result = '\n'.join(result)
26 elif isinstance(textin, basestring):
27 result = " %s\n" % repr(textin)
28 return result
31 def check(self, result, expect):
32 self.assertEquals(result, expect,
33 'expected:\n%s\nbut got:\n%s' % (
34 self.show(expect), self.show(result)))
36 def check_wrap(self, text, width, expect, **kwargs):
37 result = wrap(text, width, **kwargs)
38 self.check(result, expect)
40 def check_split(self, text, expect):
41 result = self.wrapper._split(text)
42 self.assertEquals(result, expect,
43 "\nexpected %r\n"
44 "but got %r" % (expect, result))
47 class WrapTestCase(BaseTestCase):
49 def setUp(self):
50 self.wrapper = TextWrapper(width=45)
52 def test_simple(self):
53 # Simple case: just words, spaces, and a bit of punctuation
55 text = "Hello there, how are you this fine day? I'm glad to hear it!"
57 self.check_wrap(text, 12,
58 ["Hello there,",
59 "how are you",
60 "this fine",
61 "day? I'm",
62 "glad to hear",
63 "it!"])
64 self.check_wrap(text, 42,
65 ["Hello there, how are you this fine day?",
66 "I'm glad to hear it!"])
67 self.check_wrap(text, 80, [text])
70 def test_whitespace(self):
71 # Whitespace munging and end-of-sentence detection
73 text = """\
74 This is a paragraph that already has
75 line breaks. But some of its lines are much longer than the others,
76 so it needs to be wrapped.
77 Some lines are \ttabbed too.
78 What a mess!
79 """
81 expect = ["This is a paragraph that already has line",
82 "breaks. But some of its lines are much",
83 "longer than the others, so it needs to be",
84 "wrapped. Some lines are tabbed too. What a",
85 "mess!"]
87 wrapper = TextWrapper(45, fix_sentence_endings=True)
88 result = wrapper.wrap(text)
89 self.check(result, expect)
91 result = wrapper.fill(text)
92 self.check(result, '\n'.join(expect))
94 def test_fix_sentence_endings(self):
95 wrapper = TextWrapper(60, fix_sentence_endings=True)
97 # SF #847346: ensure that fix_sentence_endings=True does the
98 # right thing even on input short enough that it doesn't need to
99 # be wrapped.
100 text = "A short line. Note the single space."
101 expect = ["A short line. Note the single space."]
102 self.check(wrapper.wrap(text), expect)
104 # Test some of the hairy end cases that _fix_sentence_endings()
105 # is supposed to handle (the easy stuff is tested in
106 # test_whitespace() above).
107 text = "Well, Doctor? What do you think?"
108 expect = ["Well, Doctor? What do you think?"]
109 self.check(wrapper.wrap(text), expect)
111 text = "Well, Doctor?\nWhat do you think?"
112 self.check(wrapper.wrap(text), expect)
114 text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
115 expect = ['I say, chaps! Anyone for "tennis?" Hmmph!']
116 self.check(wrapper.wrap(text), expect)
118 wrapper.width = 20
119 expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
120 self.check(wrapper.wrap(text), expect)
122 text = 'And she said, "Go to hell!"\nCan you believe that?'
123 expect = ['And she said, "Go to',
124 'hell!" Can you',
125 'believe that?']
126 self.check(wrapper.wrap(text), expect)
128 wrapper.width = 60
129 expect = ['And she said, "Go to hell!" Can you believe that?']
130 self.check(wrapper.wrap(text), expect)
132 text = 'File stdio.h is nice.'
133 expect = ['File stdio.h is nice.']
134 self.check(wrapper.wrap(text), expect)
136 def test_wrap_short(self):
137 # Wrapping to make short lines longer
139 text = "This is a\nshort paragraph."
141 self.check_wrap(text, 20, ["This is a short",
142 "paragraph."])
143 self.check_wrap(text, 40, ["This is a short paragraph."])
146 def test_wrap_short_1line(self):
147 # Test endcases
149 text = "This is a short line."
151 self.check_wrap(text, 30, ["This is a short line."])
152 self.check_wrap(text, 30, ["(1) This is a short line."],
153 initial_indent="(1) ")
156 def test_hyphenated(self):
157 # Test breaking hyphenated words
159 text = ("this-is-a-useful-feature-for-"
160 "reformatting-posts-from-tim-peters'ly")
162 self.check_wrap(text, 40,
163 ["this-is-a-useful-feature-for-",
164 "reformatting-posts-from-tim-peters'ly"])
165 self.check_wrap(text, 41,
166 ["this-is-a-useful-feature-for-",
167 "reformatting-posts-from-tim-peters'ly"])
168 self.check_wrap(text, 42,
169 ["this-is-a-useful-feature-for-reformatting-",
170 "posts-from-tim-peters'ly"])
172 def test_hyphenated_numbers(self):
173 # Test that hyphenated numbers (eg. dates) are not broken like words.
174 text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n"
175 "released on 1994-02-15.")
177 self.check_wrap(text, 35, ['Python 1.0.0 was released on',
178 '1994-01-26. Python 1.0.1 was',
179 'released on 1994-02-15.'])
180 self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
181 'Python 1.0.1 was released on 1994-02-15.'])
183 text = "I do all my shopping at 7-11."
184 self.check_wrap(text, 25, ["I do all my shopping at",
185 "7-11."])
186 self.check_wrap(text, 27, ["I do all my shopping at",
187 "7-11."])
188 self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
190 def test_em_dash(self):
191 # Test text with em-dashes
192 text = "Em-dashes should be written -- thus."
193 self.check_wrap(text, 25,
194 ["Em-dashes should be",
195 "written -- thus."])
197 # Probe the boundaries of the properly written em-dash,
198 # ie. " -- ".
199 self.check_wrap(text, 29,
200 ["Em-dashes should be written",
201 "-- thus."])
202 expect = ["Em-dashes should be written --",
203 "thus."]
204 self.check_wrap(text, 30, expect)
205 self.check_wrap(text, 35, expect)
206 self.check_wrap(text, 36,
207 ["Em-dashes should be written -- thus."])
209 # The improperly written em-dash is handled too, because
210 # it's adjacent to non-whitespace on both sides.
211 text = "You can also do--this or even---this."
212 expect = ["You can also do",
213 "--this or even",
214 "---this."]
215 self.check_wrap(text, 15, expect)
216 self.check_wrap(text, 16, expect)
217 expect = ["You can also do--",
218 "this or even---",
219 "this."]
220 self.check_wrap(text, 17, expect)
221 self.check_wrap(text, 19, expect)
222 expect = ["You can also do--this or even",
223 "---this."]
224 self.check_wrap(text, 29, expect)
225 self.check_wrap(text, 31, expect)
226 expect = ["You can also do--this or even---",
227 "this."]
228 self.check_wrap(text, 32, expect)
229 self.check_wrap(text, 35, expect)
231 # All of the above behaviour could be deduced by probing the
232 # _split() method.
233 text = "Here's an -- em-dash and--here's another---and another!"
234 expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
235 "and", "--", "here's", " ", "another", "---",
236 "and", " ", "another!"]
237 self.check_split(text, expect)
239 text = "and then--bam!--he was gone"
240 expect = ["and", " ", "then", "--", "bam!", "--",
241 "he", " ", "was", " ", "gone"]
242 self.check_split(text, expect)
245 def test_unix_options (self):
246 # Test that Unix-style command-line options are wrapped correctly.
247 # Both Optik (OptionParser) and Docutils rely on this behaviour!
249 text = "You should use the -n option, or --dry-run in its long form."
250 self.check_wrap(text, 20,
251 ["You should use the",
252 "-n option, or --dry-",
253 "run in its long",
254 "form."])
255 self.check_wrap(text, 21,
256 ["You should use the -n",
257 "option, or --dry-run",
258 "in its long form."])
259 expect = ["You should use the -n option, or",
260 "--dry-run in its long form."]
261 self.check_wrap(text, 32, expect)
262 self.check_wrap(text, 34, expect)
263 self.check_wrap(text, 35, expect)
264 self.check_wrap(text, 38, expect)
265 expect = ["You should use the -n option, or --dry-",
266 "run in its long form."]
267 self.check_wrap(text, 39, expect)
268 self.check_wrap(text, 41, expect)
269 expect = ["You should use the -n option, or --dry-run",
270 "in its long form."]
271 self.check_wrap(text, 42, expect)
273 # Again, all of the above can be deduced from _split().
274 text = "the -n option, or --dry-run or --dryrun"
275 expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
276 "--dry-", "run", " ", "or", " ", "--dryrun"]
277 self.check_split(text, expect)
279 def test_funky_hyphens (self):
280 # Screwy edge cases cooked up by David Goodger. All reported
281 # in SF bug #596434.
282 self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
283 self.check_split("what the--", ["what", " ", "the--"])
284 self.check_split("what the--.", ["what", " ", "the--."])
285 self.check_split("--text--.", ["--text--."])
287 # When I first read bug #596434, this is what I thought David
288 # was talking about. I was wrong; these have always worked
289 # fine. The real problem is tested in test_funky_parens()
290 # below...
291 self.check_split("--option", ["--option"])
292 self.check_split("--option-opt", ["--option-", "opt"])
293 self.check_split("foo --option-opt bar",
294 ["foo", " ", "--option-", "opt", " ", "bar"])
296 def test_punct_hyphens(self):
297 # Oh bother, SF #965425 found another problem with hyphens --
298 # hyphenated words in single quotes weren't handled correctly.
299 # In fact, the bug is that *any* punctuation around a hyphenated
300 # word was handled incorrectly, except for a leading "--", which
301 # was special-cased for Optik and Docutils. So test a variety
302 # of styles of punctuation around a hyphenated word.
303 # (Actually this is based on an Optik bug report, #813077).
304 self.check_split("the 'wibble-wobble' widget",
305 ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
306 self.check_split('the "wibble-wobble" widget',
307 ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
308 self.check_split("the (wibble-wobble) widget",
309 ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
310 self.check_split("the ['wibble-wobble'] widget",
311 ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
313 def test_funky_parens (self):
314 # Second part of SF bug #596434: long option strings inside
315 # parentheses.
316 self.check_split("foo (--option) bar",
317 ["foo", " ", "(--option)", " ", "bar"])
319 # Related stuff -- make sure parens work in simpler contexts.
320 self.check_split("foo (bar) baz",
321 ["foo", " ", "(bar)", " ", "baz"])
322 self.check_split("blah (ding dong), wubba",
323 ["blah", " ", "(ding", " ", "dong),",
324 " ", "wubba"])
326 def test_initial_whitespace(self):
327 # SF bug #622849 reported inconsistent handling of leading
328 # whitespace; let's test that a bit, shall we?
329 text = " This is a sentence with leading whitespace."
330 self.check_wrap(text, 50,
331 [" This is a sentence with leading whitespace."])
332 self.check_wrap(text, 30,
333 [" This is a sentence with", "leading whitespace."])
335 def test_no_drop_whitespace(self):
336 # SF patch #1581073
337 text = " This is a sentence with much whitespace."
338 self.check_wrap(text, 10,
339 [" This is a", " ", "sentence ",
340 "with ", "much white", "space."],
341 drop_whitespace=False)
343 if test_support.have_unicode:
344 def test_unicode(self):
345 # *Very* simple test of wrapping Unicode strings. I'm sure
346 # there's more to it than this, but let's at least make
347 # sure textwrap doesn't crash on Unicode input!
348 text = u"Hello there, how are you today?"
349 self.check_wrap(text, 50, [u"Hello there, how are you today?"])
350 self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
351 olines = self.wrapper.wrap(text)
352 assert isinstance(olines, list) and isinstance(olines[0], unicode)
353 otext = self.wrapper.fill(text)
354 assert isinstance(otext, unicode)
356 def test_no_split_at_umlaut(self):
357 text = u"Die Empf\xe4nger-Auswahl"
358 self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
360 def test_umlaut_followed_by_dash(self):
361 text = u"aa \xe4\xe4-\xe4\xe4"
362 self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
364 def test_split(self):
365 # Ensure that the standard _split() method works as advertised
366 # in the comments
368 text = "Hello there -- you goof-ball, use the -b option!"
370 result = self.wrapper._split(text)
371 self.check(result,
372 ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
373 "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
375 def test_break_on_hyphens(self):
376 # Ensure that the break_on_hyphens attributes work
377 text = "yaba daba-doo"
378 self.check_wrap(text, 10, ["yaba daba-", "doo"],
379 break_on_hyphens=True)
380 self.check_wrap(text, 10, ["yaba", "daba-doo"],
381 break_on_hyphens=False)
383 def test_bad_width(self):
384 # Ensure that width <= 0 is caught.
385 text = "Whatever, it doesn't matter."
386 self.assertRaises(ValueError, wrap, text, 0)
387 self.assertRaises(ValueError, wrap, text, -1)
390 class LongWordTestCase (BaseTestCase):
391 def setUp(self):
392 self.wrapper = TextWrapper()
393 self.text = '''\
394 Did you say "supercalifragilisticexpialidocious?"
395 How *do* you spell that odd word, anyways?
398 def test_break_long(self):
399 # Wrap text with long words and lots of punctuation
401 self.check_wrap(self.text, 30,
402 ['Did you say "supercalifragilis',
403 'ticexpialidocious?" How *do*',
404 'you spell that odd word,',
405 'anyways?'])
406 self.check_wrap(self.text, 50,
407 ['Did you say "supercalifragilisticexpialidocious?"',
408 'How *do* you spell that odd word, anyways?'])
410 # SF bug 797650. Prevent an infinite loop by making sure that at
411 # least one character gets split off on every pass.
412 self.check_wrap('-'*10+'hello', 10,
413 ['----------',
414 ' h',
415 ' e',
416 ' l',
417 ' l',
418 ' o'],
419 subsequent_indent = ' '*15)
421 # bug 1146. Prevent a long word to be wrongly wrapped when the
422 # preceding word is exactly one character shorter than the width
423 self.check_wrap(self.text, 12,
424 ['Did you say ',
425 '"supercalifr',
426 'agilisticexp',
427 'ialidocious?',
428 '" How *do*',
429 'you spell',
430 'that odd',
431 'word,',
432 'anyways?'])
434 def test_nobreak_long(self):
435 # Test with break_long_words disabled
436 self.wrapper.break_long_words = 0
437 self.wrapper.width = 30
438 expect = ['Did you say',
439 '"supercalifragilisticexpialidocious?"',
440 'How *do* you spell that odd',
441 'word, anyways?'
443 result = self.wrapper.wrap(self.text)
444 self.check(result, expect)
446 # Same thing with kwargs passed to standalone wrap() function.
447 result = wrap(self.text, width=30, break_long_words=0)
448 self.check(result, expect)
451 class IndentTestCases(BaseTestCase):
453 # called before each test method
454 def setUp(self):
455 self.text = '''\
456 This paragraph will be filled, first without any indentation,
457 and then with some (including a hanging indent).'''
460 def test_fill(self):
461 # Test the fill() method
463 expect = '''\
464 This paragraph will be filled, first
465 without any indentation, and then with
466 some (including a hanging indent).'''
468 result = fill(self.text, 40)
469 self.check(result, expect)
472 def test_initial_indent(self):
473 # Test initial_indent parameter
475 expect = [" This paragraph will be filled,",
476 "first without any indentation, and then",
477 "with some (including a hanging indent)."]
478 result = wrap(self.text, 40, initial_indent=" ")
479 self.check(result, expect)
481 expect = "\n".join(expect)
482 result = fill(self.text, 40, initial_indent=" ")
483 self.check(result, expect)
486 def test_subsequent_indent(self):
487 # Test subsequent_indent parameter
489 expect = '''\
490 * This paragraph will be filled, first
491 without any indentation, and then
492 with some (including a hanging
493 indent).'''
495 result = fill(self.text, 40,
496 initial_indent=" * ", subsequent_indent=" ")
497 self.check(result, expect)
500 # Despite the similar names, DedentTestCase is *not* the inverse
501 # of IndentTestCase!
502 class DedentTestCase(unittest.TestCase):
504 def assertUnchanged(self, text):
505 """assert that dedent() has no effect on 'text'"""
506 self.assertEquals(text, dedent(text))
508 def test_dedent_nomargin(self):
509 # No lines indented.
510 text = "Hello there.\nHow are you?\nOh good, I'm glad."
511 self.assertUnchanged(text)
513 # Similar, with a blank line.
514 text = "Hello there.\n\nBoo!"
515 self.assertUnchanged(text)
517 # Some lines indented, but overall margin is still zero.
518 text = "Hello there.\n This is indented."
519 self.assertUnchanged(text)
521 # Again, add a blank line.
522 text = "Hello there.\n\n Boo!\n"
523 self.assertUnchanged(text)
525 def test_dedent_even(self):
526 # All lines indented by two spaces.
527 text = " Hello there.\n How are ya?\n Oh good."
528 expect = "Hello there.\nHow are ya?\nOh good."
529 self.assertEquals(expect, dedent(text))
531 # Same, with blank lines.
532 text = " Hello there.\n\n How are ya?\n Oh good.\n"
533 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
534 self.assertEquals(expect, dedent(text))
536 # Now indent one of the blank lines.
537 text = " Hello there.\n \n How are ya?\n Oh good.\n"
538 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
539 self.assertEquals(expect, dedent(text))
541 def test_dedent_uneven(self):
542 # Lines indented unevenly.
543 text = '''\
544 def foo():
545 while 1:
546 return foo
548 expect = '''\
549 def foo():
550 while 1:
551 return foo
553 self.assertEquals(expect, dedent(text))
555 # Uneven indentation with a blank line.
556 text = " Foo\n Bar\n\n Baz\n"
557 expect = "Foo\n Bar\n\n Baz\n"
558 self.assertEquals(expect, dedent(text))
560 # Uneven indentation with a whitespace-only line.
561 text = " Foo\n Bar\n \n Baz\n"
562 expect = "Foo\n Bar\n\n Baz\n"
563 self.assertEquals(expect, dedent(text))
565 # dedent() should not mangle internal tabs
566 def test_dedent_preserve_internal_tabs(self):
567 text = " hello\tthere\n how are\tyou?"
568 expect = "hello\tthere\nhow are\tyou?"
569 self.assertEquals(expect, dedent(text))
571 # make sure that it preserves tabs when it's not making any
572 # changes at all
573 self.assertEquals(expect, dedent(expect))
575 # dedent() should not mangle tabs in the margin (i.e.
576 # tabs and spaces both count as margin, but are *not*
577 # considered equivalent)
578 def test_dedent_preserve_margin_tabs(self):
579 text = " hello there\n\thow are you?"
580 self.assertUnchanged(text)
582 # same effect even if we have 8 spaces
583 text = " hello there\n\thow are you?"
584 self.assertUnchanged(text)
586 # dedent() only removes whitespace that can be uniformly removed!
587 text = "\thello there\n\thow are you?"
588 expect = "hello there\nhow are you?"
589 self.assertEquals(expect, dedent(text))
591 text = " \thello there\n \thow are you?"
592 self.assertEquals(expect, dedent(text))
594 text = " \t hello there\n \t how are you?"
595 self.assertEquals(expect, dedent(text))
597 text = " \thello there\n \t how are you?"
598 expect = "hello there\n how are you?"
599 self.assertEquals(expect, dedent(text))
602 def test_main():
603 test_support.run_unittest(WrapTestCase,
604 LongWordTestCase,
605 IndentTestCases,
606 DedentTestCase)
608 if __name__ == '__main__':
609 test_main()