Lib/test/test_textwrap.py

   1 #
   2 # Test suite for the textwrap module.
   3 #
   4 # Original tests written by Greg Ward <gward@python.net>.
   5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
   6 # Currently maintained by Greg Ward.
   7 #
   8 # $Id$
   9 #
  10
  11 import unittest
  12 from test import test_support
  13
  14 from textwrap import TextWrapper, wrap, fill, dedent
  15
  16
  17 class BaseTestCase(unittest.TestCase):
  18     '''Parent class with utility methods for textwrap tests.'''
  19
  20     def show(self, textin):
  21         if isinstance(textin, list):
  22             result = []
  23             for i in range(len(textin)):
  24                 result.append("  %d: %r" % (i, textin[i]))
  25             result = '\n'.join(result)
  26         elif isinstance(textin, basestring):
  27             result = "  %s\n" % repr(textin)
  28         return result
  29
  30
  31     def check(self, result, expect):
  32         self.assertEquals(result, expect,
  33             'expected:\n%s\nbut got:\n%s' % (
  34                 self.show(expect), self.show(result)))
  35
  36     def check_wrap(self, text, width, expect, **kwargs):
  37         result = wrap(text, width, **kwargs)
  38         self.check(result, expect)
  39
  40     def check_split(self, text, expect):
  41         result = self.wrapper._split(text)
  42         self.assertEquals(result, expect,
  43                           "\nexpected %r\n"
  44                           "but got  %r" % (expect, result))
  45
  46
  47 class WrapTestCase(BaseTestCase):
  48
  49     def setUp(self):
  50         self.wrapper = TextWrapper(width=45)
  51
  52     def test_simple(self):
  53         # Simple case: just words, spaces, and a bit of punctuation
  54
  55         text = "Hello there, how are you this fine day?  I'm glad to hear it!"
  56
  57         self.check_wrap(text, 12,
  58                         ["Hello there,",
  59                          "how are you",
  60                          "this fine",
  61                          "day?  I'm",
  62                          "glad to hear",
  63                          "it!"])
  64         self.check_wrap(text, 42,
  65                         ["Hello there, how are you this fine day?",
  66                          "I'm glad to hear it!"])
  67         self.check_wrap(text, 80, [text])
  68
  69
  70     def test_whitespace(self):
  71         # Whitespace munging and end-of-sentence detection
  72
  73         text = """\
  74 This is a paragraph that already has
  75 line breaks.  But some of its lines are much longer than the others,
  76 so it needs to be wrapped.
  77 Some lines are \ttabbed too.
  78 What a mess!
  79 """
  80
  81         expect = ["This is a paragraph that already has line",
  82                   "breaks.  But some of its lines are much",
  83                   "longer than the others, so it needs to be",
  84                   "wrapped.  Some lines are  tabbed too.  What a",
  85                   "mess!"]
  86
  87         wrapper = TextWrapper(45, fix_sentence_endings=True)
  88         result = wrapper.wrap(text)
  89         self.check(result, expect)
  90
  91         result = wrapper.fill(text)
  92         self.check(result, '\n'.join(expect))
  93
  94     def test_fix_sentence_endings(self):
  95         wrapper = TextWrapper(60, fix_sentence_endings=True)
  96
  97         # SF #847346: ensure that fix_sentence_endings=True does the
  98         # right thing even on input short enough that it doesn't need to
  99         # be wrapped.
 100         text = "A short line. Note the single space."
 101         expect = ["A short line.  Note the single space."]
 102         self.check(wrapper.wrap(text), expect)
 103
 104         # Test some of the hairy end cases that _fix_sentence_endings()
 105         # is supposed to handle (the easy stuff is tested in
 106         # test_whitespace() above).
 107         text = "Well, Doctor? What do you think?"
 108         expect = ["Well, Doctor?  What do you think?"]
 109         self.check(wrapper.wrap(text), expect)
 110
 111         text = "Well, Doctor?\nWhat do you think?"
 112         self.check(wrapper.wrap(text), expect)
 113
 114         text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
 115         expect = ['I say, chaps!  Anyone for "tennis?"  Hmmph!']
 116         self.check(wrapper.wrap(text), expect)
 117
 118         wrapper.width = 20
 119         expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
 120         self.check(wrapper.wrap(text), expect)
 121
 122         text = 'And she said, "Go to hell!"\nCan you believe that?'
 123         expect = ['And she said, "Go to',
 124                   'hell!"  Can you',
 125                   'believe that?']
 126         self.check(wrapper.wrap(text), expect)
 127
 128         wrapper.width = 60
 129         expect = ['And she said, "Go to hell!"  Can you believe that?']
 130         self.check(wrapper.wrap(text), expect)
 131
 132         text = 'File stdio.h is nice.'
 133         expect = ['File stdio.h is nice.']
 134         self.check(wrapper.wrap(text), expect)
 135
 136     def test_wrap_short(self):
 137         # Wrapping to make short lines longer
 138
 139         text = "This is a\nshort paragraph."
 140
 141         self.check_wrap(text, 20, ["This is a short",
 142                                    "paragraph."])
 143         self.check_wrap(text, 40, ["This is a short paragraph."])
 144
 145
 146     def test_wrap_short_1line(self):
 147         # Test endcases
 148
 149         text = "This is a short line."
 150
 151         self.check_wrap(text, 30, ["This is a short line."])
 152         self.check_wrap(text, 30, ["(1) This is a short line."],
 153                         initial_indent="(1) ")
 154
 155
 156     def test_hyphenated(self):
 157         # Test breaking hyphenated words
 158
 159         text = ("this-is-a-useful-feature-for-"
 160                 "reformatting-posts-from-tim-peters'ly")
 161
 162         self.check_wrap(text, 40,
 163                         ["this-is-a-useful-feature-for-",
 164                          "reformatting-posts-from-tim-peters'ly"])
 165         self.check_wrap(text, 41,
 166                         ["this-is-a-useful-feature-for-",
 167                          "reformatting-posts-from-tim-peters'ly"])
 168         self.check_wrap(text, 42,
 169                         ["this-is-a-useful-feature-for-reformatting-",
 170                          "posts-from-tim-peters'ly"])
 171
 172     def test_hyphenated_numbers(self):
 173         # Test that hyphenated numbers (eg. dates) are not broken like words.
 174         text = ("Python 1.0.0 was released on 1994-01-26.  Python 1.0.1 was\n"
 175                 "released on 1994-02-15.")
 176
 177         self.check_wrap(text, 35, ['Python 1.0.0 was released on',
 178                                    '1994-01-26.  Python 1.0.1 was',
 179                                    'released on 1994-02-15.'])
 180         self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
 181                                    'Python 1.0.1 was released on 1994-02-15.'])
 182
 183         text = "I do all my shopping at 7-11."
 184         self.check_wrap(text, 25, ["I do all my shopping at",
 185                                    "7-11."])
 186         self.check_wrap(text, 27, ["I do all my shopping at",
 187                                    "7-11."])
 188         self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
 189
 190     def test_em_dash(self):
 191         # Test text with em-dashes
 192         text = "Em-dashes should be written -- thus."
 193         self.check_wrap(text, 25,
 194                         ["Em-dashes should be",
 195                          "written -- thus."])
 196
 197         # Probe the boundaries of the properly written em-dash,
 198         # ie. " -- ".
 199         self.check_wrap(text, 29,
 200                         ["Em-dashes should be written",
 201                          "-- thus."])
 202         expect = ["Em-dashes should be written --",
 203                   "thus."]
 204         self.check_wrap(text, 30, expect)
 205         self.check_wrap(text, 35, expect)
 206         self.check_wrap(text, 36,
 207                         ["Em-dashes should be written -- thus."])
 208
 209         # The improperly written em-dash is handled too, because
 210         # it's adjacent to non-whitespace on both sides.
 211         text = "You can also do--this or even---this."
 212         expect = ["You can also do",
 213                   "--this or even",
 214                   "---this."]
 215         self.check_wrap(text, 15, expect)
 216         self.check_wrap(text, 16, expect)
 217         expect = ["You can also do--",
 218                   "this or even---",
 219                   "this."]
 220         self.check_wrap(text, 17, expect)
 221         self.check_wrap(text, 19, expect)
 222         expect = ["You can also do--this or even",
 223                   "---this."]
 224         self.check_wrap(text, 29, expect)
 225         self.check_wrap(text, 31, expect)
 226         expect = ["You can also do--this or even---",
 227                   "this."]
 228         self.check_wrap(text, 32, expect)
 229         self.check_wrap(text, 35, expect)
 230
 231         # All of the above behaviour could be deduced by probing the
 232         # _split() method.
 233         text = "Here's an -- em-dash and--here's another---and another!"
 234         expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
 235                   "and", "--", "here's", " ", "another", "---",
 236                   "and", " ", "another!"]
 237         self.check_split(text, expect)
 238
 239         text = "and then--bam!--he was gone"
 240         expect = ["and", " ", "then", "--", "bam!", "--",
 241                   "he", " ", "was", " ", "gone"]
 242         self.check_split(text, expect)
 243
 244
 245     def test_unix_options (self):
 246         # Test that Unix-style command-line options are wrapped correctly.
 247         # Both Optik (OptionParser) and Docutils rely on this behaviour!
 248
 249         text = "You should use the -n option, or --dry-run in its long form."
 250         self.check_wrap(text, 20,
 251                         ["You should use the",
 252                          "-n option, or --dry-",
 253                          "run in its long",
 254                          "form."])
 255         self.check_wrap(text, 21,
 256                         ["You should use the -n",
 257                          "option, or --dry-run",
 258                          "in its long form."])
 259         expect = ["You should use the -n option, or",
 260                   "--dry-run in its long form."]
 261         self.check_wrap(text, 32, expect)
 262         self.check_wrap(text, 34, expect)
 263         self.check_wrap(text, 35, expect)
 264         self.check_wrap(text, 38, expect)
 265         expect = ["You should use the -n option, or --dry-",
 266                   "run in its long form."]
 267         self.check_wrap(text, 39, expect)
 268         self.check_wrap(text, 41, expect)
 269         expect = ["You should use the -n option, or --dry-run",
 270                   "in its long form."]
 271         self.check_wrap(text, 42, expect)
 272
 273         # Again, all of the above can be deduced from _split().
 274         text = "the -n option, or --dry-run or --dryrun"
 275         expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
 276                   "--dry-", "run", " ", "or", " ", "--dryrun"]
 277         self.check_split(text, expect)
 278
 279     def test_funky_hyphens (self):
 280         # Screwy edge cases cooked up by David Goodger.  All reported
 281         # in SF bug #596434.
 282         self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
 283         self.check_split("what the--", ["what", " ", "the--"])
 284         self.check_split("what the--.", ["what", " ", "the--."])
 285         self.check_split("--text--.", ["--text--."])
 286
 287         # When I first read bug #596434, this is what I thought David
 288         # was talking about.  I was wrong; these have always worked
 289         # fine.  The real problem is tested in test_funky_parens()
 290         # below...
 291         self.check_split("--option", ["--option"])
 292         self.check_split("--option-opt", ["--option-", "opt"])
 293         self.check_split("foo --option-opt bar",
 294                          ["foo", " ", "--option-", "opt", " ", "bar"])
 295
 296     def test_punct_hyphens(self):
 297         # Oh bother, SF #965425 found another problem with hyphens --
 298         # hyphenated words in single quotes weren't handled correctly.
 299         # In fact, the bug is that *any* punctuation around a hyphenated
 300         # word was handled incorrectly, except for a leading "--", which
 301         # was special-cased for Optik and Docutils.  So test a variety
 302         # of styles of punctuation around a hyphenated word.
 303         # (Actually this is based on an Optik bug report, #813077).
 304         self.check_split("the 'wibble-wobble' widget",
 305                          ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
 306         self.check_split('the "wibble-wobble" widget',
 307                          ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
 308         self.check_split("the (wibble-wobble) widget",
 309                          ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
 310         self.check_split("the ['wibble-wobble'] widget",
 311                          ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
 312
 313     def test_funky_parens (self):
 314         # Second part of SF bug #596434: long option strings inside
 315         # parentheses.
 316         self.check_split("foo (--option) bar",
 317                          ["foo", " ", "(--option)", " ", "bar"])
 318
 319         # Related stuff -- make sure parens work in simpler contexts.
 320         self.check_split("foo (bar) baz",
 321                          ["foo", " ", "(bar)", " ", "baz"])
 322         self.check_split("blah (ding dong), wubba",
 323                          ["blah", " ", "(ding", " ", "dong),",
 324                           " ", "wubba"])
 325
 326     def test_initial_whitespace(self):
 327         # SF bug #622849 reported inconsistent handling of leading
 328         # whitespace; let's test that a bit, shall we?
 329         text = " This is a sentence with leading whitespace."
 330         self.check_wrap(text, 50,
 331                         [" This is a sentence with leading whitespace."])
 332         self.check_wrap(text, 30,
 333                         [" This is a sentence with", "leading whitespace."])
 334
 335     def test_no_drop_whitespace(self):
 336         # SF patch #1581073
 337         text = " This is a    sentence with     much whitespace."
 338         self.check_wrap(text, 10,
 339                         [" This is a", "    ", "sentence ",
 340                          "with     ", "much white", "space."],
 341                         drop_whitespace=False)
 342
 343     if test_support.have_unicode:
 344         def test_unicode(self):
 345             # *Very* simple test of wrapping Unicode strings.  I'm sure
 346             # there's more to it than this, but let's at least make
 347             # sure textwrap doesn't crash on Unicode input!
 348             text = u"Hello there, how are you today?"
 349             self.check_wrap(text, 50, [u"Hello there, how are you today?"])
 350             self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
 351             olines = self.wrapper.wrap(text)
 352             assert isinstance(olines, list) and isinstance(olines[0], unicode)
 353             otext = self.wrapper.fill(text)
 354             assert isinstance(otext, unicode)
 355
 356         def test_no_split_at_umlaut(self):
 357             text = u"Die Empf\xe4nger-Auswahl"
 358             self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
 359
 360         def test_umlaut_followed_by_dash(self):
 361             text = u"aa \xe4\xe4-\xe4\xe4"
 362             self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
 363
 364     def test_split(self):
 365         # Ensure that the standard _split() method works as advertised
 366         # in the comments
 367
 368         text = "Hello there -- you goof-ball, use the -b option!"
 369
 370         result = self.wrapper._split(text)
 371         self.check(result,
 372              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
 373               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
 374
 375     def test_break_on_hyphens(self):
 376         # Ensure that the break_on_hyphens attributes work
 377         text = "yaba daba-doo"
 378         self.check_wrap(text, 10, ["yaba daba-", "doo"],
 379                         break_on_hyphens=True)
 380         self.check_wrap(text, 10, ["yaba", "daba-doo"],
 381                         break_on_hyphens=False)
 382
 383     def test_bad_width(self):
 384         # Ensure that width <= 0 is caught.
 385         text = "Whatever, it doesn't matter."
 386         self.assertRaises(ValueError, wrap, text, 0)
 387         self.assertRaises(ValueError, wrap, text, -1)
 388
 389
 390 class LongWordTestCase (BaseTestCase):
 391     def setUp(self):
 392         self.wrapper = TextWrapper()
 393         self.text = '''\
 394 Did you say "supercalifragilisticexpialidocious?"
 395 How *do* you spell that odd word, anyways?
 396 '''
 397
 398     def test_break_long(self):
 399         # Wrap text with long words and lots of punctuation
 400
 401         self.check_wrap(self.text, 30,
 402                         ['Did you say "supercalifragilis',
 403                          'ticexpialidocious?" How *do*',
 404                          'you spell that odd word,',
 405                          'anyways?'])
 406         self.check_wrap(self.text, 50,
 407                         ['Did you say "supercalifragilisticexpialidocious?"',
 408                          'How *do* you spell that odd word, anyways?'])
 409
 410         # SF bug 797650.  Prevent an infinite loop by making sure that at
 411         # least one character gets split off on every pass.
 412         self.check_wrap('-'*10+'hello', 10,
 413                         ['----------',
 414                          '               h',
 415                          '               e',
 416                          '               l',
 417                          '               l',
 418                          '               o'],
 419                         subsequent_indent = ' '*15)
 420
 421         # bug 1146.  Prevent a long word to be wrongly wrapped when the
 422         # preceding word is exactly one character shorter than the width
 423         self.check_wrap(self.text, 12,
 424                         ['Did you say ',
 425                          '"supercalifr',
 426                          'agilisticexp',
 427                          'ialidocious?',
 428                          '" How *do*',
 429                          'you spell',
 430                          'that odd',
 431                          'word,',
 432                          'anyways?'])
 433
 434     def test_nobreak_long(self):
 435         # Test with break_long_words disabled
 436         self.wrapper.break_long_words = 0
 437         self.wrapper.width = 30
 438         expect = ['Did you say',
 439                   '"supercalifragilisticexpialidocious?"',
 440                   'How *do* you spell that odd',
 441                   'word, anyways?'
 442                   ]
 443         result = self.wrapper.wrap(self.text)
 444         self.check(result, expect)
 445
 446         # Same thing with kwargs passed to standalone wrap() function.
 447         result = wrap(self.text, width=30, break_long_words=0)
 448         self.check(result, expect)
 449
 450
 451 class IndentTestCases(BaseTestCase):
 452
 453     # called before each test method
 454     def setUp(self):
 455         self.text = '''\
 456 This paragraph will be filled, first without any indentation,
 457 and then with some (including a hanging indent).'''
 458
 459
 460     def test_fill(self):
 461         # Test the fill() method
 462
 463         expect = '''\
 464 This paragraph will be filled, first
 465 without any indentation, and then with
 466 some (including a hanging indent).'''
 467
 468         result = fill(self.text, 40)
 469         self.check(result, expect)
 470
 471
 472     def test_initial_indent(self):
 473         # Test initial_indent parameter
 474
 475         expect = ["     This paragraph will be filled,",
 476                   "first without any indentation, and then",
 477                   "with some (including a hanging indent)."]
 478         result = wrap(self.text, 40, initial_indent="     ")
 479         self.check(result, expect)
 480
 481         expect = "\n".join(expect)
 482         result = fill(self.text, 40, initial_indent="     ")
 483         self.check(result, expect)
 484
 485
 486     def test_subsequent_indent(self):
 487         # Test subsequent_indent parameter
 488
 489         expect = '''\
 490   * This paragraph will be filled, first
 491     without any indentation, and then
 492     with some (including a hanging
 493     indent).'''
 494
 495         result = fill(self.text, 40,
 496                       initial_indent="  * ", subsequent_indent="    ")
 497         self.check(result, expect)
 498
 499
 500 # Despite the similar names, DedentTestCase is *not* the inverse
 501 # of IndentTestCase!
 502 class DedentTestCase(unittest.TestCase):
 503
 504     def assertUnchanged(self, text):
 505         """assert that dedent() has no effect on 'text'"""
 506         self.assertEquals(text, dedent(text))
 507
 508     def test_dedent_nomargin(self):
 509         # No lines indented.
 510         text = "Hello there.\nHow are you?\nOh good, I'm glad."
 511         self.assertUnchanged(text)
 512
 513         # Similar, with a blank line.
 514         text = "Hello there.\n\nBoo!"
 515         self.assertUnchanged(text)
 516
 517         # Some lines indented, but overall margin is still zero.
 518         text = "Hello there.\n  This is indented."
 519         self.assertUnchanged(text)
 520
 521         # Again, add a blank line.
 522         text = "Hello there.\n\n  Boo!\n"
 523         self.assertUnchanged(text)
 524
 525     def test_dedent_even(self):
 526         # All lines indented by two spaces.
 527         text = "  Hello there.\n  How are ya?\n  Oh good."
 528         expect = "Hello there.\nHow are ya?\nOh good."
 529         self.assertEquals(expect, dedent(text))
 530
 531         # Same, with blank lines.
 532         text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
 533         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 534         self.assertEquals(expect, dedent(text))
 535
 536         # Now indent one of the blank lines.
 537         text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
 538         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 539         self.assertEquals(expect, dedent(text))
 540
 541     def test_dedent_uneven(self):
 542         # Lines indented unevenly.
 543         text = '''\
 544         def foo():
 545             while 1:
 546                 return foo
 547         '''
 548         expect = '''\
 549 def foo():
 550     while 1:
 551         return foo
 552 '''
 553         self.assertEquals(expect, dedent(text))
 554
 555         # Uneven indentation with a blank line.
 556         text = "  Foo\n    Bar\n\n   Baz\n"
 557         expect = "Foo\n  Bar\n\n Baz\n"
 558         self.assertEquals(expect, dedent(text))
 559
 560         # Uneven indentation with a whitespace-only line.
 561         text = "  Foo\n    Bar\n \n   Baz\n"
 562         expect = "Foo\n  Bar\n\n Baz\n"
 563         self.assertEquals(expect, dedent(text))
 564
 565     # dedent() should not mangle internal tabs
 566     def test_dedent_preserve_internal_tabs(self):
 567         text = "  hello\tthere\n  how are\tyou?"
 568         expect = "hello\tthere\nhow are\tyou?"
 569         self.assertEquals(expect, dedent(text))
 570
 571         # make sure that it preserves tabs when it's not making any
 572         # changes at all
 573         self.assertEquals(expect, dedent(expect))
 574
 575     # dedent() should not mangle tabs in the margin (i.e.
 576     # tabs and spaces both count as margin, but are *not*
 577     # considered equivalent)
 578     def test_dedent_preserve_margin_tabs(self):
 579         text = "  hello there\n\thow are you?"
 580         self.assertUnchanged(text)
 581
 582         # same effect even if we have 8 spaces
 583         text = "        hello there\n\thow are you?"
 584         self.assertUnchanged(text)
 585
 586         # dedent() only removes whitespace that can be uniformly removed!
 587         text = "\thello there\n\thow are you?"
 588         expect = "hello there\nhow are you?"
 589         self.assertEquals(expect, dedent(text))
 590
 591         text = "  \thello there\n  \thow are you?"
 592         self.assertEquals(expect, dedent(text))
 593
 594         text = "  \t  hello there\n  \t  how are you?"
 595         self.assertEquals(expect, dedent(text))
 596
 597         text = "  \thello there\n  \t  how are you?"
 598         expect = "hello there\n  how are you?"
 599         self.assertEquals(expect, dedent(text))
 600
 601
 602 def test_main():
 603     test_support.run_unittest(WrapTestCase,
 604                               LongWordTestCase,
 605                               IndentTestCases,
 606                               DedentTestCase)
 607
 608 if __name__ == '__main__':
 609     test_main()