Lib/test/test_textwrap.py

   1 #
   2 # Test suite for the textwrap module.
   3 #
   4 # Original tests written by Greg Ward <gward@python.net>.
   5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
   6 # Currently maintained by Greg Ward.
   7 #
   8 # $Id$
   9 #
  10
  11 import unittest
  12 from test import test_support
  13
  14 from textwrap import TextWrapper, wrap, fill, dedent
  15
  16
  17 class BaseTestCase(unittest.TestCase):
  18     '''Parent class with utility methods for textwrap tests.'''
  19
  20     def show(self, textin):
  21         if isinstance(textin, list):
  22             result = []
  23             for i in range(len(textin)):
  24                 result.append("  %d: %r" % (i, textin[i]))
  25             result = '\n'.join(result)
  26         elif isinstance(textin, basestring):
  27             result = "  %s\n" % repr(textin)
  28         return result
  29
  30
  31     def check(self, result, expect):
  32         self.assertEquals(result, expect,
  33             'expected:\n%s\nbut got:\n%s' % (
  34                 self.show(expect), self.show(result)))
  35
  36     def check_wrap(self, text, width, expect, **kwargs):
  37         result = wrap(text, width, **kwargs)
  38         self.check(result, expect)
  39
  40     def check_split(self, text, expect):
  41         result = self.wrapper._split(text)
  42         self.assertEquals(result, expect,
  43                           "\nexpected %r\n"
  44                           "but got  %r" % (expect, result))
  45
  46
  47 class WrapTestCase(BaseTestCase):
  48
  49     def setUp(self):
  50         self.wrapper = TextWrapper(width=45)
  51
  52     def test_simple(self):
  53         # Simple case: just words, spaces, and a bit of punctuation
  54
  55         text = "Hello there, how are you this fine day?  I'm glad to hear it!"
  56
  57         self.check_wrap(text, 12,
  58                         ["Hello there,",
  59                          "how are you",
  60                          "this fine",
  61                          "day?  I'm",
  62                          "glad to hear",
  63                          "it!"])
  64         self.check_wrap(text, 42,
  65                         ["Hello there, how are you this fine day?",
  66                          "I'm glad to hear it!"])
  67         self.check_wrap(text, 80, [text])
  68
  69
  70     def test_whitespace(self):
  71         # Whitespace munging and end-of-sentence detection
  72
  73         text = """\
  74 This is a paragraph that already has
  75 line breaks.  But some of its lines are much longer than the others,
  76 so it needs to be wrapped.
  77 Some lines are \ttabbed too.
  78 What a mess!
  79 """
  80
  81         expect = ["This is a paragraph that already has line",
  82                   "breaks.  But some of its lines are much",
  83                   "longer than the others, so it needs to be",
  84                   "wrapped.  Some lines are  tabbed too.  What a",
  85                   "mess!"]
  86
  87         wrapper = TextWrapper(45, fix_sentence_endings=True)
  88         result = wrapper.wrap(text)
  89         self.check(result, expect)
  90
  91         result = wrapper.fill(text)
  92         self.check(result, '\n'.join(expect))
  93
  94     def test_fix_sentence_endings(self):
  95         wrapper = TextWrapper(60, fix_sentence_endings=True)
  96
  97         # SF #847346: ensure that fix_sentence_endings=True does the
  98         # right thing even on input short enough that it doesn't need to
  99         # be wrapped.
 100         text = "A short line. Note the single space."
 101         expect = ["A short line.  Note the single space."]
 102         self.check(wrapper.wrap(text), expect)
 103
 104         # Test some of the hairy end cases that _fix_sentence_endings()
 105         # is supposed to handle (the easy stuff is tested in
 106         # test_whitespace() above).
 107         text = "Well, Doctor? What do you think?"
 108         expect = ["Well, Doctor?  What do you think?"]
 109         self.check(wrapper.wrap(text), expect)
 110
 111         text = "Well, Doctor?\nWhat do you think?"
 112         self.check(wrapper.wrap(text), expect)
 113
 114         text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
 115         expect = ['I say, chaps!  Anyone for "tennis?"  Hmmph!']
 116         self.check(wrapper.wrap(text), expect)
 117
 118         wrapper.width = 20
 119         expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
 120         self.check(wrapper.wrap(text), expect)
 121
 122         text = 'And she said, "Go to hell!"\nCan you believe that?'
 123         expect = ['And she said, "Go to',
 124                   'hell!"  Can you',
 125                   'believe that?']
 126         self.check(wrapper.wrap(text), expect)
 127
 128         wrapper.width = 60
 129         expect = ['And she said, "Go to hell!"  Can you believe that?']
 130         self.check(wrapper.wrap(text), expect)
 131
 132         text = 'File stdio.h is nice.'
 133         expect = ['File stdio.h is nice.']
 134         self.check(wrapper.wrap(text), expect)
 135
 136     def test_wrap_short(self):
 137         # Wrapping to make short lines longer
 138
 139         text = "This is a\nshort paragraph."
 140
 141         self.check_wrap(text, 20, ["This is a short",
 142                                    "paragraph."])
 143         self.check_wrap(text, 40, ["This is a short paragraph."])
 144
 145
 146     def test_wrap_short_1line(self):
 147         # Test endcases
 148
 149         text = "This is a short line."
 150
 151         self.check_wrap(text, 30, ["This is a short line."])
 152         self.check_wrap(text, 30, ["(1) This is a short line."],
 153                         initial_indent="(1) ")
 154
 155
 156     def test_hyphenated(self):
 157         # Test breaking hyphenated words
 158
 159         text = ("this-is-a-useful-feature-for-"
 160                 "reformatting-posts-from-tim-peters'ly")
 161
 162         self.check_wrap(text, 40,
 163                         ["this-is-a-useful-feature-for-",
 164                          "reformatting-posts-from-tim-peters'ly"])
 165         self.check_wrap(text, 41,
 166                         ["this-is-a-useful-feature-for-",
 167                          "reformatting-posts-from-tim-peters'ly"])
 168         self.check_wrap(text, 42,
 169                         ["this-is-a-useful-feature-for-reformatting-",
 170                          "posts-from-tim-peters'ly"])
 171
 172     def test_hyphenated_numbers(self):
 173         # Test that hyphenated numbers (eg. dates) are not broken like words.
 174         text = ("Python 1.0.0 was released on 1994-01-26.  Python 1.0.1 was\n"
 175                 "released on 1994-02-15.")
 176
 177         self.check_wrap(text, 30, ['Python 1.0.0 was released on',
 178                                    '1994-01-26.  Python 1.0.1 was',
 179                                    'released on 1994-02-15.'])
 180         self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
 181                                    'Python 1.0.1 was released on 1994-02-15.'])
 182
 183         text = "I do all my shopping at 7-11."
 184         self.check_wrap(text, 25, ["I do all my shopping at",
 185                                    "7-11."])
 186         self.check_wrap(text, 27, ["I do all my shopping at",
 187                                    "7-11."])
 188         self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
 189
 190     def test_em_dash(self):
 191         # Test text with em-dashes
 192         text = "Em-dashes should be written -- thus."
 193         self.check_wrap(text, 25,
 194                         ["Em-dashes should be",
 195                          "written -- thus."])
 196
 197         # Probe the boundaries of the properly written em-dash,
 198         # ie. " -- ".
 199         self.check_wrap(text, 29,
 200                         ["Em-dashes should be written",
 201                          "-- thus."])
 202         expect = ["Em-dashes should be written --",
 203                   "thus."]
 204         self.check_wrap(text, 30, expect)
 205         self.check_wrap(text, 35, expect)
 206         self.check_wrap(text, 36,
 207                         ["Em-dashes should be written -- thus."])
 208
 209         # The improperly written em-dash is handled too, because
 210         # it's adjacent to non-whitespace on both sides.
 211         text = "You can also do--this or even---this."
 212         expect = ["You can also do",
 213                   "--this or even",
 214                   "---this."]
 215         self.check_wrap(text, 15, expect)
 216         self.check_wrap(text, 16, expect)
 217         expect = ["You can also do--",
 218                   "this or even---",
 219                   "this."]
 220         self.check_wrap(text, 17, expect)
 221         self.check_wrap(text, 19, expect)
 222         expect = ["You can also do--this or even",
 223                   "---this."]
 224         self.check_wrap(text, 29, expect)
 225         self.check_wrap(text, 31, expect)
 226         expect = ["You can also do--this or even---",
 227                   "this."]
 228         self.check_wrap(text, 32, expect)
 229         self.check_wrap(text, 35, expect)
 230
 231         # All of the above behaviour could be deduced by probing the
 232         # _split() method.
 233         text = "Here's an -- em-dash and--here's another---and another!"
 234         expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
 235                   "and", "--", "here's", " ", "another", "---",
 236                   "and", " ", "another!"]
 237         self.check_split(text, expect)
 238
 239         text = "and then--bam!--he was gone"
 240         expect = ["and", " ", "then", "--", "bam!", "--",
 241                   "he", " ", "was", " ", "gone"]
 242         self.check_split(text, expect)
 243
 244
 245     def test_unix_options (self):
 246         # Test that Unix-style command-line options are wrapped correctly.
 247         # Both Optik (OptionParser) and Docutils rely on this behaviour!
 248
 249         text = "You should use the -n option, or --dry-run in its long form."
 250         self.check_wrap(text, 20,
 251                         ["You should use the",
 252                          "-n option, or --dry-",
 253                          "run in its long",
 254                          "form."])
 255         self.check_wrap(text, 21,
 256                         ["You should use the -n",
 257                          "option, or --dry-run",
 258                          "in its long form."])
 259         expect = ["You should use the -n option, or",
 260                   "--dry-run in its long form."]
 261         self.check_wrap(text, 32, expect)
 262         self.check_wrap(text, 34, expect)
 263         self.check_wrap(text, 35, expect)
 264         self.check_wrap(text, 38, expect)
 265         expect = ["You should use the -n option, or --dry-",
 266                   "run in its long form."]
 267         self.check_wrap(text, 39, expect)
 268         self.check_wrap(text, 41, expect)
 269         expect = ["You should use the -n option, or --dry-run",
 270                   "in its long form."]
 271         self.check_wrap(text, 42, expect)
 272
 273         # Again, all of the above can be deduced from _split().
 274         text = "the -n option, or --dry-run or --dryrun"
 275         expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
 276                   "--dry-", "run", " ", "or", " ", "--dryrun"]
 277         self.check_split(text, expect)
 278
 279     def test_funky_hyphens (self):
 280         # Screwy edge cases cooked up by David Goodger.  All reported
 281         # in SF bug #596434.
 282         self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
 283         self.check_split("what the--", ["what", " ", "the--"])
 284         self.check_split("what the--.", ["what", " ", "the--."])
 285         self.check_split("--text--.", ["--text--."])
 286
 287         # When I first read bug #596434, this is what I thought David
 288         # was talking about.  I was wrong; these have always worked
 289         # fine.  The real problem is tested in test_funky_parens()
 290         # below...
 291         self.check_split("--option", ["--option"])
 292         self.check_split("--option-opt", ["--option-", "opt"])
 293         self.check_split("foo --option-opt bar",
 294                          ["foo", " ", "--option-", "opt", " ", "bar"])
 295
 296     def test_punct_hyphens(self):
 297         # Oh bother, SF #965425 found another problem with hyphens --
 298         # hyphenated words in single quotes weren't handled correctly.
 299         # In fact, the bug is that *any* punctuation around a hyphenated
 300         # word was handled incorrectly, except for a leading "--", which
 301         # was special-cased for Optik and Docutils.  So test a variety
 302         # of styles of punctuation around a hyphenated word.
 303         # (Actually this is based on an Optik bug report, #813077).
 304         self.check_split("the 'wibble-wobble' widget",
 305                          ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
 306         self.check_split('the "wibble-wobble" widget',
 307                          ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
 308         self.check_split("the (wibble-wobble) widget",
 309                          ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
 310         self.check_split("the ['wibble-wobble'] widget",
 311                          ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
 312
 313     def test_funky_parens (self):
 314         # Second part of SF bug #596434: long option strings inside
 315         # parentheses.
 316         self.check_split("foo (--option) bar",
 317                          ["foo", " ", "(--option)", " ", "bar"])
 318
 319         # Related stuff -- make sure parens work in simpler contexts.
 320         self.check_split("foo (bar) baz",
 321                          ["foo", " ", "(bar)", " ", "baz"])
 322         self.check_split("blah (ding dong), wubba",
 323                          ["blah", " ", "(ding", " ", "dong),",
 324                           " ", "wubba"])
 325
 326     def test_initial_whitespace(self):
 327         # SF bug #622849 reported inconsistent handling of leading
 328         # whitespace; let's test that a bit, shall we?
 329         text = " This is a sentence with leading whitespace."
 330         self.check_wrap(text, 50,
 331                         [" This is a sentence with leading whitespace."])
 332         self.check_wrap(text, 30,
 333                         [" This is a sentence with", "leading whitespace."])
 334
 335     def test_no_drop_whitespace(self):
 336         # SF patch #1581073
 337         text = " This is a    sentence with     much whitespace."
 338         self.check_wrap(text, 10,
 339                         [" This is a", "    ", "sentence ",
 340                          "with     ", "much white", "space."],
 341                         drop_whitespace=False)
 342
 343     if test_support.have_unicode:
 344         def test_unicode(self):
 345             # *Very* simple test of wrapping Unicode strings.  I'm sure
 346             # there's more to it than this, but let's at least make
 347             # sure textwrap doesn't crash on Unicode input!
 348             text = u"Hello there, how are you today?"
 349             self.check_wrap(text, 50, [u"Hello there, how are you today?"])
 350             self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
 351             olines = self.wrapper.wrap(text)
 352             assert isinstance(olines, list) and isinstance(olines[0], unicode)
 353             otext = self.wrapper.fill(text)
 354             assert isinstance(otext, unicode)
 355
 356     def test_split(self):
 357         # Ensure that the standard _split() method works as advertised
 358         # in the comments
 359
 360         text = "Hello there -- you goof-ball, use the -b option!"
 361
 362         result = self.wrapper._split(text)
 363         self.check(result,
 364              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
 365               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
 366
 367     def test_break_on_hyphens(self):
 368         # Ensure that the break_on_hyphens attributes work
 369         text = "yaba daba-doo"
 370         self.check_wrap(text, 10, ["yaba daba-", "doo"],
 371                         break_on_hyphens=True)
 372         self.check_wrap(text, 10, ["yaba", "daba-doo"],
 373                         break_on_hyphens=False)
 374
 375     def test_bad_width(self):
 376         # Ensure that width <= 0 is caught.
 377         text = "Whatever, it doesn't matter."
 378         self.assertRaises(ValueError, wrap, text, 0)
 379         self.assertRaises(ValueError, wrap, text, -1)
 380
 381
 382 class LongWordTestCase (BaseTestCase):
 383     def setUp(self):
 384         self.wrapper = TextWrapper()
 385         self.text = '''\
 386 Did you say "supercalifragilisticexpialidocious?"
 387 How *do* you spell that odd word, anyways?
 388 '''
 389
 390     def test_break_long(self):
 391         # Wrap text with long words and lots of punctuation
 392
 393         self.check_wrap(self.text, 30,
 394                         ['Did you say "supercalifragilis',
 395                          'ticexpialidocious?" How *do*',
 396                          'you spell that odd word,',
 397                          'anyways?'])
 398         self.check_wrap(self.text, 50,
 399                         ['Did you say "supercalifragilisticexpialidocious?"',
 400                          'How *do* you spell that odd word, anyways?'])
 401
 402         # SF bug 797650.  Prevent an infinite loop by making sure that at
 403         # least one character gets split off on every pass.
 404         self.check_wrap('-'*10+'hello', 10,
 405                         ['----------',
 406                          '               h',
 407                          '               e',
 408                          '               l',
 409                          '               l',
 410                          '               o'],
 411                         subsequent_indent = ' '*15)
 412
 413         # bug 1146.  Prevent a long word to be wrongly wrapped when the
 414         # preceding word is exactly one character shorter than the width
 415         self.check_wrap(self.text, 12,
 416                         ['Did you say ',
 417                          '"supercalifr',
 418                          'agilisticexp',
 419                          'ialidocious?',
 420                          '" How *do*',
 421                          'you spell',
 422                          'that odd',
 423                          'word,',
 424                          'anyways?'])
 425
 426     def test_nobreak_long(self):
 427         # Test with break_long_words disabled
 428         self.wrapper.break_long_words = 0
 429         self.wrapper.width = 30
 430         expect = ['Did you say',
 431                   '"supercalifragilisticexpialidocious?"',
 432                   'How *do* you spell that odd',
 433                   'word, anyways?'
 434                   ]
 435         result = self.wrapper.wrap(self.text)
 436         self.check(result, expect)
 437
 438         # Same thing with kwargs passed to standalone wrap() function.
 439         result = wrap(self.text, width=30, break_long_words=0)
 440         self.check(result, expect)
 441
 442
 443 class IndentTestCases(BaseTestCase):
 444
 445     # called before each test method
 446     def setUp(self):
 447         self.text = '''\
 448 This paragraph will be filled, first without any indentation,
 449 and then with some (including a hanging indent).'''
 450
 451
 452     def test_fill(self):
 453         # Test the fill() method
 454
 455         expect = '''\
 456 This paragraph will be filled, first
 457 without any indentation, and then with
 458 some (including a hanging indent).'''
 459
 460         result = fill(self.text, 40)
 461         self.check(result, expect)
 462
 463
 464     def test_initial_indent(self):
 465         # Test initial_indent parameter
 466
 467         expect = ["     This paragraph will be filled,",
 468                   "first without any indentation, and then",
 469                   "with some (including a hanging indent)."]
 470         result = wrap(self.text, 40, initial_indent="     ")
 471         self.check(result, expect)
 472
 473         expect = "\n".join(expect)
 474         result = fill(self.text, 40, initial_indent="     ")
 475         self.check(result, expect)
 476
 477
 478     def test_subsequent_indent(self):
 479         # Test subsequent_indent parameter
 480
 481         expect = '''\
 482   * This paragraph will be filled, first
 483     without any indentation, and then
 484     with some (including a hanging
 485     indent).'''
 486
 487         result = fill(self.text, 40,
 488                       initial_indent="  * ", subsequent_indent="    ")
 489         self.check(result, expect)
 490
 491
 492 # Despite the similar names, DedentTestCase is *not* the inverse
 493 # of IndentTestCase!
 494 class DedentTestCase(unittest.TestCase):
 495
 496     def assertUnchanged(self, text):
 497         """assert that dedent() has no effect on 'text'"""
 498         self.assertEquals(text, dedent(text))
 499
 500     def test_dedent_nomargin(self):
 501         # No lines indented.
 502         text = "Hello there.\nHow are you?\nOh good, I'm glad."
 503         self.assertUnchanged(text)
 504
 505         # Similar, with a blank line.
 506         text = "Hello there.\n\nBoo!"
 507         self.assertUnchanged(text)
 508
 509         # Some lines indented, but overall margin is still zero.
 510         text = "Hello there.\n  This is indented."
 511         self.assertUnchanged(text)
 512
 513         # Again, add a blank line.
 514         text = "Hello there.\n\n  Boo!\n"
 515         self.assertUnchanged(text)
 516
 517     def test_dedent_even(self):
 518         # All lines indented by two spaces.
 519         text = "  Hello there.\n  How are ya?\n  Oh good."
 520         expect = "Hello there.\nHow are ya?\nOh good."
 521         self.assertEquals(expect, dedent(text))
 522
 523         # Same, with blank lines.
 524         text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
 525         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 526         self.assertEquals(expect, dedent(text))
 527
 528         # Now indent one of the blank lines.
 529         text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
 530         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 531         self.assertEquals(expect, dedent(text))
 532
 533     def test_dedent_uneven(self):
 534         # Lines indented unevenly.
 535         text = '''\
 536         def foo():
 537             while 1:
 538                 return foo
 539         '''
 540         expect = '''\
 541 def foo():
 542     while 1:
 543         return foo
 544 '''
 545         self.assertEquals(expect, dedent(text))
 546
 547         # Uneven indentation with a blank line.
 548         text = "  Foo\n    Bar\n\n   Baz\n"
 549         expect = "Foo\n  Bar\n\n Baz\n"
 550         self.assertEquals(expect, dedent(text))
 551
 552         # Uneven indentation with a whitespace-only line.
 553         text = "  Foo\n    Bar\n \n   Baz\n"
 554         expect = "Foo\n  Bar\n\n Baz\n"
 555         self.assertEquals(expect, dedent(text))
 556
 557     # dedent() should not mangle internal tabs
 558     def test_dedent_preserve_internal_tabs(self):
 559         text = "  hello\tthere\n  how are\tyou?"
 560         expect = "hello\tthere\nhow are\tyou?"
 561         self.assertEquals(expect, dedent(text))
 562
 563         # make sure that it preserves tabs when it's not making any
 564         # changes at all
 565         self.assertEquals(expect, dedent(expect))
 566
 567     # dedent() should not mangle tabs in the margin (i.e.
 568     # tabs and spaces both count as margin, but are *not*
 569     # considered equivalent)
 570     def test_dedent_preserve_margin_tabs(self):
 571         text = "  hello there\n\thow are you?"
 572         self.assertUnchanged(text)
 573
 574         # same effect even if we have 8 spaces
 575         text = "        hello there\n\thow are you?"
 576         self.assertUnchanged(text)
 577
 578         # dedent() only removes whitespace that can be uniformly removed!
 579         text = "\thello there\n\thow are you?"
 580         expect = "hello there\nhow are you?"
 581         self.assertEquals(expect, dedent(text))
 582
 583         text = "  \thello there\n  \thow are you?"
 584         self.assertEquals(expect, dedent(text))
 585
 586         text = "  \t  hello there\n  \t  how are you?"
 587         self.assertEquals(expect, dedent(text))
 588
 589         text = "  \thello there\n  \t  how are you?"
 590         expect = "hello there\n  how are you?"
 591         self.assertEquals(expect, dedent(text))
 592
 593
 594 def test_main():
 595     test_support.run_unittest(WrapTestCase,
 596                               LongWordTestCase,
 597                               IndentTestCases,
 598                               DedentTestCase)
 599
 600 if __name__ == '__main__':
 601     test_main()