Lib/test/test_textwrap.py

   1 #
   2 # Test suite for the textwrap module.
   3 #
   4 # Original tests written by Greg Ward <gward@python.net>.
   5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
   6 # Currently maintained by Greg Ward.
   7 #
   8 # $Id$
   9 #
  10
  11 import unittest
  12 from test import test_support
  13
  14 from textwrap import TextWrapper, wrap, fill, dedent
  15
  16
  17 class BaseTestCase(unittest.TestCase):
  18     '''Parent class with utility methods for textwrap tests.'''
  19
  20     def show(self, textin):
  21         if isinstance(textin, list):
  22             result = []
  23             for i in range(len(textin)):
  24                 result.append("  %d: %r" % (i, textin[i]))
  25             result = '\n'.join(result)
  26         elif isinstance(textin, basestring):
  27             result = "  %s\n" % repr(textin)
  28         return result
  29
  30
  31     def check(self, result, expect):
  32         self.assertEquals(result, expect,
  33             'expected:\n%s\nbut got:\n%s' % (
  34                 self.show(expect), self.show(result)))
  35
  36     def check_wrap(self, text, width, expect, **kwargs):
  37         result = wrap(text, width, **kwargs)
  38         self.check(result, expect)
  39
  40     def check_split(self, text, expect):
  41         result = self.wrapper._split(text)
  42         self.assertEquals(result, expect,
  43                           "\nexpected %r\n"
  44                           "but got  %r" % (expect, result))
  45
  46
  47 class WrapTestCase(BaseTestCase):
  48
  49     def setUp(self):
  50         self.wrapper = TextWrapper(width=45)
  51
  52     def test_simple(self):
  53         # Simple case: just words, spaces, and a bit of punctuation
  54
  55         text = "Hello there, how are you this fine day?  I'm glad to hear it!"
  56
  57         self.check_wrap(text, 12,
  58                         ["Hello there,",
  59                          "how are you",
  60                          "this fine",
  61                          "day?  I'm",
  62                          "glad to hear",
  63                          "it!"])
  64         self.check_wrap(text, 42,
  65                         ["Hello there, how are you this fine day?",
  66                          "I'm glad to hear it!"])
  67         self.check_wrap(text, 80, [text])
  68
  69
  70     def test_whitespace(self):
  71         # Whitespace munging and end-of-sentence detection
  72
  73         text = """\
  74 This is a paragraph that already has
  75 line breaks.  But some of its lines are much longer than the others,
  76 so it needs to be wrapped.
  77 Some lines are \ttabbed too.
  78 What a mess!
  79 """
  80
  81         expect = ["This is a paragraph that already has line",
  82                   "breaks.  But some of its lines are much",
  83                   "longer than the others, so it needs to be",
  84                   "wrapped.  Some lines are  tabbed too.  What a",
  85                   "mess!"]
  86
  87         wrapper = TextWrapper(45, fix_sentence_endings=True)
  88         result = wrapper.wrap(text)
  89         self.check(result, expect)
  90
  91         result = wrapper.fill(text)
  92         self.check(result, '\n'.join(expect))
  93
  94     def test_fix_sentence_endings(self):
  95         wrapper = TextWrapper(60, fix_sentence_endings=True)
  96
  97         # SF #847346: ensure that fix_sentence_endings=True does the
  98         # right thing even on input short enough that it doesn't need to
  99         # be wrapped.
 100         text = "A short line. Note the single space."
 101         expect = ["A short line.  Note the single space."]
 102         self.check(wrapper.wrap(text), expect)
 103
 104         # Test some of the hairy end cases that _fix_sentence_endings()
 105         # is supposed to handle (the easy stuff is tested in
 106         # test_whitespace() above).
 107         text = "Well, Doctor? What do you think?"
 108         expect = ["Well, Doctor?  What do you think?"]
 109         self.check(wrapper.wrap(text), expect)
 110
 111         text = "Well, Doctor?\nWhat do you think?"
 112         self.check(wrapper.wrap(text), expect)
 113
 114         text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
 115         expect = ['I say, chaps!  Anyone for "tennis?"  Hmmph!']
 116         self.check(wrapper.wrap(text), expect)
 117
 118         wrapper.width = 20
 119         expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
 120         self.check(wrapper.wrap(text), expect)
 121
 122         text = 'And she said, "Go to hell!"\nCan you believe that?'
 123         expect = ['And she said, "Go to',
 124                   'hell!"  Can you',
 125                   'believe that?']
 126         self.check(wrapper.wrap(text), expect)
 127
 128         wrapper.width = 60
 129         expect = ['And she said, "Go to hell!"  Can you believe that?']
 130         self.check(wrapper.wrap(text), expect)
 131
 132         text = 'File stdio.h is nice.'
 133         expect = ['File stdio.h is nice.']
 134         self.check(wrapper.wrap(text), expect)
 135
 136     def test_wrap_short(self):
 137         # Wrapping to make short lines longer
 138
 139         text = "This is a\nshort paragraph."
 140
 141         self.check_wrap(text, 20, ["This is a short",
 142                                    "paragraph."])
 143         self.check_wrap(text, 40, ["This is a short paragraph."])
 144
 145
 146     def test_wrap_short_1line(self):
 147         # Test endcases
 148
 149         text = "This is a short line."
 150
 151         self.check_wrap(text, 30, ["This is a short line."])
 152         self.check_wrap(text, 30, ["(1) This is a short line."],
 153                         initial_indent="(1) ")
 154
 155
 156     def test_hyphenated(self):
 157         # Test breaking hyphenated words
 158
 159         text = ("this-is-a-useful-feature-for-"
 160                 "reformatting-posts-from-tim-peters'ly")
 161
 162         self.check_wrap(text, 40,
 163                         ["this-is-a-useful-feature-for-",
 164                          "reformatting-posts-from-tim-peters'ly"])
 165         self.check_wrap(text, 41,
 166                         ["this-is-a-useful-feature-for-",
 167                          "reformatting-posts-from-tim-peters'ly"])
 168         self.check_wrap(text, 42,
 169                         ["this-is-a-useful-feature-for-reformatting-",
 170                          "posts-from-tim-peters'ly"])
 171
 172     def test_hyphenated_numbers(self):
 173         # Test that hyphenated numbers (eg. dates) are not broken like words.
 174         text = ("Python 1.0.0 was released on 1994-01-26.  Python 1.0.1 was\n"
 175                 "released on 1994-02-15.")
 176
 177         self.check_wrap(text, 35, ['Python 1.0.0 was released on',
 178                                    '1994-01-26.  Python 1.0.1 was',
 179                                    'released on 1994-02-15.'])
 180         self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
 181                                    'Python 1.0.1 was released on 1994-02-15.'])
 182
 183         text = "I do all my shopping at 7-11."
 184         self.check_wrap(text, 25, ["I do all my shopping at",
 185                                    "7-11."])
 186         self.check_wrap(text, 27, ["I do all my shopping at",
 187                                    "7-11."])
 188         self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
 189
 190     def test_em_dash(self):
 191         # Test text with em-dashes
 192         text = "Em-dashes should be written -- thus."
 193         self.check_wrap(text, 25,
 194                         ["Em-dashes should be",
 195                          "written -- thus."])
 196
 197         # Probe the boundaries of the properly written em-dash,
 198         # ie. " -- ".
 199         self.check_wrap(text, 29,
 200                         ["Em-dashes should be written",
 201                          "-- thus."])
 202         expect = ["Em-dashes should be written --",
 203                   "thus."]
 204         self.check_wrap(text, 30, expect)
 205         self.check_wrap(text, 35, expect)
 206         self.check_wrap(text, 36,
 207                         ["Em-dashes should be written -- thus."])
 208
 209         # The improperly written em-dash is handled too, because
 210         # it's adjacent to non-whitespace on both sides.
 211         text = "You can also do--this or even---this."
 212         expect = ["You can also do",
 213                   "--this or even",
 214                   "---this."]
 215         self.check_wrap(text, 15, expect)
 216         self.check_wrap(text, 16, expect)
 217         expect = ["You can also do--",
 218                   "this or even---",
 219                   "this."]
 220         self.check_wrap(text, 17, expect)
 221         self.check_wrap(text, 19, expect)
 222         expect = ["You can also do--this or even",
 223                   "---this."]
 224         self.check_wrap(text, 29, expect)
 225         self.check_wrap(text, 31, expect)
 226         expect = ["You can also do--this or even---",
 227                   "this."]
 228         self.check_wrap(text, 32, expect)
 229         self.check_wrap(text, 35, expect)
 230
 231         # All of the above behaviour could be deduced by probing the
 232         # _split() method.
 233         text = "Here's an -- em-dash and--here's another---and another!"
 234         expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
 235                   "and", "--", "here's", " ", "another", "---",
 236                   "and", " ", "another!"]
 237         self.check_split(text, expect)
 238
 239         text = "and then--bam!--he was gone"
 240         expect = ["and", " ", "then", "--", "bam!", "--",
 241                   "he", " ", "was", " ", "gone"]
 242         self.check_split(text, expect)
 243
 244
 245     def test_unix_options (self):
 246         # Test that Unix-style command-line options are wrapped correctly.
 247         # Both Optik (OptionParser) and Docutils rely on this behaviour!
 248
 249         text = "You should use the -n option, or --dry-run in its long form."
 250         self.check_wrap(text, 20,
 251                         ["You should use the",
 252                          "-n option, or --dry-",
 253                          "run in its long",
 254                          "form."])
 255         self.check_wrap(text, 21,
 256                         ["You should use the -n",
 257                          "option, or --dry-run",
 258                          "in its long form."])
 259         expect = ["You should use the -n option, or",
 260                   "--dry-run in its long form."]
 261         self.check_wrap(text, 32, expect)
 262         self.check_wrap(text, 34, expect)
 263         self.check_wrap(text, 35, expect)
 264         self.check_wrap(text, 38, expect)
 265         expect = ["You should use the -n option, or --dry-",
 266                   "run in its long form."]
 267         self.check_wrap(text, 39, expect)
 268         self.check_wrap(text, 41, expect)
 269         expect = ["You should use the -n option, or --dry-run",
 270                   "in its long form."]
 271         self.check_wrap(text, 42, expect)
 272
 273         # Again, all of the above can be deduced from _split().
 274         text = "the -n option, or --dry-run or --dryrun"
 275         expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
 276                   "--dry-", "run", " ", "or", " ", "--dryrun"]
 277         self.check_split(text, expect)
 278
 279     def test_funky_hyphens (self):
 280         # Screwy edge cases cooked up by David Goodger.  All reported
 281         # in SF bug #596434.
 282         self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
 283         self.check_split("what the--", ["what", " ", "the--"])
 284         self.check_split("what the--.", ["what", " ", "the--."])
 285         self.check_split("--text--.", ["--text--."])
 286
 287         # When I first read bug #596434, this is what I thought David
 288         # was talking about.  I was wrong; these have always worked
 289         # fine.  The real problem is tested in test_funky_parens()
 290         # below...
 291         self.check_split("--option", ["--option"])
 292         self.check_split("--option-opt", ["--option-", "opt"])
 293         self.check_split("foo --option-opt bar",
 294                          ["foo", " ", "--option-", "opt", " ", "bar"])
 295
 296     def test_punct_hyphens(self):
 297         # Oh bother, SF #965425 found another problem with hyphens --
 298         # hyphenated words in single quotes weren't handled correctly.
 299         # In fact, the bug is that *any* punctuation around a hyphenated
 300         # word was handled incorrectly, except for a leading "--", which
 301         # was special-cased for Optik and Docutils.  So test a variety
 302         # of styles of punctuation around a hyphenated word.
 303         # (Actually this is based on an Optik bug report, #813077).
 304         self.check_split("the 'wibble-wobble' widget",
 305                          ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
 306         self.check_split('the "wibble-wobble" widget',
 307                          ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
 308         self.check_split("the (wibble-wobble) widget",
 309                          ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
 310         self.check_split("the ['wibble-wobble'] widget",
 311                          ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
 312
 313     def test_funky_parens (self):
 314         # Second part of SF bug #596434: long option strings inside
 315         # parentheses.
 316         self.check_split("foo (--option) bar",
 317                          ["foo", " ", "(--option)", " ", "bar"])
 318
 319         # Related stuff -- make sure parens work in simpler contexts.
 320         self.check_split("foo (bar) baz",
 321                          ["foo", " ", "(bar)", " ", "baz"])
 322         self.check_split("blah (ding dong), wubba",
 323                          ["blah", " ", "(ding", " ", "dong),",
 324                           " ", "wubba"])
 325
 326     def test_initial_whitespace(self):
 327         # SF bug #622849 reported inconsistent handling of leading
 328         # whitespace; let's test that a bit, shall we?
 329         text = " This is a sentence with leading whitespace."
 330         self.check_wrap(text, 50,
 331                         [" This is a sentence with leading whitespace."])
 332         self.check_wrap(text, 30,
 333                         [" This is a sentence with", "leading whitespace."])
 334
 335     def test_no_drop_whitespace(self):
 336         # SF patch #1581073
 337         text = " This is a    sentence with     much whitespace."
 338         self.check_wrap(text, 10,
 339                         [" This is a", "    ", "sentence ",
 340                          "with     ", "much white", "space."],
 341                         drop_whitespace=False)
 342
 343     if test_support.have_unicode:
 344         def test_unicode(self):
 345             # *Very* simple test of wrapping Unicode strings.  I'm sure
 346             # there's more to it than this, but let's at least make
 347             # sure textwrap doesn't crash on Unicode input!
 348             text = u"Hello there, how are you today?"
 349             self.check_wrap(text, 50, [u"Hello there, how are you today?"])
 350             self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
 351             olines = self.wrapper.wrap(text)
 352             self.assertIsInstance(olines, list)
 353             self.assertIsInstance(olines[0], unicode)
 354             otext = self.wrapper.fill(text)
 355             self.assertIsInstance(otext, unicode)
 356
 357         def test_no_split_at_umlaut(self):
 358             text = u"Die Empf\xe4nger-Auswahl"
 359             self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
 360
 361         def test_umlaut_followed_by_dash(self):
 362             text = u"aa \xe4\xe4-\xe4\xe4"
 363             self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
 364
 365     def test_split(self):
 366         # Ensure that the standard _split() method works as advertised
 367         # in the comments
 368
 369         text = "Hello there -- you goof-ball, use the -b option!"
 370
 371         result = self.wrapper._split(text)
 372         self.check(result,
 373              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
 374               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
 375
 376     def test_break_on_hyphens(self):
 377         # Ensure that the break_on_hyphens attributes work
 378         text = "yaba daba-doo"
 379         self.check_wrap(text, 10, ["yaba daba-", "doo"],
 380                         break_on_hyphens=True)
 381         self.check_wrap(text, 10, ["yaba", "daba-doo"],
 382                         break_on_hyphens=False)
 383
 384     def test_bad_width(self):
 385         # Ensure that width <= 0 is caught.
 386         text = "Whatever, it doesn't matter."
 387         self.assertRaises(ValueError, wrap, text, 0)
 388         self.assertRaises(ValueError, wrap, text, -1)
 389
 390
 391 class LongWordTestCase (BaseTestCase):
 392     def setUp(self):
 393         self.wrapper = TextWrapper()
 394         self.text = '''\
 395 Did you say "supercalifragilisticexpialidocious?"
 396 How *do* you spell that odd word, anyways?
 397 '''
 398
 399     def test_break_long(self):
 400         # Wrap text with long words and lots of punctuation
 401
 402         self.check_wrap(self.text, 30,
 403                         ['Did you say "supercalifragilis',
 404                          'ticexpialidocious?" How *do*',
 405                          'you spell that odd word,',
 406                          'anyways?'])
 407         self.check_wrap(self.text, 50,
 408                         ['Did you say "supercalifragilisticexpialidocious?"',
 409                          'How *do* you spell that odd word, anyways?'])
 410
 411         # SF bug 797650.  Prevent an infinite loop by making sure that at
 412         # least one character gets split off on every pass.
 413         self.check_wrap('-'*10+'hello', 10,
 414                         ['----------',
 415                          '               h',
 416                          '               e',
 417                          '               l',
 418                          '               l',
 419                          '               o'],
 420                         subsequent_indent = ' '*15)
 421
 422         # bug 1146.  Prevent a long word to be wrongly wrapped when the
 423         # preceding word is exactly one character shorter than the width
 424         self.check_wrap(self.text, 12,
 425                         ['Did you say ',
 426                          '"supercalifr',
 427                          'agilisticexp',
 428                          'ialidocious?',
 429                          '" How *do*',
 430                          'you spell',
 431                          'that odd',
 432                          'word,',
 433                          'anyways?'])
 434
 435     def test_nobreak_long(self):
 436         # Test with break_long_words disabled
 437         self.wrapper.break_long_words = 0
 438         self.wrapper.width = 30
 439         expect = ['Did you say',
 440                   '"supercalifragilisticexpialidocious?"',
 441                   'How *do* you spell that odd',
 442                   'word, anyways?'
 443                   ]
 444         result = self.wrapper.wrap(self.text)
 445         self.check(result, expect)
 446
 447         # Same thing with kwargs passed to standalone wrap() function.
 448         result = wrap(self.text, width=30, break_long_words=0)
 449         self.check(result, expect)
 450
 451
 452 class IndentTestCases(BaseTestCase):
 453
 454     # called before each test method
 455     def setUp(self):
 456         self.text = '''\
 457 This paragraph will be filled, first without any indentation,
 458 and then with some (including a hanging indent).'''
 459
 460
 461     def test_fill(self):
 462         # Test the fill() method
 463
 464         expect = '''\
 465 This paragraph will be filled, first
 466 without any indentation, and then with
 467 some (including a hanging indent).'''
 468
 469         result = fill(self.text, 40)
 470         self.check(result, expect)
 471
 472
 473     def test_initial_indent(self):
 474         # Test initial_indent parameter
 475
 476         expect = ["     This paragraph will be filled,",
 477                   "first without any indentation, and then",
 478                   "with some (including a hanging indent)."]
 479         result = wrap(self.text, 40, initial_indent="     ")
 480         self.check(result, expect)
 481
 482         expect = "\n".join(expect)
 483         result = fill(self.text, 40, initial_indent="     ")
 484         self.check(result, expect)
 485
 486
 487     def test_subsequent_indent(self):
 488         # Test subsequent_indent parameter
 489
 490         expect = '''\
 491   * This paragraph will be filled, first
 492     without any indentation, and then
 493     with some (including a hanging
 494     indent).'''
 495
 496         result = fill(self.text, 40,
 497                       initial_indent="  * ", subsequent_indent="    ")
 498         self.check(result, expect)
 499
 500
 501 # Despite the similar names, DedentTestCase is *not* the inverse
 502 # of IndentTestCase!
 503 class DedentTestCase(unittest.TestCase):
 504
 505     def assertUnchanged(self, text):
 506         """assert that dedent() has no effect on 'text'"""
 507         self.assertEquals(text, dedent(text))
 508
 509     def test_dedent_nomargin(self):
 510         # No lines indented.
 511         text = "Hello there.\nHow are you?\nOh good, I'm glad."
 512         self.assertUnchanged(text)
 513
 514         # Similar, with a blank line.
 515         text = "Hello there.\n\nBoo!"
 516         self.assertUnchanged(text)
 517
 518         # Some lines indented, but overall margin is still zero.
 519         text = "Hello there.\n  This is indented."
 520         self.assertUnchanged(text)
 521
 522         # Again, add a blank line.
 523         text = "Hello there.\n\n  Boo!\n"
 524         self.assertUnchanged(text)
 525
 526     def test_dedent_even(self):
 527         # All lines indented by two spaces.
 528         text = "  Hello there.\n  How are ya?\n  Oh good."
 529         expect = "Hello there.\nHow are ya?\nOh good."
 530         self.assertEquals(expect, dedent(text))
 531
 532         # Same, with blank lines.
 533         text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
 534         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 535         self.assertEquals(expect, dedent(text))
 536
 537         # Now indent one of the blank lines.
 538         text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
 539         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 540         self.assertEquals(expect, dedent(text))
 541
 542     def test_dedent_uneven(self):
 543         # Lines indented unevenly.
 544         text = '''\
 545         def foo():
 546             while 1:
 547                 return foo
 548         '''
 549         expect = '''\
 550 def foo():
 551     while 1:
 552         return foo
 553 '''
 554         self.assertEquals(expect, dedent(text))
 555
 556         # Uneven indentation with a blank line.
 557         text = "  Foo\n    Bar\n\n   Baz\n"
 558         expect = "Foo\n  Bar\n\n Baz\n"
 559         self.assertEquals(expect, dedent(text))
 560
 561         # Uneven indentation with a whitespace-only line.
 562         text = "  Foo\n    Bar\n \n   Baz\n"
 563         expect = "Foo\n  Bar\n\n Baz\n"
 564         self.assertEquals(expect, dedent(text))
 565
 566     # dedent() should not mangle internal tabs
 567     def test_dedent_preserve_internal_tabs(self):
 568         text = "  hello\tthere\n  how are\tyou?"
 569         expect = "hello\tthere\nhow are\tyou?"
 570         self.assertEquals(expect, dedent(text))
 571
 572         # make sure that it preserves tabs when it's not making any
 573         # changes at all
 574         self.assertEquals(expect, dedent(expect))
 575
 576     # dedent() should not mangle tabs in the margin (i.e.
 577     # tabs and spaces both count as margin, but are *not*
 578     # considered equivalent)
 579     def test_dedent_preserve_margin_tabs(self):
 580         text = "  hello there\n\thow are you?"
 581         self.assertUnchanged(text)
 582
 583         # same effect even if we have 8 spaces
 584         text = "        hello there\n\thow are you?"
 585         self.assertUnchanged(text)
 586
 587         # dedent() only removes whitespace that can be uniformly removed!
 588         text = "\thello there\n\thow are you?"
 589         expect = "hello there\nhow are you?"
 590         self.assertEquals(expect, dedent(text))
 591
 592         text = "  \thello there\n  \thow are you?"
 593         self.assertEquals(expect, dedent(text))
 594
 595         text = "  \t  hello there\n  \t  how are you?"
 596         self.assertEquals(expect, dedent(text))
 597
 598         text = "  \thello there\n  \t  how are you?"
 599         expect = "hello there\n  how are you?"
 600         self.assertEquals(expect, dedent(text))
 601
 602
 603 def test_main():
 604     test_support.run_unittest(WrapTestCase,
 605                               LongWordTestCase,
 606                               IndentTestCases,
 607                               DedentTestCase)
 608
 609 if __name__ == '__main__':
 610     test_main()