cola/textwrap.py

   1 """Text wrapping and filling.
   2 """
   3 from __future__ import division, absolute_import, unicode_literals
   4 import re
   5
   6 from .compat import ustr
   7
   8 # Copyright (C) 1999-2001 Gregory P. Ward.
   9 # Copyright (C) 2002, 2003 Python Software Foundation.
  10 # Copyright (C) 2013, David Aguilar
  11 # Written by Greg Ward <gward@python.net>
  12 # Simplified for git-cola by David Aguilar <davvid@gmail.com>
  13
  14
  15 class TextWrapper(object):
  16     """
  17     Object for wrapping/filling text.  The public interface consists of
  18     the wrap() and fill() methods; the other methods are just there for
  19     subclasses to override in order to tweak the default behaviour.
  20     If you want to completely replace the main wrapping algorithm,
  21     you'll probably have to override _wrap_chunks().
  22
  23     Several instance attributes control various aspects of wrapping:
  24       width (default: 70)
  25         The preferred width of wrapped lines.
  26       tabwidth (default: 8)
  27         The width of a tab used when calculating line length.
  28       break_on_hyphens (default: false)
  29         Allow breaking hyphenated words. If true, wrapping will occur
  30         preferably on whitespaces and right after hyphens part of
  31         compound words.
  32       drop_whitespace (default: true)
  33         Drop leading and trailing whitespace from lines.
  34     """
  35
  36     # This funky little regex is just the trick for splitting
  37     # text up into word-wrappable chunks.  E.g.
  38     #   "Hello there -- you goof-ball, use the -b option!"
  39     # splits into
  40     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
  41     # (after stripping out empty strings).
  42     wordsep_re = re.compile(
  43         r'(\s+|'                                  # any whitespace
  44         r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|'   # hyphenated words
  45         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
  46
  47     # This less funky little regex just split on recognized spaces. E.g.
  48     #   "Hello there -- you goof-ball, use the -b option!"
  49     # splits into
  50     #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
  51     wordsep_simple_re = re.compile(r'(\s+)')
  52
  53     def __init__(self,
  54                  width=70,
  55                  tabwidth=8,
  56                  break_on_hyphens=False,
  57                  drop_whitespace=True):
  58         self.width = width
  59         self.tabwidth = tabwidth
  60         self.break_on_hyphens = break_on_hyphens
  61         self.drop_whitespace = drop_whitespace
  62
  63         # recompile the regexes for Unicode mode -- done in this clumsy way for
  64         # backwards compatibility because it's rather common to monkey-patch
  65         # the TextWrapper class' wordsep_re attribute.
  66         self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
  67         self.wordsep_simple_re_uni = re.compile(
  68             self.wordsep_simple_re.pattern, re.U)
  69
  70     def _split(self, text):
  71         """_split(text : string) -> [string]
  72
  73         Split the text to wrap into indivisible chunks.  Chunks are
  74         not quite the same as words; see _wrap_chunks() for full
  75         details.  As an example, the text
  76           Look, goof-ball -- use the -b option!
  77         breaks into the following chunks:
  78           'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
  79           'use', ' ', 'the', ' ', '-b', ' ', 'option!'
  80         if break_on_hyphens is True, or in:
  81           'Look,', ' ', 'goof-ball', ' ', '--', ' ',
  82           'use', ' ', 'the', ' ', '-b', ' ', option!'
  83         otherwise.
  84         """
  85         if isinstance(text, ustr):
  86             if self.break_on_hyphens:
  87                 pat = self.wordsep_re_uni
  88             else:
  89                 pat = self.wordsep_simple_re_uni
  90         else:
  91             if self.break_on_hyphens:
  92                 pat = self.wordsep_re
  93             else:
  94                 pat = self.wordsep_simple_re
  95         chunks = pat.split(text)
  96         chunks = list(filter(None, chunks))  # remove empty chunks
  97         return chunks
  98
  99     def _wrap_chunks(self, chunks):
 100         """_wrap_chunks(chunks : [string]) -> [string]
 101
 102         Wrap a sequence of text chunks and return a list of lines of length
 103         'self.width' or less.  Some lines may be longer than this.  Chunks
 104         correspond roughly to words and the whitespace between them: each
 105         chunk is indivisible, but a line break can come between any two
 106         chunks.  Chunks should not have internal whitespace; ie. a chunk is
 107         either all whitespace or a "word".  Whitespace chunks will be removed
 108         from the beginning and end of lines, but apart from that whitespace is
 109         preserved.
 110         """
 111         lines = []
 112
 113         # Arrange in reverse order so items can be efficiently popped
 114         # from a stack of chucks.
 115         chunks = list(reversed(chunks))
 116
 117         while chunks:
 118
 119             # Start the list of chunks that will make up the current line.
 120             # cur_len is just the length of all the chunks in cur_line.
 121             cur_line = []
 122             cur_len = 0
 123
 124             # Maximum width for this line.
 125             width = self.width
 126
 127             # First chunk on line is a space -- drop it, unless this
 128             # is the very beginning of the text (ie. no lines started yet).
 129             if self.drop_whitespace and chunks[-1] == ' ' and lines:
 130                 chunks.pop()
 131
 132             while chunks:
 133                 l = self.chunklen(chunks[-1])
 134
 135                 # Can at least squeeze this chunk onto the current line.
 136                 if cur_len + l <= width:
 137                     cur_line.append(chunks.pop())
 138                     cur_len += l
 139                 # Nope, this line is full.
 140                 else:
 141                     break
 142
 143             # The current line is full, and the next chunk is too big to
 144             # fit on *any* line (not just this one).
 145             if chunks and self.chunklen(chunks[-1]) > width:
 146                 if not cur_line:
 147                     cur_line.append(chunks.pop())
 148
 149             # If the last chunk on this line is all a space, drop it.
 150             if self.drop_whitespace and cur_line and cur_line[-1] == ' ':
 151                 cur_line.pop()
 152
 153             # Avoid whitespace at the beginining of the line.
 154             if (self.drop_whitespace and cur_line and
 155                     cur_line[0] in (' ',)):
 156                 cur_line.pop(0)
 157
 158             # Convert current line back to a string and store it in list
 159             # of all lines (return value).
 160             if cur_line:
 161                 lines.append(''.join(cur_line))
 162
 163         return lines
 164
 165     def chunklen(self, word):
 166         """Return length of a word taking tabs into account
 167
 168         >>> w = TextWrapper(tabwidth=8)
 169         >>> w.chunklen("\\t\\t\\t\\tX")
 170         33
 171
 172         """
 173         return len(word.replace('\t', '')) + word.count('\t') * self.tabwidth
 174
 175     # -- Public interface ----------------------------------------------
 176
 177     def wrap(self, text):
 178         """wrap(text : string) -> [string]
 179
 180         Reformat the single paragraph in 'text' so it fits in lines of
 181         no more than 'self.width' columns, and return a list of wrapped
 182         lines.  Tabs in 'text' are expanded with string.expandtabs(),
 183         and all other whitespace characters (including newline) are
 184         converted to space.
 185         """
 186         chunks = self._split(text)
 187         return self._wrap_chunks(chunks)
 188
 189     def fill(self, text):
 190         """fill(text : string) -> string
 191
 192         Reformat the single paragraph in 'text' to fit in lines of no
 193         more than 'self.width' columns, and return a new string
 194         containing the entire wrapped paragraph.
 195         """
 196         return "\n".join(self.wrap(text))
 197
 198
 199 def word_wrap(text, tabwidth, limit, break_on_hyphens=False):
 200     """Wrap long lines to the specified limit"""
 201
 202     lines = []
 203
 204     # Acked-by:, Signed-off-by:, Helped-by:, etc.
 205     special_tag_rgx = re.compile(
 206             r'^('
 207             r'(('
 208             r'Acked-by|'
 209             r"Ack'd-by|"
 210             r'Based-on-patch-by|'
 211             r'Cheered-on-by|'
 212             r'Co-authored-by|'
 213             r'Comments-by|'
 214             r'Confirmed-by|'
 215             r'Contributions-by|'
 216             r'Debugged-by|'
 217             r'Discovered-by|'
 218             r'Explained-by|'
 219             r'Backtraced-by|'
 220             r'Helped-by|'
 221             r'Liked-by|'
 222             r'Link|'
 223             r'Improved-by|'
 224             r'Inspired-by|'
 225             r'Initial-patch-by|'
 226             r'Noticed-by|'
 227             r'Original-patch-by|'
 228             r'Originally-by|'
 229             r'Mentored-by|'
 230             r'Patch-by|'
 231             r'Proposed-by|'
 232             r'References|'
 233             r'Related-to|'
 234             r'Reported-by|'
 235             r'Requested-by|'
 236             r'Reviewed-by|'
 237             r'See-also|'
 238             r'Signed-off-by|'
 239             r'Signed-Off-by|'
 240             r'Spotted-by|'
 241             r'Suggested-by|'
 242             r'Tested-by|'
 243             r'Tested-on-([a-zA-Z-_]+)-by|'
 244             r'With-suggestions-by'
 245             r'):)'
 246             r'|([Cc]\.\s*[Ff]\.\s+)'
 247             r')')
 248
 249     w = TextWrapper(width=limit,
 250                     tabwidth=tabwidth,
 251                     break_on_hyphens=break_on_hyphens,
 252                     drop_whitespace=True)
 253
 254     for line in text.split('\n'):
 255         if special_tag_rgx.match(line):
 256             lines.append(line)
 257         else:
 258             lines.append(w.fill(line))
 259
 260     return '\n'.join(lines)