cola/diffparse.py

   1 from __future__ import division, absolute_import, unicode_literals
   2 import math
   3 import re
   4 from collections import defaultdict
   5
   6 from . import compat
   7
   8
   9 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
  10
  11
  12 class _DiffHunk(object):
  13     def __init__(self, old_start, old_count, new_start, new_count, heading,
  14                  first_line_idx, lines):
  15         self.old_start = old_start
  16         self.old_count = old_count
  17         self.new_start = new_start
  18         self.new_count = new_count
  19         self.heading = heading
  20         self.first_line_idx = first_line_idx
  21         self.lines = lines
  22
  23     @property
  24     def last_line_idx(self):
  25         return self.first_line_idx + len(self.lines) - 1
  26
  27
  28 def _parse_range_str(range_str):
  29     if ',' in range_str:
  30         begin, end = range_str.split(',', 1)
  31         return int(begin), int(end)
  32     else:
  33         return int(range_str), 1
  34
  35
  36 def _format_range(start, count):
  37     if count == 1:
  38         return str(start)
  39     else:
  40         return '%d,%d' % (start, count)
  41
  42
  43 def _format_hunk_header(old_start, old_count, new_start, new_count,
  44                         heading=''):
  45     return '@@ -%s +%s @@%s' % (_format_range(old_start, old_count),
  46                                 _format_range(new_start, new_count),
  47                                 heading)
  48
  49
  50 def _parse_diff(diff_text):
  51     hunks = []
  52     for line_idx, line in enumerate(diff_text.split('\n')):
  53         match = _HUNK_HEADER_RE.match(line)
  54         if match:
  55             old_start, old_count = _parse_range_str(match.group(1))
  56             new_start, new_count = _parse_range_str(match.group(2))
  57             heading = match.group(3)
  58             hunks.append(_DiffHunk(old_start, old_count,
  59                                    new_start, new_count,
  60                                    heading, line_idx, lines=[line]))
  61         elif not hunks:
  62             # first line of the diff is not a header line
  63             errmsg = 'Malformed diff?: %s' % diff_text
  64             raise AssertionError(errmsg)
  65         elif line:
  66             hunks[-1].lines.append(line)
  67     return hunks
  68
  69
  70 def digits(number):
  71     """Return the number of digits needed to display a number"""
  72     if number >= 0:
  73         result = int(math.log10(number)) + 1
  74     else:
  75         result = 1
  76     return result
  77
  78
  79 class DiffLines(object):
  80     """Parse diffs and gather line numbers"""
  81
  82     EMPTY = -1
  83     DASH = -2
  84
  85     def __init__(self):
  86         self.max_old = -1
  87         self.max_new = -1
  88         self.valid = True
  89
  90     def digits(self):
  91         return digits(max(self.max_old, self.max_new))
  92
  93     def parse(self, diff_text):
  94         self.max_old = -1
  95         self.max_new = -1
  96
  97         lines = []
  98         old_start = 0
  99         old_count = 0
 100         new_start = 0
 101         new_count = 0
 102         old_cur = 0
 103         new_cur = 0
 104
 105         INITIAL_STATE = 0
 106         DIFF_STATE = 1
 107         state = INITIAL_STATE
 108
 109         for text in diff_text.splitlines():
 110             if text.startswith('@@ -'):
 111                 parts = text.split(' ', 4)
 112                 if parts[0] == '@@' and parts[3] == '@@':
 113                     state = DIFF_STATE
 114                     old_start, old_count = _parse_range_str(parts[1][1:])
 115                     new_start, new_count = _parse_range_str(parts[2][1:])
 116                     old_cur = old_start
 117                     new_cur = new_start
 118                     self.max_old = max(old_start + old_count, self.max_old)
 119                     self.max_new = max(new_start + new_count, self.max_new)
 120                     lines.append((self.DASH, self.DASH))
 121                     continue
 122             if state == INITIAL_STATE:
 123                 lines.append((self.EMPTY, self.EMPTY))
 124             elif text.startswith('-'):
 125                 lines.append((old_cur, self.EMPTY))
 126                 old_cur += 1
 127             elif text.startswith('+'):
 128                 lines.append((self.EMPTY, new_cur))
 129                 new_cur += 1
 130             elif text.startswith(' '):
 131                 lines.append((old_cur, new_cur))
 132                 old_cur += 1
 133                 new_cur += 1
 134             elif not text:
 135                 old_cur += 1
 136                 new_cur += 1
 137             else:
 138                 self.valid = False
 139                 continue
 140
 141         return lines
 142
 143
 144 class FormatDigits(object):
 145     """Format numbers for use in diff line numbers"""
 146
 147     DASH = DiffLines.DASH
 148     EMPTY = DiffLines.EMPTY
 149
 150     def __init__(self, dash='', empty=''):
 151         self.fmt = ''
 152         self.empty = ''
 153         self.dash = ''
 154         self._dash = dash or compat.unichr(0xb7)
 155         self._empty = empty or ' '
 156
 157     def set_digits(self, digits):
 158         self.fmt = ('%%0%dd' % digits)
 159         self.empty = (self._empty * digits)
 160         self.dash = (self._dash * digits)
 161
 162     def value(self, old, new):
 163         old_str = self._format(old)
 164         new_str = self._format(new)
 165         return ('%s %s' % (old_str, new_str))
 166
 167     def number(self, value):
 168         return (self.fmt % value)
 169
 170     def _format(self, value):
 171         if value == self.DASH:
 172             result = self.dash
 173         elif value == self.EMPTY:
 174             result = self.empty
 175         else:
 176             result = self.number(value)
 177         return result
 178
 179
 180 class DiffParser(object):
 181     """Parse and rewrite diffs to produce edited patches
 182
 183     This parser is used for modifying the worktree and index by constructing
 184     temporary patches that are applied using "git apply".
 185
 186     """
 187
 188     def __init__(self, filename, diff_text):
 189         self.filename = filename
 190         self.hunks = _parse_diff(diff_text)
 191
 192     def generate_patch(self, first_line_idx, last_line_idx,
 193                        reverse=False):
 194         """Return a patch containing a subset of the diff"""
 195
 196         ADDITION = '+'
 197         DELETION = '-'
 198         CONTEXT = ' '
 199         NO_NEWLINE = '\\'
 200
 201         lines = ['--- a/%s' % self.filename, '+++ b/%s' % self.filename]
 202
 203         start_offset = 0
 204
 205         for hunk in self.hunks:
 206             # skip hunks until we get to the one that contains the first
 207             # selected line
 208             if hunk.last_line_idx < first_line_idx:
 209                 continue
 210             # once we have processed the hunk that contains the last selected
 211             # line, we can stop
 212             if hunk.first_line_idx > last_line_idx:
 213                 break
 214
 215             prev_skipped = False
 216             counts = defaultdict(int)
 217             filtered_lines = []
 218
 219             for line_idx, line in enumerate(hunk.lines[1:],
 220                                             start=hunk.first_line_idx + 1):
 221                 line_type, line_content = line[:1], line[1:]
 222
 223                 if reverse:
 224                     if line_type == ADDITION:
 225                         line_type = DELETION
 226                     elif line_type == DELETION:
 227                         line_type = ADDITION
 228
 229                 if not (first_line_idx <= line_idx <= last_line_idx):
 230                     if line_type == ADDITION:
 231                         # Skip additions that are not selected.
 232                         prev_skipped = True
 233                         continue
 234                     elif line_type == DELETION:
 235                         # Change deletions that are not selected to context.
 236                         line_type = CONTEXT
 237                 if line_type == NO_NEWLINE and prev_skipped:
 238                     # If the line immediately before a "No newline" line was
 239                     # skipped (because it was an unselected addition) skip
 240                     # the "No newline" line as well.
 241                     continue
 242                 filtered_lines.append(line_type + line_content)
 243                 counts[line_type] += 1
 244                 prev_skipped = False
 245
 246             # Do not include hunks that, after filtering, have only context
 247             # lines (no additions or deletions).
 248             if not counts[ADDITION] and not counts[DELETION]:
 249                 continue
 250
 251             old_count = counts[CONTEXT] + counts[DELETION]
 252             new_count = counts[CONTEXT] + counts[ADDITION]
 253
 254             if reverse:
 255                 old_start = hunk.new_start
 256             else:
 257                 old_start = hunk.old_start
 258             new_start = old_start + start_offset
 259             if old_count == 0:
 260                 new_start += 1
 261             if new_count == 0:
 262                 new_start -= 1
 263
 264             start_offset += counts[ADDITION] - counts[DELETION]
 265
 266             lines.append(_format_hunk_header(old_start, old_count,
 267                                              new_start, new_count,
 268                                              hunk.heading))
 269             lines.extend(filtered_lines)
 270
 271         # If there are only two lines, that means we did not include any hunks,
 272         # so return None.
 273         if len(lines) == 2:
 274             return None
 275         else:
 276             lines.append('')
 277             return '\n'.join(lines)
 278
 279     def generate_hunk_patch(self, line_idx, reverse=False):
 280         """Return a patch containing the hunk for the specified line only"""
 281         if not self.hunks:
 282             return None
 283         for hunk in self.hunks:
 284             if line_idx <= hunk.last_line_idx:
 285                 break
 286         return self.generate_patch(hunk.first_line_idx, hunk.last_line_idx,
 287                                    reverse=reverse)