cola/diffparse.py

   1 from __future__ import division, absolute_import, unicode_literals
   2 import math
   3 import re
   4 from collections import defaultdict
   5
   6 from . import compat
   7
   8
   9 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
  10
  11
  12 class _DiffHunk(object):
  13
  14     def __init__(self, old_start, old_count, new_start, new_count, heading,
  15                  first_line_idx, lines):
  16         self.old_start = old_start
  17         self.old_count = old_count
  18         self.new_start = new_start
  19         self.new_count = new_count
  20         self.heading = heading
  21         self.first_line_idx = first_line_idx
  22         self.lines = lines
  23
  24     @property
  25     def last_line_idx(self):
  26         return self.first_line_idx + len(self.lines) - 1
  27
  28
  29 def _parse_range_str(range_str):
  30     if ',' in range_str:
  31         begin, end = range_str.split(',', 1)
  32         return int(begin), int(end)
  33     else:
  34         return int(range_str), 1
  35
  36
  37 def _format_range(start, count):
  38     if count == 1:
  39         return str(start)
  40     else:
  41         return '%d,%d' % (start, count)
  42
  43
  44 def _format_hunk_header(old_start, old_count, new_start, new_count,
  45                         heading=''):
  46     return '@@ -%s +%s @@%s' % (_format_range(old_start, old_count),
  47                                 _format_range(new_start, new_count),
  48                                 heading)
  49
  50
  51 def _parse_diff(diff_text):
  52     hunks = []
  53     for line_idx, line in enumerate(diff_text.split('\n')):
  54         match = _HUNK_HEADER_RE.match(line)
  55         if match:
  56             old_start, old_count = _parse_range_str(match.group(1))
  57             new_start, new_count = _parse_range_str(match.group(2))
  58             heading = match.group(3)
  59             hunks.append(_DiffHunk(old_start, old_count,
  60                                    new_start, new_count,
  61                                    heading, line_idx, lines=[line]))
  62         elif not hunks:
  63             # first line of the diff is not a header line
  64             errmsg = 'Malformed diff?: %s' % diff_text
  65             raise AssertionError(errmsg)
  66         elif line:
  67             hunks[-1].lines.append(line)
  68     return hunks
  69
  70
  71 def digits(number):
  72     """Return the number of digits needed to display a number"""
  73     if number >= 0:
  74         result = int(math.log10(number)) + 1
  75     else:
  76         result = 1
  77     return result
  78
  79
  80 class Counter(object):
  81     """Keep track of a diff range's values"""
  82
  83     def __init__(self, value=0, max_value=-1):
  84         self.value = value
  85         self.max_value = max_value
  86         self._initial_max_value = max_value
  87
  88     def reset(self):
  89         """Reset the max counter and return self for convenience"""
  90         self.max_value = self._initial_max_value
  91         return self
  92
  93     def parse(self, range_str):
  94         """Parse a diff range and setup internal state"""
  95         start, count = _parse_range_str(range_str)
  96         self.value = start
  97         self.max_value = max(start + count, self.max_value)
  98
  99     def tick(self, amount=1):
 100         """Return the current value and increment to the next"""
 101         value = self.value
 102         self.value += amount
 103         return value
 104
 105
 106 class DiffLines(object):
 107     """Parse diffs and gather line numbers"""
 108
 109     EMPTY = -1
 110     DASH = -2
 111
 112     def __init__(self):
 113         self.valid = True
 114         self.merge = False
 115
 116         # diff <old> <new>
 117         # merge <ours> <theirs> <new>
 118         self.old = Counter()
 119         self.new = Counter()
 120         self.ours = Counter()
 121         self.theirs = Counter()
 122
 123     def digits(self):
 124         return digits(max(self.old.max_value, self.new.max_value,
 125                           self.ours.max_value, self.theirs.max_value))
 126
 127     def parse(self, diff_text):
 128         lines = []
 129         INITIAL_STATE = 0
 130         DIFF_STATE = 1
 131         state = INITIAL_STATE
 132         merge = self.merge = False
 133         NO_NEWLINE = '\\ No newline at end of file'
 134
 135         old = self.old.reset()
 136         new = self.new.reset()
 137         ours = self.ours.reset()
 138         theirs = self.theirs.reset()
 139
 140         for text in diff_text.splitlines():
 141             if text.startswith('@@ -'):
 142                 parts = text.split(' ', 4)
 143                 if parts[0] == '@@' and parts[3] == '@@':
 144                     state = DIFF_STATE
 145                     old.parse(parts[1][1:])
 146                     new.parse(parts[2][1:])
 147                     lines.append((self.DASH, self.DASH))
 148                     continue
 149             if text.startswith('@@@ -'):
 150                 self.merge = merge = True
 151                 parts = text.split(' ', 5)
 152                 if parts[0] == '@@@' and parts[4] == '@@@':
 153                     state = DIFF_STATE
 154                     ours.parse(parts[1][1:])
 155                     theirs.parse(parts[2][1:])
 156                     new.parse(parts[3][1:])
 157                     lines.append((self.DASH, self.DASH, self.DASH))
 158                     continue
 159             if state == INITIAL_STATE or text == NO_NEWLINE:
 160                 if merge:
 161                     lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
 162                 else:
 163                     lines.append((self.EMPTY, self.EMPTY))
 164             elif not merge and text.startswith('-'):
 165                 lines.append((old.tick(), self.EMPTY))
 166             elif merge and text.startswith('- '):
 167                 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
 168             elif merge and text.startswith(' -'):
 169                 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
 170             elif merge and text.startswith('--'):
 171                 lines.append((ours.tick(), theirs.tick(), self.EMPTY))
 172             elif not merge and text.startswith('+'):
 173                 lines.append((self.EMPTY, new.tick()))
 174             elif merge and text.startswith('++'):
 175                 lines.append((self.EMPTY, self.EMPTY, new.tick()))
 176             elif merge and text.startswith('+ '):
 177                 lines.append((self.EMPTY, theirs.tick(), new.tick()))
 178             elif merge and text.startswith(' +'):
 179                 lines.append((ours.tick(), self.EMPTY, new.tick()))
 180             elif not merge and text.startswith(' '):
 181                 lines.append((old.tick(), new.tick()))
 182             elif merge and text.startswith('  '):
 183                 lines.append((ours.tick(), theirs.tick(), new.tick()))
 184             elif not text:
 185                 new.tick()
 186                 old.tick()
 187                 ours.tick()
 188                 theirs.tick()
 189             else:
 190                 state = INITIAL_STATE
 191                 if merge:
 192                     lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
 193                 else:
 194                     lines.append((self.EMPTY, self.EMPTY))
 195
 196         return lines
 197
 198
 199 class FormatDigits(object):
 200     """Format numbers for use in diff line numbers"""
 201
 202     DASH = DiffLines.DASH
 203     EMPTY = DiffLines.EMPTY
 204
 205     def __init__(self, dash='', empty=''):
 206         self.fmt = ''
 207         self.empty = ''
 208         self.dash = ''
 209         self._dash = dash or compat.unichr(0xb7)
 210         self._empty = empty or ' '
 211
 212     def set_digits(self, digits):
 213         self.fmt = ('%%0%dd' % digits)
 214         self.empty = (self._empty * digits)
 215         self.dash = (self._dash * digits)
 216
 217     def value(self, old, new):
 218         old_str = self._format(old)
 219         new_str = self._format(new)
 220         return ('%s %s' % (old_str, new_str))
 221
 222     def merge_value(self, old, base, new):
 223         old_str = self._format(old)
 224         base_str = self._format(base)
 225         new_str = self._format(new)
 226         return ('%s %s %s' % (old_str, base_str, new_str))
 227
 228     def number(self, value):
 229         return (self.fmt % value)
 230
 231     def _format(self, value):
 232         if value == self.DASH:
 233             result = self.dash
 234         elif value == self.EMPTY:
 235             result = self.empty
 236         else:
 237             result = self.number(value)
 238         return result
 239
 240
 241 class DiffParser(object):
 242     """Parse and rewrite diffs to produce edited patches
 243
 244     This parser is used for modifying the worktree and index by constructing
 245     temporary patches that are applied using "git apply".
 246
 247     """
 248
 249     def __init__(self, filename, diff_text):
 250         self.filename = filename
 251         self.hunks = _parse_diff(diff_text)
 252
 253     def generate_patch(self, first_line_idx, last_line_idx,
 254                        reverse=False):
 255         """Return a patch containing a subset of the diff"""
 256
 257         ADDITION = '+'
 258         DELETION = '-'
 259         CONTEXT = ' '
 260         NO_NEWLINE = '\\'
 261
 262         lines = ['--- a/%s' % self.filename, '+++ b/%s' % self.filename]
 263
 264         start_offset = 0
 265
 266         for hunk in self.hunks:
 267             # skip hunks until we get to the one that contains the first
 268             # selected line
 269             if hunk.last_line_idx < first_line_idx:
 270                 continue
 271             # once we have processed the hunk that contains the last selected
 272             # line, we can stop
 273             if hunk.first_line_idx > last_line_idx:
 274                 break
 275
 276             prev_skipped = False
 277             counts = defaultdict(int)
 278             filtered_lines = []
 279
 280             for line_idx, line in enumerate(hunk.lines[1:],
 281                                             start=hunk.first_line_idx + 1):
 282                 line_type, line_content = line[:1], line[1:]
 283
 284                 if reverse:
 285                     if line_type == ADDITION:
 286                         line_type = DELETION
 287                     elif line_type == DELETION:
 288                         line_type = ADDITION
 289
 290                 if not (first_line_idx <= line_idx <= last_line_idx):
 291                     if line_type == ADDITION:
 292                         # Skip additions that are not selected.
 293                         prev_skipped = True
 294                         continue
 295                     elif line_type == DELETION:
 296                         # Change deletions that are not selected to context.
 297                         line_type = CONTEXT
 298                 if line_type == NO_NEWLINE and prev_skipped:
 299                     # If the line immediately before a "No newline" line was
 300                     # skipped (because it was an unselected addition) skip
 301                     # the "No newline" line as well.
 302                     continue
 303                 filtered_lines.append(line_type + line_content)
 304                 counts[line_type] += 1
 305                 prev_skipped = False
 306
 307             # Do not include hunks that, after filtering, have only context
 308             # lines (no additions or deletions).
 309             if not counts[ADDITION] and not counts[DELETION]:
 310                 continue
 311
 312             old_count = counts[CONTEXT] + counts[DELETION]
 313             new_count = counts[CONTEXT] + counts[ADDITION]
 314
 315             if reverse:
 316                 old_start = hunk.new_start
 317             else:
 318                 old_start = hunk.old_start
 319             new_start = old_start + start_offset
 320             if old_count == 0:
 321                 new_start += 1
 322             if new_count == 0:
 323                 new_start -= 1
 324
 325             start_offset += counts[ADDITION] - counts[DELETION]
 326
 327             lines.append(_format_hunk_header(old_start, old_count,
 328                                              new_start, new_count,
 329                                              hunk.heading))
 330             lines.extend(filtered_lines)
 331
 332         # If there are only two lines, that means we did not include any hunks,
 333         # so return None.
 334         if len(lines) == 2:
 335             return None
 336         else:
 337             lines.append('')
 338             return '\n'.join(lines)
 339
 340     def generate_hunk_patch(self, line_idx, reverse=False):
 341         """Return a patch containing the hunk for the specified line only"""
 342         if not self.hunks:
 343             return None
 344         for hunk in self.hunks:
 345             if line_idx <= hunk.last_line_idx:
 346                 break
 347         return self.generate_patch(hunk.first_line_idx, hunk.last_line_idx,
 348                                    reverse=reverse)