cola/diffparse.py

   1 from __future__ import absolute_import, division, print_function, unicode_literals
   2 import math
   3 import re
   4 from collections import defaultdict
   5
   6 from . import compat
   7
   8
   9 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
  10
  11
  12 class _DiffHunk(object):
  13     def __init__(
  14         self, old_start, old_count, new_start, new_count, heading, first_line_idx, lines
  15     ):
  16         self.old_start = old_start
  17         self.old_count = old_count
  18         self.new_start = new_start
  19         self.new_count = new_count
  20         self.heading = heading
  21         self.first_line_idx = first_line_idx
  22         self.lines = lines
  23
  24     @property
  25     def last_line_idx(self):
  26         return self.first_line_idx + len(self.lines) - 1
  27
  28
  29 def parse_range_str(range_str):
  30     if ',' in range_str:
  31         begin, end = range_str.split(',', 1)
  32         return int(begin), int(end)
  33     return int(range_str), 1
  34
  35
  36 def _format_range(start, count):
  37     if count == 1:
  38         return str(start)
  39     return '%d,%d' % (start, count)
  40
  41
  42 def _format_hunk_header(old_start, old_count, new_start, new_count, heading=''):
  43     return '@@ -%s +%s @@%s\n' % (
  44         _format_range(old_start, old_count),
  45         _format_range(new_start, new_count),
  46         heading,
  47     )
  48
  49
  50 def _parse_diff(diff_text):
  51     hunks = []
  52     for line_idx, line in enumerate(diff_text.split('\n')):
  53         match = _HUNK_HEADER_RE.match(line)
  54         if match:
  55             old_start, old_count = parse_range_str(match.group(1))
  56             new_start, new_count = parse_range_str(match.group(2))
  57             heading = match.group(3)
  58             hunks.append(
  59                 _DiffHunk(
  60                     old_start,
  61                     old_count,
  62                     new_start,
  63                     new_count,
  64                     heading,
  65                     line_idx,
  66                     lines=[line + '\n'],
  67                 )
  68             )
  69         elif line and hunks:
  70             hunks[-1].lines.append(line + '\n')
  71     return hunks
  72
  73
  74 def digits(number):
  75     """Return the number of digits needed to display a number"""
  76     if number >= 0:
  77         result = int(math.log10(number)) + 1
  78     else:
  79         result = 1
  80     return result
  81
  82
  83 class Counter(object):
  84     """Keep track of a diff range's values"""
  85
  86     def __init__(self, value=0, max_value=-1):
  87         self.value = value
  88         self.max_value = max_value
  89         self._initial_max_value = max_value
  90
  91     def reset(self):
  92         """Reset the max counter and return self for convenience"""
  93         self.max_value = self._initial_max_value
  94         return self
  95
  96     def parse(self, range_str):
  97         """Parse a diff range and setup internal state"""
  98         start, count = parse_range_str(range_str)
  99         self.value = start
 100         self.max_value = max(start + count - 1, self.max_value)
 101
 102     def tick(self, amount=1):
 103         """Return the current value and increment to the next"""
 104         value = self.value
 105         self.value += amount
 106         return value
 107
 108
 109 class DiffLines(object):
 110     """Parse diffs and gather line numbers"""
 111
 112     EMPTY = -1
 113     DASH = -2
 114
 115     def __init__(self):
 116         self.merge = False
 117
 118         # diff <old> <new>
 119         # merge <ours> <theirs> <new>
 120         self.old = Counter()
 121         self.new = Counter()
 122         self.ours = Counter()
 123         self.theirs = Counter()
 124
 125     def digits(self):
 126         return digits(
 127             max(
 128                 self.old.max_value,
 129                 self.new.max_value,
 130                 self.ours.max_value,
 131                 self.theirs.max_value,
 132             )
 133         )
 134
 135     def parse(self, diff_text):
 136         lines = []
 137         DIFF_STATE = 1
 138         state = INITIAL_STATE = 0
 139         merge = self.merge = False
 140         NO_NEWLINE = r'\ No newline at end of file'
 141
 142         old = self.old.reset()
 143         new = self.new.reset()
 144         ours = self.ours.reset()
 145         theirs = self.theirs.reset()
 146
 147         for text in diff_text.split('\n'):
 148             if text.startswith('@@ -'):
 149                 parts = text.split(' ', 4)
 150                 if parts[0] == '@@' and parts[3] == '@@':
 151                     state = DIFF_STATE
 152                     old.parse(parts[1][1:])
 153                     new.parse(parts[2][1:])
 154                     lines.append((self.DASH, self.DASH))
 155                     continue
 156             if text.startswith('@@@ -'):
 157                 self.merge = merge = True
 158                 parts = text.split(' ', 5)
 159                 if parts[0] == '@@@' and parts[4] == '@@@':
 160                     state = DIFF_STATE
 161                     ours.parse(parts[1][1:])
 162                     theirs.parse(parts[2][1:])
 163                     new.parse(parts[3][1:])
 164                     lines.append((self.DASH, self.DASH, self.DASH))
 165                     continue
 166             if state == INITIAL_STATE or text.rstrip() == NO_NEWLINE:
 167                 if merge:
 168                     lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
 169                 else:
 170                     lines.append((self.EMPTY, self.EMPTY))
 171             elif not merge and text.startswith('-'):
 172                 lines.append((old.tick(), self.EMPTY))
 173             elif merge and text.startswith('- '):
 174                 lines.append((ours.tick(), self.EMPTY, self.EMPTY))
 175             elif merge and text.startswith(' -'):
 176                 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
 177             elif merge and text.startswith('--'):
 178                 lines.append((ours.tick(), theirs.tick(), self.EMPTY))
 179             elif not merge and text.startswith('+'):
 180                 lines.append((self.EMPTY, new.tick()))
 181             elif merge and text.startswith('++'):
 182                 lines.append((self.EMPTY, self.EMPTY, new.tick()))
 183             elif merge and text.startswith('+ '):
 184                 lines.append((self.EMPTY, theirs.tick(), new.tick()))
 185             elif merge and text.startswith(' +'):
 186                 lines.append((ours.tick(), self.EMPTY, new.tick()))
 187             elif not merge and text.startswith(' '):
 188                 lines.append((old.tick(), new.tick()))
 189             elif merge and text.startswith('  '):
 190                 lines.append((ours.tick(), theirs.tick(), new.tick()))
 191             elif not text:
 192                 new.tick()
 193                 old.tick()
 194                 ours.tick()
 195                 theirs.tick()
 196             else:
 197                 state = INITIAL_STATE
 198                 if merge:
 199                     lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
 200                 else:
 201                     lines.append((self.EMPTY, self.EMPTY))
 202
 203         return lines
 204
 205
 206 class FormatDigits(object):
 207     """Format numbers for use in diff line numbers"""
 208
 209     DASH = DiffLines.DASH
 210     EMPTY = DiffLines.EMPTY
 211
 212     def __init__(self, dash='', empty=''):
 213         self.fmt = ''
 214         self.empty = ''
 215         self.dash = ''
 216         self._dash = dash or compat.uchr(0xB7)
 217         self._empty = empty or ' '
 218
 219     def set_digits(self, value):
 220         self.fmt = '%%0%dd' % value
 221         self.empty = self._empty * value
 222         self.dash = self._dash * value
 223
 224     def value(self, old, new):
 225         old_str = self._format(old)
 226         new_str = self._format(new)
 227         return '%s %s' % (old_str, new_str)
 228
 229     def merge_value(self, old, base, new):
 230         old_str = self._format(old)
 231         base_str = self._format(base)
 232         new_str = self._format(new)
 233         return '%s %s %s' % (old_str, base_str, new_str)
 234
 235     def number(self, value):
 236         return self.fmt % value
 237
 238     def _format(self, value):
 239         if value == self.DASH:
 240             result = self.dash
 241         elif value == self.EMPTY:
 242             result = self.empty
 243         else:
 244             result = self.number(value)
 245         return result
 246
 247
 248 class DiffParser(object):
 249     """Parse and rewrite diffs to produce edited patches
 250
 251     This parser is used for modifying the worktree and index by constructing
 252     temporary patches that are applied using "git apply".
 253
 254     """
 255
 256     def __init__(self, filename, diff_text):
 257         self.filename = filename
 258         self.hunks = _parse_diff(diff_text)
 259
 260     def generate_patch(self, first_line_idx, last_line_idx, reverse=False):
 261         """Return a patch containing a subset of the diff"""
 262
 263         ADDITION = '+'
 264         DELETION = '-'
 265         CONTEXT = ' '
 266         NO_NEWLINE = '\\'
 267
 268         lines = ['--- a/%s\n' % self.filename, '+++ b/%s\n' % self.filename]
 269
 270         start_offset = 0
 271
 272         for hunk in self.hunks:
 273             # skip hunks until we get to the one that contains the first
 274             # selected line
 275             if hunk.last_line_idx < first_line_idx:
 276                 continue
 277             # once we have processed the hunk that contains the last selected
 278             # line, we can stop
 279             if hunk.first_line_idx > last_line_idx:
 280                 break
 281
 282             prev_skipped = False
 283             counts = defaultdict(int)
 284             filtered_lines = []
 285
 286             for line_idx, line in enumerate(
 287                 hunk.lines[1:], start=hunk.first_line_idx + 1
 288             ):
 289                 line_type, line_content = line[:1], line[1:]
 290
 291                 if reverse:
 292                     if line_type == ADDITION:
 293                         line_type = DELETION
 294                     elif line_type == DELETION:
 295                         line_type = ADDITION
 296
 297                 if not first_line_idx <= line_idx <= last_line_idx:
 298                     if line_type == ADDITION:
 299                         # Skip additions that are not selected.
 300                         prev_skipped = True
 301                         continue
 302                     if line_type == DELETION:
 303                         # Change deletions that are not selected to context.
 304                         line_type = CONTEXT
 305                 if line_type == NO_NEWLINE and prev_skipped:
 306                     # If the line immediately before a "No newline" line was
 307                     # skipped (because it was an unselected addition) skip
 308                     # the "No newline" line as well.
 309                     continue
 310                 filtered_lines.append(line_type + line_content)
 311                 counts[line_type] += 1
 312                 prev_skipped = False
 313
 314             # Do not include hunks that, after filtering, have only context
 315             # lines (no additions or deletions).
 316             if not counts[ADDITION] and not counts[DELETION]:
 317                 continue
 318
 319             old_count = counts[CONTEXT] + counts[DELETION]
 320             new_count = counts[CONTEXT] + counts[ADDITION]
 321
 322             if reverse:
 323                 old_start = hunk.new_start
 324             else:
 325                 old_start = hunk.old_start
 326             new_start = old_start + start_offset
 327             if old_count == 0:
 328                 new_start += 1
 329             if new_count == 0:
 330                 new_start -= 1
 331
 332             start_offset += counts[ADDITION] - counts[DELETION]
 333
 334             lines.append(
 335                 _format_hunk_header(
 336                     old_start, old_count, new_start, new_count, hunk.heading
 337                 )
 338             )
 339             lines.extend(filtered_lines)
 340
 341         # If there are only two lines, that means we did not include any hunks,
 342         # so return None.
 343         if len(lines) == 2:
 344             return None
 345         return ''.join(lines)
 346
 347     def generate_hunk_patch(self, line_idx, reverse=False):
 348         """Return a patch containing the hunk for the specified line only"""
 349         hunk = None
 350         for hunk in self.hunks:
 351             if line_idx <= hunk.last_line_idx:
 352                 break
 353         if hunk is None:
 354             return None
 355         return self.generate_patch(
 356             hunk.first_line_idx, hunk.last_line_idx, reverse=reverse
 357         )