pylint: rename variables for readability
[git-cola.git] / cola / diffparse.py
blobc0df07278265de534d8e61dd66cb746ea651c93b
1 from __future__ import absolute_import, division, print_function, unicode_literals
2 import math
3 import re
4 from collections import Counter
5 from itertools import groupby
7 from . import compat
10 DIFF_CONTEXT = ' '
11 DIFF_ADDITION = '+'
12 DIFF_DELETION = '-'
13 DIFF_NO_NEWLINE = '\\'
16 def parse_range_str(range_str):
17 if ',' in range_str:
18 begin, end = range_str.split(',', 1)
19 return int(begin), int(end)
20 return int(range_str), 1
23 def _format_range(start, count):
24 if count == 1:
25 return str(start)
26 return '%d,%d' % (start, count)
29 def _format_hunk_header(old_start, old_count, new_start, new_count, heading=''):
30 return '@@ -%s +%s @@%s\n' % (
31 _format_range(old_start, old_count),
32 _format_range(new_start, new_count),
33 heading,
37 def digits(number):
38 """Return the number of digits needed to display a number"""
39 if number >= 0:
40 result = int(math.log10(number)) + 1
41 else:
42 result = 1
43 return result
46 class LineCounter(object):
47 """Keep track of a diff range's values"""
49 def __init__(self, value=0, max_value=-1):
50 self.value = value
51 self.max_value = max_value
52 self._initial_max_value = max_value
54 def reset(self):
55 """Reset the max counter and return self for convenience"""
56 self.max_value = self._initial_max_value
57 return self
59 def parse(self, range_str):
60 """Parse a diff range and setup internal state"""
61 start, count = parse_range_str(range_str)
62 self.value = start
63 self.max_value = max(start + count - 1, self.max_value)
65 def tick(self, amount=1):
66 """Return the current value and increment to the next"""
67 value = self.value
68 self.value += amount
69 return value
72 class DiffLines(object):
73 """Parse diffs and gather line numbers"""
75 EMPTY = -1
76 DASH = -2
78 def __init__(self):
79 self.merge = False
81 # diff <old> <new>
82 # merge <ours> <theirs> <new>
83 self.old = LineCounter()
84 self.new = LineCounter()
85 self.ours = LineCounter()
86 self.theirs = LineCounter()
88 def digits(self):
89 return digits(
90 max(
91 self.old.max_value,
92 self.new.max_value,
93 self.ours.max_value,
94 self.theirs.max_value,
98 def parse(self, diff_text):
99 lines = []
100 diff_state = 1
101 state = initial_state = 0
102 merge = self.merge = False
103 no_newline = r'\ No newline at end of file'
105 old = self.old.reset()
106 new = self.new.reset()
107 ours = self.ours.reset()
108 theirs = self.theirs.reset()
110 for text in diff_text.split('\n'):
111 if text.startswith('@@ -'):
112 parts = text.split(' ', 4)
113 if parts[0] == '@@' and parts[3] == '@@':
114 state = diff_state
115 old.parse(parts[1][1:])
116 new.parse(parts[2][1:])
117 lines.append((self.DASH, self.DASH))
118 continue
119 if text.startswith('@@@ -'):
120 self.merge = merge = True
121 parts = text.split(' ', 5)
122 if parts[0] == '@@@' and parts[4] == '@@@':
123 state = diff_state
124 ours.parse(parts[1][1:])
125 theirs.parse(parts[2][1:])
126 new.parse(parts[3][1:])
127 lines.append((self.DASH, self.DASH, self.DASH))
128 continue
129 if state == initial_state or text.rstrip() == no_newline:
130 if merge:
131 lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
132 else:
133 lines.append((self.EMPTY, self.EMPTY))
134 elif not merge and text.startswith('-'):
135 lines.append((old.tick(), self.EMPTY))
136 elif merge and text.startswith('- '):
137 lines.append((ours.tick(), self.EMPTY, self.EMPTY))
138 elif merge and text.startswith(' -'):
139 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
140 elif merge and text.startswith('--'):
141 lines.append((ours.tick(), theirs.tick(), self.EMPTY))
142 elif not merge and text.startswith('+'):
143 lines.append((self.EMPTY, new.tick()))
144 elif merge and text.startswith('++'):
145 lines.append((self.EMPTY, self.EMPTY, new.tick()))
146 elif merge and text.startswith('+ '):
147 lines.append((self.EMPTY, theirs.tick(), new.tick()))
148 elif merge and text.startswith(' +'):
149 lines.append((ours.tick(), self.EMPTY, new.tick()))
150 elif not merge and text.startswith(' '):
151 lines.append((old.tick(), new.tick()))
152 elif merge and text.startswith(' '):
153 lines.append((ours.tick(), theirs.tick(), new.tick()))
154 elif not text:
155 new.tick()
156 old.tick()
157 ours.tick()
158 theirs.tick()
159 else:
160 state = initial_state
161 if merge:
162 lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
163 else:
164 lines.append((self.EMPTY, self.EMPTY))
166 return lines
169 class FormatDigits(object):
170 """Format numbers for use in diff line numbers"""
172 DASH = DiffLines.DASH
173 EMPTY = DiffLines.EMPTY
175 def __init__(self, dash='', empty=''):
176 self.fmt = ''
177 self.empty = ''
178 self.dash = ''
179 self._dash = dash or compat.uchr(0xB7)
180 self._empty = empty or ' '
182 def set_digits(self, value):
183 self.fmt = '%%0%dd' % value
184 self.empty = self._empty * value
185 self.dash = self._dash * value
187 def value(self, old, new):
188 old_str = self._format(old)
189 new_str = self._format(new)
190 return '%s %s' % (old_str, new_str)
192 def merge_value(self, old, base, new):
193 old_str = self._format(old)
194 base_str = self._format(base)
195 new_str = self._format(new)
196 return '%s %s %s' % (old_str, base_str, new_str)
198 def number(self, value):
199 return self.fmt % value
201 def _format(self, value):
202 if value == self.DASH:
203 result = self.dash
204 elif value == self.EMPTY:
205 result = self.empty
206 else:
207 result = self.number(value)
208 return result
211 class _HunkGrouper:
212 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
214 def __init__(self):
215 self.match = None
217 def __call__(self, line):
218 match = self._HUNK_HEADER_RE.match(line)
219 if match is not None:
220 self.match = match
221 return self.match
224 class _DiffHunk:
225 def __init__(self, old_start, start_offset, heading, content_lines):
226 type_counts = Counter(line[:1] for line in content_lines)
227 self.old_count = type_counts[DIFF_CONTEXT] + type_counts[DIFF_DELETION]
228 self.new_count = type_counts[DIFF_CONTEXT] + type_counts[DIFF_ADDITION]
230 if self.old_count == 0:
231 self.old_start = 0
232 else:
233 self.old_start = old_start
235 if self.new_count == 0:
236 self.new_start = 0
237 elif self.old_start == 0:
238 self.new_start = 1
239 else:
240 self.new_start = self.old_start + start_offset
242 self.heading = heading
244 self.lines = [
245 _format_hunk_header(
246 self.old_start,
247 self.old_count,
248 self.new_start,
249 self.new_count,
250 heading,
252 *content_lines,
254 self.content_lines = content_lines
256 self.changes = type_counts[DIFF_DELETION] + type_counts[DIFF_ADDITION]
258 def has_changes(self):
259 return bool(self.changes)
261 def line_delta(self):
262 return self.new_count - self.old_count
265 class Patch:
266 """Parse and rewrite diffs to produce edited patches
268 This parser is used for modifying the worktree and index by constructing
269 temporary patches that are applied using "git apply".
273 def __init__(self, filename, hunks, header_line_count=0):
274 self.filename = filename
275 self.hunks = hunks
276 self.header_line_count = header_line_count
278 @classmethod
279 def parse(cls, filename, diff_text):
280 header_line_count = 0
281 hunks = []
282 start_offset = 0
283 for match, hunk_lines in groupby(diff_text.split('\n'), _HunkGrouper()):
284 if match is not None:
285 # Skip the hunk range header line as it will be regenerated by the
286 # _DiffHunk.
287 next(hunk_lines)
288 hunk = _DiffHunk(
289 old_start=parse_range_str(match.group(1))[0],
290 start_offset=start_offset,
291 heading=match.group(3),
292 content_lines=[line + '\n' for line in hunk_lines if line],
294 if hunk.has_changes():
295 hunks.append(hunk)
296 start_offset += hunk.line_delta()
297 else:
298 header_line_count = len(list(hunk_lines))
299 return cls(filename, hunks, header_line_count)
301 def has_changes(self):
302 return bool(self.hunks)
304 def as_text(self, *, file_headers=True):
305 lines = []
306 if self.hunks:
307 if file_headers:
308 lines.append('--- a/%s\n' % self.filename)
309 lines.append('+++ b/%s\n' % self.filename)
310 for hunk in self.hunks:
311 lines.extend(hunk.lines)
312 return ''.join(lines)
314 def _hunk_iter(self):
315 hunk_last_line_idx = self.header_line_count - 1
316 for hunk in self.hunks:
317 hunk_first_line_idx = hunk_last_line_idx + 1
318 hunk_last_line_idx += len(hunk.lines)
319 yield hunk_first_line_idx, hunk_last_line_idx, hunk
321 @staticmethod
322 def _reverse_content_lines(content_lines):
323 # Normally in a diff, deletions come before additions. In order to preserve
324 # this property in reverse patches, when this function encounters a deletion
325 # line and switches it to addition, it appends the line to the pending_additions
326 # list, while additions that get switched to deletions are appended directly to
327 # the content_lines list. Each time a context line is encountered, any pending
328 # additions are then appended to the content_lines list immmediately before the
329 # context line and the pending_additions list is cleared.
330 new_content_lines = []
331 pending_additions = []
332 line_type = None
333 for line in content_lines:
334 prev_line_type = line_type
335 line_type = line[:1]
336 if line_type == DIFF_ADDITION:
337 new_content_lines.append(DIFF_DELETION + line[1:])
338 elif line_type == DIFF_DELETION:
339 pending_additions.append(DIFF_ADDITION + line[1:])
340 elif line_type == DIFF_NO_NEWLINE:
341 if prev_line_type == DIFF_DELETION:
342 # Previous line was a deletion that was switched to an
343 # addition, so the "No newline" line goes with it.
344 pending_additions.append(line)
345 else:
346 new_content_lines.append(line)
347 else:
348 new_content_lines.extend(pending_additions)
349 new_content_lines.append(line)
350 pending_additions = []
351 new_content_lines.extend(pending_additions)
352 return new_content_lines
354 def extract_subset(self, first_line_idx, last_line_idx, *, reverse=False):
355 new_hunks = []
356 start_offset = 0
357 for hunk_first_line_idx, hunk_last_line_idx, hunk in self._hunk_iter():
358 # Skip hunks until reaching the one that contains the first selected line.
359 if hunk_last_line_idx < first_line_idx:
360 continue
362 # Stop once the hunk that contains the last selected line has been
363 # processed.
364 if hunk_first_line_idx > last_line_idx:
365 break
367 content_lines = []
369 prev_skipped = False
370 for hunk_line_idx, line in enumerate(
371 hunk.content_lines, start=hunk_first_line_idx + 1
373 line_type = line[:1]
374 if not first_line_idx <= hunk_line_idx <= last_line_idx:
375 if line_type == DIFF_ADDITION:
376 if reverse:
377 # Change unselected additions to context for reverse diffs.
378 line = DIFF_CONTEXT + line[1:]
379 else:
380 # Skip unselected additions for normal diffs.
381 prev_skipped = True
382 continue
383 elif line_type == DIFF_DELETION:
384 if not reverse:
385 # Change unselected deletions to context for normal diffs.
386 line = DIFF_CONTEXT + line[1:]
387 else:
388 # Skip unselected deletions for reverse diffs.
389 prev_skipped = True
390 continue
392 if line_type == DIFF_NO_NEWLINE and prev_skipped:
393 # If the line immediately before a "No newline" line was skipped
394 # (e.g. because it was an unselected addition) skip the "No
395 # newline" line as well
396 continue
398 content_lines.append(line)
400 if reverse:
401 old_start = hunk.new_start
402 content_lines = self._reverse_content_lines(content_lines)
403 else:
404 old_start = hunk.old_start
405 new_hunk = _DiffHunk(
406 old_start=old_start,
407 start_offset=start_offset,
408 heading=hunk.heading,
409 content_lines=content_lines,
411 if new_hunk.has_changes():
412 new_hunks.append(new_hunk)
413 start_offset += new_hunk.line_delta()
415 return Patch(self.filename, new_hunks)
417 def extract_hunk(self, line_idx, *, reverse=False):
418 """Return a new patch containing only the hunk containing the specified line"""
419 new_hunks = []
420 for _, hunk_last_line_idx, hunk in self._hunk_iter():
421 if line_idx <= hunk_last_line_idx:
422 if reverse:
423 old_start = hunk.new_start
424 content_lines = self._reverse_content_lines(hunk.content_lines)
425 else:
426 old_start = hunk.old_start
427 content_lines = hunk.content_lines
428 new_hunks = [
429 _DiffHunk(
430 old_start=old_start,
431 start_offset=0,
432 heading=hunk.heading,
433 content_lines=content_lines,
436 break
437 return Patch(self.filename, new_hunks)