widgets: move PlainTextLabel and RichTextLabel to the text module
[git-cola.git] / cola / diffparse.py
blob79674ecbe386addb1e425d51bee52c9afd952d56
1 import math
2 import re
3 from collections import Counter
4 from itertools import groupby
6 from . import compat
9 DIFF_CONTEXT = ' '
10 DIFF_ADDITION = '+'
11 DIFF_DELETION = '-'
12 DIFF_NO_NEWLINE = '\\'
15 def parse_range_str(range_str):
16 if ',' in range_str:
17 begin, end = range_str.split(',', 1)
18 return int(begin), int(end)
19 return int(range_str), 1
22 def _format_range(start, count):
23 if count == 1:
24 return str(start)
25 return '%d,%d' % (start, count)
28 def _format_hunk_header(old_start, old_count, new_start, new_count, heading=''):
29 return '@@ -{} +{} @@{}\n'.format(
30 _format_range(old_start, old_count),
31 _format_range(new_start, new_count),
32 heading,
36 def digits(number):
37 """Return the number of digits needed to display a number"""
38 if number >= 0:
39 result = int(math.log10(number)) + 1
40 else:
41 result = 1
42 return result
45 class LineCounter:
46 """Keep track of a diff range's values"""
48 def __init__(self, value=0, max_value=-1):
49 self.value = value
50 self.max_value = max_value
51 self._initial_max_value = max_value
53 def reset(self):
54 """Reset the max counter and return self for convenience"""
55 self.max_value = self._initial_max_value
56 return self
58 def parse(self, range_str):
59 """Parse a diff range and setup internal state"""
60 start, count = parse_range_str(range_str)
61 self.value = start
62 self.max_value = max(start + count - 1, self.max_value)
64 def tick(self, amount=1):
65 """Return the current value and increment to the next"""
66 value = self.value
67 self.value += amount
68 return value
71 class DiffLines:
72 """Parse diffs and gather line numbers"""
74 EMPTY = -1
75 DASH = -2
77 def __init__(self):
78 self.merge = False
80 # diff <old> <new>
81 # merge <ours> <theirs> <new>
82 self.old = LineCounter()
83 self.new = LineCounter()
84 self.ours = LineCounter()
85 self.theirs = LineCounter()
87 def digits(self):
88 return digits(
89 max(
90 self.old.max_value,
91 self.new.max_value,
92 self.ours.max_value,
93 self.theirs.max_value,
97 def parse(self, diff_text):
98 lines = []
99 diff_state = 1
100 state = initial_state = 0
101 merge = self.merge = False
102 no_newline = r'\ No newline at end of file'
104 old = self.old.reset()
105 new = self.new.reset()
106 ours = self.ours.reset()
107 theirs = self.theirs.reset()
109 for text in diff_text.split('\n'):
110 if text.startswith('@@ -'):
111 parts = text.split(' ', 4)
112 if parts[0] == '@@' and parts[3] == '@@':
113 state = diff_state
114 old.parse(parts[1][1:])
115 new.parse(parts[2][1:])
116 lines.append((self.DASH, self.DASH))
117 continue
118 if text.startswith('@@@ -'):
119 self.merge = merge = True
120 parts = text.split(' ', 5)
121 if parts[0] == '@@@' and parts[4] == '@@@':
122 state = diff_state
123 ours.parse(parts[1][1:])
124 theirs.parse(parts[2][1:])
125 new.parse(parts[3][1:])
126 lines.append((self.DASH, self.DASH, self.DASH))
127 continue
128 if state == initial_state or text.rstrip() == no_newline:
129 if merge:
130 lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
131 else:
132 lines.append((self.EMPTY, self.EMPTY))
133 elif not merge and text.startswith('-'):
134 lines.append((old.tick(), self.EMPTY))
135 elif merge and text.startswith('- '):
136 lines.append((ours.tick(), self.EMPTY, self.EMPTY))
137 elif merge and text.startswith(' -'):
138 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
139 elif merge and text.startswith('--'):
140 lines.append((ours.tick(), theirs.tick(), self.EMPTY))
141 elif not merge and text.startswith('+'):
142 lines.append((self.EMPTY, new.tick()))
143 elif merge and text.startswith('++'):
144 lines.append((self.EMPTY, self.EMPTY, new.tick()))
145 elif merge and text.startswith('+ '):
146 lines.append((self.EMPTY, theirs.tick(), new.tick()))
147 elif merge and text.startswith(' +'):
148 lines.append((ours.tick(), self.EMPTY, new.tick()))
149 elif not merge and text.startswith(' '):
150 lines.append((old.tick(), new.tick()))
151 elif merge and text.startswith(' '):
152 lines.append((ours.tick(), theirs.tick(), new.tick()))
153 elif not text:
154 new.tick()
155 old.tick()
156 ours.tick()
157 theirs.tick()
158 else:
159 state = initial_state
160 if merge:
161 lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
162 else:
163 lines.append((self.EMPTY, self.EMPTY))
165 return lines
168 class FormatDigits:
169 """Format numbers for use in diff line numbers"""
171 DASH = DiffLines.DASH
172 EMPTY = DiffLines.EMPTY
174 def __init__(self, dash='', empty=''):
175 self.fmt = ''
176 self.empty = ''
177 self.dash = ''
178 self._dash = dash or compat.uchr(0xB7)
179 self._empty = empty or ' '
181 def set_digits(self, value):
182 self.fmt = '%%0%dd' % value
183 self.empty = self._empty * value
184 self.dash = self._dash * value
186 def value(self, old, new):
187 old_str = self._format(old)
188 new_str = self._format(new)
189 return f'{old_str} {new_str}'
191 def merge_value(self, old, base, new):
192 old_str = self._format(old)
193 base_str = self._format(base)
194 new_str = self._format(new)
195 return f'{old_str} {base_str} {new_str}'
197 def number(self, value):
198 return self.fmt % value
200 def _format(self, value):
201 if value == self.DASH:
202 result = self.dash
203 elif value == self.EMPTY:
204 result = self.empty
205 else:
206 result = self.number(value)
207 return result
210 class _HunkGrouper:
211 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
213 def __init__(self):
214 self.match = None
216 def __call__(self, line):
217 match = self._HUNK_HEADER_RE.match(line)
218 if match is not None:
219 self.match = match
220 return self.match
223 class _DiffHunk:
224 def __init__(self, old_start, start_offset, heading, content_lines):
225 type_counts = Counter(line[:1] for line in content_lines)
226 self.old_count = type_counts[DIFF_CONTEXT] + type_counts[DIFF_DELETION]
227 self.new_count = type_counts[DIFF_CONTEXT] + type_counts[DIFF_ADDITION]
229 if self.old_count == 0:
230 self.old_start = 0
231 else:
232 self.old_start = old_start
234 if self.new_count == 0:
235 self.new_start = 0
236 elif self.old_start == 0:
237 self.new_start = 1
238 else:
239 self.new_start = self.old_start + start_offset
241 self.heading = heading
243 self.lines = [
244 _format_hunk_header(
245 self.old_start,
246 self.old_count,
247 self.new_start,
248 self.new_count,
249 heading,
251 *content_lines,
253 self.content_lines = content_lines
255 self.changes = type_counts[DIFF_DELETION] + type_counts[DIFF_ADDITION]
257 def has_changes(self):
258 return bool(self.changes)
260 def line_delta(self):
261 return self.new_count - self.old_count
264 class Patch:
265 """Parse and rewrite diffs to produce edited patches
267 This parser is used for modifying the worktree and index by constructing
268 temporary patches that are applied using "git apply".
272 def __init__(self, filename, hunks, header_line_count=0):
273 self.filename = filename
274 self.hunks = hunks
275 self.header_line_count = header_line_count
277 @classmethod
278 def parse(cls, filename, diff_text):
279 header_line_count = 0
280 hunks = []
281 start_offset = 0
282 for match, hunk_lines in groupby(diff_text.split('\n'), _HunkGrouper()):
283 if match is not None:
284 # Skip the hunk range header line as it will be regenerated by the
285 # _DiffHunk.
286 next(hunk_lines)
287 hunk = _DiffHunk(
288 old_start=parse_range_str(match.group(1))[0],
289 start_offset=start_offset,
290 heading=match.group(3),
291 content_lines=[line + '\n' for line in hunk_lines if line],
293 if hunk.has_changes():
294 hunks.append(hunk)
295 start_offset += hunk.line_delta()
296 else:
297 header_line_count = len(list(hunk_lines))
298 return cls(filename, hunks, header_line_count)
300 def has_changes(self):
301 return bool(self.hunks)
303 def as_text(self, *, file_headers=True):
304 lines = []
305 if self.hunks:
306 if file_headers:
307 lines.append('--- a/%s\n' % self.filename)
308 lines.append('+++ b/%s\n' % self.filename)
309 for hunk in self.hunks:
310 lines.extend(hunk.lines)
311 return ''.join(lines)
313 def _hunk_iter(self):
314 hunk_last_line_idx = self.header_line_count - 1
315 for hunk in self.hunks:
316 hunk_first_line_idx = hunk_last_line_idx + 1
317 hunk_last_line_idx += len(hunk.lines)
318 yield hunk_first_line_idx, hunk_last_line_idx, hunk
320 @staticmethod
321 def _reverse_content_lines(content_lines):
322 # Normally in a diff, deletions come before additions. In order to preserve
323 # this property in reverse patches, when this function encounters a deletion
324 # line and switches it to addition, it appends the line to the pending_additions
325 # list, while additions that get switched to deletions are appended directly to
326 # the content_lines list. Each time a context line is encountered, any pending
327 # additions are then appended to the content_lines list immmediately before the
328 # context line and the pending_additions list is cleared.
329 new_content_lines = []
330 pending_additions = []
331 line_type = None
332 for line in content_lines:
333 prev_line_type = line_type
334 line_type = line[:1]
335 if line_type == DIFF_ADDITION:
336 new_content_lines.append(DIFF_DELETION + line[1:])
337 elif line_type == DIFF_DELETION:
338 pending_additions.append(DIFF_ADDITION + line[1:])
339 elif line_type == DIFF_NO_NEWLINE:
340 if prev_line_type == DIFF_DELETION:
341 # Previous line was a deletion that was switched to an
342 # addition, so the "No newline" line goes with it.
343 pending_additions.append(line)
344 else:
345 new_content_lines.append(line)
346 else:
347 new_content_lines.extend(pending_additions)
348 new_content_lines.append(line)
349 pending_additions = []
350 new_content_lines.extend(pending_additions)
351 return new_content_lines
353 def extract_subset(self, first_line_idx, last_line_idx, *, reverse=False):
354 new_hunks = []
355 start_offset = 0
356 for hunk_first_line_idx, hunk_last_line_idx, hunk in self._hunk_iter():
357 # Skip hunks until reaching the one that contains the first selected line.
358 if hunk_last_line_idx < first_line_idx:
359 continue
361 # Stop once the hunk that contains the last selected line has been
362 # processed.
363 if hunk_first_line_idx > last_line_idx:
364 break
366 content_lines = []
368 prev_skipped = False
369 for hunk_line_idx, line in enumerate(
370 hunk.content_lines, start=hunk_first_line_idx + 1
372 line_type = line[:1]
373 if not first_line_idx <= hunk_line_idx <= last_line_idx:
374 if line_type == DIFF_ADDITION:
375 if reverse:
376 # Change unselected additions to context for reverse diffs.
377 line = DIFF_CONTEXT + line[1:]
378 else:
379 # Skip unselected additions for normal diffs.
380 prev_skipped = True
381 continue
382 elif line_type == DIFF_DELETION:
383 if not reverse:
384 # Change unselected deletions to context for normal diffs.
385 line = DIFF_CONTEXT + line[1:]
386 else:
387 # Skip unselected deletions for reverse diffs.
388 prev_skipped = True
389 continue
391 if line_type == DIFF_NO_NEWLINE and prev_skipped:
392 # If the line immediately before a "No newline" line was skipped
393 # (e.g. because it was an unselected addition) skip the "No
394 # newline" line as well
395 continue
397 content_lines.append(line)
399 if reverse:
400 old_start = hunk.new_start
401 content_lines = self._reverse_content_lines(content_lines)
402 else:
403 old_start = hunk.old_start
404 new_hunk = _DiffHunk(
405 old_start=old_start,
406 start_offset=start_offset,
407 heading=hunk.heading,
408 content_lines=content_lines,
410 if new_hunk.has_changes():
411 new_hunks.append(new_hunk)
412 start_offset += new_hunk.line_delta()
414 return Patch(self.filename, new_hunks)
416 def extract_hunk(self, line_idx, *, reverse=False):
417 """Return a new patch containing only the hunk containing the specified line"""
418 new_hunks = []
419 for _, hunk_last_line_idx, hunk in self._hunk_iter():
420 if line_idx <= hunk_last_line_idx:
421 if reverse:
422 old_start = hunk.new_start
423 content_lines = self._reverse_content_lines(hunk.content_lines)
424 else:
425 old_start = hunk.old_start
426 content_lines = hunk.content_lines
427 new_hunks = [
428 _DiffHunk(
429 old_start=old_start,
430 start_offset=0,
431 heading=hunk.heading,
432 content_lines=content_lines,
435 break
436 return Patch(self.filename, new_hunks)