git-cola v2.11
[git-cola.git] / cola / diffparse.py
blobd990015e49eab32b3f9d125335aedb284a6e7f1c
1 from __future__ import division, absolute_import, unicode_literals
2 import math
3 import re
4 from collections import defaultdict
6 from . import compat
9 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
12 class _DiffHunk(object):
14 def __init__(self, old_start, old_count, new_start, new_count, heading,
15 first_line_idx, lines):
16 self.old_start = old_start
17 self.old_count = old_count
18 self.new_start = new_start
19 self.new_count = new_count
20 self.heading = heading
21 self.first_line_idx = first_line_idx
22 self.lines = lines
24 @property
25 def last_line_idx(self):
26 return self.first_line_idx + len(self.lines) - 1
29 def _parse_range_str(range_str):
30 if ',' in range_str:
31 begin, end = range_str.split(',', 1)
32 return int(begin), int(end)
33 else:
34 return int(range_str), 1
37 def _format_range(start, count):
38 if count == 1:
39 return str(start)
40 else:
41 return '%d,%d' % (start, count)
44 def _format_hunk_header(old_start, old_count, new_start, new_count,
45 heading=''):
46 return '@@ -%s +%s @@%s' % (_format_range(old_start, old_count),
47 _format_range(new_start, new_count),
48 heading)
51 def _parse_diff(diff_text):
52 hunks = []
53 for line_idx, line in enumerate(diff_text.split('\n')):
54 match = _HUNK_HEADER_RE.match(line)
55 if match:
56 old_start, old_count = _parse_range_str(match.group(1))
57 new_start, new_count = _parse_range_str(match.group(2))
58 heading = match.group(3)
59 hunks.append(_DiffHunk(old_start, old_count,
60 new_start, new_count,
61 heading, line_idx, lines=[line]))
62 elif not hunks:
63 # first line of the diff is not a header line
64 errmsg = 'Malformed diff?: %s' % diff_text
65 raise AssertionError(errmsg)
66 elif line:
67 hunks[-1].lines.append(line)
68 return hunks
71 def digits(number):
72 """Return the number of digits needed to display a number"""
73 if number >= 0:
74 result = int(math.log10(number)) + 1
75 else:
76 result = 1
77 return result
80 class Counter(object):
81 """Keep track of a diff range's values"""
83 def __init__(self, value=0, max_value=-1):
84 self.value = value
85 self.max_value = max_value
86 self._initial_max_value = max_value
88 def reset(self):
89 """Reset the max counter and return self for convenience"""
90 self.max_value = self._initial_max_value
91 return self
93 def parse(self, range_str):
94 """Parse a diff range and setup internal state"""
95 start, count = _parse_range_str(range_str)
96 self.value = start
97 self.max_value = max(start + count, self.max_value)
99 def tick(self, amount=1):
100 """Return the current value and increment to the next"""
101 value = self.value
102 self.value += amount
103 return value
106 class DiffLines(object):
107 """Parse diffs and gather line numbers"""
109 EMPTY = -1
110 DASH = -2
112 def __init__(self):
113 self.valid = True
114 self.merge = False
116 # diff <old> <new>
117 # merge <ours> <theirs> <new>
118 self.old = Counter()
119 self.new = Counter()
120 self.ours = Counter()
121 self.theirs = Counter()
123 def digits(self):
124 return digits(max(self.old.max_value, self.new.max_value,
125 self.ours.max_value, self.theirs.max_value))
127 def parse(self, diff_text):
128 lines = []
129 INITIAL_STATE = 0
130 DIFF_STATE = 1
131 state = INITIAL_STATE
132 merge = self.merge = False
133 NO_NEWLINE = '\\ No newline at end of file'
135 old = self.old.reset()
136 new = self.new.reset()
137 ours = self.ours.reset()
138 theirs = self.theirs.reset()
140 for text in diff_text.splitlines():
141 if text.startswith('@@ -'):
142 parts = text.split(' ', 4)
143 if parts[0] == '@@' and parts[3] == '@@':
144 state = DIFF_STATE
145 old.parse(parts[1][1:])
146 new.parse(parts[2][1:])
147 lines.append((self.DASH, self.DASH))
148 continue
149 if text.startswith('@@@ -'):
150 self.merge = merge = True
151 parts = text.split(' ', 5)
152 if parts[0] == '@@@' and parts[4] == '@@@':
153 state = DIFF_STATE
154 ours.parse(parts[1][1:])
155 theirs.parse(parts[2][1:])
156 new.parse(parts[3][1:])
157 lines.append((self.DASH, self.DASH, self.DASH))
158 continue
159 if state == INITIAL_STATE or text == NO_NEWLINE:
160 if merge:
161 lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
162 else:
163 lines.append((self.EMPTY, self.EMPTY))
164 elif not merge and text.startswith('-'):
165 lines.append((old.tick(), self.EMPTY))
166 elif merge and text.startswith('- '):
167 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
168 elif merge and text.startswith(' -'):
169 lines.append((self.EMPTY, theirs.tick(), self.EMPTY))
170 elif merge and text.startswith('--'):
171 lines.append((ours.tick(), theirs.tick(), self.EMPTY))
172 elif not merge and text.startswith('+'):
173 lines.append((self.EMPTY, new.tick()))
174 elif merge and text.startswith('++'):
175 lines.append((self.EMPTY, self.EMPTY, new.tick()))
176 elif merge and text.startswith('+ '):
177 lines.append((self.EMPTY, theirs.tick(), new.tick()))
178 elif merge and text.startswith(' +'):
179 lines.append((ours.tick(), self.EMPTY, new.tick()))
180 elif not merge and text.startswith(' '):
181 lines.append((old.tick(), new.tick()))
182 elif merge and text.startswith(' '):
183 lines.append((ours.tick(), theirs.tick(), new.tick()))
184 elif not text:
185 new.tick()
186 old.tick()
187 ours.tick()
188 theirs.tick()
189 else:
190 state = INITIAL_STATE
191 if merge:
192 lines.append((self.EMPTY, self.EMPTY, self.EMPTY))
193 else:
194 lines.append((self.EMPTY, self.EMPTY))
196 return lines
199 class FormatDigits(object):
200 """Format numbers for use in diff line numbers"""
202 DASH = DiffLines.DASH
203 EMPTY = DiffLines.EMPTY
205 def __init__(self, dash='', empty=''):
206 self.fmt = ''
207 self.empty = ''
208 self.dash = ''
209 self._dash = dash or compat.unichr(0xb7)
210 self._empty = empty or ' '
212 def set_digits(self, digits):
213 self.fmt = ('%%0%dd' % digits)
214 self.empty = (self._empty * digits)
215 self.dash = (self._dash * digits)
217 def value(self, old, new):
218 old_str = self._format(old)
219 new_str = self._format(new)
220 return ('%s %s' % (old_str, new_str))
222 def merge_value(self, old, base, new):
223 old_str = self._format(old)
224 base_str = self._format(base)
225 new_str = self._format(new)
226 return ('%s %s %s' % (old_str, base_str, new_str))
228 def number(self, value):
229 return (self.fmt % value)
231 def _format(self, value):
232 if value == self.DASH:
233 result = self.dash
234 elif value == self.EMPTY:
235 result = self.empty
236 else:
237 result = self.number(value)
238 return result
241 class DiffParser(object):
242 """Parse and rewrite diffs to produce edited patches
244 This parser is used for modifying the worktree and index by constructing
245 temporary patches that are applied using "git apply".
249 def __init__(self, filename, diff_text):
250 self.filename = filename
251 self.hunks = _parse_diff(diff_text)
253 def generate_patch(self, first_line_idx, last_line_idx,
254 reverse=False):
255 """Return a patch containing a subset of the diff"""
257 ADDITION = '+'
258 DELETION = '-'
259 CONTEXT = ' '
260 NO_NEWLINE = '\\'
262 lines = ['--- a/%s' % self.filename, '+++ b/%s' % self.filename]
264 start_offset = 0
266 for hunk in self.hunks:
267 # skip hunks until we get to the one that contains the first
268 # selected line
269 if hunk.last_line_idx < first_line_idx:
270 continue
271 # once we have processed the hunk that contains the last selected
272 # line, we can stop
273 if hunk.first_line_idx > last_line_idx:
274 break
276 prev_skipped = False
277 counts = defaultdict(int)
278 filtered_lines = []
280 for line_idx, line in enumerate(hunk.lines[1:],
281 start=hunk.first_line_idx + 1):
282 line_type, line_content = line[:1], line[1:]
284 if reverse:
285 if line_type == ADDITION:
286 line_type = DELETION
287 elif line_type == DELETION:
288 line_type = ADDITION
290 if not (first_line_idx <= line_idx <= last_line_idx):
291 if line_type == ADDITION:
292 # Skip additions that are not selected.
293 prev_skipped = True
294 continue
295 elif line_type == DELETION:
296 # Change deletions that are not selected to context.
297 line_type = CONTEXT
298 if line_type == NO_NEWLINE and prev_skipped:
299 # If the line immediately before a "No newline" line was
300 # skipped (because it was an unselected addition) skip
301 # the "No newline" line as well.
302 continue
303 filtered_lines.append(line_type + line_content)
304 counts[line_type] += 1
305 prev_skipped = False
307 # Do not include hunks that, after filtering, have only context
308 # lines (no additions or deletions).
309 if not counts[ADDITION] and not counts[DELETION]:
310 continue
312 old_count = counts[CONTEXT] + counts[DELETION]
313 new_count = counts[CONTEXT] + counts[ADDITION]
315 if reverse:
316 old_start = hunk.new_start
317 else:
318 old_start = hunk.old_start
319 new_start = old_start + start_offset
320 if old_count == 0:
321 new_start += 1
322 if new_count == 0:
323 new_start -= 1
325 start_offset += counts[ADDITION] - counts[DELETION]
327 lines.append(_format_hunk_header(old_start, old_count,
328 new_start, new_count,
329 hunk.heading))
330 lines.extend(filtered_lines)
332 # If there are only two lines, that means we did not include any hunks,
333 # so return None.
334 if len(lines) == 2:
335 return None
336 else:
337 lines.append('')
338 return '\n'.join(lines)
340 def generate_hunk_patch(self, line_idx, reverse=False):
341 """Return a patch containing the hunk for the specified line only"""
342 if not self.hunks:
343 return None
344 for hunk in self.hunks:
345 if line_idx <= hunk.last_line_idx:
346 break
347 return self.generate_patch(hunk.first_line_idx, hunk.last_line_idx,
348 reverse=reverse)