dag: update column allocation algorithm description
[git-cola.git] / cola / diffparse.py
blob98282fbd916bfcdd116b191fc49fe20fb19f347d
1 from __future__ import division, absolute_import, unicode_literals
2 import math
3 import re
4 from collections import defaultdict
6 from . import compat
9 _HUNK_HEADER_RE = re.compile(r'^@@ -([0-9,]+) \+([0-9,]+) @@(.*)')
12 class _DiffHunk(object):
13 def __init__(self, old_start, old_count, new_start, new_count, heading,
14 first_line_idx, lines):
15 self.old_start = old_start
16 self.old_count = old_count
17 self.new_start = new_start
18 self.new_count = new_count
19 self.heading = heading
20 self.first_line_idx = first_line_idx
21 self.lines = lines
23 @property
24 def last_line_idx(self):
25 return self.first_line_idx + len(self.lines) - 1
28 def _parse_range_str(range_str):
29 if ',' in range_str:
30 begin, end = range_str.split(',', 1)
31 return int(begin), int(end)
32 else:
33 return int(range_str), 1
36 def _format_range(start, count):
37 if count == 1:
38 return str(start)
39 else:
40 return '%d,%d' % (start, count)
43 def _format_hunk_header(old_start, old_count, new_start, new_count,
44 heading=''):
45 return '@@ -%s +%s @@%s' % (_format_range(old_start, old_count),
46 _format_range(new_start, new_count),
47 heading)
50 def _parse_diff(diff_text):
51 hunks = []
52 for line_idx, line in enumerate(diff_text.split('\n')):
53 match = _HUNK_HEADER_RE.match(line)
54 if match:
55 old_start, old_count = _parse_range_str(match.group(1))
56 new_start, new_count = _parse_range_str(match.group(2))
57 heading = match.group(3)
58 hunks.append(_DiffHunk(old_start, old_count,
59 new_start, new_count,
60 heading, line_idx, lines=[line]))
61 elif not hunks:
62 # first line of the diff is not a header line
63 errmsg = 'Malformed diff?: %s' % diff_text
64 raise AssertionError(errmsg)
65 elif line:
66 hunks[-1].lines.append(line)
67 return hunks
70 def digits(number):
71 """Return the number of digits needed to display a number"""
72 if number >= 0:
73 result = int(math.log10(number)) + 1
74 else:
75 result = 1
76 return result
79 class DiffLines(object):
80 """Parse diffs and gather line numbers"""
82 EMPTY = -1
83 DASH = -2
85 def __init__(self):
86 self.max_old = -1
87 self.max_new = -1
88 self.valid = True
90 def digits(self):
91 return digits(max(self.max_old, self.max_new))
93 def parse(self, diff_text):
94 self.max_old = -1
95 self.max_new = -1
97 lines = []
98 old_start = 0
99 old_count = 0
100 new_start = 0
101 new_count = 0
102 old_cur = 0
103 new_cur = 0
105 INITIAL_STATE = 0
106 DIFF_STATE = 1
107 state = INITIAL_STATE
109 for text in diff_text.splitlines():
110 if text.startswith('@@ -'):
111 parts = text.split(' ', 4)
112 if parts[0] == '@@' and parts[3] == '@@':
113 state = DIFF_STATE
114 old_start, old_count = _parse_range_str(parts[1][1:])
115 new_start, new_count = _parse_range_str(parts[2][1:])
116 old_cur = old_start
117 new_cur = new_start
118 self.max_old = max(old_start + old_count, self.max_old)
119 self.max_new = max(new_start + new_count, self.max_new)
120 lines.append((self.DASH, self.DASH))
121 continue
122 if state == INITIAL_STATE:
123 lines.append((self.EMPTY, self.EMPTY))
124 elif text.startswith('-'):
125 lines.append((old_cur, self.EMPTY))
126 old_cur += 1
127 elif text.startswith('+'):
128 lines.append((self.EMPTY, new_cur))
129 new_cur += 1
130 elif text.startswith(' '):
131 lines.append((old_cur, new_cur))
132 old_cur += 1
133 new_cur += 1
134 elif not text:
135 old_cur += 1
136 new_cur += 1
137 else:
138 self.valid = False
139 continue
141 return lines
144 class FormatDigits(object):
145 """Format numbers for use in diff line numbers"""
147 DASH = DiffLines.DASH
148 EMPTY = DiffLines.EMPTY
150 def __init__(self, dash='', empty=''):
151 self.fmt = ''
152 self.empty = ''
153 self.dash = ''
154 self._dash = dash or compat.unichr(0xb7)
155 self._empty = empty or ' '
157 def set_digits(self, digits):
158 self.fmt = ('%%0%dd' % digits)
159 self.empty = (self._empty * digits)
160 self.dash = (self._dash * digits)
162 def value(self, old, new):
163 old_str = self._format(old)
164 new_str = self._format(new)
165 return ('%s %s' % (old_str, new_str))
167 def number(self, value):
168 return (self.fmt % value)
170 def _format(self, value):
171 if value == self.DASH:
172 result = self.dash
173 elif value == self.EMPTY:
174 result = self.empty
175 else:
176 result = self.number(value)
177 return result
180 class DiffParser(object):
181 """Parse and rewrite diffs to produce edited patches
183 This parser is used for modifying the worktree and index by constructing
184 temporary patches that are applied using "git apply".
188 def __init__(self, filename, diff_text):
189 self.filename = filename
190 self.hunks = _parse_diff(diff_text)
192 def generate_patch(self, first_line_idx, last_line_idx,
193 reverse=False):
194 """Return a patch containing a subset of the diff"""
196 ADDITION = '+'
197 DELETION = '-'
198 CONTEXT = ' '
199 NO_NEWLINE = '\\'
201 lines = ['--- a/%s' % self.filename, '+++ b/%s' % self.filename]
203 start_offset = 0
205 for hunk in self.hunks:
206 # skip hunks until we get to the one that contains the first
207 # selected line
208 if hunk.last_line_idx < first_line_idx:
209 continue
210 # once we have processed the hunk that contains the last selected
211 # line, we can stop
212 if hunk.first_line_idx > last_line_idx:
213 break
215 prev_skipped = False
216 counts = defaultdict(int)
217 filtered_lines = []
219 for line_idx, line in enumerate(hunk.lines[1:],
220 start=hunk.first_line_idx + 1):
221 line_type, line_content = line[:1], line[1:]
223 if reverse:
224 if line_type == ADDITION:
225 line_type = DELETION
226 elif line_type == DELETION:
227 line_type = ADDITION
229 if not (first_line_idx <= line_idx <= last_line_idx):
230 if line_type == ADDITION:
231 # Skip additions that are not selected.
232 prev_skipped = True
233 continue
234 elif line_type == DELETION:
235 # Change deletions that are not selected to context.
236 line_type = CONTEXT
237 if line_type == NO_NEWLINE and prev_skipped:
238 # If the line immediately before a "No newline" line was
239 # skipped (because it was an unselected addition) skip
240 # the "No newline" line as well.
241 continue
242 filtered_lines.append(line_type + line_content)
243 counts[line_type] += 1
244 prev_skipped = False
246 # Do not include hunks that, after filtering, have only context
247 # lines (no additions or deletions).
248 if not counts[ADDITION] and not counts[DELETION]:
249 continue
251 old_count = counts[CONTEXT] + counts[DELETION]
252 new_count = counts[CONTEXT] + counts[ADDITION]
254 if reverse:
255 old_start = hunk.new_start
256 else:
257 old_start = hunk.old_start
258 new_start = old_start + start_offset
259 if old_count == 0:
260 new_start += 1
261 if new_count == 0:
262 new_start -= 1
264 start_offset += counts[ADDITION] - counts[DELETION]
266 lines.append(_format_hunk_header(old_start, old_count,
267 new_start, new_count,
268 hunk.heading))
269 lines.extend(filtered_lines)
271 # If there are only two lines, that means we did not include any hunks,
272 # so return None.
273 if len(lines) == 2:
274 return None
275 else:
276 lines.append('')
277 return '\n'.join(lines)
279 def generate_hunk_patch(self, line_idx, reverse=False):
280 """Return a patch containing the hunk for the specified line only"""
281 if not self.hunks:
282 return None
283 for hunk in self.hunks:
284 if line_idx <= hunk.last_line_idx:
285 break
286 return self.generate_patch(hunk.first_line_idx, hunk.last_line_idx,
287 reverse=reverse)