Extract functions generate_edits_from_blocks() and write_edits().
[cvs2svn.git] / cvs2svn_lib / rcs_stream.py
blob7ea1bd02a82e9926828f10f5e3ba4ec8e77b8db9
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module processes RCS diffs (deltas)."""
20 from cStringIO import StringIO
21 import re
24 def msplit(s):
25 """Split S into an array of lines.
27 Only \n is a line separator. The line endings are part of the lines."""
29 # return s.splitlines(True) clobbers \r
30 re = [ i + "\n" for i in s.split("\n") ]
31 re[-1] = re[-1][:-1]
32 if not re[-1]:
33 del re[-1]
34 return re
37 class MalformedDeltaException(Exception):
38 """A malformed RCS delta was encountered."""
40 pass
43 ed_command_re = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
46 def generate_edits(diff):
47 """Generate edit commands from an RCS diff block.
49 DIFF is a string holding an entire RCS file delta. Generate a tuple
50 (COMMAND, START, ARG) for each block implied by DIFF. Tuples
51 describe the ed commands:
53 ('a', INPUT_POS, LINES) : add LINES at INPUT_POS. LINES is a
54 list of strings.
56 ('d', INPUT_POS, COUNT) : delete COUNT input lines starting at line
57 START.
59 In all cases, START is expressed as a zero-offset line number within
60 the input revision."""
62 diff = msplit(diff)
63 i = 0
65 while i < len(diff):
66 m = ed_command_re.match(diff[i])
67 if not m:
68 raise MalformedDeltaException('Bad ed command')
69 i += 1
70 command = m.group(1)
71 start = int(m.group(2))
72 count = int(m.group(3))
73 if command == 'd':
74 # "d" - Delete command
75 yield ('d', start - 1, count)
76 else:
77 # "a" - Add command
78 if i + count > len(diff):
79 raise MalformedDeltaException('Add block truncated')
80 yield ('a', start, diff[i:i + count])
81 i += count
84 def merge_blocks(blocks):
85 """Merge adjacent 'r'eplace or 'c'opy blocks."""
87 i = iter(blocks)
89 try:
90 (command1, old_lines1, new_lines1) = i.next()
91 except StopIteration:
92 return
94 for (command2, old_lines2, new_lines2) in i:
95 if command1 == 'r' and command2 == 'r':
96 old_lines1 += old_lines2
97 new_lines1 += new_lines2
98 elif command1 == 'c' and command2 == 'c':
99 old_lines1 += old_lines2
100 new_lines1 = old_lines1
101 else:
102 yield (command1, old_lines1, new_lines1)
103 (command1, old_lines1, new_lines1) = (command2, old_lines2, new_lines2)
105 yield (command1, old_lines1, new_lines1)
108 def invert_blocks(blocks):
109 """Invert the blocks in BLOCKS.
111 BLOCKS is an iterable over blocks. Invert them, in the sense that
112 the input becomes the output and the output the input."""
114 for (command, old_lines, new_lines) in blocks:
115 yield (command, new_lines, old_lines)
118 def generate_edits_from_blocks(blocks):
119 """Convert BLOCKS into an equivalent series of RCS edits.
121 The edits are generated as tuples in the format described in the
122 docstring for generate_edits().
124 It is important that deletes are emitted before adds in the output
125 for two reasons:
127 1. The last line in the last 'add' block might end in a line that is
128 not terminated with a newline, in which case no other command is
129 allowed to follow it.
131 2. This is the canonical order used by RCS; this ensures that
132 inverting twice gives back the original delta."""
134 # Merge adjacent 'r'eplace blocks to ensure that we emit adds and
135 # deletes in the right order:
136 blocks = merge_blocks(blocks)
138 input_position = 0
139 for (command, old_lines, new_lines) in blocks:
140 if command == 'c':
141 input_position += len(old_lines)
142 elif command == 'r':
143 if old_lines:
144 yield ('d', input_position, len(old_lines))
145 input_position += len(old_lines)
146 if new_lines:
147 yield ('a', input_position, new_lines)
150 def write_edits(f, edits):
151 """Write EDITS to file-like object f as an RCS diff."""
153 for (command, input_position, arg) in edits:
154 if command == 'd':
155 f.write('d%d %d\n' % (input_position + 1, arg,))
156 elif command == 'a':
157 lines = arg
158 f.write('a%d %d\n' % (input_position, len(lines),))
159 f.writelines(lines)
160 del lines
161 else:
162 raise MalformedDeltaException('Unknown command %r' % (command,))
165 def write_blocks(f, blocks):
166 """Write blocks to file-like object f as a valid RCS diff."""
168 edits = generate_edits_from_blocks(blocks)
169 write_edits(f, edits)
172 class RCSStream:
173 """This class allows RCS deltas to be accumulated.
175 This file holds the contents of a single RCS version in memory as an
176 array of lines. It is able to apply an RCS delta to the version,
177 thereby transforming the stored text into the following RCS version.
178 While doing so, it can optionally also return the inverted delta.
180 This class holds revisions in memory. It uses temporary memory
181 space of a few times the size of a single revision plus a few times
182 the size of a single delta."""
184 def __init__(self, text):
185 """Instantiate and initialize the file content with TEXT."""
187 self._lines = msplit(text)
189 def get_text(self):
190 """Return the current file content."""
192 return "".join(self._lines)
194 def generate_blocks(self, edits):
195 """Generate edit blocks from an iterable of RCS edits.
197 EDITS is an iterable over RCS edits, as generated by
198 generate_edits(). Generate a tuple (COMMAND, OLD_LINES,
199 NEW_LINES) for each block implied by EDITS when applied to the
200 current contents of SELF. OLD_LINES and NEW_LINES are lists of
201 strings, where each string is one line. OLD_LINES and NEW_LINES
202 are newly-allocated lists, though they might both point at the
203 same list. Blocks consist of copy and replace commands:
205 ('c', OLD_LINES, NEW_LINES) : copy the lines from one version
206 to the other, unaltered. In this case
207 OLD_LINES==NEW_LINES.
209 ('r', OLD_LINES, NEW_LINES) : replace OLD_LINES with
210 NEW_LINES. Either OLD_LINES or NEW_LINES (or both) might
211 be empty."""
213 # The number of lines from the old version that have been processed
214 # so far:
215 input_pos = 0
217 for (command, start, arg) in edits:
218 if command == 'd':
219 # "d" - Delete command
220 count = arg
221 if start < input_pos:
222 raise MalformedDeltaException('Deletion before last edit')
223 if start > len(self._lines):
224 raise MalformedDeltaException('Deletion past file end')
225 if start + count > len(self._lines):
226 raise MalformedDeltaException('Deletion beyond file end')
228 if input_pos < start:
229 copied_lines = self._lines[input_pos:start]
230 yield ('c', copied_lines, copied_lines)
231 del copied_lines
232 yield ('r', self._lines[start:start + count], [])
233 input_pos = start + count
234 else:
235 # "a" - Add command
236 lines = arg
237 if start < input_pos:
238 raise MalformedDeltaException('Insertion before last edit')
239 if start > len(self._lines):
240 raise MalformedDeltaException('Insertion past file end')
242 if input_pos < start:
243 copied_lines = self._lines[input_pos:start]
244 yield ('c', copied_lines, copied_lines)
245 del copied_lines
246 input_pos = start
247 yield ('r', [], lines)
249 # Pass along the part of the input that follows all of the delta
250 # blocks:
251 copied_lines = self._lines[input_pos:]
252 if copied_lines:
253 yield ('c', copied_lines, copied_lines)
255 def apply_diff(self, diff):
256 """Apply the RCS diff DIFF to the current file content."""
258 lines = []
260 blocks = self.generate_blocks(generate_edits(diff))
261 for (command, old_lines, new_lines) in blocks:
262 lines += new_lines
264 self._lines = lines
266 def apply_and_invert_diff(self, diff, inverse_diff):
267 """Apply DIFF and generate its inverse.
269 Apply the RCS diff DIFF to the current file content.
270 Simultaneously generate an RCS diff suitable for reverting the
271 change, and write it to the file-like object INVERSE_DIFF. Return
272 INVERSE_DIFF."""
274 blocks = self.generate_blocks(generate_edits(diff))
276 # Blocks have to be merged so that adjacent delete,add edits are
277 # generated in that order:
278 blocks = merge_blocks(blocks)
280 blocks = invert_blocks(blocks)
282 # Convert the iterable into a list (1) so that we can modify
283 # self._lines in-place, (2) because we need it twice.
284 blocks = list(blocks)
286 self._lines = []
287 for (command, old_lines, new_lines) in blocks:
288 self._lines += old_lines
290 write_blocks(inverse_diff, blocks)
292 def invert_diff(self, diff):
293 """Apply DIFF and generate its inverse.
295 Apply the RCS diff DIFF to the current file content.
296 Simultaneously generate an RCS diff suitable for reverting the
297 change, and return it as a string."""
299 inverse_diff = StringIO()
300 self.apply_and_invert_diff(diff, inverse_diff)
301 return inverse_diff.getvalue()