Use method calls when initializing CVSTextDecoder.
[cvs2svn.git] / cvs2svn_lib / rcs_stream.py
blobc045cf2da07ba785a118525939ec84ecf0ab15b7
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module processes RCS diffs (deltas)."""
20 from cStringIO import StringIO
21 import re
24 def msplit(s):
25 """Split S into an array of lines.
27 Only \n is a line separator. The line endings are part of the lines."""
29 # return s.splitlines(True) clobbers \r
30 re = [ i + "\n" for i in s.split("\n") ]
31 re[-1] = re[-1][:-1]
32 if not re[-1]:
33 del re[-1]
34 return re
37 class MalformedDeltaException(Exception):
38 """A malformed RCS delta was encountered."""
40 pass
43 ed_command_re = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
46 def generate_edits(diff):
47 """Generate edit commands from an RCS diff block.
49 DIFF is a string holding an entire RCS file delta. Generate a tuple
50 (COMMAND, INPUT_POS, ARG) for each block implied by DIFF. Tuples
51 describe the ed commands:
53 ('a', INPUT_POS, LINES) : add LINES at INPUT_POS. LINES is a
54 list of strings.
56 ('d', INPUT_POS, COUNT) : delete COUNT input lines starting at
57 line INPUT_POS.
59 In all cases, INPUT_POS is expressed as a zero-offset line number
60 within the input revision."""
62 diff = msplit(diff)
63 i = 0
65 while i < len(diff):
66 m = ed_command_re.match(diff[i])
67 if not m:
68 raise MalformedDeltaException('Bad ed command')
69 i += 1
70 command = m.group(1)
71 start = int(m.group(2))
72 count = int(m.group(3))
73 if command == 'd':
74 # "d" - Delete command
75 yield ('d', start - 1, count)
76 else:
77 # "a" - Add command
78 if i + count > len(diff):
79 raise MalformedDeltaException('Add block truncated')
80 yield ('a', start, diff[i:i + count])
81 i += count
84 def merge_blocks(blocks):
85 """Merge adjacent 'r'eplace or 'c'opy blocks."""
87 i = iter(blocks)
89 try:
90 (command1, old_lines1, new_lines1) = i.next()
91 except StopIteration:
92 return
94 for (command2, old_lines2, new_lines2) in i:
95 if command1 == 'r' and command2 == 'r':
96 old_lines1 += old_lines2
97 new_lines1 += new_lines2
98 elif command1 == 'c' and command2 == 'c':
99 old_lines1 += old_lines2
100 new_lines1 = old_lines1
101 else:
102 yield (command1, old_lines1, new_lines1)
103 (command1, old_lines1, new_lines1) = (command2, old_lines2, new_lines2)
105 yield (command1, old_lines1, new_lines1)
108 def invert_blocks(blocks):
109 """Invert the blocks in BLOCKS.
111 BLOCKS is an iterable over blocks. Invert them, in the sense that
112 the input becomes the output and the output the input."""
114 for (command, old_lines, new_lines) in blocks:
115 yield (command, new_lines, old_lines)
118 def generate_edits_from_blocks(blocks):
119 """Convert BLOCKS into an equivalent series of RCS edits.
121 The edits are generated as tuples in the format described in the
122 docstring for generate_edits().
124 It is important that deletes are emitted before adds in the output
125 for two reasons:
127 1. The last line in the last 'add' block might end in a line that is
128 not terminated with a newline, in which case no other command is
129 allowed to follow it.
131 2. This is the canonical order used by RCS; this ensures that
132 inverting twice gives back the original delta."""
134 # Merge adjacent 'r'eplace blocks to ensure that we emit adds and
135 # deletes in the right order:
136 blocks = merge_blocks(blocks)
138 input_position = 0
139 for (command, old_lines, new_lines) in blocks:
140 if command == 'c':
141 input_position += len(old_lines)
142 elif command == 'r':
143 if old_lines:
144 yield ('d', input_position, len(old_lines))
145 input_position += len(old_lines)
146 if new_lines:
147 yield ('a', input_position, new_lines)
150 def write_edits(f, edits):
151 """Write EDITS to file-like object f as an RCS diff."""
153 for (command, input_position, arg) in edits:
154 if command == 'd':
155 f.write('d%d %d\n' % (input_position + 1, arg,))
156 elif command == 'a':
157 lines = arg
158 f.write('a%d %d\n' % (input_position, len(lines),))
159 f.writelines(lines)
160 del lines
161 else:
162 raise MalformedDeltaException('Unknown command %r' % (command,))
165 class RCSStream:
166 """This class allows RCS deltas to be accumulated.
168 This file holds the contents of a single RCS version in memory as an
169 array of lines. It is able to apply an RCS delta to the version,
170 thereby transforming the stored text into the following RCS version.
171 While doing so, it can optionally also return the inverted delta.
173 This class holds revisions in memory. It uses temporary memory
174 space of a few times the size of a single revision plus a few times
175 the size of a single delta."""
177 def __init__(self, text):
178 """Instantiate and initialize the file content with TEXT."""
180 self.set_text(text)
182 def get_text(self):
183 """Return the current file content."""
185 return "".join(self._lines)
187 def set_lines(self, lines):
188 """Set the current contents to the specified LINES.
190 LINES is an iterable over well-formed lines; i.e., each line
191 contains exactly one LF as its last character, except that the
192 list line can be unterminated. LINES will be consumed
193 immediately; if it is a sequence, it will be copied."""
195 self._lines = list(lines)
197 def set_text(self, text):
198 """Set the current file content."""
200 self._lines = msplit(text)
202 def generate_blocks(self, edits):
203 """Generate edit blocks from an iterable of RCS edits.
205 EDITS is an iterable over RCS edits, as generated by
206 generate_edits(). Generate a tuple (COMMAND, OLD_LINES,
207 NEW_LINES) for each block implied by EDITS when applied to the
208 current contents of SELF. OLD_LINES and NEW_LINES are lists of
209 strings, where each string is one line. OLD_LINES and NEW_LINES
210 are newly-allocated lists, though they might both point at the
211 same list. Blocks consist of copy and replace commands:
213 ('c', OLD_LINES, NEW_LINES) : copy the lines from one version
214 to the other, unaltered. In this case
215 OLD_LINES==NEW_LINES.
217 ('r', OLD_LINES, NEW_LINES) : replace OLD_LINES with
218 NEW_LINES. Either OLD_LINES or NEW_LINES (or both) might
219 be empty."""
221 # The number of lines from the old version that have been processed
222 # so far:
223 input_pos = 0
225 for (command, start, arg) in edits:
226 if command == 'd':
227 # "d" - Delete command
228 count = arg
229 if start < input_pos:
230 raise MalformedDeltaException('Deletion before last edit')
231 if start > len(self._lines):
232 raise MalformedDeltaException('Deletion past file end')
233 if start + count > len(self._lines):
234 raise MalformedDeltaException('Deletion beyond file end')
236 if input_pos < start:
237 copied_lines = self._lines[input_pos:start]
238 yield ('c', copied_lines, copied_lines)
239 del copied_lines
240 yield ('r', self._lines[start:start + count], [])
241 input_pos = start + count
242 else:
243 # "a" - Add command
244 lines = arg
245 if start < input_pos:
246 raise MalformedDeltaException('Insertion before last edit')
247 if start > len(self._lines):
248 raise MalformedDeltaException('Insertion past file end')
250 if input_pos < start:
251 copied_lines = self._lines[input_pos:start]
252 yield ('c', copied_lines, copied_lines)
253 del copied_lines
254 input_pos = start
255 yield ('r', [], lines)
257 # Pass along the part of the input that follows all of the delta
258 # blocks:
259 copied_lines = self._lines[input_pos:]
260 if copied_lines:
261 yield ('c', copied_lines, copied_lines)
263 def apply_diff(self, diff):
264 """Apply the RCS diff DIFF to the current file content."""
266 lines = []
268 blocks = self.generate_blocks(generate_edits(diff))
269 for (command, old_lines, new_lines) in blocks:
270 lines += new_lines
272 self._lines = lines
274 def apply_and_invert_edits(self, edits):
275 """Apply EDITS and generate their inverse.
277 Apply EDITS to the current file content. Simultaneously generate
278 edits suitable for reverting the change."""
280 blocks = self.generate_blocks(edits)
282 # Blocks have to be merged so that adjacent delete,add edits are
283 # generated in that order:
284 blocks = merge_blocks(blocks)
286 # Convert the iterable into a list (1) so that we can modify
287 # self._lines in-place, (2) because we need it twice.
288 blocks = list(blocks)
290 self._lines = []
291 for (command, old_lines, new_lines) in blocks:
292 self._lines += new_lines
294 return generate_edits_from_blocks(invert_blocks(blocks))
296 def invert_diff(self, diff):
297 """Apply DIFF and generate its inverse.
299 Apply the RCS diff DIFF to the current file content.
300 Simultaneously generate an RCS diff suitable for reverting the
301 change, and return it as a string."""
303 inverse_diff = StringIO()
304 write_edits(
305 inverse_diff, self.apply_and_invert_edits(generate_edits(diff))
307 return inverse_diff.getvalue()