Make generate_blocks() a method of RCSStream.
[cvs2svn.git] / cvs2svn_lib / rcs_stream.py
blob3f3f486ea61daeebb2da0f14618b0c36e43c6bcf
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module processes RCS diffs (deltas)."""
20 from cStringIO import StringIO
21 import re
24 def msplit(s):
25 """Split S into an array of lines.
27 Only \n is a line separator. The line endings are part of the lines."""
29 # return s.splitlines(True) clobbers \r
30 re = [ i + "\n" for i in s.split("\n") ]
31 re[-1] = re[-1][:-1]
32 if not re[-1]:
33 del re[-1]
34 return re
37 class MalformedDeltaException(Exception):
38 """A malformed RCS delta was encountered."""
40 pass
43 ed_command_re = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
46 def generate_edits(diff):
47 """Generate edit commands from an RCS diff block.
49 DIFF is a string holding an entire RCS file delta. Generate a tuple
50 (COMMAND, START, ARG) for each block implied by DIFF. Tuples
51 describe the ed commands:
53 ('a', INPUT_POS, LINES) : add LINES at INPUT_POS. LINES is a
54 list of strings.
56 ('d', INPUT_POS, COUNT) : delete COUNT input lines starting at line
57 START.
59 In all cases, START is expressed as a zero-offset line number within
60 the input revision."""
62 diff = msplit(diff)
63 i = 0
65 while i < len(diff):
66 m = ed_command_re.match(diff[i])
67 if not m:
68 raise MalformedDeltaException('Bad ed command')
69 i += 1
70 command = m.group(1)
71 start = int(m.group(2))
72 count = int(m.group(3))
73 if command == 'd':
74 # "d" - Delete command
75 yield ('d', start - 1, count)
76 else:
77 # "a" - Add command
78 if i + count > len(diff):
79 raise MalformedDeltaException('Add block truncated')
80 yield ('a', start, diff[i:i + count])
81 i += count
84 def reorder_blocks(blocks):
85 """Reorder blocks to reverse delete,add pairs.
87 If a delete block is followed by an add block, emit the blocks in
88 reverse order. This is part of inverting diffs, because when the
89 blocks are inverted the blocks will be in the original delete,add
90 order.
92 1. This is required because the last line in the last 'add' block
93 might end in a line that is not terminated with a newline, in
94 which case no other command is allowed to follow it.
96 2. It is also nice to keep deltas in a canonical order; among other
97 things, this ensures that inverting twice gives back the original
98 delta."""
100 i = iter(blocks)
102 try:
103 (command1, start1, count1, lines1) = i.next()
104 except StopIteration:
105 return
107 for (command2, start2, count2, lines2) in i:
108 if command1 == 'd' and command2 == 'a':
109 yield ('a', start2 - count1, count2, lines2)
110 else:
111 yield (command1, start1, count1, lines1)
112 (command1, start1, count1, lines1) = (command2, start2, count2, lines2)
114 yield (command1, start1, count1, lines1)
117 class RCSStream:
118 """This class allows RCS deltas to be accumulated.
120 This file holds the contents of a single RCS version in memory as an
121 array of lines. It is able to apply an RCS delta to the version,
122 thereby transforming the stored text into the following RCS version.
123 While doing so, it can optionally also return the inverted delta.
125 This class holds revisions in memory. It uses temporary memory
126 space of a few times the size of a single revision plus a few times
127 the size of a single delta."""
129 def __init__(self, text):
130 """Instantiate and initialize the file content with TEXT."""
132 self._lines = msplit(text)
134 def get_text(self):
135 """Return the current file content."""
137 return "".join(self._lines)
139 def generate_blocks(self, diff):
140 """Generate edit blocks from an RCS diff block.
142 DIFF is a string holding an entire RCS file delta. Generate a
143 tuple (COMMAND, START, COUNT, [LINE,...]) for each block implied
144 by DIFF. Blocks consist of ed commands and copy blocks:
146 ('a', START, COUNT, LINES) : add LINES at the current position
147 in the output. START is the logical position in the input
148 revision at which the insertion ends up.
150 ('d', START, COUNT, []) : ignore the COUNT lines starting at
151 line START in the input.
153 ('c', START, COUNT, []) : copy COUNT lines, starting at line
154 START in the input, to the output at the current position.
156 START is expressed as a zero-offset line number within the input
157 revision."""
159 # The number of lines from the old version that have been processed
160 # so far:
161 input_pos = 0
163 for (command, start, arg) in generate_edits(diff):
164 if command == 'd':
165 # "d" - Delete command
166 count = arg
167 if start < input_pos:
168 raise MalformedDeltaException('Deletion before last edit')
169 if start > len(self._lines):
170 raise MalformedDeltaException('Deletion past file end')
171 if start + count > len(self._lines):
172 raise MalformedDeltaException('Deletion beyond file end')
174 if input_pos < start:
175 yield ('c', input_pos, start - input_pos, [])
176 yield ('d', start, count, [])
177 input_pos = start + count
178 else:
179 # "a" - Add command
180 lines = arg
181 if start < input_pos:
182 raise MalformedDeltaException('Insertion before last edit')
183 if start > len(self._lines):
184 raise MalformedDeltaException('Insertion past file end')
186 if input_pos < start:
187 yield ('c', input_pos, start - input_pos, [])
188 input_pos = start
189 yield ('a', start, len(lines), lines)
191 # Pass along the part of the input that follows all of the delta
192 # blocks:
193 if input_pos < len(self._lines):
194 yield ('c', input_pos, len(self._lines) - input_pos, [])
196 def apply_diff(self, diff):
197 """Apply the RCS diff DIFF to the current file content."""
199 new_lines = []
201 for (command, start, count, lines) in self.generate_blocks(diff):
202 if command == 'c':
203 new_lines += self._lines[start:start + count]
204 elif command == 'd':
205 pass
206 else:
207 new_lines += lines
209 self._lines = new_lines
211 def apply_and_invert_diff(self, diff, inverse_diff):
212 """Apply DIFF and generate its inverse.
214 Apply the RCS diff DIFF to the current file content.
215 Simultaneously generate an RCS diff suitable for reverting the
216 change, and write it to the file-like object INVERSE_DIFF. Return
217 INVERSE_DIFF."""
219 new_lines = []
221 adjust = 0
222 for (command, start, count, lines) \
223 in reorder_blocks(self.generate_blocks(diff)):
224 if command == 'c':
225 new_lines += self._lines[start:start + count]
226 elif command == 'd':
227 inverse_diff.write("a%d %d\n" % (start + adjust, count))
228 inverse_diff.writelines(self._lines[start:start + count])
229 adjust -= count
230 else:
231 inverse_diff.write("d%d %d\n" % (start + 1 + adjust, count))
232 # Add the lines from the diff:
233 new_lines += lines
234 adjust += count
236 self._lines = new_lines
238 def invert_diff(self, diff):
239 """Apply DIFF and generate its inverse.
241 Apply the RCS diff DIFF to the current file content.
242 Simultaneously generate an RCS diff suitable for reverting the
243 change, and return it as a string."""
245 inverse_diff = StringIO()
246 self.apply_and_invert_diff(diff, inverse_diff)
247 return inverse_diff.getvalue()