Add a way to specify the MimeMapper mappings to its constructor directly.
[cvs2svn.git] / cvs2svn_lib / rcs_stream.py
blob5d7d1698b0ac6bbb9e39b6cd16702c73bfe09373
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module processes RCS diffs (deltas)."""
20 from cStringIO import StringIO
21 import re
24 def msplit(s):
25 """Split S into an array of lines.
27 Only \n is a line separator. The line endings are part of the lines."""
29 # return s.splitlines(True) clobbers \r
30 re = [ i + "\n" for i in s.split("\n") ]
31 re[-1] = re[-1][:-1]
32 if not re[-1]:
33 del re[-1]
34 return re
37 class MalformedDeltaException(Exception):
38 """A malformed RCS delta was encountered."""
40 pass
43 ed_command_re = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
46 def generate_blocks(numlines, diff):
47 """Generate edit blocks from an RCS diff block.
49 NUMLINES is the number of lines in the old revision; DIFF is a
50 string holding an entire RCS file delta. Generate a tuple (COMMAND,
51 START, COUNT, [LINE,...]) for each block implied by DIFF. Blocks
52 consist of ed commands and copy blocks:
54 ('a', START, COUNT, LINES) : add LINES at the current position
55 in the output. START is the logical position in the input
56 revision at which the insertion ends up.
58 ('d', START, COUNT, []) : ignore the COUNT lines starting at
59 line START in the input.
61 ('c', START, COUNT, []) : copy COUNT lines, starting at line
62 START in the input, to the output at the current position.
64 START is expressed as a zero-offset line number within the
65 input revision."""
67 diff = msplit(diff)
68 i = 0
70 # The number of lines from the old version that have been processed
71 # so far:
72 input_pos = 0
74 while i < len(diff):
75 m = ed_command_re.match(diff[i])
76 if not m:
77 raise MalformedDeltaException('Bad ed command')
78 i += 1
79 command = m.group(1)
80 start = int(m.group(2))
81 count = int(m.group(3))
82 if command == 'd':
83 # "d" - Delete command
84 start -= 1
86 if start < input_pos:
87 raise MalformedDeltaException('Deletion before last edit')
88 if start > numlines:
89 raise MalformedDeltaException('Deletion past file end')
90 if start + count > numlines:
91 raise MalformedDeltaException('Deletion beyond file end')
93 if input_pos < start:
94 yield ('c', input_pos, start - input_pos, [])
95 yield (command, start, count, [])
96 input_pos = start + count
97 else:
98 # "a" - Add command
100 if start < input_pos:
101 raise MalformedDeltaException('Insertion before last edit')
102 if start > numlines:
103 raise MalformedDeltaException('Insertion past file end')
104 if i + count > len(diff):
105 raise MalformedDeltaException('Add block truncated')
107 if input_pos < start:
108 yield ('c', input_pos, start - input_pos, [])
109 input_pos = start
110 yield (command, start, count, diff[i:i + count])
111 i += count
113 # Pass along the part of the input that follows all of the delta
114 # blocks:
115 if input_pos < numlines:
116 yield ('c', input_pos, numlines - input_pos, [])
119 def reorder_blocks(blocks):
120 """Reorder blocks to reverse add,delete pairs.
122 If an add block is followed by a delete block, emit the blocks in
123 reverse order. This is part of inverting diffs, because when the
124 blocks are inverted add,delete pairs will be in the original order
125 again.
127 1. This is required because the last line in the last 'add' block
128 might end in a line that is not terminated with a newline, in
129 which case no other command is allowed to follow it.
131 2. It is also nice to keep deltas in a canonical order; among other
132 things, this ensures that inverting twice gives back the original
133 delta."""
135 i = iter(blocks)
137 try:
138 (command1, start1, count1, lines1) = i.next()
139 except StopIteration:
140 return
142 for (command2, start2, count2, lines2) in i:
143 if command1 == 'd' and command2 == 'a':
144 yield (command2, start2 - count1, count2, lines2)
145 else:
146 yield (command1, start1, count1, lines1)
147 (command1, start1, count1, lines1) = (command2, start2, count2, lines2)
149 yield (command1, start1, count1, lines1)
152 class RCSStream:
153 """This class allows RCS deltas to be accumulated.
155 This file holds the contents of a single RCS version in memory as an
156 array of lines. It is able to apply an RCS delta to the version,
157 thereby transforming the stored text into the following RCS version.
158 While doing so, it can optionally also return the inverted delta.
160 This class holds revisions in memory. It uses temporary memory
161 space of a few times the size of a single revision plus a few times
162 the size of a single delta."""
164 def __init__(self, text):
165 """Instantiate and initialize the file content with TEXT."""
167 self._lines = msplit(text)
169 def get_text(self):
170 """Return the current file content."""
172 return "".join(self._lines)
174 def apply_diff(self, diff):
175 """Apply the RCS diff DIFF to the current file content."""
177 new_lines = []
179 for (command, start, count, lines) \
180 in generate_blocks(len(self._lines), diff):
181 if command == 'c':
182 new_lines += self._lines[start:start + count]
183 elif command == 'd':
184 pass
185 else:
186 new_lines += lines
188 self._lines = new_lines
190 def apply_and_invert_diff(self, diff, inverse_diff):
191 """Apply DIFF and generate its inverse.
193 Apply the RCS diff DIFF to the current file content.
194 Simultaneously generate an RCS diff suitable for reverting the
195 change, and write it to the file-like object INVERSE_DIFF. Return
196 INVERSE_DIFF."""
198 new_lines = []
200 adjust = 0
201 for (command, start, count, lines) \
202 in reorder_blocks(generate_blocks(len(self._lines), diff)):
203 if command == 'c':
204 new_lines += self._lines[start:start + count]
205 elif command == 'd':
206 inverse_diff.write("a%d %d\n" % (start + adjust, count))
207 inverse_diff.writelines(self._lines[start:start + count])
208 adjust -= count
209 else:
210 inverse_diff.write("d%d %d\n" % (start + 1 + adjust, count))
211 # Add the lines from the diff:
212 new_lines += lines
213 adjust += count
215 self._lines = new_lines
217 def invert_diff(self, diff):
218 """Apply DIFF and generate its inverse.
220 Apply the RCS diff DIFF to the current file content.
221 Simultaneously generate an RCS diff suitable for reverting the
222 change, and return it as a string."""
224 inverse_diff = StringIO()
225 self.apply_and_invert_diff(diff, inverse_diff)
226 return inverse_diff.getvalue()