From 1fb35ff3800a0b342e20e034105dfccc0cffacdd Mon Sep 17 00:00:00 2001 From: mhagger Date: Mon, 31 May 2010 03:48:24 +0000 Subject: [PATCH] Change the representation of "blocks" within the rcs_stream module. Represent blocks as 'c'opies and 'r'eplaces, where the replace blocks can contain both old_lines and new_lines. Don't bother keeping track of line numbers, as (with this format) they are only needed on output. This makes it trivial to implement transformations on blocks. The cost is that sometimes "old_lines" is generated when it is not needed. But it only consists of a list of pointers to lines that are already in RAM, so it is not much extra work. git-svn-id: http://cvs2svn.tigris.org/svn/cvs2svn/trunk@5128 be7e6eca-30d4-0310-a8e5-ac0d63af7087 --- cvs2svn_lib/rcs_stream.py | 160 ++++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 69 deletions(-) diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py index f0982a6f..49142987 100644 --- a/cvs2svn_lib/rcs_stream.py +++ b/cvs2svn_lib/rcs_stream.py @@ -81,37 +81,64 @@ def generate_edits(diff): i += count -def reorder_blocks(blocks): - """Reorder blocks to reverse delete,add pairs. - - If a delete block is followed by an add block, emit the blocks in - reverse order. This is part of inverting diffs, because when the - blocks are inverted the blocks will be in the original delete,add - order. - - 1. This is required because the last line in the last 'add' block - might end in a line that is not terminated with a newline, in - which case no other command is allowed to follow it. - - 2. It is also nice to keep deltas in a canonical order; among other - things, this ensures that inverting twice gives back the original - delta.""" +def merge_blocks(blocks): + """Merge adjacent 'r'eplace or 'c'opy blocks.""" i = iter(blocks) try: - (command1, start1, count1, lines1) = i.next() + (command1, old_lines1, new_lines1) = i.next() except StopIteration: return - for (command2, start2, count2, lines2) in i: - if command1 == 'd' and command2 == 'a': - yield ('a', start2 - count1, count2, lines2) + for (command2, old_lines2, new_lines2) in i: + if command1 == 'r' and command2 == 'r': + old_lines1 += old_lines2 + new_lines1 += new_lines2 + elif command1 == 'c' and command2 == 'c': + old_lines1 += old_lines2 + new_lines1 = old_lines1 else: - yield (command1, start1, count1, lines1) - (command1, start1, count1, lines1) = (command2, start2, count2, lines2) + yield (command1, old_lines1, new_lines1) + (command1, old_lines1, new_lines1) = (command2, old_lines2, new_lines2) + + yield (command1, old_lines1, new_lines1) + + +def invert_blocks(blocks): + """Invert the blocks in BLOCKS. + + BLOCKS is an iterable over blocks. Invert them, in the sense that + the input becomes the output and the output the input.""" + + for (command, old_lines, new_lines) in blocks: + yield (command, new_lines, old_lines) + + +def write_blocks(f, blocks): + """Write blocks to file-like object f as a valid RCS diff. + + It is important that deletes are emitted before adds in the output + for two reasons: - yield (command1, start1, count1, lines1) + 1. The last line in the last 'add' block might end in a line that is + not terminated with a newline, in which case no other command is + allowed to follow it. + + 2. This is the canonical order used by RCS; this ensures that + inverting twice gives back the original delta.""" + + input_position = 0 + for (command, old_lines, new_lines) in blocks: + if command == 'c': + input_position += len(old_lines) + elif command == 'r': + if old_lines: + f.write('d%d %d\n' % (input_position + 1, len(old_lines),)) + input_position += len(old_lines) + if new_lines: + f.write('a%d %d\n' % (input_position, len(new_lines),)) + f.writelines(new_lines) class RCSStream: @@ -140,23 +167,20 @@ class RCSStream: """Generate edit blocks from an iterable of RCS edits. EDITS is an iterable over RCS edits, as generated by - generate_edits(). Generate a tuple (COMMAND, START, COUNT, - [LINE,...]) for each block implied by EDITS when applied to the - current contents of SELF. Blocks consist of ed commands and copy - blocks: - - ('a', START, COUNT, LINES) : add LINES at the current position - in the output. START is the logical position in the input - revision at which the insertion ends up. + generate_edits(). Generate a tuple (COMMAND, OLD_LINES, + NEW_LINES) for each block implied by EDITS when applied to the + current contents of SELF. OLD_LINES and NEW_LINES are lists of + strings, where each string is one line. OLD_LINES and NEW_LINES + are newly-allocated lists, though they might both point at the + same list. Blocks consist of copy and replace commands: - ('d', START, COUNT, []) : ignore the COUNT lines starting at - line START in the input. + ('c', OLD_LINES, NEW_LINES) : copy the lines from one version + to the other, unaltered. In this case + OLD_LINES==NEW_LINES. - ('c', START, COUNT, []) : copy COUNT lines, starting at line - START in the input, to the output at the current position. - - START is expressed as a zero-offset line number within the input - revision.""" + ('r', OLD_LINES, NEW_LINES) : replace OLD_LINES with + NEW_LINES. Either OLD_LINES or NEW_LINES (or both) might + be empty.""" # The number of lines from the old version that have been processed # so far: @@ -174,8 +198,10 @@ class RCSStream: raise MalformedDeltaException('Deletion beyond file end') if input_pos < start: - yield ('c', input_pos, start - input_pos, []) - yield ('d', start, count, []) + copied_lines = self._lines[input_pos:start] + yield ('c', copied_lines, copied_lines) + del copied_lines + yield ('r', self._lines[start:start + count], []) input_pos = start + count else: # "a" - Add command @@ -186,30 +212,28 @@ class RCSStream: raise MalformedDeltaException('Insertion past file end') if input_pos < start: - yield ('c', input_pos, start - input_pos, []) + copied_lines = self._lines[input_pos:start] + yield ('c', copied_lines, copied_lines) + del copied_lines input_pos = start - yield ('a', start, len(lines), lines) + yield ('r', [], lines) # Pass along the part of the input that follows all of the delta # blocks: - if input_pos < len(self._lines): - yield ('c', input_pos, len(self._lines) - input_pos, []) + copied_lines = self._lines[input_pos:] + if copied_lines: + yield ('c', copied_lines, copied_lines) def apply_diff(self, diff): """Apply the RCS diff DIFF to the current file content.""" - new_lines = [] + lines = [] blocks = self.generate_blocks(generate_edits(diff)) - for (command, start, count, lines) in blocks: - if command == 'c': - new_lines += self._lines[start:start + count] - elif command == 'd': - pass - else: - new_lines += lines + for (command, old_lines, new_lines) in blocks: + lines += new_lines - self._lines = new_lines + self._lines = lines def apply_and_invert_diff(self, diff, inverse_diff): """Apply DIFF and generate its inverse. @@ -219,25 +243,23 @@ class RCSStream: change, and write it to the file-like object INVERSE_DIFF. Return INVERSE_DIFF.""" - new_lines = [] - - adjust = 0 blocks = self.generate_blocks(generate_edits(diff)) - blocks = reorder_blocks(blocks) - for (command, start, count, lines) in blocks: - if command == 'c': - new_lines += self._lines[start:start + count] - elif command == 'd': - inverse_diff.write("a%d %d\n" % (start + adjust, count)) - inverse_diff.writelines(self._lines[start:start + count]) - adjust -= count - else: - inverse_diff.write("d%d %d\n" % (start + 1 + adjust, count)) - # Add the lines from the diff: - new_lines += lines - adjust += count - self._lines = new_lines + # Blocks have to be merged so that adjacent delete,add edits are + # generated in that order: + blocks = merge_blocks(blocks) + + blocks = invert_blocks(blocks) + + # Convert the iterable into a list (1) so that we can modify + # self._lines in-place, (2) because we need it twice. + blocks = list(blocks) + + self._lines = [] + for (command, old_lines, new_lines) in blocks: + self._lines += old_lines + + write_blocks(inverse_diff, blocks) def invert_diff(self, diff): """Apply DIFF and generate its inverse. -- 2.11.4.GIT