From 3557f0fd0f5947e0382e689d16fd71bd38bfc395 Mon Sep 17 00:00:00 2001 From: smerten Date: Sat, 3 Jul 2010 11:26:54 +0000 Subject: [PATCH] Text and blocks are handled. All unit tests but the section test are green. git-svn-id: https://docutils.svn.sourceforge.net/svnroot/docutils/trunk@6350 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- sandbox/rstdiff/global.log | 162 +++++++++++++++++++++ sandbox/rstdiff/rstdiff.py | 133 ++++++++++++++--- sandbox/rstdiff/tag.log | 2 +- sandbox/rstdiff/tests/simple/expected/section.xml | 2 +- sandbox/rstdiff/tests/simple/expected/textOnly.xml | 9 +- .../rstdiff/tests/simple/input/textOnly.new.rst | 4 + .../rstdiff/tests/simple/input/textOnly.old.rst | 4 + sandbox/rstdiff/tests/simple/source/section.rst | 2 +- sandbox/rstdiff/tests/simple/source/textOnly.rst | 8 +- sandbox/rstdiff/treediff/__init__.py | 15 +- 10 files changed, 305 insertions(+), 36 deletions(-) diff --git a/sandbox/rstdiff/global.log b/sandbox/rstdiff/global.log index 9ee40d56b..e2e476d6d 100644 --- a/sandbox/rstdiff/global.log +++ b/sandbox/rstdiff/global.log @@ -1,4 +1,166 @@ ************************************** +Date: Sat Jul 3 13:24:31 CEST 2010 +Author: stefan +Tag: rstdiff_1_48 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Added comments. +************************************** +Date: Sat Jul 3 13:16:29 CEST 2010 +Author: stefan +Tag: rstdiff_1_47 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Mergeable ranges of opcodes are merged. +************************************** +Date: Sat Jul 3 12:59:26 CEST 2010 +Author: stefan +Tag: rstdiff_1_46 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Unit tests run again. +************************************** +Date: Sat Jul 3 12:49:55 CEST 2010 +Author: stefan +Tag: rstdiff_1_45 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Opcodes are propagated to parents if possible. +************************************** +Date: Sat Jul 3 12:00:42 CEST 2010 +Author: stefan +Tag: rstdiff_1_44 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/tests/simple/source +In directory rosalu:/home/stefan/free/rstdiff/tests/simple/source + +Modified Files: + section.rst textOnly.rst + +-------------------------------------- +Log Message: +Improved class names. +************************************** +Date: Sat Jul 3 11:54:18 CEST 2010 +Author: stefan +Tag: rstdiff_1_43 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/tests/simple/source +In directory rosalu:/home/stefan/free/rstdiff/tests/simple/source + +Modified Files: + textOnly.rst + +-------------------------------------- +Log Message: +Classes are set for changed nodes. +************************************** +Date: Sat Jun 26 13:43:08 CEST 2010 +Author: stefan +Tag: rstdiff_1_42 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/tests/simple/source +In directory rosalu:/home/stefan/free/rstdiff/tests/simple/source + +Modified Files: + textOnly.rst + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/treediff +In directory rosalu:/home/stefan/free/rstdiff/treediff + +Modified Files: + __init__.py + +-------------------------------------- +Log Message: +White is treated as junk so replacement sequences are recognized. +************************************** +Date: Sat Jun 26 10:35:24 CEST 2010 +Author: stefan +Tag: rstdiff_1_41 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/tests/simple/input +In directory rosalu:/home/stefan/free/rstdiff/tests/simple/input + +Modified Files: + textOnly.new.rst textOnly.old.rst + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/tests/simple/source +In directory rosalu:/home/stefan/free/rstdiff/tests/simple/source + +Modified Files: + textOnly.rst + +-------------------------------------- +Log Message: +Debugging. +************************************** Date: Sun Jun 20 11:34:24 CEST 2010 Author: stefan Tag: rstdiff_1_40 diff --git a/sandbox/rstdiff/rstdiff.py b/sandbox/rstdiff/rstdiff.py index fe68c7dd5..1a56f1476 100755 --- a/sandbox/rstdiff/rstdiff.py +++ b/sandbox/rstdiff/rstdiff.py @@ -143,7 +143,7 @@ class Opcode(object): def __init__(self, opcodeTuple): """Initialize from a tuple returned by `TreeMatcher.get_opcodes()`""" - self._tuple = opcodeTuple + self._tuple = list(opcodeTuple) def getCommand(self): """Return the command.""" @@ -154,7 +154,7 @@ class Opcode(object): return ( self._tuple[1], self._tuple[2], ) def getNewRange(self): - """Returns the range pertaining to an new list.""" + """Returns the range pertaining to a new list.""" return ( self._tuple[3], self._tuple[4], ) def getSubOpcodes(self): @@ -185,6 +185,35 @@ class Opcode(object): return ( self.getCommand(), oldList[oldRange[0]:oldRange[1]], newList[newRange[0]:newRange[1]], self.getSubOpcodes()) + def setSubOpcodes(self, opcodes): + """Set the sub-opcodes to a new list.""" + if self._tuple[0] != self.Descend: + raise TypeError("Can not set subopcodes of a %r opcode" + % ( self._tuple[0], )) + self._tuple[5] = opcodes + + def setCommand(self, command): + """Set a new command adapting subopcodes.""" + if self._tuple[0] == command: + return + self._tuple[0] = command + if command == self.Descend: + self._tuple[5] = [ ] + else: + self._tuple = self._tuple[0:5] + + def setOldRange(self, range): + """Sets the range pertaining to an old list.""" + ( self._tuple[1], self._tuple[2], ) = range + + def setNewRange(self, range): + """Sets the range pertaining to a new list.""" + ( self._tuple[3], self._tuple[4], ) = range + + def asTuple(self): + """Return the opcode as a tuple.""" + return tuple(self._tuple) + ############################################################################### ############################################################################### # Additional docutils stuff @@ -253,18 +282,27 @@ class Words2TextVisitor(nodes.SparseNodeVisitor): def visit_Word(self, word): parent = word.parent - last = parent.index(word) - end = last + 1 + # Find this node and the first node of the sequence it belongs to + first = None + for i in range(len(parent)): + if not isinstance(parent[i], nodes.Text): + first = None + elif first is None: + first = i + # ``parent.index(word)`` uses value equality - can not be + # used here to find `word` + if id(parent[i]) == id(word): + end = i + 1 + break + else: + raise IndexError("Can not find %r in its parent" % ( word, )) + if (len(parent) > end and isinstance(parent[end], nodes.Text)): # The visitor processes following children even if they are # deleted - so work for last node of a sequence return - first = last - while first > 0 and isinstance(parent[first - 1], nodes.Text): - first -= 1 - text = nodes.Text(reduce(lambda s, node: s + node.astext(), parent[first:end], "")) parent[first:end] = ( text, ) @@ -365,6 +403,11 @@ class DocutilsDispatcher(HashableNodeImpl): ########################################################################### # Merging + NewDelete = 'removed' + NewInsert = 'added' + NewReplaced = 'replaced' + NewReplacement = 'replacement' + def copyRoot(self, node): """Copy `node` as root and return it.""" return self.dispatchClass('copyRoot', node) @@ -379,17 +422,16 @@ class DocutilsDispatcher(HashableNodeImpl): def addChild_UNKNOWN(self, root, child): root.append(child) - def copyChild(self, node): - """Copy `node` as child and return it.""" - return self.dispatchClass('copyChild', node) + def copyChild(self, node, newType): + """Copy `node` as child and return it. `newType` is ``None`` for an + unchanged child or the change type.""" + return self.dispatchClass('copyChild', node, newType) - def copyChild_UNKNOWN(self, node): - return node.deepcopy() - - NewDelete = Opcode.Delete - NewInsert = Opcode.Insert - NewReplaced = 'replaced' - NewReplacement = 'replacement' + def copyChild_UNKNOWN(self, node, newType): + copy = node.deepcopy() + if newType: + copy['classes'].append(self.newType2Class(newType)) + return copy def copyChildren(self, head, tail, root, newType): """Return a range of new nodes copied from [ `head` ] + `tail` under @@ -398,7 +440,7 @@ class DocutilsDispatcher(HashableNodeImpl): return self.dispatchClass('copyChildren', head, tail, root, newType) def copyChildren_UNKNOWN(self, head, tail, root, newType): - return [ self.copyChild(child) + return [ self.copyChild(child, newType) for child in [ head, ] + tail ] def copyRange(self, root, children, newType): @@ -427,7 +469,7 @@ class DocutilsDispatcher(HashableNodeImpl): of `oldRoot` / `newRoot` by `command`.""" if command == Opcode.Equal: for old in oldRange: - self.addChild(diffRoot, self.copyChild(old)) + self.addChild(diffRoot, self.copyChild(old, None)) elif command == Opcode.Insert or command == Opcode.Delete: if command == Opcode.Insert: srcRoot = newRoot @@ -486,6 +528,8 @@ class DocutilsDispatcher(HashableNodeImpl): rootEq_Word = rootEq_Text def rootEq_White(self, node, other): + # TODO Must behave different for places where whitespace + # differences are relevant return True # Text behaves the same as root or child @@ -547,7 +591,8 @@ def doDiff(hashableNodeImpl, oldTree, newTree): """Create a difference from `oldTree` to `newTree` using `hashableNodeImpl`. Returns the opcodes necessary to transform `oldTree` to `newTree`.""" - matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree) + matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree, + lambda node: isinstance(node, White)) return matcher.get_opcodes() def buildDocument(oldTree, newTree, opcodes, settings): @@ -577,16 +622,51 @@ def buildTree(dispatcher, diffRoot, opcodes, oldRoot, newRoot): dispatcher.mergeChildren(diffRoot, oldRoot, newRoot, command, oldRange, newRange) +def cleanOpcodes(opcodes): + """Replace some nasty results in `opcodes` by cleaner versions.""" + for i in range(len(opcodes)): + opcode = Opcode(opcodes[i]) + subOpcodes = opcode.getSubOpcodes() + if not subOpcodes: + # Nothing to clean for flat opcodes + continue + + cleanOpcodes(subOpcodes) + j = 1 + while j < len(subOpcodes): + prev = Opcode(subOpcodes[j - 1]) + this = Opcode(subOpcodes[j]) + if (this.getCommand() != Opcode.Descend + and prev.getCommand() == this.getCommand()): + # Merge adjacing opcodes of same type + prevOld = prev.getOldRange() + prevNew = prev.getNewRange() + thisOld = this.getOldRange() + thisNew = this.getNewRange() + prev.setOldRange(( prevOld[0], thisOld[1], )) + prev.setNewRange(( prevNew[0], thisNew[1], )) + subOpcodes[j - 1:j + 1] = [ prev.asTuple(), ] + else: + j += 1 + opcode.setSubOpcodes(subOpcodes) + if len(subOpcodes) == 1: + subOpcode = Opcode(subOpcodes[0]) + if subOpcode.getCommand() != Opcode.Descend: + # Propagate 1-element sequences up + opcode.setCommand(subOpcode.getCommand()) + opcodes[i] = opcode.asTuple() + def createDiff(oldTree, newTree): """Create and return a diff document from `oldTree` to `newTree`.""" dispatcher = DocutilsDispatcher() #dispatcher.debug = True opcodes = doDiff(dispatcher, oldTree, newTree) + cleanOpcodes(opcodes) if len(opcodes) != 1: raise TypeError("Don't how to merge documents which are not rootEq") opcode = Opcode(opcodes[0]) - if opcode.getCommand() != Opcode.Descend: - raise TypeError("Don't how to merge opcode of type %r" + if opcode.getCommand() not in ( Opcode.Descend, Opcode.Equal, ): + raise TypeError("Don't how to merge top level opcode of type %r" % ( opcode.getCommand(), )) if dispatcher.debug: @@ -596,7 +676,12 @@ def createDiff(oldTree, newTree): pprint(opcodes, sys.stdout, 2, 40, None) diffDoc = buildDocument(oldTree, newTree, opcodes, pub.settings) - buildTree(dispatcher, diffDoc, opcode.getSubOpcodes(), oldTree, newTree) + if opcode.getCommand() == Opcode.Equal: + # TODO Equality should be reported somehow + diffDoc.extend([ child.deepcopy() + for child in newTree.children ]) + else: + buildTree(dispatcher, diffDoc, opcode.getSubOpcodes(), oldTree, newTree) return diffDoc if __name__ == '__main__': diff --git a/sandbox/rstdiff/tag.log b/sandbox/rstdiff/tag.log index 955607574..6c3618b06 100644 --- a/sandbox/rstdiff/tag.log +++ b/sandbox/rstdiff/tag.log @@ -1 +1 @@ -rstdiff_1_40 +rstdiff_1_48 diff --git a/sandbox/rstdiff/tests/simple/expected/section.xml b/sandbox/rstdiff/tests/simple/expected/section.xml index f53cfbd59..3bff8fd2b 100644 --- a/sandbox/rstdiff/tests/simple/expected/section.xml +++ b/sandbox/rstdiff/tests/simple/expected/section.xml @@ -25,7 +25,7 @@ Text in third section. -
+
Obsolete section diff --git a/sandbox/rstdiff/tests/simple/expected/textOnly.xml b/sandbox/rstdiff/tests/simple/expected/textOnly.xml index a5d1ad612..081e88b28 100644 --- a/sandbox/rstdiff/tests/simple/expected/textOnly.xml +++ b/sandbox/rstdiff/tests/simple/expected/textOnly.xml @@ -5,19 +5,24 @@ An important addition. + An anchor between changes. + + This is some text distributed in a couple of lines. Let's count to make things longer: - fill three lines: one two three four five six seven eight nine ten eleven twelve. - + + Another anchor. + + Some obsolete add-on. diff --git a/sandbox/rstdiff/tests/simple/input/textOnly.new.rst b/sandbox/rstdiff/tests/simple/input/textOnly.new.rst index b6006630b..68e07b11b 100644 --- a/sandbox/rstdiff/tests/simple/input/textOnly.new.rst +++ b/sandbox/rstdiff/tests/simple/input/textOnly.new.rst @@ -1,5 +1,9 @@ An important addition. +An anchor between changes. + This is some text distributed in a couple of lines. Let's count to fill three lines: one two three four five six seven eight nine ten eleven twelve. + +Another anchor. diff --git a/sandbox/rstdiff/tests/simple/input/textOnly.old.rst b/sandbox/rstdiff/tests/simple/input/textOnly.old.rst index 975f0a470..fa6962495 100644 --- a/sandbox/rstdiff/tests/simple/input/textOnly.old.rst +++ b/sandbox/rstdiff/tests/simple/input/textOnly.old.rst @@ -1,5 +1,9 @@ +An anchor between changes. + This is some text distributed in a couple of lines. Let's count to make things longer: one two three four five six seven eight nine ten eleven twelve. +Another anchor. + Some obsolete add-on. diff --git a/sandbox/rstdiff/tests/simple/source/section.rst b/sandbox/rstdiff/tests/simple/source/section.rst index 6e7e5e58b..53eef089c 100644 --- a/sandbox/rstdiff/tests/simple/source/section.rst +++ b/sandbox/rstdiff/tests/simple/source/section.rst @@ -15,7 +15,7 @@ Third section Text in third section. -.. class:: change-deleted +.. class:: change-removed Obsolete section ================ diff --git a/sandbox/rstdiff/tests/simple/source/textOnly.rst b/sandbox/rstdiff/tests/simple/source/textOnly.rst index e6b78179e..b361bd2a0 100644 --- a/sandbox/rstdiff/tests/simple/source/textOnly.rst +++ b/sandbox/rstdiff/tests/simple/source/textOnly.rst @@ -5,11 +5,15 @@ An important addition. +An anchor between changes. + This is some text distributed in a couple of lines. Let's count to -:change-replaced:`make things longer:` :change-replacement:`fill three lines:` one two three four five six seven eight nine ten +:change-replaced:`make things longer:`:change-replacement:`fill three lines:` one two three four five six seven eight nine ten eleven twelve. -.. class:: change-deleted +Another anchor. + +.. class:: change-removed Some obsolete add-on. diff --git a/sandbox/rstdiff/treediff/__init__.py b/sandbox/rstdiff/treediff/__init__.py index 76efeb074..54bcb78a3 100644 --- a/sandbox/rstdiff/treediff/__init__.py +++ b/sandbox/rstdiff/treediff/__init__.py @@ -199,17 +199,22 @@ spirit to `difflib.SequenceMatcher'""" a = None b = None hashableNodeImpl = None + isJunk = None - def __init__(self, hashableNodeImpl, a, b): + def __init__(self, hashableNodeImpl, a, b, isJunk=None): """Construct a TreeMatcher for matching trees `a` and `b`. `a` and `b` must be the root nodes of two trees to be compared. `hashableNodeImpl` must be an implementation of `HashableNodeImpl` -governing the comparison of the nodes in the trees.""" +governing the comparison of the nodes in the trees. + +If `isJunk` is given it must be a one-argument function returning +`True` if the given argument should be considered as junk. """ self.a = a self.b = b self.hashableNodeImpl = hashableNodeImpl + self.isJunk = isJunk def get_opcodes(self): """Return list of 5- or 6-tuples describing how to turn `a` into `b`. @@ -237,7 +242,7 @@ is only a 'replace' of one tree by the other. self.hashableNodeImpl.pushRootOnly(True) try: - sm = SequenceMatcher(None, [ self.a, ], [ self.b, ]) + sm = SequenceMatcher(self.isJunk, [ self.a, ], [ self.b, ]) rootOpcodes = sm.get_opcodes() if rootOpcodes[0][0] == 'equal': return [ ( 'descend', 0, 1, 0, 1, @@ -254,7 +259,7 @@ is only a 'replace' of one tree by the other. b = self.hashableNodeImpl.getChildren(bElem) self.hashableNodeImpl.pushRootOnly(False) try: - sm = SequenceMatcher(None, a, b) + sm = SequenceMatcher(self.isJunk, a, b) nestedOpcodes = sm.get_opcodes() return self._resolveDeepReplace(nestedOpcodes, a, b) finally: @@ -271,7 +276,7 @@ is only a 'replace' of one tree by the other. continue self.hashableNodeImpl.pushRootOnly(True) try: - sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd]) + sm = SequenceMatcher(self.isJunk, a[aBeg:aEnd], b[bBeg:bEnd]) rootOpcodes = sm.get_opcodes() for j in xrange(len(rootOpcodes)): ( subOpcode, aSubBeg, aSubEnd, -- 2.11.4.GIT