From 561afe754980b570c5b0cae727036b710aaa5aba Mon Sep 17 00:00:00 2001 From: smerten Date: Mon, 24 May 2010 11:07:53 +0000 Subject: [PATCH] Added basic script including some infrastructure. git-svn-id: https://docutils.svn.sourceforge.net/svnroot/docutils/trunk@6329 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- sandbox/rstdiff/global.log | 161 +++++++++++++ sandbox/rstdiff/rstdiff.py | 265 +++++++++++++++++++++ sandbox/rstdiff/studies/diff.py | 84 ++----- sandbox/rstdiff/studies/hashable.py | 211 ++-------------- sandbox/rstdiff/tag.log | 2 +- .../{studies/hashable.py => treediff/__init__.py} | 137 +++++++++-- 6 files changed, 595 insertions(+), 265 deletions(-) create mode 100755 sandbox/rstdiff/rstdiff.py rewrite sandbox/rstdiff/studies/hashable.py (92%) copy sandbox/rstdiff/{studies/hashable.py => treediff/__init__.py} (55%) diff --git a/sandbox/rstdiff/global.log b/sandbox/rstdiff/global.log index aa26464d5..4766be9fe 100644 --- a/sandbox/rstdiff/global.log +++ b/sandbox/rstdiff/global.log @@ -1,4 +1,165 @@ ************************************** +Date: Mon May 24 12:52:09 CEST 2010 +Author: stefan +Tag: rstdiff_1_25 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Completed infrastructure. +************************************** +Date: Sun May 23 13:55:05 CEST 2010 +Author: stefan +Tag: rstdiff_1_24 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/treediff +In directory rosalu:/home/stefan/free/rstdiff/treediff + +Modified Files: + __init__.py + +-------------------------------------- +Log Message: +First steps towards a hashable implementation for Docutils nodes. +************************************** +Date: Sun May 16 14:45:35 CEST 2010 +Author: stefan +Tag: rstdiff_1_23 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/studies +In directory rosalu:/home/stefan/free/rstdiff/studies + +Modified Files: + diff.py hashable.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/treediff +In directory rosalu:/home/stefan/free/rstdiff/treediff + +Added Files: + __init__.py + +-------------------------------------- +Log Message: +Moved core of the studies to real package. +************************************** +Date: Sun May 16 14:44:48 CEST 2010 +Author: stefan + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/treediff +In directory rosalu:/home/stefan/free/rstdiff/treediff + + +-------------------------------------- +Log Message: +Directory /home/stefan/vault/sm/rstdiff/treediff added to the repository +************************************** +Date: Thu May 13 14:32:49 CEST 2010 +Author: stefan +Tag: rstdiff_1_22 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Writer is configured properly. +************************************** +Date: Sun May 9 14:38:14 CEST 2010 +Author: stefan +Tag: rstdiff_1_21 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Input files are parsed. +************************************** +Date: Sun May 9 14:22:11 CEST 2010 +Author: stefan +Tag: rstdiff_1_20 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Modified Files: + rstdiff.py + +-------------------------------------- +Log Message: +Command line processing ok. +************************************** +Date: Sun May 9 10:25:10 CEST 2010 +Author: stefan +Tag: rstdiff_1_19 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff +In directory rosalu:/home/stefan/free/rstdiff + +Added Files: + rstdiff.py + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/studies +In directory rosalu:/home/stefan/free/rstdiff/studies + +Modified Files: + diff.py + +-------------------------------------- +Log Message: +Added `rstdiff.py`. +************************************** +Date: Sat May 8 12:56:02 CEST 2010 +Author: stefan +Tag: rstdiff_1_18 + +-------------------------------------- +Update of /home/stefan/vault/sm/rstdiff/studies +In directory rosalu:/home/stefan/free/rstdiff/studies + +Modified Files: + diff.py + +-------------------------------------- +Log Message: +Continued studies. There is a class `TreeMatcher` similar to +`SequenceMatcher`. +************************************** Date: Sun Apr 25 12:14:20 CEST 2010 Author: stefan Tag: rstdiff_1_17 diff --git a/sandbox/rstdiff/rstdiff.py b/sandbox/rstdiff/rstdiff.py new file mode 100755 index 000000000..a2af0b064 --- /dev/null +++ b/sandbox/rstdiff/rstdiff.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Stefan Merten + +# rstdiff.py is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +""" +Generates a structural diff from two reStructuredText input documents +and produces an annotated result. +""" + +__docformat__ = 'reStructuredText' + +try: + import locale + locale.setlocale(locale.LC_ALL, '') +except: + pass + +import os, re, sys + +import docutils +from docutils import frontend, writers, nodes, SettingsSpec +from docutils.core import Publisher +from docutils.utils import SystemMessage, Reporter, new_reporter +from docutils.frontend import OptionParser, make_paths_absolute +from docutils.nodes import Node + +from treediff import TreeMatcher, HashableNodeImpl + +############################################################################### +# Command line specification + +description = ("""Generates a structural diff from two reStructuredText input +documents and produces an annotated result. """) + +writerOption = 'writer' +writerDefault = 'xml' +writerArgRE1 = '^--' + writerOption + '=' + '(.*)$' + +settings_spec = ( + 'rstdiff options', + None, + (('Select writer to write output with (default "xml").', + ['--' + writerOption], + {}), + ) + ) + +settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace', + writerOption: writerDefault} + +config_section = 'rstdiff' + +usage = '%prog [options]... [ []]' + +############################################################################### +# Classes for three argument command lines + +class Publisher3Args(Publisher): + + def setup_option_parser(self, usage=None, description=None, + settings_spec=None, config_section=None, + **defaults): + if config_section: + if not settings_spec: + settings_spec = SettingsSpec() + settings_spec.config_section = config_section + parts = config_section.split() + if len(parts) > 1 and parts[-1] == 'application': + settings_spec.config_section_dependencies = ['applications'] + #@@@ Add self.source & self.destination to components in future? + option_parser = OptionParser3Args( + components=(self.parser, self.reader, self.writer, settings_spec), + defaults=defaults, read_config_files=1, + usage=usage, description=description) + return option_parser + +class OptionParser3Args(OptionParser): + + def check_values(self, values, args): + """Store positional arguments as runtime settings.""" + values._old_source, values._new_source, values._destination = self.check_args(args) + make_paths_absolute(values.__dict__, self.relative_path_settings, + os.getcwd()) + values._config_files = self.config_files + return values + + def check_args(self, args): + old_source = new_source = destination = None + if not args: + self.error('At least 1 argument required.') + else: + old_source = args.pop(0) + if old_source == '-': # means stdin + old_source = None + if args: + new_source = args.pop(0) + if new_source == '-': # means stdin + new_source = None + if args: + destination = args.pop(0) + if destination == '-': # means stdout + destination = None + if args: + self.error('Maximum 3 arguments allowed.') + if old_source is None and new_source is None: + self.error('Old and new source may not both use stdin.') + if (old_source and old_source == destination + or new_source and new_source == destination): + self.error('Do not specify the same file for both source and ' + 'destination. It will clobber the source file.') + return old_source, new_source, destination + +############################################################################### +# Hashable + +class HashableDocutilsNodeImpl(HashableNodeImpl): + """Implements equality for a docutils `Node`.""" + + def __init__(self): + super(self.__class__, self).__init__(Node) + + def dispatchClass(self, function, node, *args): + """Dispatch a call of type `function` for the class of `node` using + arguments `node` and `args`. Default is to dispatch for imaginary class + "UNKNOWN".""" + try: + method = getattr(self, + "%s_%s" % ( function, node.__class__.__name__, )) + except AttributeError: + method = getattr(self, + "%s_%s" % ( function, "UNKNOWN", )) + return method(node, *args) + + ########################################################################### + # Implementation of abstract methods + + def rootHash(self, node): + """Return a hash for the root only. Subclasses must override + this.""" + return self.dispatchClass('rootHash', node) + + def rootHash_UNKNOWN(self, node): + return hash(node.__class__) + + def rootEq(self, node, other): + """Returns root equality of `node` and an `other` node. ``True`` if + the two nodes as roots are equal without considering their + children. This should be true if one node can be replaced by + the other and all changes can be represented without changing + the node itself. Subclasses must override this.""" + # Only nodes of the same class can be equal + if node.__class__ != other.__class__: + return False + return self.dispatchClass('rootEq', node, other) + + def rootEq_UNKNOWN(self, node, other): + # Unless we know better two roots of the same type are considered equal + return True + + def childHash(self, node): + """Return a hash for the node as a child. Subclasses must override + this.""" + return self.dispatchClass('childHash', node) + + def childHash_UNKNOWN(self, node): + # TODO Is this correct *and* good? + return hash(node.__class__) + + def childEq(self, node, other): + """Returns equality of `node` and an `other` node as children. + ``True`` if the child features of the two nodes are equal + without considering the root. Subclasses must override + this.""" + # Only nodes of the same class can be equal + # TODO Is this correct? + if node.__class__ != other.__class__: + return False + return self.dispatchClass('childEq', node, other) + + def childEq_UNKNOWN(self, node, other): + # We don't know how to compare two nodes of same type as children + return False + + def getChildren(self, node): + """Return the children of `node` as a list. Subclasses must override + this.""" + return self.dispatchClass('getChildren', node) + + def getChildren_UNKNOWN(self, node): + return node.children + + ########################################################################### + # Real comparison + +############################################################################### +# Main + +def processCommandLine(): + """Process command line and return a `Publisher`.""" + # Determine writer here so options can be given normally + preWriter = writerDefault + for arg in sys.argv: + match = re.search(writerArgRE1, arg) + if match: + preWriter = match.group(1) + + pub = Publisher3Args() + pub.set_reader('standalone', None, 'restructuredtext') + pub.set_writer(preWriter) + + settingsSpec = SettingsSpec() + settingsSpec.settings_spec = settings_spec + settingsSpec.settings_defaults = settings_defaults + pub.process_command_line(usage=usage, description=description, + settings_spec=settingsSpec, + config_section=config_section) + if pub.settings.writer != preWriter: + new_reporter('', + pub.settings).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer" + % ( preWriter, pub.settings.writer, )) + return pub + +def readTree(pub, sourceName): + """Read and return a tree from `sourceName`.""" + # Reset reader - just in case it keeps state from a previous invocation + pub.set_reader('standalone', None, 'restructuredtext') + pub.set_source(None, sourceName) + return pub.reader.read(pub.source, pub.parser, pub.settings) + +def doDiff(pub, oldTree, newTree): + """Create a difference from `oldTree` to `newTree`. Returns the opcodes + necessary to transform `oldTree` to `newTree`.""" + hashableNodeImpl = HashableDocutilsNodeImpl() + matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree) + return matcher.get_opcodes() + +if __name__ == '__main__': + pub = processCommandLine() + + pub.set_destination(None, None) + + oldTree = readTree(pub, pub.settings._old_source) + newTree = readTree(pub, pub.settings._new_source) + + opcodes = doDiff(pub, oldTree, newTree) + + from pprint import pprint + print(newTree) + print(oldTree) + pprint(opcodes, sys.stdout, 2, 40, None) diff --git a/sandbox/rstdiff/studies/diff.py b/sandbox/rstdiff/studies/diff.py index 4ff8a2f41..7f35aae65 100644 --- a/sandbox/rstdiff/studies/diff.py +++ b/sandbox/rstdiff/studies/diff.py @@ -2,12 +2,12 @@ from difflib import SequenceMatcher -from hashable import HashableNodeImpl - from pprint import pprint import sys +from treediff import TreeMatcher, HashableNodeImpl + __docformat__ = 'reStructuredText' aI = ( 1, 2, 3, 5, 6, 7, ) @@ -21,6 +21,7 @@ sm = SequenceMatcher(None, aI, bI) # print(sm.get_opcodes()) # The way to go class TreeNode(object): + """An example tree node to play with.""" tag = None children = ( ) @@ -30,6 +31,7 @@ class TreeNode(object): self.children = children class HashableTreeNodeImpl(HashableNodeImpl): + """A `HashableNodeImpl` for `TreeNode`.""" def __init__(self): super(self.__class__, self).__init__(TreeNode) @@ -38,8 +40,6 @@ class HashableTreeNodeImpl(HashableNodeImpl): return hash(node.tag) def childHash(self, node): - """Return a hash for the children only. Subclasses should override - this.""" return hash(node) def rootEq(self, node, other): @@ -51,61 +51,6 @@ class HashableTreeNodeImpl(HashableNodeImpl): def getChildren(self, node): return node.children -# To generate a diff tree: -# -# * ``equal`` opcodes need a copy -# -# * ``insert`` and ``delete`` opcodes must be processed as such -# -# * ``replace`` opcodes need to be analyzed recursively to find a -# minimal set of changes - -def resolveDeepReplace(hashableNodeImpl, opcodes, a, b): - """Resolves ``replace`` elements in `opcodes` pertaining to `a` and - `b`. Returns opcodes including nested elements for these cases. - `hashableNodeImpl` is the `HashableNodeImpl` used to control the hashable - feature.""" - result = [ ] - for i in xrange(len(opcodes)): - ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i] - if opcode != 'replace': - result.append(opcodes[i]) - continue - try: - hashableNodeImpl.pushRootOnly(True) - sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd]) - rootOpcodes = sm.get_opcodes() - for j in xrange(len(rootOpcodes)): - ( subOpcode, aSubBeg, aSubEnd, bSubBeg, bSubEnd ) = rootOpcodes[j] - if subOpcode != 'equal': - result.append(( subOpcode, aBeg + aSubBeg, aBeg + aSubEnd, - bBeg + bSubBeg, bBeg + bSubEnd, )) - else: - for k in xrange(aSubEnd - aSubBeg): - aIdx = aBeg + aSubBeg + k - bIdx = bBeg + bSubBeg + k - result.append(('descend', - aIdx, aIdx + 1, bIdx, bIdx + 1, - resolveRootEqual(hashableNodeImpl, - a[aIdx], b[bIdx]), )) - finally: - hashableNodeImpl.popRootOnly() - return result - -def resolveRootEqual(hashableNodeImpl, aElem, bElem): - """Considers children of `aElem` and `bElem` which have equal roots. - Returns opcodes for the children. `hashableNodeImpl` is the - `HashableNodeImpl` used to control the hashable feature.""" - a = hashableNodeImpl.getChildren(aElem) - b = hashableNodeImpl.getChildren(bElem) - try: - hashableNodeImpl.pushRootOnly(False) - sm = SequenceMatcher(None, a, b) - nestedOpcodes = sm.get_opcodes() - return resolveDeepReplace(hashableNodeImpl, nestedOpcodes, a, b) - finally: - hashableNodeImpl.popRootOnly() - hashableNodeImpl = HashableTreeNodeImpl() aT = ( TreeNode('first'), @@ -140,7 +85,22 @@ sm = SequenceMatcher(None, aT, bT) top = sm.get_opcodes() pprint(top) print('---') -# Use a pseudo root -pprint(resolveRootEqual(hashableNodeImpl, - TreeNode(None, aT), TreeNode(None, bT)), + +# Use a pseudo root with different root nodes. +pprint(TreeMatcher(hashableNodeImpl, + TreeNode('a', aT), TreeNode('b', bT)).get_opcodes(), + sys.stdout, 2, 40, None) + +# Use a pseudo root with equal root nodes. +pprint(TreeMatcher(hashableNodeImpl, + TreeNode(None, aT), TreeNode(None, bT)).get_opcodes(), sys.stdout, 2, 40, None) + +# To generate a diff tree: +# +# * ``equal`` opcodes need a copy +# +# * ``insert`` and ``delete`` opcodes must be processed as such +# +# * ``replace`` opcodes need to be analyzed recursively to find a +# minimal set of changes diff --git a/sandbox/rstdiff/studies/hashable.py b/sandbox/rstdiff/studies/hashable.py dissimilarity index 92% index c3e2da060..803bb867e 100644 --- a/sandbox/rstdiff/studies/hashable.py +++ b/sandbox/rstdiff/studies/hashable.py @@ -1,187 +1,24 @@ -# A study to check how a Docutils node can be made hashable - -from docutils.nodes import Node - -__docformat__ = 'reStructuredText' - -class HashableDescriptor(object): - """A descriptor to plug into a class to be made hashable.""" - - """Name of function to override.""" - override = None - """Arity of function to override.""" - arity = None - hashableImpl = None - debug = False - - def __init__(self, override, arity, hashableImpl): - """Initialize a descriptor for function `override` with `arity` using - `hashableImpl`.""" - self.override = override - self.arity = arity - self.hashableImpl = hashableImpl - - def __get__(self, instance, owner): - """Implements the descriptor protocol. Returns a function to use on - `instance`.""" - if self.debug: - print('__get__ called on ' + owner.__name__ + ' by ' - + self.override) - try: - fct = self.hashableImpl.getFunction(self.override) - except: - if self.debug: - print('***Exception***') - raise AttributeError('Can not access method %r' - % ( self.override, )) - if self.arity == 0: - return lambda: fct(instance) - elif self.arity == 1: - return lambda other: fct(instance, other) - else: - raise AttributeError('Can not create function for %r with arity %d' - % ( self.override, self.arity, )) - -class HashableImpl(object): - """Implements hashability and reflexive equality for a class.""" - - debug = False - - def __init__(self, cls): - """Set methods to implement equality in `cls`.""" - setattr(cls, '__hash__', HashableDescriptor('__hash__', 0, self)) - setattr(cls, '__eq__', HashableDescriptor('__eq__', 1, self)) - setattr(cls, '__ne__', HashableDescriptor('__ne__', 1, self)) - - def getFunction(self, name): - """Return the function named `name`.""" - # Get the function from the *real* object - return getattr(self, 'impl' + name) - - def impl__hash__(self, this): - """Return `this.__hash__`(). Derived classes must override this.""" - if self.debug: - print('impl__hash__ called') - return id(this) - - def impl__eq__(self, this, other): - """Return `this.__eq__`(other). Derived classes must override this.""" - if self.debug: - print('impl__eq__ called') - return id(this) == id(other) - - def impl__ne__(self, this, other): - """Return `this.__ne__`(other). Derived classes must override this.""" - if self.debug: - print('impl__ne__ called') - return not self.impl__eq__(this, other) - -class HashableNodeImpl(HashableImpl): - """An abstract implementation of `HashableImpl` for nodes in a tree. - Allows switching between of root only comparison and deep - comparison.""" - - """Consider only the root node (or include the children).""" - _rootOnly = False - """Stack for `_rootOnly`""" - __rootOnlies = [ ] - - def __init__(self, cls): - HashableImpl.__init__(self, cls) - - def pushRootOnly(self, newRootOnly): - """Set `newRootOnly` as new `rootOnly` value. If ``True`` then only - information in the root is considered.""" - self.__rootOnlies.append(self._rootOnly) - self._rootOnly = newRootOnly - - def popRootOnly(self): - """Pop and return last `rootOnly` value.""" - self._rootOnly = self.__rootOnlies.pop() - - def impl__hash__(self, node): - """Returns the hash for node `node`. Subclasses may override this but - overriding `rootHash` and `childrenHash` may make more sense.""" - rootHash = self.rootHash(node) - if self._rootOnly: - return rootHash - else: - return rootHash + self.childrenHash(node) - - def impl__eq__(self, node, other): - """Returns equality between `node` and an `other` node. Subclasses may - override this but overriding `rootEq` and `childrenEq` may - make more sense.""" - if not self.rootEq(node, other): - return False - if self._rootOnly: - return True - return self.childrenEq(node, other) - - def childrenHash(self, node): - """Return a hash for the children only. Subclasses may override - this but overriding `childHash` may make more sense.""" - return reduce(lambda x, y: x + y, - [ self.childHash(child) - for child in self.getChildren(node) ], 0) - - def childrenEq(self, node, other): - """Returns children equality of `node` and an `other` node. ``True`` - if the children of the two nodes are equal without considering - the root. Subclasses may override this but overriding - `childEq` may make more sense.""" - nodeChildren = self.getChildren(node) - otherChildren = self.getChildren(other) - if len(nodeChildren) != len(otherChildren): - return False - for i in xrange(len(nodeChildren)): - if not self.childEq(nodeChildren[i], otherChildren[i]): - return False - return True - - def rootHash(self, node): - """Return a hash for the root only. Subclasses must override - this.""" - raise NotImplementedError() - - def childHash(self, node): - """Return a hash for the node as a child. Subclasses must override - this.""" - raise NotImplementedError() - - def rootEq(self, node, other): - """Returns root equality of `node` and an `other` node. ``True`` if - the two nodes as roots are equal without considering their - children. This should be true if one node can be replaced by - the other and all changes can be represented without changing - the node itself. Subclasses must override this.""" - raise NotImplementedError() - - def childEq(self, node, other): - """Returns equality of `node` and an `other` node as children. - ``True`` if the child features of the two nodes are equal - without considering the root. Subclasses must override - this.""" - raise NotImplementedError() - - def getChildren(self, node): - """Return the children of `node` as a list. Subclasses must override - this.""" - raise NotImplementedError() - -class HashableDocutilsNodeImpl(HashableImpl): - """Implements equality for a docutils `Node`.""" - - def __init__(self): - super(self.__class__, self).__init__(Node) - -if __name__ == '__main__': - hashableImpl = HashableDocutilsNodeImpl() - hashableImpl.debug = True - - n1 = Node() - n2 = Node() - print(n1 == n1) - print(n1 == n2) - print(n1 != n2) - h = { n1: 'bla' } +# A study to check how a Docutils node can be made hashable + +from docutils.nodes import Node + +from treediff import HashableImpl + +__docformat__ = 'reStructuredText' + +class HashableDocutilsNodeImpl(HashableImpl): + """Implements equality for a docutils `Node`.""" + + def __init__(self): + super(self.__class__, self).__init__(Node) + +if __name__ == '__main__': + hashableImpl = HashableDocutilsNodeImpl() + hashableImpl.debug = True + + n1 = Node() + n2 = Node() + print(n1 == n1) + print(n1 == n2) + print(n1 != n2) + h = { n1: 'bla' } diff --git a/sandbox/rstdiff/tag.log b/sandbox/rstdiff/tag.log index 371e02913..155906132 100644 --- a/sandbox/rstdiff/tag.log +++ b/sandbox/rstdiff/tag.log @@ -1 +1 @@ -rstdiff_1_17 +rstdiff_1_25 diff --git a/sandbox/rstdiff/studies/hashable.py b/sandbox/rstdiff/treediff/__init__.py similarity index 55% copy from sandbox/rstdiff/studies/hashable.py copy to sandbox/rstdiff/treediff/__init__.py index c3e2da060..b58943588 100644 --- a/sandbox/rstdiff/studies/hashable.py +++ b/sandbox/rstdiff/treediff/__init__.py @@ -1,9 +1,29 @@ -# A study to check how a Docutils node can be made hashable +#!/usr/bin/env python -from docutils.nodes import Node +# Copyright (C) 2010 Stefan Merten + +# rstdiff.py is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +from difflib import SequenceMatcher __docformat__ = 'reStructuredText' +############################################################################### +# Hashable + class HashableDescriptor(object): """A descriptor to plug into a class to be made hashable.""" @@ -169,19 +189,106 @@ class HashableNodeImpl(HashableImpl): this.""" raise NotImplementedError() -class HashableDocutilsNodeImpl(HashableImpl): - """Implements equality for a docutils `Node`.""" +############################################################################### +# Tree matcher + +class TreeMatcher(object): + """Objects of this class are able to match trees. This is similar in +spirit to `difflib.SequenceMatcher'""" + + a = None + b = None + hashableNodeImpl = None + + def __init__(self, hashableNodeImpl, a, b): + """Construct a TreeMatcher for matching trees `a` and `b`. + +`a` and `b` must be the root nodes of two trees to be compared. +`hashableNodeImpl` must be an implementation of `HashableNodeImpl` +governing the comparison of the nodes in the trees.""" - def __init__(self): - super(self.__class__, self).__init__(Node) + self.a = a + self.b = b + self.hashableNodeImpl = hashableNodeImpl + + def get_opcodes(self): + """Return list of 5- or 6-tuples describing how to turn `a` into `b`. + +Each tuple is of the form (tag, i1, i2, j1, j2, [sub]). The first tuple +has i1 == j1 == 0, and remaining tuples have i1 == i2 from the +tuple preceding it, and likewise for j1 == the previous j2. + +The tags are strings, with these meanings: + +'replace': a[i1:i2] should be replaced by b[j1:j2] +'delete': a[i1:i2] should be deleted. + Note that j1==j2 in this case. +'insert': b[j1:j2] should be inserted at a[i1:i1]. + Note that i1==i2 in this case. +'equal': a[i1:i2] == b[j1:j2] +'descend': Descend on nodes a[i1] and b[i1]. In this case + sub is a list of opcodes pertaining to the list of children + of the two nodes. + Note that i2==i1+1 and j2==j1+1 in this case. + +Note that if the roots of the trees are not root-equal then the result +is only a 'replace' of one tree by the other. +""" + + self.hashableNodeImpl.pushRootOnly(True) + try: + sm = SequenceMatcher(None, [ self.a, ], [ self.b, ]) + rootOpcodes = sm.get_opcodes() + if rootOpcodes[0][0] == 'equal': + return ( 'descend', 0, 1, 0, 1, + self._resolveRootEqual(self.a, self.b), ) + else: + return rootOpcodes + finally: + self.hashableNodeImpl.popRootOnly() + + def _resolveRootEqual(self, aElem, bElem): + """Considers children of `aElem` and `bElem` which have equal roots. + Returns opcodes for the children.""" + a = self.hashableNodeImpl.getChildren(aElem) + b = self.hashableNodeImpl.getChildren(bElem) + self.hashableNodeImpl.pushRootOnly(False) + try: + sm = SequenceMatcher(None, a, b) + nestedOpcodes = sm.get_opcodes() + return self._resolveDeepReplace(nestedOpcodes, a, b) + finally: + self.hashableNodeImpl.popRootOnly() -if __name__ == '__main__': - hashableImpl = HashableDocutilsNodeImpl() - hashableImpl.debug = True + def _resolveDeepReplace(self, opcodes, a, b): + """Resolves ``replace`` elements in `opcodes` pertaining to `a` and + `b`. Returns opcodes including nested elements for these cases.""" + result = [ ] + for i in xrange(len(opcodes)): + ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i] + if opcode != 'replace': + result.append(opcodes[i]) + continue + self.hashableNodeImpl.pushRootOnly(True) + try: + sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd]) + rootOpcodes = sm.get_opcodes() + for j in xrange(len(rootOpcodes)): + ( subOpcode, aSubBeg, aSubEnd, + bSubBeg, bSubEnd ) = rootOpcodes[j] + if subOpcode != 'equal': + result.append(( subOpcode, + aBeg + aSubBeg, aBeg + aSubEnd, + bBeg + bSubBeg, bBeg + bSubEnd, )) + else: + for k in xrange(aSubEnd - aSubBeg): + aIdx = aBeg + aSubBeg + k + bIdx = bBeg + bSubBeg + k + result.append(('descend', + aIdx, aIdx + 1, bIdx, bIdx + 1, + self._resolveRootEqual(a[aIdx], + b[bIdx]), )) + finally: + self.hashableNodeImpl.popRootOnly() + return result - n1 = Node() - n2 = Node() - print(n1 == n1) - print(n1 == n2) - print(n1 != n2) - h = { n1: 'bla' } -- 2.11.4.GIT