Added basic script including some infrastructure.
[docutils/kirr.git] / sandbox / rstdiff / treediff / __init__.py
blobb5894358827aa6e3a675406bcff41cb47f2bbb49
1 #!/usr/bin/env python
3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 from difflib import SequenceMatcher
22 __docformat__ = 'reStructuredText'
24 ###############################################################################
25 # Hashable
27 class HashableDescriptor(object):
28 """A descriptor to plug into a class to be made hashable."""
30 """Name of function to override."""
31 override = None
32 """Arity of function to override."""
33 arity = None
34 hashableImpl = None
35 debug = False
37 def __init__(self, override, arity, hashableImpl):
38 """Initialize a descriptor for function `override` with `arity` using
39 `hashableImpl`."""
40 self.override = override
41 self.arity = arity
42 self.hashableImpl = hashableImpl
44 def __get__(self, instance, owner):
45 """Implements the descriptor protocol. Returns a function to use on
46 `instance`."""
47 if self.debug:
48 print('__get__ called on ' + owner.__name__ + ' by '
49 + self.override)
50 try:
51 fct = self.hashableImpl.getFunction(self.override)
52 except:
53 if self.debug:
54 print('***Exception***')
55 raise AttributeError('Can not access method %r'
56 % ( self.override, ))
57 if self.arity == 0:
58 return lambda: fct(instance)
59 elif self.arity == 1:
60 return lambda other: fct(instance, other)
61 else:
62 raise AttributeError('Can not create function for %r with arity %d'
63 % ( self.override, self.arity, ))
65 class HashableImpl(object):
66 """Implements hashability and reflexive equality for a class."""
68 debug = False
70 def __init__(self, cls):
71 """Set methods to implement equality in `cls`."""
72 setattr(cls, '__hash__', HashableDescriptor('__hash__', 0, self))
73 setattr(cls, '__eq__', HashableDescriptor('__eq__', 1, self))
74 setattr(cls, '__ne__', HashableDescriptor('__ne__', 1, self))
76 def getFunction(self, name):
77 """Return the function named `name`."""
78 # Get the function from the *real* object
79 return getattr(self, 'impl' + name)
81 def impl__hash__(self, this):
82 """Return `this.__hash__`(). Derived classes must override this."""
83 if self.debug:
84 print('impl__hash__ called')
85 return id(this)
87 def impl__eq__(self, this, other):
88 """Return `this.__eq__`(other). Derived classes must override this."""
89 if self.debug:
90 print('impl__eq__ called')
91 return id(this) == id(other)
93 def impl__ne__(self, this, other):
94 """Return `this.__ne__`(other). Derived classes must override this."""
95 if self.debug:
96 print('impl__ne__ called')
97 return not self.impl__eq__(this, other)
99 class HashableNodeImpl(HashableImpl):
100 """An abstract implementation of `HashableImpl` for nodes in a tree.
101 Allows switching between of root only comparison and deep
102 comparison."""
104 """Consider only the root node (or include the children)."""
105 _rootOnly = False
106 """Stack for `_rootOnly`"""
107 __rootOnlies = [ ]
109 def __init__(self, cls):
110 HashableImpl.__init__(self, cls)
112 def pushRootOnly(self, newRootOnly):
113 """Set `newRootOnly` as new `rootOnly` value. If ``True`` then only
114 information in the root is considered."""
115 self.__rootOnlies.append(self._rootOnly)
116 self._rootOnly = newRootOnly
118 def popRootOnly(self):
119 """Pop and return last `rootOnly` value."""
120 self._rootOnly = self.__rootOnlies.pop()
122 def impl__hash__(self, node):
123 """Returns the hash for node `node`. Subclasses may override this but
124 overriding `rootHash` and `childrenHash` may make more sense."""
125 rootHash = self.rootHash(node)
126 if self._rootOnly:
127 return rootHash
128 else:
129 return rootHash + self.childrenHash(node)
131 def impl__eq__(self, node, other):
132 """Returns equality between `node` and an `other` node. Subclasses may
133 override this but overriding `rootEq` and `childrenEq` may
134 make more sense."""
135 if not self.rootEq(node, other):
136 return False
137 if self._rootOnly:
138 return True
139 return self.childrenEq(node, other)
141 def childrenHash(self, node):
142 """Return a hash for the children only. Subclasses may override
143 this but overriding `childHash` may make more sense."""
144 return reduce(lambda x, y: x + y,
145 [ self.childHash(child)
146 for child in self.getChildren(node) ], 0)
148 def childrenEq(self, node, other):
149 """Returns children equality of `node` and an `other` node. ``True``
150 if the children of the two nodes are equal without considering
151 the root. Subclasses may override this but overriding
152 `childEq` may make more sense."""
153 nodeChildren = self.getChildren(node)
154 otherChildren = self.getChildren(other)
155 if len(nodeChildren) != len(otherChildren):
156 return False
157 for i in xrange(len(nodeChildren)):
158 if not self.childEq(nodeChildren[i], otherChildren[i]):
159 return False
160 return True
162 def rootHash(self, node):
163 """Return a hash for the root only. Subclasses must override
164 this."""
165 raise NotImplementedError()
167 def childHash(self, node):
168 """Return a hash for the node as a child. Subclasses must override
169 this."""
170 raise NotImplementedError()
172 def rootEq(self, node, other):
173 """Returns root equality of `node` and an `other` node. ``True`` if
174 the two nodes as roots are equal without considering their
175 children. This should be true if one node can be replaced by
176 the other and all changes can be represented without changing
177 the node itself. Subclasses must override this."""
178 raise NotImplementedError()
180 def childEq(self, node, other):
181 """Returns equality of `node` and an `other` node as children.
182 ``True`` if the child features of the two nodes are equal
183 without considering the root. Subclasses must override
184 this."""
185 raise NotImplementedError()
187 def getChildren(self, node):
188 """Return the children of `node` as a list. Subclasses must override
189 this."""
190 raise NotImplementedError()
192 ###############################################################################
193 # Tree matcher
195 class TreeMatcher(object):
196 """Objects of this class are able to match trees. This is similar in
197 spirit to `difflib.SequenceMatcher'"""
199 a = None
200 b = None
201 hashableNodeImpl = None
203 def __init__(self, hashableNodeImpl, a, b):
204 """Construct a TreeMatcher for matching trees `a` and `b`.
206 `a` and `b` must be the root nodes of two trees to be compared.
207 `hashableNodeImpl` must be an implementation of `HashableNodeImpl`
208 governing the comparison of the nodes in the trees."""
210 self.a = a
211 self.b = b
212 self.hashableNodeImpl = hashableNodeImpl
214 def get_opcodes(self):
215 """Return list of 5- or 6-tuples describing how to turn `a` into `b`.
217 Each tuple is of the form (tag, i1, i2, j1, j2, [sub]). The first tuple
218 has i1 == j1 == 0, and remaining tuples have i1 == i2 from the
219 tuple preceding it, and likewise for j1 == the previous j2.
221 The tags are strings, with these meanings:
223 'replace': a[i1:i2] should be replaced by b[j1:j2]
224 'delete': a[i1:i2] should be deleted.
225 Note that j1==j2 in this case.
226 'insert': b[j1:j2] should be inserted at a[i1:i1].
227 Note that i1==i2 in this case.
228 'equal': a[i1:i2] == b[j1:j2]
229 'descend': Descend on nodes a[i1] and b[i1]. In this case
230 sub is a list of opcodes pertaining to the list of children
231 of the two nodes.
232 Note that i2==i1+1 and j2==j1+1 in this case.
234 Note that if the roots of the trees are not root-equal then the result
235 is only a 'replace' of one tree by the other.
238 self.hashableNodeImpl.pushRootOnly(True)
239 try:
240 sm = SequenceMatcher(None, [ self.a, ], [ self.b, ])
241 rootOpcodes = sm.get_opcodes()
242 if rootOpcodes[0][0] == 'equal':
243 return ( 'descend', 0, 1, 0, 1,
244 self._resolveRootEqual(self.a, self.b), )
245 else:
246 return rootOpcodes
247 finally:
248 self.hashableNodeImpl.popRootOnly()
250 def _resolveRootEqual(self, aElem, bElem):
251 """Considers children of `aElem` and `bElem` which have equal roots.
252 Returns opcodes for the children."""
253 a = self.hashableNodeImpl.getChildren(aElem)
254 b = self.hashableNodeImpl.getChildren(bElem)
255 self.hashableNodeImpl.pushRootOnly(False)
256 try:
257 sm = SequenceMatcher(None, a, b)
258 nestedOpcodes = sm.get_opcodes()
259 return self._resolveDeepReplace(nestedOpcodes, a, b)
260 finally:
261 self.hashableNodeImpl.popRootOnly()
263 def _resolveDeepReplace(self, opcodes, a, b):
264 """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
265 `b`. Returns opcodes including nested elements for these cases."""
266 result = [ ]
267 for i in xrange(len(opcodes)):
268 ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i]
269 if opcode != 'replace':
270 result.append(opcodes[i])
271 continue
272 self.hashableNodeImpl.pushRootOnly(True)
273 try:
274 sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd])
275 rootOpcodes = sm.get_opcodes()
276 for j in xrange(len(rootOpcodes)):
277 ( subOpcode, aSubBeg, aSubEnd,
278 bSubBeg, bSubEnd ) = rootOpcodes[j]
279 if subOpcode != 'equal':
280 result.append(( subOpcode,
281 aBeg + aSubBeg, aBeg + aSubEnd,
282 bBeg + bSubBeg, bBeg + bSubEnd, ))
283 else:
284 for k in xrange(aSubEnd - aSubBeg):
285 aIdx = aBeg + aSubBeg + k
286 bIdx = bBeg + bSubBeg + k
287 result.append(('descend',
288 aIdx, aIdx + 1, bIdx, bIdx + 1,
289 self._resolveRootEqual(a[aIdx],
290 b[bIdx]), ))
291 finally:
292 self.hashableNodeImpl.popRootOnly()
293 return result