3 # Copyright (C) 2010 Stefan Merten
5 # rstdiff.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 from difflib
import SequenceMatcher
22 __docformat__
= 'reStructuredText'
24 ###############################################################################
27 class HashableDescriptor(object):
28 """A descriptor to plug into a class to be made hashable."""
30 """Name of function to override."""
32 """Arity of function to override."""
37 def __init__(self
, override
, arity
, hashableImpl
):
38 """Initialize a descriptor for function `override` with `arity` using
40 self
.override
= override
42 self
.hashableImpl
= hashableImpl
44 def __get__(self
, instance
, owner
):
45 """Implements the descriptor protocol. Returns a function to use on
48 print('__get__ called on ' + owner
.__name
__ + ' by '
51 fct
= self
.hashableImpl
.getFunction(self
.override
)
54 print('***Exception***')
55 raise AttributeError('Can not access method %r'
58 return lambda: fct(instance
)
60 return lambda other
: fct(instance
, other
)
62 raise AttributeError('Can not create function for %r with arity %d'
63 % ( self
.override
, self
.arity
, ))
65 class HashableImpl(object):
66 """Implements hashability and reflexive equality for a class."""
70 def __init__(self
, cls
):
71 """Set methods to implement equality in `cls`."""
72 setattr(cls
, '__hash__', HashableDescriptor('__hash__', 0, self
))
73 setattr(cls
, '__eq__', HashableDescriptor('__eq__', 1, self
))
74 setattr(cls
, '__ne__', HashableDescriptor('__ne__', 1, self
))
76 def getFunction(self
, name
):
77 """Return the function named `name`."""
78 # Get the function from the *real* object
79 return getattr(self
, 'impl' + name
)
81 def impl__hash__(self
, this
):
82 """Return `this.__hash__`(). Derived classes must override this."""
84 print('impl__hash__ called')
87 def impl__eq__(self
, this
, other
):
88 """Return `this.__eq__`(other). Derived classes must override this."""
90 print('impl__eq__ called')
91 return id(this
) == id(other
)
93 def impl__ne__(self
, this
, other
):
94 """Return `this.__ne__`(other). Derived classes must override this."""
96 print('impl__ne__ called')
97 return not self
.impl__eq__(this
, other
)
99 class HashableNodeImpl(HashableImpl
):
100 """An abstract implementation of `HashableImpl` for nodes in a tree.
101 Allows switching between of root only comparison and deep
104 """Consider only the root node (or include the children)."""
106 """Stack for `_rootOnly`"""
109 def __init__(self
, cls
):
110 HashableImpl
.__init
__(self
, cls
)
112 def pushRootOnly(self
, newRootOnly
):
113 """Set `newRootOnly` as new `rootOnly` value. If ``True`` then only
114 information in the root is considered."""
115 self
.__rootOnlies
.append(self
._rootOnly
)
116 self
._rootOnly
= newRootOnly
118 def popRootOnly(self
):
119 """Pop and return last `rootOnly` value."""
120 self
._rootOnly
= self
.__rootOnlies
.pop()
122 def impl__hash__(self
, node
):
123 """Returns the hash for node `node`. Subclasses may override this but
124 overriding `rootHash` and `childrenHash` may make more sense."""
125 rootHash
= self
.rootHash(node
)
129 return rootHash
+ self
.childrenHash(node
)
131 def impl__eq__(self
, node
, other
):
132 """Returns equality between `node` and an `other` node. Subclasses may
133 override this but overriding `rootEq` and `childrenEq` may
135 if not self
.rootEq(node
, other
):
139 return self
.childrenEq(node
, other
)
141 def childrenHash(self
, node
):
142 """Return a hash for the children only. Subclasses may override
143 this but overriding `childHash` may make more sense."""
144 return reduce(lambda x
, y
: x
+ y
,
145 [ self
.childHash(child
)
146 for child
in self
.getChildren(node
) ], 0)
148 def childrenEq(self
, node
, other
):
149 """Returns children equality of `node` and an `other` node. ``True``
150 if the children of the two nodes are equal without considering
151 the root. Subclasses may override this but overriding
152 `childEq` may make more sense."""
153 nodeChildren
= self
.getChildren(node
)
154 otherChildren
= self
.getChildren(other
)
155 if len(nodeChildren
) != len(otherChildren
):
157 for i
in xrange(len(nodeChildren
)):
158 if not self
.childEq(nodeChildren
[i
], otherChildren
[i
]):
162 def rootHash(self
, node
):
163 """Return a hash for the root only. Subclasses must override
165 raise NotImplementedError()
167 def childHash(self
, node
):
168 """Return a hash for the node as a child. Subclasses must override
170 raise NotImplementedError()
172 def rootEq(self
, node
, other
):
173 """Returns root equality of `node` and an `other` node. ``True`` if
174 the two nodes as roots are equal without considering their
175 children. This should be true if one node can be replaced by
176 the other and all changes can be represented without changing
177 the node itself. Subclasses must override this."""
178 raise NotImplementedError()
180 def childEq(self
, node
, other
):
181 """Returns equality of `node` and an `other` node as children.
182 ``True`` if the child features of the two nodes are equal
183 without considering the root. Subclasses must override
185 raise NotImplementedError()
187 def getChildren(self
, node
):
188 """Return the children of `node` as a list. Subclasses must override
190 raise NotImplementedError()
192 ###############################################################################
195 class TreeMatcher(object):
196 """Objects of this class are able to match trees. This is similar in
197 spirit to `difflib.SequenceMatcher'"""
201 hashableNodeImpl
= None
203 def __init__(self
, hashableNodeImpl
, a
, b
):
204 """Construct a TreeMatcher for matching trees `a` and `b`.
206 `a` and `b` must be the root nodes of two trees to be compared.
207 `hashableNodeImpl` must be an implementation of `HashableNodeImpl`
208 governing the comparison of the nodes in the trees."""
212 self
.hashableNodeImpl
= hashableNodeImpl
214 def get_opcodes(self
):
215 """Return list of 5- or 6-tuples describing how to turn `a` into `b`.
217 Each tuple is of the form (tag, i1, i2, j1, j2, [sub]). The first tuple
218 has i1 == j1 == 0, and remaining tuples have i1 == i2 from the
219 tuple preceding it, and likewise for j1 == the previous j2.
221 The tags are strings, with these meanings:
223 'replace': a[i1:i2] should be replaced by b[j1:j2]
224 'delete': a[i1:i2] should be deleted.
225 Note that j1==j2 in this case.
226 'insert': b[j1:j2] should be inserted at a[i1:i1].
227 Note that i1==i2 in this case.
228 'equal': a[i1:i2] == b[j1:j2]
229 'descend': Descend on nodes a[i1] and b[i1]. In this case
230 sub is a list of opcodes pertaining to the list of children
232 Note that i2==i1+1 and j2==j1+1 in this case.
234 Note that if the roots of the trees are not root-equal then the result
235 is only a 'replace' of one tree by the other.
238 self
.hashableNodeImpl
.pushRootOnly(True)
240 sm
= SequenceMatcher(None, [ self
.a
, ], [ self
.b
, ])
241 rootOpcodes
= sm
.get_opcodes()
242 if rootOpcodes
[0][0] == 'equal':
243 return ( 'descend', 0, 1, 0, 1,
244 self
._resolveRootEqual
(self
.a
, self
.b
), )
248 self
.hashableNodeImpl
.popRootOnly()
250 def _resolveRootEqual(self
, aElem
, bElem
):
251 """Considers children of `aElem` and `bElem` which have equal roots.
252 Returns opcodes for the children."""
253 a
= self
.hashableNodeImpl
.getChildren(aElem
)
254 b
= self
.hashableNodeImpl
.getChildren(bElem
)
255 self
.hashableNodeImpl
.pushRootOnly(False)
257 sm
= SequenceMatcher(None, a
, b
)
258 nestedOpcodes
= sm
.get_opcodes()
259 return self
._resolveDeepReplace
(nestedOpcodes
, a
, b
)
261 self
.hashableNodeImpl
.popRootOnly()
263 def _resolveDeepReplace(self
, opcodes
, a
, b
):
264 """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
265 `b`. Returns opcodes including nested elements for these cases."""
267 for i
in xrange(len(opcodes
)):
268 ( opcode
, aBeg
, aEnd
, bBeg
, bEnd
) = opcodes
[i
]
269 if opcode
!= 'replace':
270 result
.append(opcodes
[i
])
272 self
.hashableNodeImpl
.pushRootOnly(True)
274 sm
= SequenceMatcher(None, a
[aBeg
:aEnd
], b
[bBeg
:bEnd
])
275 rootOpcodes
= sm
.get_opcodes()
276 for j
in xrange(len(rootOpcodes
)):
277 ( subOpcode
, aSubBeg
, aSubEnd
,
278 bSubBeg
, bSubEnd
) = rootOpcodes
[j
]
279 if subOpcode
!= 'equal':
280 result
.append(( subOpcode
,
281 aBeg
+ aSubBeg
, aBeg
+ aSubEnd
,
282 bBeg
+ bSubBeg
, bBeg
+ bSubEnd
, ))
284 for k
in xrange(aSubEnd
- aSubBeg
):
285 aIdx
= aBeg
+ aSubBeg
+ k
286 bIdx
= bBeg
+ bSubBeg
+ k
287 result
.append(('descend',
288 aIdx
, aIdx
+ 1, bIdx
, bIdx
+ 1,
289 self
._resolveRootEqual
(a
[aIdx
],
292 self
.hashableNodeImpl
.popRootOnly()