Continued studies.
[docutils/kirr.git] / sandbox / rstdiff / studies / diff.py
blob4ff8a2f4151f1f6b2f9c42d9a042c666b76a014d
1 # Studies for using difflib on structures
3 from difflib import SequenceMatcher
5 from hashable import HashableNodeImpl
7 from pprint import pprint
9 import sys
11 __docformat__ = 'reStructuredText'
13 aI = ( 1, 2, 3, 5, 6, 7, )
14 bI = ( 2, 3, 4, 8, 6 )
16 sm = SequenceMatcher(None, aI, bI)
18 # print(sm.find_longest_match(0, len(aI), 0, len(bI)))
19 # print(sm.get_matching_blocks())
20 # print(list(sm.get_grouped_opcodes(0))) # Useful for context diffs
21 # print(sm.get_opcodes()) # The way to go
23 class TreeNode(object):
25 tag = None
26 children = ( )
28 def __init__(self, tag, children=( )):
29 self.tag = tag
30 self.children = children
32 class HashableTreeNodeImpl(HashableNodeImpl):
34 def __init__(self):
35 super(self.__class__, self).__init__(TreeNode)
37 def rootHash(self, node):
38 return hash(node.tag)
40 def childHash(self, node):
41 """Return a hash for the children only. Subclasses should override
42 this."""
43 return hash(node)
45 def rootEq(self, node, other):
46 return type(node) == type(other) and node.tag == other.tag
48 def childEq(self, node, other):
49 return node == other
51 def getChildren(self, node):
52 return node.children
54 # To generate a diff tree:
56 # * ``equal`` opcodes need a copy
58 # * ``insert`` and ``delete`` opcodes must be processed as such
60 # * ``replace`` opcodes need to be analyzed recursively to find a
61 # minimal set of changes
63 def resolveDeepReplace(hashableNodeImpl, opcodes, a, b):
64 """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
65 `b`. Returns opcodes including nested elements for these cases.
66 `hashableNodeImpl` is the `HashableNodeImpl` used to control the hashable
67 feature."""
68 result = [ ]
69 for i in xrange(len(opcodes)):
70 ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i]
71 if opcode != 'replace':
72 result.append(opcodes[i])
73 continue
74 try:
75 hashableNodeImpl.pushRootOnly(True)
76 sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd])
77 rootOpcodes = sm.get_opcodes()
78 for j in xrange(len(rootOpcodes)):
79 ( subOpcode, aSubBeg, aSubEnd, bSubBeg, bSubEnd ) = rootOpcodes[j]
80 if subOpcode != 'equal':
81 result.append(( subOpcode, aBeg + aSubBeg, aBeg + aSubEnd,
82 bBeg + bSubBeg, bBeg + bSubEnd, ))
83 else:
84 for k in xrange(aSubEnd - aSubBeg):
85 aIdx = aBeg + aSubBeg + k
86 bIdx = bBeg + bSubBeg + k
87 result.append(('descend',
88 aIdx, aIdx + 1, bIdx, bIdx + 1,
89 resolveRootEqual(hashableNodeImpl,
90 a[aIdx], b[bIdx]), ))
91 finally:
92 hashableNodeImpl.popRootOnly()
93 return result
95 def resolveRootEqual(hashableNodeImpl, aElem, bElem):
96 """Considers children of `aElem` and `bElem` which have equal roots.
97 Returns opcodes for the children. `hashableNodeImpl` is the
98 `HashableNodeImpl` used to control the hashable feature."""
99 a = hashableNodeImpl.getChildren(aElem)
100 b = hashableNodeImpl.getChildren(bElem)
101 try:
102 hashableNodeImpl.pushRootOnly(False)
103 sm = SequenceMatcher(None, a, b)
104 nestedOpcodes = sm.get_opcodes()
105 return resolveDeepReplace(hashableNodeImpl, nestedOpcodes, a, b)
106 finally:
107 hashableNodeImpl.popRootOnly()
109 hashableNodeImpl = HashableTreeNodeImpl()
111 aT = ( TreeNode('first'),
112 TreeNode('second', (
113 TreeNode('second.first'),
114 TreeNode('second.second'),
116 TreeNode('third', (
117 TreeNode(2),
119 TreeNode('fourth'),
122 bT = ( TreeNode('first'),
123 TreeNode('second', (
124 TreeNode('second.first', (
125 TreeNode('second.first.first'),
127 TreeNode('second.second1'),
128 TreeNode('second.second'),
130 TreeNode('second1', (
131 TreeNode(2),
133 TreeNode('third', (
134 TreeNode(2),
136 TreeNode('fourth1'),
139 sm = SequenceMatcher(None, aT, bT)
140 top = sm.get_opcodes()
141 pprint(top)
142 print('---')
143 # Use a pseudo root
144 pprint(resolveRootEqual(hashableNodeImpl,
145 TreeNode(None, aT), TreeNode(None, bT)),
146 sys.stdout, 2, 40, None)