First studies for an implmentation of `rstdiff`.
[docutils/kirr.git] / sandbox / rstdiff / studies / diff.py
blob8dea6c1dbd9adc170e56f36d397ac1647e1a6296
1 # Studies for using difflib on structures
3 from difflib import SequenceMatcher
5 from hashable import HashableImpl
7 aI = ( 1, 2, 3, 5, 6, 7, )
8 bI = ( 2, 3, 4, 8, 6 )
10 sm = SequenceMatcher(None, aI, bI)
12 # print(sm.find_longest_match(0, len(aI), 0, len(bI)))
13 # print(sm.get_matching_blocks())
14 # print(list(sm.get_grouped_opcodes(0))) # Useful for context diffs
15 # print(sm.get_opcodes()) # The way to go
17 class TreeNode(object):
19 tag = None
20 children = ( )
22 def __init__(self, tag, children=( )):
23 self.tag = tag
24 self.children = children
26 class HashableTreeNodeImpl(HashableImpl):
28 """Consider only the root node (or include the children)."""
29 # TODO Needs a push/pop methods
30 rootOnly = False
32 def __init__(self):
33 super(self.__class__, self).__init__(TreeNode)
35 def impl__hash__(self, this):
36 rootHash = self.rootHash(this)
37 if self.rootOnly:
38 return rootHash
39 else:
40 return rootHash + self.childrenHash(this)
42 def impl__eq__(self, this, other):
43 if not self.rootEq(this, other):
44 return False
45 if self.rootOnly:
46 return True
47 return self.childrenEq(this, other)
49 def rootHash(self, this):
50 """Return a hash for the root only."""
51 return hash(this.tag)
53 def childrenHash(self, this):
54 """Return a hash for the children only."""
55 return reduce(lambda x, y: x + y,
56 [ hash(child)
57 for child in this.children ], 0)
59 def rootEq(self, this, other):
60 """True if the two nodes as roots are equal without considering their
61 children. This should be true if one node can be replaced by
62 the other and all changes can be represented without changing
63 the node itself."""
64 return type(this) == type(other) and this.tag == other.tag
66 def childrenEq(self, this, other):
67 """True if the children of the two nodes are equal without considering
68 the root features."""
69 if len(this.children) != len(other.children):
70 return False
71 for i in xrange(len(this.children)):
72 if not (this.children[i] == other.children[i]):
73 return False
74 return hashableImpl
76 # To generate a diff tree:
78 # * ``equal`` opcodes need a copy
80 # * ``insert`` and ``delete`` opcodes must be processed as such
82 # * ``replace`` opcodes need to be analyzed recursively to find a
83 # minimal set of changes
85 def resolveDeepReplace(hashableImpl, opcodes, a, b):
86 """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
87 `b`. Returns opcodes including nested elements for these cases.
88 `hashableImpl` is the `HashableImpl` used to control the hashable
89 feature."""
90 result = [ ]
91 for i in xrange(len(opcodes)):
92 ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i]
93 if opcode != 'replace':
94 result.append(opcodes[i])
95 continue
96 try:
97 savedRootOnly = hashableImpl.rootOnly
98 hashableImpl.rootOnly = True
99 sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd])
100 rootOpcodes = sm.get_opcodes()
101 for j in xrange(len(rootOpcodes)):
102 ( subOpcode, aSubBeg, aSubEnd, bSubBeg, bSubEnd ) = rootOpcodes[j]
103 if subOpcode != 'equal':
104 result.append(( subOpcode, aBeg + aSubBeg, aBeg + aSubEnd,
105 bBeg + bSubBeg, bBeg + bSubEnd, ))
106 else:
107 for k in xrange(aSubEnd - aSubBeg):
108 aIdx = aBeg + aSubBeg + k
109 bIdx = bBeg + bSubBeg + k
110 result.append(('descend',
111 aIdx, aIdx + 1, bIdx, bIdx + 1,
112 resolveRootEqual(hashableImpl,
113 a[aIdx], b[bIdx]), ))
114 finally:
115 hashableImpl.rootOnly = savedRootOnly
116 return result
118 def resolveRootEqual(hashableImpl, aElem, bElem):
119 """Considers children of `aElem` and `bElem` which have equal roots.
120 Returns opcodes for the children. `hashableImpl` is the
121 `HashableImpl` used to control the hashable feature."""
122 a = aElem.children
123 b = bElem.children
124 try:
125 savedRootOnly = hashableImpl.rootOnly
126 hashableImpl.rootOnly = False
127 sm = SequenceMatcher(None, a, b)
128 nestedOpcodes = sm.get_opcodes()
129 return resolveDeepReplace(hashableImpl, nestedOpcodes, a, b)
130 finally:
131 hashableImpl.rootOnly = savedRootOnly
133 hashableImpl = HashableTreeNodeImpl()
135 aT = ( TreeNode('first'),
136 TreeNode('second', (
137 TreeNode('second.first'),
139 TreeNode('third', (
140 TreeNode(2),
144 bT = ( TreeNode('first'),
145 TreeNode('second', (
146 TreeNode('second.first', (
147 TreeNode('second.first.first'),
149 TreeNode('second.second'),
151 TreeNode('second1', (
152 TreeNode(2),
154 TreeNode('third', (
155 TreeNode(2),
157 TreeNode('fourth'),
160 sm = SequenceMatcher(None, aT, bT)
161 top = sm.get_opcodes()
162 print(top)
163 print('---')
164 # Use a pseudo root
165 print(resolveRootEqual(hashableImpl, TreeNode(None, aT), TreeNode(None, bT)))