1 # Studies for using difflib on structures
3 from difflib
import SequenceMatcher
5 from hashable
import HashableImpl
7 aI
= ( 1, 2, 3, 5, 6, 7, )
10 sm
= SequenceMatcher(None, aI
, bI
)
12 # print(sm.find_longest_match(0, len(aI), 0, len(bI)))
13 # print(sm.get_matching_blocks())
14 # print(list(sm.get_grouped_opcodes(0))) # Useful for context diffs
15 # print(sm.get_opcodes()) # The way to go
17 class TreeNode(object):
22 def __init__(self
, tag
, children
=( )):
24 self
.children
= children
26 class HashableTreeNodeImpl(HashableImpl
):
28 """Consider only the root node (or include the children)."""
29 # TODO Needs a push/pop methods
33 super(self
.__class
__, self
).__init
__(TreeNode
)
35 def impl__hash__(self
, this
):
36 rootHash
= self
.rootHash(this
)
40 return rootHash
+ self
.childrenHash(this
)
42 def impl__eq__(self
, this
, other
):
43 if not self
.rootEq(this
, other
):
47 return self
.childrenEq(this
, other
)
49 def rootHash(self
, this
):
50 """Return a hash for the root only."""
53 def childrenHash(self
, this
):
54 """Return a hash for the children only."""
55 return reduce(lambda x
, y
: x
+ y
,
57 for child
in this
.children
], 0)
59 def rootEq(self
, this
, other
):
60 """True if the two nodes as roots are equal without considering their
61 children. This should be true if one node can be replaced by
62 the other and all changes can be represented without changing
64 return type(this
) == type(other
) and this
.tag
== other
.tag
66 def childrenEq(self
, this
, other
):
67 """True if the children of the two nodes are equal without considering
69 if len(this
.children
) != len(other
.children
):
71 for i
in xrange(len(this
.children
)):
72 if not (this
.children
[i
] == other
.children
[i
]):
76 # To generate a diff tree:
78 # * ``equal`` opcodes need a copy
80 # * ``insert`` and ``delete`` opcodes must be processed as such
82 # * ``replace`` opcodes need to be analyzed recursively to find a
83 # minimal set of changes
85 def resolveDeepReplace(hashableImpl
, opcodes
, a
, b
):
86 """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
87 `b`. Returns opcodes including nested elements for these cases.
88 `hashableImpl` is the `HashableImpl` used to control the hashable
91 for i
in xrange(len(opcodes
)):
92 ( opcode
, aBeg
, aEnd
, bBeg
, bEnd
) = opcodes
[i
]
93 if opcode
!= 'replace':
94 result
.append(opcodes
[i
])
97 savedRootOnly
= hashableImpl
.rootOnly
98 hashableImpl
.rootOnly
= True
99 sm
= SequenceMatcher(None, a
[aBeg
:aEnd
], b
[bBeg
:bEnd
])
100 rootOpcodes
= sm
.get_opcodes()
101 for j
in xrange(len(rootOpcodes
)):
102 ( subOpcode
, aSubBeg
, aSubEnd
, bSubBeg
, bSubEnd
) = rootOpcodes
[j
]
103 if subOpcode
!= 'equal':
104 result
.append(( subOpcode
, aBeg
+ aSubBeg
, aBeg
+ aSubEnd
,
105 bBeg
+ bSubBeg
, bBeg
+ bSubEnd
, ))
107 for k
in xrange(aSubEnd
- aSubBeg
):
108 aIdx
= aBeg
+ aSubBeg
+ k
109 bIdx
= bBeg
+ bSubBeg
+ k
110 result
.append(('descend',
111 aIdx
, aIdx
+ 1, bIdx
, bIdx
+ 1,
112 resolveRootEqual(hashableImpl
,
113 a
[aIdx
], b
[bIdx
]), ))
115 hashableImpl
.rootOnly
= savedRootOnly
118 def resolveRootEqual(hashableImpl
, aElem
, bElem
):
119 """Considers children of `aElem` and `bElem` which have equal roots.
120 Returns opcodes for the children. `hashableImpl` is the
121 `HashableImpl` used to control the hashable feature."""
125 savedRootOnly
= hashableImpl
.rootOnly
126 hashableImpl
.rootOnly
= False
127 sm
= SequenceMatcher(None, a
, b
)
128 nestedOpcodes
= sm
.get_opcodes()
129 return resolveDeepReplace(hashableImpl
, nestedOpcodes
, a
, b
)
131 hashableImpl
.rootOnly
= savedRootOnly
133 hashableImpl
= HashableTreeNodeImpl()
135 aT
= ( TreeNode('first'),
137 TreeNode('second.first'),
144 bT
= ( TreeNode('first'),
146 TreeNode('second.first', (
147 TreeNode('second.first.first'),
149 TreeNode('second.second'),
151 TreeNode('second1', (
160 sm
= SequenceMatcher(None, aT
, bT
)
161 top
= sm
.get_opcodes()
165 print(resolveRootEqual(hashableImpl
, TreeNode(None, aT
), TreeNode(None, bT
)))