11 class DeltaAlgorithm(object):
15 def test(self
, changes
):
20 def getTestResult(self
, changes
):
21 # There is no reason to cache successful tests because we will
22 # always reduce the changeset when we see one.
24 changeset
= frozenset(changes
)
25 if changeset
in self
.cache
:
27 elif not self
.test(changes
):
28 self
.cache
.add(changeset
)
33 def run(self
, changes
, force
=False):
34 # Make sure the initial test passes, if not then (a) either
35 # the user doesn't expect monotonicity, and we may end up
36 # doing O(N^2) tests, or (b) the test is wrong. Avoid the
37 # O(N^2) case unless user requests it.
39 if not self
.getTestResult(changes
):
40 raise ValueError,'Initial test passed to delta fails.'
42 # Check empty set first to quickly find poor test functions.
43 if self
.getTestResult(set()):
46 return self
.delta(changes
, self
.split(changes
))
49 """split(set) -> [sets]
51 Partition a set into one or two pieces.
54 # There are many ways to split, we could do a better job with more
55 # context information (but then the API becomes grosser).
61 return L
[:mid
],L
[mid
:]
63 def delta(self
, c
, sets
):
64 # assert(reduce(set.union, sets, set()) == c)
66 # If there is nothing left we can remove, we are done.
70 # Look for a passing subset.
71 res
= self
.search(c
, sets
)
75 # Otherwise, partition sets if possible; if not we are done.
76 refined
= sum(map(list, map(self
.split
, sets
)), [])
77 if len(refined
) == len(sets
):
80 return self
.delta(c
, refined
)
82 def search(self
, c
, sets
):
83 for i
,S
in enumerate(sets
):
84 # If test passes on this subset alone, recurse.
85 if self
.getTestResult(S
):
86 return self
.delta(S
, self
.split(S
))
88 # Otherwise if we have more than two sets, see if test
89 # pases without this subset.
91 complement
= sum(sets
[:i
] + sets
[i
+1:],[])
92 if self
.getTestResult(complement
):
93 return self
.delta(complement
, sets
[:i
] + sets
[i
+1:])
98 def __init__(self
, type, data
, flags
, file, line
, column
):
106 kTokenRE
= re
.compile(r
"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""",
107 re
.DOTALL | re
.MULTILINE
)
110 p
= subprocess
.Popen(['clang','-dump-raw-tokens',path
],
111 stdin
=subprocess
.PIPE
,
112 stdout
=subprocess
.PIPE
,
113 stderr
=subprocess
.PIPE
)
114 out
,err
= p
.communicate()
118 for ln
in err
.split('\n'):
119 # Silly programmers refuse to print in simple machine readable
124 collect
= collect
+ '\n' + ln
125 if 'Loc=<' in ln
and ln
.endswith('>'):
126 ln
,collect
= collect
,None
127 tokens
.append(Token(*kTokenRE
.match(ln
).groups()))
133 class TMBDDelta(DeltaAlgorithm
):
134 def __init__(self
, testProgram
, tokenLists
, log
):
135 def patchName(name
, suffix
):
136 base
,ext
= os
.path
.splitext(name
)
137 return base
+ '.' + suffix
+ ext
138 super(TMBDDelta
, self
).__init
__()
139 self
.testProgram
= testProgram
140 self
.tokenLists
= tokenLists
141 self
.tempFiles
= [patchName(f
,'tmp')
142 for f
,_
in self
.tokenLists
]
143 self
.targetFiles
= [patchName(f
,'ok')
144 for f
,_
in self
.tokenLists
]
148 def writeFiles(self
, changes
, fileNames
):
149 assert len(fileNames
) == len(self
.tokenLists
)
150 byFile
= [[] for i
in self
.tokenLists
]
154 for i
,(file,tokens
) in enumerate(self
.tokenLists
):
155 f
= open(fileNames
[i
],'w')
162 def test(self
, changes
):
165 byFile
= self
.writeFiles(changes
, self
.tempFiles
)
168 print >>sys
.stderr
, 'TEST - ',
170 for i
,(file,_
) in enumerate(self
.tokenLists
):
173 sys
.stderr
.write('\n ')
174 sys
.stderr
.write('%s:%d tokens: [' % (file,len(byFile
[i
])))
177 if prev
is None or j
!= prev
+ 1:
179 sys
.stderr
.write('%d][' % prev
)
180 sys
.stderr
.write(str(j
))
181 sys
.stderr
.write(':')
184 sys
.stderr
.write(str(byFile
[i
][-1]))
185 sys
.stderr
.write('] ')
187 print >>sys
.stderr
, ', '.join(['%s:%d tokens' % (file, len(byFile
[i
]))
188 for i
,(file,_
) in enumerate(self
.tokenLists
)]),
190 p
= subprocess
.Popen([self
.testProgram
] + self
.tempFiles
)
194 self
.writeFiles(changes
, self
.targetFiles
)
197 print >>sys
.stderr
, '=> %s' % res
200 print '\nSUCCESS (%d tokens)' % len(changes
)
202 sys
.stderr
.write('.')
207 res
= super(TMBDDelta
, self
).run([(i
,j
)
208 for i
,(file,tokens
) in enumerate(self
.tokenLists
)
209 for j
in range(len(tokens
))])
210 self
.writeFiles(res
, self
.targetFiles
)
215 def tokenBasedMultiDelta(program
, files
, log
):
216 # Read in the lists of tokens.
217 tokenLists
= [(file, [t
.data
for t
in getTokens(file)])
220 numTokens
= sum([len(tokens
) for _
,tokens
in tokenLists
])
221 print "Delta on %s with %d tokens." % (', '.join(files
), numTokens
)
223 tbmd
= TMBDDelta(program
, tokenLists
, log
)
227 print "Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd
.targetFiles
),
232 from optparse
import OptionParser
, OptionGroup
233 parser
= OptionParser("%prog <test program> {files+}")
234 parser
.add_option("", "--debug", dest
="debugLevel",
235 help="set debug level [default %default]",
236 action
="store", type=int, default
=0)
237 (opts
, args
) = parser
.parse_args()
240 parser
.error('Invalid number of arguments.')
242 program
,files
= args
[0],args
[1:]
244 md
= tokenBasedMultiDelta(program
, files
, log
=opts
.debugLevel
)
246 if __name__
== '__main__':
249 except KeyboardInterrupt:
250 print >>sys
.stderr
,'Interrupted.'
251 os
._exit
(1) # Avoid freeing our giant cache.