Break out of loop correctly when the first file difference is found.
[cvs2svn.git] / contrib / verify-cvs2svn.py
blob29de2048e117f57a347be90ae7e01ba5f152aad4
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import optparse
36 import subprocess
37 import shutil
38 import re
41 # CVS and Subversion command line client commands
42 CVS_CMD = 'cvs'
43 SVN_CMD = 'svn'
44 HG_CMD = 'hg'
47 def pipe(cmd):
48 """Run cmd as a pipe. Return (output, status)."""
49 child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
50 output = child.stdout.read()
51 status = child.wait()
52 return (output, status)
54 def cmd_failed(cmd, output, status):
55 print 'CMD FAILED:', ' '.join(cmd)
56 print 'Output:'
57 sys.stdout.write(output)
58 raise RuntimeError('%s command failed!' % cmd[0])
60 class CvsRepos:
61 def __init__(self, path):
62 """Open the CVS repository at PATH."""
63 path = os.path.abspath(path)
64 if not os.path.isdir(path):
65 raise RuntimeError('CVS path is not a directory')
67 if os.path.exists(os.path.join(path, 'CVSROOT')):
68 # The whole repository
69 self.module = "."
70 self.cvsroot = path
71 else:
72 self.cvsroot = os.path.dirname(path)
73 self.module = os.path.basename(path)
74 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
75 parent = os.path.dirname(self.cvsroot)
76 if parent == self.cvsroot:
77 raise RuntimeError('Cannot find the CVSROOT')
78 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
79 self.cvsroot = parent
81 def __str__(self):
82 return os.path.basename(self.cvsroot)
84 def export(self, dest_path, rev=None, keyword_opt=None):
85 """Export revision REV to DEST_PATH where REV can be None to export
86 the HEAD revision, or any valid CVS revision string to export that
87 revision."""
88 os.mkdir(dest_path)
89 cmd = [ CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export' ]
90 if rev:
91 cmd.extend([ '-r', rev ])
92 else:
93 cmd.extend([ '-D', 'now' ])
94 if keyword_opt:
95 cmd.append(keyword_opt)
96 cmd.extend([ '-d', dest_path, self.module ])
97 (output, status) = pipe(cmd)
98 if status or output:
99 cmd_failed(cmd, output, status)
102 class SvnRepos:
103 name = 'svn'
105 def __init__(self, url):
106 """Open the Subversion repository at URL."""
107 # Check if the user supplied an URL or a path
108 if url.find('://') == -1:
109 abspath = os.path.abspath(url)
110 url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
111 if os.sep != '/':
112 url = url.replace(os.sep, '/')
114 self.url = url
116 # Cache a list of all tags and branches
117 list = self.list('')
118 if 'tags' in list:
119 self.tag_list = self.list('tags')
120 else:
121 self.tag_list = []
122 if 'branches' in list:
123 self.branch_list = self.list('branches')
124 else:
125 self.branch_list = []
127 def __str__(self):
128 return self.url.split('/')[-1]
130 def export(self, path, dest_path):
131 """Export PATH to DEST_PATH."""
132 url = '/'.join([self.url, path])
133 cmd = [ SVN_CMD, 'export', '-q', url, dest_path ]
134 (output, status) = pipe(cmd)
135 if status or output:
136 cmd_failed(cmd, output, status)
138 def export_trunk(self, dest_path):
139 """Export trunk to DEST_PATH."""
140 self.export('trunk', dest_path)
142 def export_tag(self, dest_path, tag):
143 """Export the tag TAG to DEST_PATH."""
144 self.export('tags/' + tag, dest_path)
146 def export_branch(self, dest_path, branch):
147 """Export the branch BRANCH to DEST_PATH."""
148 self.export('branches/' + branch, dest_path)
150 def list(self, path):
151 """Return a list of all files and directories in PATH."""
152 cmd = [ SVN_CMD, 'ls', self.url + '/' + path ]
153 (output, status) = pipe(cmd)
154 if status:
155 cmd_failed(cmd, output, status)
156 entries = []
157 for line in output.split("\n"):
158 if line:
159 entries.append(line[:-1])
160 return entries
162 def tags(self):
163 """Return a list of all tags in the repository."""
164 return self.tag_list
166 def branches(self):
167 """Return a list of all branches in the repository."""
168 return self.branch_list
170 class HgRepos:
171 name = 'hg'
173 def __init__(self, path):
174 self.path = path
175 self.base_cmd = [HG_CMD, '-R', self.path]
177 self._branches = None # cache result of branches()
178 self._have_default = None # so export_trunk() doesn't blow up
180 def __str__(self):
181 return os.path.basename(self.path)
183 def _export(self, dest_path, rev):
184 cmd = self.base_cmd + ['archive',
185 '--type', 'files',
186 '--rev', rev,
187 '--exclude', 're:^\.hg',
188 dest_path]
189 (output, status) = pipe(cmd)
190 if status or output:
191 cmd_failed(cmd, output, status)
193 # If Mercurial has nothing to export, then it doesn't create
194 # dest_path. This breaks tree_compare(), so just check that the
195 # manifest for the chosen revision really is empty, and if so create
196 # the empty dir.
197 if not os.path.exists(dest_path):
198 cmd = self.base_cmd + ['manifest', '--rev', rev]
200 (output, status) = pipe(cmd)
201 if status:
202 cmd_failed(cmd, output, status)
203 manifest = [fn for fn in output.split("\n")[:-1]
204 if not fn.startswith('.hg')]
205 if not manifest:
206 os.mkdir(dest_path)
208 def export_trunk(self, dest_path):
209 self.branches() # ensure _have_default is set
210 if self._have_default:
211 self._export(dest_path, 'default')
212 else:
213 # same as CVS does when exporting empty trunk
214 os.mkdir(dest_path)
216 def export_tag(self, dest_path, tag):
217 self._export(dest_path, tag)
219 def export_branch(self, dest_path, branch):
220 self._export(dest_path, branch)
222 def tags(self):
223 cmd = self.base_cmd + ['tags', '-q']
224 tags = self._split_output(cmd)
225 tags.remove('tip')
226 return tags
228 def branches(self):
229 if self._branches is None:
230 cmd = self.base_cmd + ['branches', '-q']
231 self._branches = branches = self._split_output(cmd)
232 try:
233 branches.remove('default')
234 self._have_default = True
235 except ValueError:
236 self._have_default = False
238 return self._branches
240 def _split_output(self, cmd):
241 (output, status) = pipe(cmd)
242 if status:
243 cmd_failed(cmd, output, status)
244 return output.split("\n")[:-1]
246 class GitRepos:
247 name = 'git'
249 def __init__(self, path):
250 raise NotImplementedError()
252 def transform_symbol(ctx, name):
253 """Transform the symbol NAME using the renaming rules specified
254 with --symbol-transform. Return the transformed symbol name."""
256 for (pattern, replacement) in ctx.symbol_transforms:
257 newname = pattern.sub(replacement, name)
258 if newname != name:
259 print " symbol '%s' transformed to '%s'" % (name, newname)
260 name = newname
262 return name
265 class Failures(object):
266 def __init__(self):
267 self.count = 0 # number of failures seen
269 def __str__(self):
270 return str(self.count)
272 def __repr__(self):
273 return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self.count)
275 def report(self, summary, details=None):
276 self.count += 1
277 sys.stdout.write(' FAIL: %s\n' % summary)
278 if details:
279 for line in details:
280 sys.stdout.write(' %s\n' % line)
282 def __nonzero__(self):
283 return self.count > 0
286 def file_compare(failures, base1, base2, run_diff, rel_path):
287 """Compare the mode and contents of two files.
289 The paths are specified as two base paths BASE1 and BASE2, and a
290 path REL_PATH that is relative to the two base paths. Return True
291 iff the file mode and contents are identical."""
293 ok = True
294 path1 = os.path.join(base1, rel_path)
295 path2 = os.path.join(base2, rel_path)
296 mode1 = os.stat(path1).st_mode & 0700 # only look at owner bits
297 mode2 = os.stat(path2).st_mode & 0700
298 if mode1 != mode2:
299 failures.report('File modes differ for %s' % rel_path,
300 details=['%s: %o' % (path1, mode1),
301 '%s: %o' % (path2, mode2)])
302 ok = False
304 file1 = open(path1, 'rb')
305 file2 = open(path2, 'rb')
306 while True:
307 data1 = file1.read(8192)
308 data2 = file2.read(8192)
309 if data1 != data2:
310 if run_diff:
311 cmd = ['diff', '-u', path1, path2]
312 (output, status) = pipe(cmd)
313 diff = output.split('\n')
314 else:
315 diff = None
316 failures.report('File contents differ for %s' % rel_path,
317 details=diff)
318 ok = False
319 break
320 if len(data1) == 0:
321 # eof
322 break
324 return ok
327 def tree_compare(failures, base1, base2, run_diff, rel_path=''):
328 """Compare the contents of two directory trees, including file contents.
330 The paths are specified as two base paths BASE1 and BASE2, and a
331 path REL_PATH that is relative to the two base paths. Return True
332 iff the trees are identical."""
334 if not rel_path:
335 path1 = base1
336 path2 = base2
337 else:
338 path1 = os.path.join(base1, rel_path)
339 path2 = os.path.join(base2, rel_path)
340 if not os.path.exists(path1):
341 failures.report('%s does not exist' % path1)
342 return False
343 if not os.path.exists(path2):
344 failures.report('%s does not exist' % path2)
345 return False
346 if os.path.isfile(path1) and os.path.isfile(path2):
347 return file_compare(failures, base1, base2, run_diff, rel_path)
348 if not (os.path.isdir(path1) and os.path.isdir(path2)):
349 failures.report('Path types differ for %r' % rel_path)
350 return False
351 entries1 = os.listdir(path1)
352 entries1.sort()
353 entries2 = os.listdir(path2)
354 entries2.sort()
356 ok = True
358 missing = filter(lambda x: x not in entries2, entries1)
359 extra = filter(lambda x: x not in entries1, entries2)
360 if missing:
361 failures.report('Directory /%s is missing entries: %s' %
362 (rel_path, ', '.join(missing)))
363 ok = False
364 if extra:
365 failures.report('Directory /%s has extra entries: %s' %
366 (rel_path, ', '.join(extra)))
367 ok = False
369 for entry in entries1:
370 new_rel_path = os.path.join(rel_path, entry)
371 if not tree_compare(failures, base1, base2, run_diff, new_rel_path):
372 ok = False
373 return ok
376 def verify_contents_single(failures, cvsrepos, verifyrepos, kind, label, ctx):
377 """Verify the HEAD revision of a trunk, tag, or branch.
379 Verify that the contents of the HEAD revision of all directories and
380 files in the conversion repository VERIFYREPOS match the ones in the
381 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
382 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
383 specify the name of the tag or branch. CTX has the attributes:
384 CTX.tmpdir: specifying the directory for all temporary files.
385 CTX.skip_cleanup: if true, the temporary files are not deleted.
386 CTX.run_diff: if true, run diff on differing files."""
388 itemname = kind + (kind != 'trunk' and '-' + label or '')
389 cvs_export_dir = os.path.join(
390 ctx.tmpdir, 'cvs-export-%s' % itemname)
391 vrf_export_dir = os.path.join(
392 ctx.tmpdir, '%s-export-%s' % (verifyrepos.name, itemname))
394 if label:
395 cvslabel = transform_symbol(ctx, label)
396 else:
397 cvslabel = None
399 try:
400 cvsrepos.export(cvs_export_dir, cvslabel, ctx.keyword_opt)
401 if kind == 'trunk':
402 verifyrepos.export_trunk(vrf_export_dir)
403 elif kind == 'tag':
404 verifyrepos.export_tag(vrf_export_dir, label)
405 else:
406 verifyrepos.export_branch(vrf_export_dir, label)
408 if not tree_compare(
409 failures, cvs_export_dir, vrf_export_dir, ctx.run_diff
411 return False
412 finally:
413 if not ctx.skip_cleanup:
414 if os.path.exists(cvs_export_dir):
415 shutil.rmtree(cvs_export_dir)
416 if os.path.exists(vrf_export_dir):
417 shutil.rmtree(vrf_export_dir)
418 return True
421 def verify_contents(failures, cvsrepos, verifyrepos, ctx):
422 """Verify that the contents of the HEAD revision of all directories
423 and files in the trunk, all tags and all branches in the conversion
424 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
425 CTX is passed through to verify_contents_single()."""
427 # branches/tags that failed:
428 locations = []
430 # Verify contents of trunk
431 print 'Verifying trunk'
432 if not verify_contents_single(
433 failures, cvsrepos, verifyrepos, 'trunk', None, ctx
435 locations.append('trunk')
437 # Verify contents of all tags
438 for tag in verifyrepos.tags():
439 print 'Verifying tag', tag
440 if not verify_contents_single(
441 failures, cvsrepos, verifyrepos, 'tag', tag, ctx
443 locations.append('tag:' + tag)
445 # Verify contents of all branches
446 for branch in verifyrepos.branches():
447 if branch[:10] == 'unlabeled-':
448 print 'Skipped branch', branch
449 else:
450 print 'Verifying branch', branch
451 if not verify_contents_single(
452 failures, cvsrepos, verifyrepos, 'branch', branch, ctx
454 locations.append('branch:' + branch)
456 assert bool(failures) == bool(locations), \
457 "failures = %r\nlocations = %r" % (failures, locations)
459 # Show the results
460 if failures:
461 sys.stdout.write('FAIL: %s != %s: %d failure(s) in:\n'
462 % (cvsrepos, verifyrepos, failures.count))
463 for location in locations:
464 sys.stdout.write(' %s\n' % location)
465 else:
466 sys.stdout.write('PASS: %s == %s\n' % (cvsrepos, verifyrepos))
468 class OptionContext:
469 pass
472 def main(argv):
473 parser = optparse.OptionParser(
474 usage='%prog [options] cvs-repos verify-repos')
475 parser.add_option('--branch',
476 help='verify contents of the branch BRANCH only')
477 parser.add_option('--diff', action='store_true', dest='run_diff',
478 help='run diff on differing files')
479 parser.add_option('--tag',
480 help='verify contents of the tag TAG only')
481 parser.add_option('--tmpdir',
482 metavar='PATH',
483 help='path to store temporary files')
484 parser.add_option('--trunk', action='store_true',
485 help='verify contents of trunk only')
486 parser.add_option('--symbol-transform', action='append',
487 metavar='P:S',
488 help='transform symbol names from P to S like cvs2svn, '
489 'except transforms SVN symbol to CVS symbol')
490 parser.add_option('--svn',
491 action='store_const', dest='repos_type', const='svn',
492 help='assume verify-repos is svn [default]')
493 parser.add_option('--hg',
494 action='store_const', dest='repos_type', const='hg',
495 help='assume verify-repos is hg')
496 parser.add_option('--git',
497 action='store_const', dest='repos_type', const='git',
498 help='assume verify-repos is git (not implemented!)')
499 parser.add_option('--suppress-keywords',
500 action='store_const', dest='keyword_opt', const='-kk',
501 help='suppress CVS keyword expansion '
502 '(equivalent to --keyword-opt=-kk)')
503 parser.add_option('--keyword-opt',
504 metavar='OPT',
505 help='control CVS keyword expansion by adding OPT to '
506 'cvs export command line')
508 parser.set_defaults(run_diff=False,
509 tmpdir='',
510 skip_cleanup=False,
511 symbol_transforms=[],
512 repos_type='svn')
513 (options, args) = parser.parse_args()
515 symbol_transforms = []
516 for value in options.symbol_transforms:
517 # This is broken!
518 [pattern, replacement] = value.split(":")
519 try:
520 symbol_transforms.append(
521 RegexpSymbolTransform(pattern, replacement))
522 except re.error:
523 parser.error("'%s' is not a valid regexp." % (pattern,))
525 def error(msg):
526 """Print an error to sys.stderr."""
527 sys.stderr.write('Error: ' + str(msg) + '\n')
529 verify_branch = options.branch
530 verify_tag = options.tag
531 verify_trunk = options.trunk
533 # Consistency check for options and arguments.
534 if len(args) != 2:
535 parser.error("wrong number of arguments")
537 cvs_path = args[0]
538 verify_path = args[1]
539 verify_klass = {'svn': SvnRepos,
540 'hg': HgRepos,
541 'git': GitRepos}[options.repos_type]
543 failures = Failures()
544 try:
545 # Open the repositories
546 cvsrepos = CvsRepos(cvs_path)
547 verifyrepos = verify_klass(verify_path)
549 # Do our thing...
550 if verify_branch:
551 print 'Verifying branch', verify_branch
552 verify_contents_single(
553 failures, cvsrepos, verifyrepos, 'branch', verify_branch, options
555 elif verify_tag:
556 print 'Verifying tag', verify_tag
557 verify_contents_single(
558 failures, cvsrepos, verifyrepos, 'tag', verify_tag, options
560 elif verify_trunk:
561 print 'Verifying trunk'
562 verify_contents_single(
563 failures, cvsrepos, verifyrepos, 'trunk', None, options
565 else:
566 # Verify trunk, tags and branches
567 verify_contents(failures, cvsrepos, verifyrepos, options)
568 except RuntimeError, e:
569 error(str(e))
570 except KeyboardInterrupt:
571 pass
573 sys.exit(failures and 1 or 0)
575 if __name__ == '__main__':
576 main(sys.argv)