Use optparse module to parse command-line options.
[cvs2svn.git] / contrib / verify-cvs2svn.py
blob7d08048758aea5cd66adfc60e75dc5cd5726066e
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import optparse
36 import subprocess
37 import shutil
38 import re
41 # CVS and Subversion command line client commands
42 CVS_CMD = 'cvs'
43 SVN_CMD = 'svn'
44 HG_CMD = 'hg'
47 def pipe(cmd):
48 """Run cmd as a pipe. Return (output, status)."""
49 child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
50 output = child.stdout.read()
51 status = child.wait()
52 return (output, status)
54 def cmd_failed(cmd, output, status):
55 print 'CMD FAILED:', ' '.join(cmd)
56 print 'Output:'
57 sys.stdout.write(output)
58 raise RuntimeError('%s command failed!' % cmd[0])
60 class CvsRepos:
61 def __init__(self, path):
62 """Open the CVS repository at PATH."""
63 path = os.path.abspath(path)
64 if not os.path.isdir(path):
65 raise RuntimeError('CVS path is not a directory')
67 if os.path.exists(os.path.join(path, 'CVSROOT')):
68 # The whole repository
69 self.module = "."
70 self.cvsroot = path
71 else:
72 self.cvsroot = os.path.dirname(path)
73 self.module = os.path.basename(path)
74 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
75 parent = os.path.dirname(self.cvsroot)
76 if parent == self.cvsroot:
77 raise RuntimeError('Cannot find the CVSROOT')
78 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
79 self.cvsroot = parent
81 def __str__(self):
82 return os.path.basename(self.cvsroot)
84 def export(self, dest_path, rev=None, keyword_opt=None):
85 """Export revision REV to DEST_PATH where REV can be None to export
86 the HEAD revision, or any valid CVS revision string to export that
87 revision."""
88 os.mkdir(dest_path)
89 cmd = [ CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export' ]
90 if rev:
91 cmd.extend([ '-r', rev ])
92 else:
93 cmd.extend([ '-D', 'now' ])
94 if keyword_opt:
95 cmd.append(keyword_opt)
96 cmd.extend([ '-d', dest_path, self.module ])
97 (output, status) = pipe(cmd)
98 if status or output:
99 cmd_failed(cmd, output, status)
102 class SvnRepos:
103 name = 'svn'
105 def __init__(self, url):
106 """Open the Subversion repository at URL."""
107 # Check if the user supplied an URL or a path
108 if url.find('://') == -1:
109 abspath = os.path.abspath(url)
110 url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
111 if os.sep != '/':
112 url = url.replace(os.sep, '/')
114 self.url = url
116 # Cache a list of all tags and branches
117 list = self.list('')
118 if 'tags' in list:
119 self.tag_list = self.list('tags')
120 else:
121 self.tag_list = []
122 if 'branches' in list:
123 self.branch_list = self.list('branches')
124 else:
125 self.branch_list = []
127 def __str__(self):
128 return self.url.split('/')[-1]
130 def export(self, path, dest_path):
131 """Export PATH to DEST_PATH."""
132 url = '/'.join([self.url, path])
133 cmd = [ SVN_CMD, 'export', '-q', url, dest_path ]
134 (output, status) = pipe(cmd)
135 if status or output:
136 cmd_failed(cmd, output, status)
138 def export_trunk(self, dest_path):
139 """Export trunk to DEST_PATH."""
140 self.export('trunk', dest_path)
142 def export_tag(self, dest_path, tag):
143 """Export the tag TAG to DEST_PATH."""
144 self.export('tags/' + tag, dest_path)
146 def export_branch(self, dest_path, branch):
147 """Export the branch BRANCH to DEST_PATH."""
148 self.export('branches/' + branch, dest_path)
150 def list(self, path):
151 """Return a list of all files and directories in PATH."""
152 cmd = [ SVN_CMD, 'ls', self.url + '/' + path ]
153 (output, status) = pipe(cmd)
154 if status:
155 cmd_failed(cmd, output, status)
156 entries = []
157 for line in output.split("\n"):
158 if line:
159 entries.append(line[:-1])
160 return entries
162 def tags(self):
163 """Return a list of all tags in the repository."""
164 return self.tag_list
166 def branches(self):
167 """Return a list of all branches in the repository."""
168 return self.branch_list
170 class HgRepos:
171 name = 'hg'
173 def __init__(self, path):
174 self.path = path
175 self.base_cmd = [HG_CMD, '-R', self.path]
177 self._branches = None # cache result of branches()
178 self._have_default = None # so export_trunk() doesn't blow up
180 def __str__(self):
181 return os.path.basename(self.path)
183 def _export(self, dest_path, rev):
184 cmd = self.base_cmd + ['archive',
185 '--type', 'files',
186 '--rev', rev,
187 '--exclude', 're:^\.hg',
188 dest_path]
189 (output, status) = pipe(cmd)
190 if status or output:
191 cmd_failed(cmd, output, status)
193 # If Mercurial has nothing to export, then it doesn't create
194 # dest_path. This breaks tree_compare(), so just check that the
195 # manifest for the chosen revision really is empty, and if so create
196 # the empty dir.
197 if not os.path.exists(dest_path):
198 cmd = self.base_cmd + ['manifest', '--rev', rev]
200 (output, status) = pipe(cmd)
201 if status:
202 cmd_failed(cmd, output, status)
203 manifest = [fn for fn in output.split("\n")[:-1]
204 if not fn.startswith('.hg')]
205 if not manifest:
206 os.mkdir(dest_path)
208 def export_trunk(self, dest_path):
209 self.branches() # ensure _have_default is set
210 if self._have_default:
211 self._export(dest_path, 'default')
212 else:
213 # same as CVS does when exporting empty trunk
214 os.mkdir(dest_path)
216 def export_tag(self, dest_path, tag):
217 self._export(dest_path, tag)
219 def export_branch(self, dest_path, branch):
220 self._export(dest_path, branch)
222 def tags(self):
223 cmd = self.base_cmd + ['tags', '-q']
224 tags = self._split_output(cmd)
225 tags.remove('tip')
226 return tags
228 def branches(self):
229 if self._branches is None:
230 cmd = self.base_cmd + ['branches', '-q']
231 self._branches = branches = self._split_output(cmd)
232 try:
233 branches.remove('default')
234 self._have_default = True
235 except ValueError:
236 self._have_default = False
238 return self._branches
240 def _split_output(self, cmd):
241 (output, status) = pipe(cmd)
242 if status:
243 cmd_failed(cmd, output, status)
244 return output.split("\n")[:-1]
246 class GitRepos:
247 name = 'git'
249 def __init__(self, path):
250 raise NotImplementedError()
252 def transform_symbol(ctx, name):
253 """Transform the symbol NAME using the renaming rules specified
254 with --symbol-transform. Return the transformed symbol name."""
256 for (pattern, replacement) in ctx.symbol_transforms:
257 newname = pattern.sub(replacement, name)
258 if newname != name:
259 print " symbol '%s' transformed to '%s'" % (name, newname)
260 name = newname
262 return name
265 class Failures(object):
266 def __init__(self):
267 self.count = 0 # number of failures seen
269 def report(self, summary, details=None):
270 self.count += 1
271 sys.stdout.write(' FAIL: %s\n' % summary)
272 if details:
273 for line in details:
274 sys.stdout.write(' %s\n' % line)
276 def __nonzero__(self):
277 return self.count > 0
279 def file_compare(failures, base1, base2, run_diff, rel_path):
280 """Compare the mode and contents of two files. The paths are
281 specified as two base paths BASE1 and BASE2, and a path REL_PATH that
282 is relative to the two base paths. Return 1 if the file mode and
283 contents are identical, else 0."""
284 ok = True
285 path1 = os.path.join(base1, rel_path)
286 path2 = os.path.join(base2, rel_path)
287 mode1 = os.stat(path1).st_mode & 0700 # only look at owner bits
288 mode2 = os.stat(path2).st_mode & 0700
289 if mode1 != mode2:
290 failures.report('File modes differ for %s' % rel_path,
291 details=['%s: %o' % (path1, mode1),
292 '%s: %o' % (path2, mode2)])
293 ok = False
295 file1 = open(path1, 'rb')
296 file2 = open(path2, 'rb')
297 while True:
298 data1 = file1.read(8192)
299 data2 = file2.read(8192)
300 if data1 != data2:
301 if run_diff:
302 cmd = ['diff', '-u', path1, path2]
303 (output, status) = pipe(cmd)
304 diff = output.split('\n')
305 else:
306 diff = None
307 failures.report('File contents differ for %s' % rel_path,
308 details=diff)
309 ok = False
310 if len(data1) == 0:
311 # eof
312 break
314 return ok
317 def tree_compare(failures, base1, base2, run_diff, rel_path=''):
318 """Compare the contents of two directory trees, including the contents
319 of all files. The paths are specified as two base paths BASE1 and BASE2,
320 and a path REL_PATH that is relative to the two base paths. Return 1
321 if the trees are identical, else 0."""
322 if not rel_path:
323 path1 = base1
324 path2 = base2
325 else:
326 path1 = os.path.join(base1, rel_path)
327 path2 = os.path.join(base2, rel_path)
328 if not os.path.exists(path1):
329 failures.report('%s does not exist' % path1)
330 return 0
331 if not os.path.exists(path2):
332 failures.report('%s does not exist' % path2)
333 return 0
334 if os.path.isfile(path1) and os.path.isfile(path2):
335 return file_compare(failures, base1, base2, run_diff, rel_path)
336 if not (os.path.isdir(path1) and os.path.isdir(path2)):
337 failures.report('Path types differ for %r' % rel_path)
338 return 0
339 entries1 = os.listdir(path1)
340 entries1.sort()
341 entries2 = os.listdir(path2)
342 entries2.sort()
343 missing = filter(lambda x: x not in entries2, entries1)
344 extra = filter(lambda x: x not in entries1, entries2)
345 if missing:
346 failures.report('Directory /%s is missing entries: %s' %
347 (rel_path, ', '.join(missing)))
348 if extra:
349 failures.report('Directory /%s has extra entries: %s' %
350 (rel_path, ', '.join(extra)))
351 ok = 1
352 for entry in entries1:
353 new_rel_path = os.path.join(rel_path, entry)
354 if not tree_compare(failures, base1, base2, run_diff, new_rel_path):
355 ok = 0
356 return ok
359 def verify_contents_single(failures, cvsrepos, verifyrepos, kind, label, ctx):
360 """Verify that the contents of the HEAD revision of all directories
361 and files in the conversion repository VERIFYREPOS matches the ones in
362 the CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
363 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
364 specify the name of the tag or branch. CTX has the attributes:
365 CTX.tmpdir: specifying the directory for all temporary files.
366 CTX.skip_cleanup: if true, the temporary files are not deleted.
367 CTX.run_diff: if true, run diff on differing files.
369 itemname = kind + (kind != 'trunk' and '-' + label or '')
370 cvs_export_dir = os.path.join(
371 ctx.tmpdir, 'cvs-export-%s' % itemname)
372 vrf_export_dir = os.path.join(
373 ctx.tmpdir, '%s-export-%s' % (verifyrepos.name, itemname))
375 if label:
376 cvslabel = transform_symbol(ctx, label)
377 else:
378 cvslabel = None
380 try:
381 cvsrepos.export(cvs_export_dir, cvslabel, ctx.keyword_opt)
382 if kind == 'trunk':
383 verifyrepos.export_trunk(vrf_export_dir)
384 elif kind == 'tag':
385 verifyrepos.export_tag(vrf_export_dir, label)
386 else:
387 verifyrepos.export_branch(vrf_export_dir, label)
389 if not tree_compare(
390 failures, cvs_export_dir, vrf_export_dir, ctx.run_diff
392 return 0
393 finally:
394 if not ctx.skip_cleanup:
395 if os.path.exists(cvs_export_dir):
396 shutil.rmtree(cvs_export_dir)
397 if os.path.exists(vrf_export_dir):
398 shutil.rmtree(vrf_export_dir)
399 return 1
402 def verify_contents(failures, cvsrepos, verifyrepos, ctx):
403 """Verify that the contents of the HEAD revision of all directories
404 and files in the trunk, all tags and all branches in the conversion
405 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
406 CTX is passed through to verify_contents_single()."""
408 # branches/tags that failed:
409 locations = []
411 # Verify contents of trunk
412 print 'Verifying trunk'
413 if not verify_contents_single(
414 failures, cvsrepos, verifyrepos, 'trunk', None, ctx
416 locations.append('trunk')
418 # Verify contents of all tags
419 for tag in verifyrepos.tags():
420 print 'Verifying tag', tag
421 if not verify_contents_single(
422 failures, cvsrepos, verifyrepos, 'tag', tag, ctx
424 locations.append('tag:' + tag)
426 # Verify contents of all branches
427 for branch in verifyrepos.branches():
428 if branch[:10] == 'unlabeled-':
429 print 'Skipped branch', branch
430 else:
431 print 'Verifying branch', branch
432 if not verify_contents_single(
433 failures, cvsrepos, verifyrepos, 'branch', branch, ctx
435 locations.append('branch:' + branch)
437 assert bool(failures) == bool(locations)
439 # Show the results
440 if failures:
441 sys.stdout.write('FAIL: %s != %s: %d failure(s) in:\n'
442 % (cvsrepos, verifyrepos, failures.count))
443 for location in locations:
444 sys.stdout.write(' %s\n' % location)
445 else:
446 sys.stdout.write('PASS: %s == %s\n' % (cvsrepos, verifyrepos))
448 class OptionContext:
449 pass
452 def main(argv):
453 parser = optparse.OptionParser(
454 usage='%prog [options] cvs-repos verify-repos')
455 parser.add_option('--branch',
456 help='verify contents of the branch BRANCH only')
457 parser.add_option('--diff', action='store_true', dest='run_diff',
458 help='run diff on differing files')
459 parser.add_option('--tag',
460 help='verify contents of the tag TAG only')
461 parser.add_option('--tmpdir',
462 metavar='PATH',
463 help='path to store temporary files')
464 parser.add_option('--trunk', action='store_true',
465 help='verify contents of trunk only')
466 parser.add_option('--symbol-transform', action='append',
467 metavar='P:S',
468 help='transform symbol names from P to S like cvs2svn, '
469 'except transforms SVN symbol to CVS symbol')
470 parser.add_option('--svn',
471 action='store_const', dest='repos_type', const='svn',
472 help='assume verify-repos is svn [default]')
473 parser.add_option('--hg',
474 action='store_const', dest='repos_type', const='hg',
475 help='assume verify-repos is hg')
476 parser.add_option('--git',
477 action='store_const', dest='repos_type', const='git',
478 help='assume verify-repos is git (not implemented!)')
479 parser.add_option('--suppress-keywords',
480 action='store_const', dest='keyword_opt', const='-kk',
481 help='suppress CVS keyword expansion '
482 '(equivalent to --keyword-opt=-kk)')
483 parser.add_option('--keyword-opt',
484 metavar='OPT',
485 help='control CVS keyword expansion by adding OPT to '
486 'cvs export command line')
488 parser.set_defaults(run_diff=False,
489 tmpdir='',
490 skip_cleanup=False,
491 symbol_transforms=[],
492 repos_type='svn')
493 (options, args) = parser.parse_args()
495 symbol_transforms = []
496 for value in options.symbol_transforms:
497 # This is broken!
498 [pattern, replacement] = value.split(":")
499 try:
500 symbol_transforms.append(
501 RegexpSymbolTransform(pattern, replacement))
502 except re.error:
503 parser.error("'%s' is not a valid regexp." % (pattern,))
505 def error(msg):
506 """Print an error to sys.stderr."""
507 sys.stderr.write('Error: ' + str(msg) + '\n')
509 verify_branch = options.branch
510 verify_tag = options.tag
511 verify_trunk = options.trunk
513 # Consistency check for options and arguments.
514 if len(args) != 2:
515 parser.error("wrong number of arguments")
517 cvs_path = args[0]
518 verify_path = args[1]
519 verify_klass = {'svn': SvnRepos,
520 'hg': HgRepos,
521 'git': GitRepos}[options.repos_type]
523 failures = Failures()
524 try:
525 # Open the repositories
526 cvsrepos = CvsRepos(cvs_path)
527 verifyrepos = verify_klass(verify_path)
529 # Do our thing...
530 if verify_branch:
531 print 'Verifying branch', verify_branch
532 verify_contents_single(
533 failures, cvsrepos, verifyrepos, 'branch', verify_branch, options
535 elif verify_tag:
536 print 'Verifying tag', verify_tag
537 verify_contents_single(
538 failures, cvsrepos, verifyrepos, 'tag', verify_tag, options
540 elif verify_trunk:
541 print 'Verifying trunk'
542 verify_contents_single(
543 failures, cvsrepos, verifyrepos, 'trunk', None, options
545 else:
546 # Verify trunk, tags and branches
547 verify_contents(failures, cvsrepos, verifyrepos, options)
548 except RuntimeError, e:
549 error(str(e))
550 except KeyboardInterrupt:
551 pass
553 sys.exit(failures and 1 or 0)
555 if __name__ == '__main__':
556 main(sys.argv)