InitializeChangesetsPass: Fully initialize breaks array before continuing.
[cvs2svn.git] / contrib / verify-cvs2svn.py
blobe24c07d3e4915571f1dd46d20422492bc6859de5
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import optparse
36 import subprocess
37 import shutil
38 import re
39 import tarfile
42 # CVS and Subversion command line client commands
43 CVS_CMD = 'cvs'
44 SVN_CMD = 'svn'
45 HG_CMD = 'hg'
46 GIT_CMD = 'git'
49 def pipe(cmd):
50 """Run cmd as a pipe. Return (output, status)."""
51 child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
52 output = child.stdout.read()
53 status = child.wait()
54 return (output, status)
57 def cmd_failed(cmd, output, status):
58 print 'CMD FAILED:', ' '.join(cmd)
59 print 'Output:'
60 sys.stdout.write(output)
61 raise RuntimeError('%s command failed!' % cmd[0])
64 def split_output(cmd):
65 (output, status) = pipe(cmd)
66 if status:
67 cmd_failed(cmd, output, status)
68 retval = output.split(os.linesep)[:-1]
69 if retval and not retval[-1]:
70 del retval[-1]
71 return retval
74 class CvsRepos:
75 def __init__(self, path):
76 """Open the CVS repository at PATH."""
77 path = os.path.abspath(path)
78 if not os.path.isdir(path):
79 raise RuntimeError('CVS path is not a directory')
81 if os.path.exists(os.path.join(path, 'CVSROOT')):
82 # The whole repository
83 self.module = "."
84 self.cvsroot = path
85 else:
86 self.cvsroot = os.path.dirname(path)
87 self.module = os.path.basename(path)
88 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
89 parent = os.path.dirname(self.cvsroot)
90 if parent == self.cvsroot:
91 raise RuntimeError('Cannot find the CVSROOT')
92 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
93 self.cvsroot = parent
95 def __str__(self):
96 return os.path.basename(self.cvsroot)
98 def export(self, dest_path, rev=None, keyword_opt=None):
99 """Export revision REV to DEST_PATH where REV can be None to export
100 the HEAD revision, or any valid CVS revision string to export that
101 revision."""
102 os.mkdir(dest_path)
103 cmd = [CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export']
104 if rev:
105 cmd.extend(['-r', rev])
106 else:
107 cmd.extend(['-D', 'now'])
108 if keyword_opt:
109 cmd.append(keyword_opt)
110 cmd.extend(['-d', dest_path, self.module])
111 (output, status) = pipe(cmd)
112 if status or output:
113 cmd_failed(cmd, output, status)
116 class SvnRepos:
117 name = 'svn'
119 def __init__(self, url):
120 """Open the Subversion repository at URL."""
121 # Check if the user supplied an URL or a path
122 if url.find('://') == -1:
123 abspath = os.path.abspath(url)
124 url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
125 if os.sep != '/':
126 url = url.replace(os.sep, '/')
128 self.url = url
130 # Cache a list of all tags and branches
131 list = self.list('')
132 if 'tags' in list:
133 self.tag_list = self.list('tags')
134 else:
135 self.tag_list = []
136 if 'branches' in list:
137 self.branch_list = self.list('branches')
138 else:
139 self.branch_list = []
141 def __str__(self):
142 return self.url.split('/')[-1]
144 def export(self, path, dest_path):
145 """Export PATH to DEST_PATH."""
146 url = '/'.join([self.url, path])
147 cmd = [SVN_CMD, 'export', '-q', url, dest_path]
148 (output, status) = pipe(cmd)
149 if status or output:
150 cmd_failed(cmd, output, status)
152 def export_trunk(self, dest_path):
153 """Export trunk to DEST_PATH."""
154 self.export('trunk', dest_path)
156 def export_tag(self, dest_path, tag):
157 """Export the tag TAG to DEST_PATH."""
158 self.export('tags/' + tag, dest_path)
160 def export_branch(self, dest_path, branch):
161 """Export the branch BRANCH to DEST_PATH."""
162 self.export('branches/' + branch, dest_path)
164 def list(self, path):
165 """Return a list of all files and directories in PATH."""
166 cmd = [SVN_CMD, 'ls', self.url + '/' + path]
167 entries = []
168 for line in split_output(cmd):
169 if line:
170 entries.append(line.rstrip('/'))
171 return entries
173 def tags(self):
174 """Return a list of all tags in the repository."""
175 return self.tag_list
177 def branches(self):
178 """Return a list of all branches in the repository."""
179 return self.branch_list
182 class HgRepos:
183 name = 'hg'
185 def __init__(self, path):
186 self.path = path
187 self.base_cmd = [HG_CMD, '-R', self.path]
189 self._branches = None # cache result of branches()
190 self._have_default = None # so export_trunk() doesn't blow up
192 def __str__(self):
193 return os.path.basename(self.path)
195 def _export(self, dest_path, rev):
196 cmd = self.base_cmd + ['archive',
197 '--type', 'files',
198 '--rev', rev,
199 '--exclude', 're:^\.hg',
200 dest_path]
201 (output, status) = pipe(cmd)
202 if status or output:
203 cmd_failed(cmd, output, status)
205 # If Mercurial has nothing to export, then it doesn't create
206 # dest_path. This breaks tree_compare(), so just check that the
207 # manifest for the chosen revision really is empty, and if so create
208 # the empty dir.
209 if not os.path.exists(dest_path):
210 cmd = self.base_cmd + ['manifest', '--rev', rev]
212 manifest = [fn for fn in split_output(cmd)
213 if not fn.startswith('.hg')]
214 if not manifest:
215 os.mkdir(dest_path)
217 def export_trunk(self, dest_path):
218 self.branches() # ensure _have_default is set
219 if self._have_default:
220 self._export(dest_path, 'default')
221 else:
222 # same as CVS does when exporting empty trunk
223 os.mkdir(dest_path)
225 def export_tag(self, dest_path, tag):
226 self._export(dest_path, tag)
228 def export_branch(self, dest_path, branch):
229 self._export(dest_path, branch)
231 def tags(self):
232 cmd = self.base_cmd + ['tags', '-q']
233 tags = split_output(cmd)
234 tags.remove('tip')
235 return tags
237 def branches(self):
238 if self._branches is None:
239 cmd = self.base_cmd + ['branches', '-q']
240 self._branches = branches = split_output(cmd)
241 try:
242 branches.remove('default')
243 self._have_default = True
244 except ValueError:
245 self._have_default = False
247 return self._branches
250 class GitRepos:
251 name = 'git'
253 def __init__(self, path):
254 self.path = path
255 self.repo_cmd = [
256 GIT_CMD,
257 '--git-dir=' + os.path.join(self.path, '.git'),
258 '--work-tree=' + self.path,
261 self._branches = None # cache result of branches()
262 self._have_master = None # so export_trunk() doesn't blow up
264 def __str__(self):
265 return os.path.basename(self.path)
267 def _export(self, dest_path, rev):
268 # clone the repository
269 cmd = [GIT_CMD, 'archive', '--remote=' + self.path, '--format=tar', rev]
270 git_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
272 if False:
273 # Unfortunately for some git tags the below causes
274 # git_proc.wait() to hang. The git archive process is in a
275 # <defunct> state and the verify-cvs2svn hangs for good.
276 tar = tarfile.open(mode="r|", fileobj=git_proc.stdout)
277 for tarinfo in tar:
278 tar.extract(tarinfo, dest_path)
279 tar.close()
280 else:
281 os.mkdir(dest_path)
282 tar_proc = subprocess.Popen(
283 ['tar', '-C', dest_path, '-x'],
284 stdin=git_proc.stdout, stdout=subprocess.PIPE,
286 output = tar_proc.stdout.read()
287 status = tar_proc.wait()
288 if output or status:
289 raise RuntimeError(
290 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
291 % (rev, self.path, dest_path, output)
294 status = git_proc.wait()
295 if status:
296 raise RuntimeError(
297 'Git extract of rev %s from repo %s to %s failed!'
298 % (rev, self.path, dest_path)
301 if not os.path.exists(dest_path):
302 raise RuntimeError(
303 'Git clone of %s to %s failed!' % (self.path, dest_path)
306 def export_trunk(self, dest_path):
307 self.branches() # ensure _have_default is set
308 if self._have_master:
309 self._export(dest_path, 'master')
310 else:
311 # same as CVS does when exporting empty trunk
312 os.mkdir(dest_path)
314 def export_tag(self, dest_path, tag):
315 self._export(dest_path, tag)
317 def export_branch(self, dest_path, branch):
318 self._export(dest_path, branch)
320 def tags(self):
321 cmd = self.repo_cmd + ['tag']
322 tags = split_output(cmd)
323 return tags
325 def branches(self):
326 if self._branches is None:
327 cmd = self.repo_cmd + ['branch']
328 branches = split_output(cmd)
329 # Remove the two chracters at the start of the branch name
330 for i in range(len(branches)):
331 branches[i] = branches[i][2:]
332 self._branches = branches
333 try:
334 branches.remove('master')
335 self._have_master = True
336 except ValueError:
337 self._have_master = False
339 return self._branches
342 def transform_symbol(ctx, name):
343 """Transform the symbol NAME using the renaming rules specified
344 with --symbol-transform. Return the transformed symbol name."""
346 for (pattern, replacement) in ctx.symbol_transforms:
347 newname = pattern.sub(replacement, name)
348 if newname != name:
349 print " symbol '%s' transformed to '%s'" % (name, newname)
350 name = newname
352 return name
355 class Failures(object):
356 def __init__(self):
357 self.count = 0 # number of failures seen
359 def __str__(self):
360 return str(self.count)
362 def __repr__(self):
363 return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self.count)
365 def report(self, summary, details=None):
366 self.count += 1
367 sys.stdout.write(' FAIL: %s\n' % summary)
368 if details:
369 for line in details:
370 sys.stdout.write(' %s\n' % line)
372 def __nonzero__(self):
373 return self.count > 0
376 def file_compare(failures, base1, base2, run_diff, rel_path):
377 """Compare the mode and contents of two files.
379 The paths are specified as two base paths BASE1 and BASE2, and a
380 path REL_PATH that is relative to the two base paths. Return True
381 iff the file mode and contents are identical."""
383 ok = True
384 path1 = os.path.join(base1, rel_path)
385 path2 = os.path.join(base2, rel_path)
386 mode1 = os.stat(path1).st_mode & 0700 # only look at owner bits
387 mode2 = os.stat(path2).st_mode & 0700
388 if mode1 != mode2:
389 failures.report('File modes differ for %s' % rel_path,
390 details=['%s: %o' % (path1, mode1),
391 '%s: %o' % (path2, mode2)])
392 ok = False
394 file1 = open(path1, 'rb')
395 file2 = open(path2, 'rb')
396 try:
397 while True:
398 data1 = file1.read(8192)
399 data2 = file2.read(8192)
400 if data1 != data2:
401 if run_diff:
402 cmd = ['diff', '-u', path1, path2]
403 (output, status) = pipe(cmd)
404 diff = output.split(os.linesep)
405 else:
406 diff = None
407 failures.report('File contents differ for %s' % rel_path,
408 details=diff)
409 ok = False
410 break
411 if len(data1) == 0:
412 # eof
413 break
414 finally:
415 file1.close()
416 file2.close()
418 return ok
421 def tree_compare(failures, base1, base2, run_diff, rel_path=''):
422 """Compare the contents of two directory trees, including file contents.
424 The paths are specified as two base paths BASE1 and BASE2, and a
425 path REL_PATH that is relative to the two base paths. Return True
426 iff the trees are identical."""
428 if not rel_path:
429 path1 = base1
430 path2 = base2
431 else:
432 path1 = os.path.join(base1, rel_path)
433 path2 = os.path.join(base2, rel_path)
434 if not os.path.exists(path1):
435 failures.report('%s does not exist' % path1)
436 return False
437 if not os.path.exists(path2):
438 failures.report('%s does not exist' % path2)
439 return False
440 if os.path.isfile(path1) and os.path.isfile(path2):
441 return file_compare(failures, base1, base2, run_diff, rel_path)
442 if not (os.path.isdir(path1) and os.path.isdir(path2)):
443 failures.report('Path types differ for %r' % rel_path)
444 return False
445 entries1 = os.listdir(path1)
446 entries1.sort()
447 entries2 = os.listdir(path2)
448 entries2.sort()
450 ok = True
452 missing = filter(lambda x: x not in entries2, entries1)
453 extra = filter(lambda x: x not in entries1, entries2)
454 if missing:
455 failures.report('Directory /%s is missing entries: %s' %
456 (rel_path, ', '.join(missing)))
457 ok = False
458 if extra:
459 failures.report('Directory /%s has extra entries: %s' %
460 (rel_path, ', '.join(extra)))
461 ok = False
463 for entry in entries1:
464 new_rel_path = os.path.join(rel_path, entry)
465 if not tree_compare(failures, base1, base2, run_diff, new_rel_path):
466 ok = False
467 return ok
470 def verify_contents_single(failures, cvsrepos, verifyrepos, kind, label, ctx):
471 """Verify the HEAD revision of a trunk, tag, or branch.
473 Verify that the contents of the HEAD revision of all directories and
474 files in the conversion repository VERIFYREPOS match the ones in the
475 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
476 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
477 specify the name of the tag or branch. CTX has the attributes:
478 CTX.tmpdir: specifying the directory for all temporary files.
479 CTX.skip_cleanup: if true, the temporary files are not deleted.
480 CTX.run_diff: if true, run diff on differing files."""
482 itemname = kind + (kind != 'trunk' and '-' + label or '')
483 cvs_export_dir = os.path.join(
484 ctx.tmpdir, 'cvs-export-%s' % itemname)
485 vrf_export_dir = os.path.join(
486 ctx.tmpdir, '%s-export-%s' % (verifyrepos.name, itemname))
488 if label:
489 cvslabel = transform_symbol(ctx, label)
490 else:
491 cvslabel = None
493 try:
494 cvsrepos.export(cvs_export_dir, cvslabel, ctx.keyword_opt)
495 if kind == 'trunk':
496 verifyrepos.export_trunk(vrf_export_dir)
497 elif kind == 'tag':
498 verifyrepos.export_tag(vrf_export_dir, label)
499 else:
500 verifyrepos.export_branch(vrf_export_dir, label)
502 if not tree_compare(
503 failures, cvs_export_dir, vrf_export_dir, ctx.run_diff
505 return False
506 finally:
507 if not ctx.skip_cleanup:
508 if os.path.exists(cvs_export_dir):
509 shutil.rmtree(cvs_export_dir)
510 if os.path.exists(vrf_export_dir):
511 shutil.rmtree(vrf_export_dir)
512 return True
515 def verify_contents(failures, cvsrepos, verifyrepos, ctx):
516 """Verify that the contents of the HEAD revision of all directories
517 and files in the trunk, all tags and all branches in the conversion
518 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
519 CTX is passed through to verify_contents_single()."""
521 # branches/tags that failed:
522 locations = []
524 # Verify contents of trunk
525 print 'Verifying trunk'
526 sys.stdout.flush()
527 if not verify_contents_single(
528 failures, cvsrepos, verifyrepos, 'trunk', None, ctx
530 locations.append('trunk')
532 # Verify contents of all tags
533 for tag in verifyrepos.tags():
534 print 'Verifying tag', tag
535 sys.stdout.flush()
536 if not verify_contents_single(
537 failures, cvsrepos, verifyrepos, 'tag', tag, ctx
539 locations.append('tag:' + tag)
541 # Verify contents of all branches
542 for branch in verifyrepos.branches():
543 if branch[:10] == 'unlabeled-':
544 print 'Skipped branch', branch
545 else:
546 print 'Verifying branch', branch
547 if not verify_contents_single(
548 failures, cvsrepos, verifyrepos, 'branch', branch, ctx
550 locations.append('branch:' + branch)
551 sys.stdout.flush()
553 assert bool(failures) == bool(locations), \
554 "failures = %r\nlocations = %r" % (failures, locations)
556 # Show the results
557 if failures:
558 sys.stdout.write('FAIL: %s != %s: %d failure(s) in:\n'
559 % (cvsrepos, verifyrepos, failures.count))
560 for location in locations:
561 sys.stdout.write(' %s\n' % location)
562 else:
563 sys.stdout.write('PASS: %s == %s\n' % (cvsrepos, verifyrepos))
564 sys.stdout.flush()
567 class OptionContext:
568 pass
571 def main(argv):
572 parser = optparse.OptionParser(
573 usage='%prog [options] cvs-repos verify-repos')
574 parser.add_option('--branch',
575 help='verify contents of the branch BRANCH only')
576 parser.add_option('--diff', action='store_true', dest='run_diff',
577 help='run diff on differing files')
578 parser.add_option('--tag',
579 help='verify contents of the tag TAG only')
580 parser.add_option('--tmpdir',
581 metavar='PATH',
582 help='path to store temporary files')
583 parser.add_option('--trunk', action='store_true',
584 help='verify contents of trunk only')
585 parser.add_option('--symbol-transform', action='append',
586 metavar='P:S',
587 help='transform symbol names from P to S like cvs2svn, '
588 'except transforms SVN symbol to CVS symbol')
589 parser.add_option('--svn',
590 action='store_const', dest='repos_type', const='svn',
591 help='assume verify-repos is svn [default]')
592 parser.add_option('--hg',
593 action='store_const', dest='repos_type', const='hg',
594 help='assume verify-repos is hg')
595 parser.add_option('--git',
596 action='store_const', dest='repos_type', const='git',
597 help='assume verify-repos is git')
598 parser.add_option('--suppress-keywords',
599 action='store_const', dest='keyword_opt', const='-kk',
600 help='suppress CVS keyword expansion '
601 '(equivalent to --keyword-opt=-kk)')
602 parser.add_option('--keyword-opt',
603 metavar='OPT',
604 help='control CVS keyword expansion by adding OPT to '
605 'cvs export command line')
607 parser.set_defaults(run_diff=False,
608 tmpdir='',
609 skip_cleanup=False,
610 symbol_transforms=[],
611 repos_type='svn')
612 (options, args) = parser.parse_args()
614 symbol_transforms = []
615 for value in options.symbol_transforms:
616 # This is broken!
617 [pattern, replacement] = value.split(":")
618 try:
619 symbol_transforms.append(
620 RegexpSymbolTransform(pattern, replacement))
621 except re.error:
622 parser.error("'%s' is not a valid regexp." % (pattern,))
624 def error(msg):
625 """Print an error to sys.stderr."""
626 sys.stderr.write('Error: ' + str(msg) + '\n')
628 verify_branch = options.branch
629 verify_tag = options.tag
630 verify_trunk = options.trunk
632 # Consistency check for options and arguments.
633 if len(args) != 2:
634 parser.error("wrong number of arguments")
636 cvs_path = args[0]
637 verify_path = args[1]
638 verify_klass = {'svn': SvnRepos,
639 'hg': HgRepos,
640 'git': GitRepos}[options.repos_type]
642 failures = Failures()
643 try:
644 # Open the repositories
645 cvsrepos = CvsRepos(cvs_path)
646 verifyrepos = verify_klass(verify_path)
648 # Do our thing...
649 if verify_branch:
650 print 'Verifying branch', verify_branch
651 verify_contents_single(
652 failures, cvsrepos, verifyrepos, 'branch', verify_branch, options
654 elif verify_tag:
655 print 'Verifying tag', verify_tag
656 verify_contents_single(
657 failures, cvsrepos, verifyrepos, 'tag', verify_tag, options
659 elif verify_trunk:
660 print 'Verifying trunk'
661 verify_contents_single(
662 failures, cvsrepos, verifyrepos, 'trunk', None, options
664 else:
665 # Verify trunk, tags and branches
666 verify_contents(failures, cvsrepos, verifyrepos, options)
667 except RuntimeError, e:
668 error(str(e))
669 except KeyboardInterrupt:
670 pass
672 sys.exit(failures and 1 or 0)
675 if __name__ == '__main__':
676 main(sys.argv)