verify-cvs2svn.py: Make split_output() a file-level function.
[cvs2svn.git] / contrib / verify-cvs2svn.py
blobdd9d5c6a67a5362a73e455e7308585c22eece16d
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import optparse
36 import subprocess
37 import shutil
38 import re
39 import tarfile
42 # CVS and Subversion command line client commands
43 CVS_CMD = 'cvs'
44 SVN_CMD = 'svn'
45 HG_CMD = 'hg'
46 GIT_CMD = 'git'
49 def pipe(cmd):
50 """Run cmd as a pipe. Return (output, status)."""
51 child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
52 output = child.stdout.read()
53 status = child.wait()
54 return (output, status)
57 def cmd_failed(cmd, output, status):
58 print 'CMD FAILED:', ' '.join(cmd)
59 print 'Output:'
60 sys.stdout.write(output)
61 raise RuntimeError('%s command failed!' % cmd[0])
64 def split_output(self, cmd):
65 (output, status) = pipe(cmd)
66 if status:
67 cmd_failed(cmd, output, status)
68 return output.split(os.linesep)[:-1]
71 class CvsRepos:
72 def __init__(self, path):
73 """Open the CVS repository at PATH."""
74 path = os.path.abspath(path)
75 if not os.path.isdir(path):
76 raise RuntimeError('CVS path is not a directory')
78 if os.path.exists(os.path.join(path, 'CVSROOT')):
79 # The whole repository
80 self.module = "."
81 self.cvsroot = path
82 else:
83 self.cvsroot = os.path.dirname(path)
84 self.module = os.path.basename(path)
85 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
86 parent = os.path.dirname(self.cvsroot)
87 if parent == self.cvsroot:
88 raise RuntimeError('Cannot find the CVSROOT')
89 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
90 self.cvsroot = parent
92 def __str__(self):
93 return os.path.basename(self.cvsroot)
95 def export(self, dest_path, rev=None, keyword_opt=None):
96 """Export revision REV to DEST_PATH where REV can be None to export
97 the HEAD revision, or any valid CVS revision string to export that
98 revision."""
99 os.mkdir(dest_path)
100 cmd = [CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export']
101 if rev:
102 cmd.extend(['-r', rev])
103 else:
104 cmd.extend(['-D', 'now'])
105 if keyword_opt:
106 cmd.append(keyword_opt)
107 cmd.extend(['-d', dest_path, self.module])
108 (output, status) = pipe(cmd)
109 if status or output:
110 cmd_failed(cmd, output, status)
113 class SvnRepos:
114 name = 'svn'
116 def __init__(self, url):
117 """Open the Subversion repository at URL."""
118 # Check if the user supplied an URL or a path
119 if url.find('://') == -1:
120 abspath = os.path.abspath(url)
121 url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
122 if os.sep != '/':
123 url = url.replace(os.sep, '/')
125 self.url = url
127 # Cache a list of all tags and branches
128 list = self.list('')
129 if 'tags' in list:
130 self.tag_list = self.list('tags')
131 else:
132 self.tag_list = []
133 if 'branches' in list:
134 self.branch_list = self.list('branches')
135 else:
136 self.branch_list = []
138 def __str__(self):
139 return self.url.split('/')[-1]
141 def export(self, path, dest_path):
142 """Export PATH to DEST_PATH."""
143 url = '/'.join([self.url, path])
144 cmd = [SVN_CMD, 'export', '-q', url, dest_path]
145 (output, status) = pipe(cmd)
146 if status or output:
147 cmd_failed(cmd, output, status)
149 def export_trunk(self, dest_path):
150 """Export trunk to DEST_PATH."""
151 self.export('trunk', dest_path)
153 def export_tag(self, dest_path, tag):
154 """Export the tag TAG to DEST_PATH."""
155 self.export('tags/' + tag, dest_path)
157 def export_branch(self, dest_path, branch):
158 """Export the branch BRANCH to DEST_PATH."""
159 self.export('branches/' + branch, dest_path)
161 def list(self, path):
162 """Return a list of all files and directories in PATH."""
163 cmd = [SVN_CMD, 'ls', self.url + '/' + path]
164 (output, status) = pipe(cmd)
165 if status:
166 cmd_failed(cmd, output, status)
167 entries = []
168 for line in output.split(os.linesep):
169 if line:
170 entries.append(line[:-1])
171 return entries
173 def tags(self):
174 """Return a list of all tags in the repository."""
175 return self.tag_list
177 def branches(self):
178 """Return a list of all branches in the repository."""
179 return self.branch_list
182 class HgRepos:
183 name = 'hg'
185 def __init__(self, path):
186 self.path = path
187 self.base_cmd = [HG_CMD, '-R', self.path]
189 self._branches = None # cache result of branches()
190 self._have_default = None # so export_trunk() doesn't blow up
192 def __str__(self):
193 return os.path.basename(self.path)
195 def _export(self, dest_path, rev):
196 cmd = self.base_cmd + ['archive',
197 '--type', 'files',
198 '--rev', rev,
199 '--exclude', 're:^\.hg',
200 dest_path]
201 (output, status) = pipe(cmd)
202 if status or output:
203 cmd_failed(cmd, output, status)
205 # If Mercurial has nothing to export, then it doesn't create
206 # dest_path. This breaks tree_compare(), so just check that the
207 # manifest for the chosen revision really is empty, and if so create
208 # the empty dir.
209 if not os.path.exists(dest_path):
210 cmd = self.base_cmd + ['manifest', '--rev', rev]
212 (output, status) = pipe(cmd)
213 if status:
214 cmd_failed(cmd, output, status)
215 manifest = [fn for fn in output.split(os.linesep)[:-1]
216 if not fn.startswith('.hg')]
217 if not manifest:
218 os.mkdir(dest_path)
220 def export_trunk(self, dest_path):
221 self.branches() # ensure _have_default is set
222 if self._have_default:
223 self._export(dest_path, 'default')
224 else:
225 # same as CVS does when exporting empty trunk
226 os.mkdir(dest_path)
228 def export_tag(self, dest_path, tag):
229 self._export(dest_path, tag)
231 def export_branch(self, dest_path, branch):
232 self._export(dest_path, branch)
234 def tags(self):
235 cmd = self.base_cmd + ['tags', '-q']
236 tags = split_output(cmd)
237 tags.remove('tip')
238 return tags
240 def branches(self):
241 if self._branches is None:
242 cmd = self.base_cmd + ['branches', '-q']
243 self._branches = branches = split_output(cmd)
244 try:
245 branches.remove('default')
246 self._have_default = True
247 except ValueError:
248 self._have_default = False
250 return self._branches
253 class GitRepos:
254 name = 'git'
256 def __init__(self, path):
257 self.path = path
258 self.repo_cmd = [
259 GIT_CMD,
260 '--git-dir=' + os.path.join(self.path, '.git'),
261 '--work-tree=' + self.path,
264 self._branches = None # cache result of branches()
265 self._have_master = None # so export_trunk() doesn't blow up
267 def __str__(self):
268 return os.path.basename(self.path)
270 def _export(self, dest_path, rev):
271 # clone the repository
272 cmd = [GIT_CMD, 'archive', '--remote=' + self.path, '--format=tar', rev]
273 git_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
275 if False:
276 # Unfortunately for some git tags the below causes
277 # git_proc.wait() to hang. The git archive process is in a
278 # <defunct> state and the verify-cvs2svn hangs for good.
279 tar = tarfile.open(mode="r|", fileobj=git_proc.stdout)
280 for tarinfo in tar:
281 tar.extract(tarinfo, dest_path)
282 tar.close()
283 else:
284 os.mkdir(dest_path)
285 tar_proc = subprocess.Popen(
286 ['tar', '-C', dest_path, '-x'],
287 stdin=git_proc.stdout, stdout=subprocess.PIPE,
289 output = tar_proc.stdout.read()
290 status = tar_proc.wait()
291 if output or status:
292 raise RuntimeError(
293 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
294 % (rev, self.path, dest_path, output)
297 status = git_proc.wait()
298 if status:
299 raise RuntimeError(
300 'Git extract of rev %s from repo %s to %s failed!'
301 % (rev, self.path, dest_path)
304 if not os.path.exists(dest_path):
305 raise RuntimeError(
306 'Git clone of %s to %s failed!' % (self.path, dest_path)
309 def export_trunk(self, dest_path):
310 self.branches() # ensure _have_default is set
311 if self._have_master:
312 self._export(dest_path, 'master')
313 else:
314 # same as CVS does when exporting empty trunk
315 os.mkdir(dest_path)
317 def export_tag(self, dest_path, tag):
318 self._export(dest_path, tag)
320 def export_branch(self, dest_path, branch):
321 self._export(dest_path, branch)
323 def tags(self):
324 cmd = self.repo_cmd + ['tag']
325 tags = split_output(cmd)
326 return tags
328 def branches(self):
329 if self._branches is None:
330 cmd = self.repo_cmd + ['branch']
331 branches = split_output(cmd)
332 # Remove the two chracters at the start of the branch name
333 for i in range(len(branches)):
334 branches[i] = branches[i][2:]
335 self._branches = branches
336 try:
337 branches.remove('master')
338 self._have_master = True
339 except ValueError:
340 self._have_master = False
342 return self._branches
345 def transform_symbol(ctx, name):
346 """Transform the symbol NAME using the renaming rules specified
347 with --symbol-transform. Return the transformed symbol name."""
349 for (pattern, replacement) in ctx.symbol_transforms:
350 newname = pattern.sub(replacement, name)
351 if newname != name:
352 print " symbol '%s' transformed to '%s'" % (name, newname)
353 name = newname
355 return name
358 class Failures(object):
359 def __init__(self):
360 self.count = 0 # number of failures seen
362 def __str__(self):
363 return str(self.count)
365 def __repr__(self):
366 return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self.count)
368 def report(self, summary, details=None):
369 self.count += 1
370 sys.stdout.write(' FAIL: %s\n' % summary)
371 if details:
372 for line in details:
373 sys.stdout.write(' %s\n' % line)
375 def __nonzero__(self):
376 return self.count > 0
379 def file_compare(failures, base1, base2, run_diff, rel_path):
380 """Compare the mode and contents of two files.
382 The paths are specified as two base paths BASE1 and BASE2, and a
383 path REL_PATH that is relative to the two base paths. Return True
384 iff the file mode and contents are identical."""
386 ok = True
387 path1 = os.path.join(base1, rel_path)
388 path2 = os.path.join(base2, rel_path)
389 mode1 = os.stat(path1).st_mode & 0700 # only look at owner bits
390 mode2 = os.stat(path2).st_mode & 0700
391 if mode1 != mode2:
392 failures.report('File modes differ for %s' % rel_path,
393 details=['%s: %o' % (path1, mode1),
394 '%s: %o' % (path2, mode2)])
395 ok = False
397 file1 = open(path1, 'rb')
398 file2 = open(path2, 'rb')
399 while True:
400 data1 = file1.read(8192)
401 data2 = file2.read(8192)
402 if data1 != data2:
403 if run_diff:
404 cmd = ['diff', '-u', path1, path2]
405 (output, status) = pipe(cmd)
406 diff = output.split(os.linesep)
407 else:
408 diff = None
409 failures.report('File contents differ for %s' % rel_path,
410 details=diff)
411 ok = False
412 break
413 if len(data1) == 0:
414 # eof
415 break
417 return ok
420 def tree_compare(failures, base1, base2, run_diff, rel_path=''):
421 """Compare the contents of two directory trees, including file contents.
423 The paths are specified as two base paths BASE1 and BASE2, and a
424 path REL_PATH that is relative to the two base paths. Return True
425 iff the trees are identical."""
427 if not rel_path:
428 path1 = base1
429 path2 = base2
430 else:
431 path1 = os.path.join(base1, rel_path)
432 path2 = os.path.join(base2, rel_path)
433 if not os.path.exists(path1):
434 failures.report('%s does not exist' % path1)
435 return False
436 if not os.path.exists(path2):
437 failures.report('%s does not exist' % path2)
438 return False
439 if os.path.isfile(path1) and os.path.isfile(path2):
440 return file_compare(failures, base1, base2, run_diff, rel_path)
441 if not (os.path.isdir(path1) and os.path.isdir(path2)):
442 failures.report('Path types differ for %r' % rel_path)
443 return False
444 entries1 = os.listdir(path1)
445 entries1.sort()
446 entries2 = os.listdir(path2)
447 entries2.sort()
449 ok = True
451 missing = filter(lambda x: x not in entries2, entries1)
452 extra = filter(lambda x: x not in entries1, entries2)
453 if missing:
454 failures.report('Directory /%s is missing entries: %s' %
455 (rel_path, ', '.join(missing)))
456 ok = False
457 if extra:
458 failures.report('Directory /%s has extra entries: %s' %
459 (rel_path, ', '.join(extra)))
460 ok = False
462 for entry in entries1:
463 new_rel_path = os.path.join(rel_path, entry)
464 if not tree_compare(failures, base1, base2, run_diff, new_rel_path):
465 ok = False
466 return ok
469 def verify_contents_single(failures, cvsrepos, verifyrepos, kind, label, ctx):
470 """Verify the HEAD revision of a trunk, tag, or branch.
472 Verify that the contents of the HEAD revision of all directories and
473 files in the conversion repository VERIFYREPOS match the ones in the
474 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
475 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
476 specify the name of the tag or branch. CTX has the attributes:
477 CTX.tmpdir: specifying the directory for all temporary files.
478 CTX.skip_cleanup: if true, the temporary files are not deleted.
479 CTX.run_diff: if true, run diff on differing files."""
481 itemname = kind + (kind != 'trunk' and '-' + label or '')
482 cvs_export_dir = os.path.join(
483 ctx.tmpdir, 'cvs-export-%s' % itemname)
484 vrf_export_dir = os.path.join(
485 ctx.tmpdir, '%s-export-%s' % (verifyrepos.name, itemname))
487 if label:
488 cvslabel = transform_symbol(ctx, label)
489 else:
490 cvslabel = None
492 try:
493 cvsrepos.export(cvs_export_dir, cvslabel, ctx.keyword_opt)
494 if kind == 'trunk':
495 verifyrepos.export_trunk(vrf_export_dir)
496 elif kind == 'tag':
497 verifyrepos.export_tag(vrf_export_dir, label)
498 else:
499 verifyrepos.export_branch(vrf_export_dir, label)
501 if not tree_compare(
502 failures, cvs_export_dir, vrf_export_dir, ctx.run_diff
504 return False
505 finally:
506 if not ctx.skip_cleanup:
507 if os.path.exists(cvs_export_dir):
508 shutil.rmtree(cvs_export_dir)
509 if os.path.exists(vrf_export_dir):
510 shutil.rmtree(vrf_export_dir)
511 return True
514 def verify_contents(failures, cvsrepos, verifyrepos, ctx):
515 """Verify that the contents of the HEAD revision of all directories
516 and files in the trunk, all tags and all branches in the conversion
517 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
518 CTX is passed through to verify_contents_single()."""
520 # branches/tags that failed:
521 locations = []
523 # Verify contents of trunk
524 print 'Verifying trunk'
525 sys.stdout.flush()
526 if not verify_contents_single(
527 failures, cvsrepos, verifyrepos, 'trunk', None, ctx
529 locations.append('trunk')
531 # Verify contents of all tags
532 for tag in verifyrepos.tags():
533 print 'Verifying tag', tag
534 sys.stdout.flush()
535 if not verify_contents_single(
536 failures, cvsrepos, verifyrepos, 'tag', tag, ctx
538 locations.append('tag:' + tag)
540 # Verify contents of all branches
541 for branch in verifyrepos.branches():
542 if branch[:10] == 'unlabeled-':
543 print 'Skipped branch', branch
544 else:
545 print 'Verifying branch', branch
546 if not verify_contents_single(
547 failures, cvsrepos, verifyrepos, 'branch', branch, ctx
549 locations.append('branch:' + branch)
550 sys.stdout.flush()
552 assert bool(failures) == bool(locations), \
553 "failures = %r\nlocations = %r" % (failures, locations)
555 # Show the results
556 if failures:
557 sys.stdout.write('FAIL: %s != %s: %d failure(s) in:\n'
558 % (cvsrepos, verifyrepos, failures.count))
559 for location in locations:
560 sys.stdout.write(' %s\n' % location)
561 else:
562 sys.stdout.write('PASS: %s == %s\n' % (cvsrepos, verifyrepos))
563 sys.stdout.flush()
566 class OptionContext:
567 pass
570 def main(argv):
571 parser = optparse.OptionParser(
572 usage='%prog [options] cvs-repos verify-repos')
573 parser.add_option('--branch',
574 help='verify contents of the branch BRANCH only')
575 parser.add_option('--diff', action='store_true', dest='run_diff',
576 help='run diff on differing files')
577 parser.add_option('--tag',
578 help='verify contents of the tag TAG only')
579 parser.add_option('--tmpdir',
580 metavar='PATH',
581 help='path to store temporary files')
582 parser.add_option('--trunk', action='store_true',
583 help='verify contents of trunk only')
584 parser.add_option('--symbol-transform', action='append',
585 metavar='P:S',
586 help='transform symbol names from P to S like cvs2svn, '
587 'except transforms SVN symbol to CVS symbol')
588 parser.add_option('--svn',
589 action='store_const', dest='repos_type', const='svn',
590 help='assume verify-repos is svn [default]')
591 parser.add_option('--hg',
592 action='store_const', dest='repos_type', const='hg',
593 help='assume verify-repos is hg')
594 parser.add_option('--git',
595 action='store_const', dest='repos_type', const='git',
596 help='assume verify-repos is git')
597 parser.add_option('--suppress-keywords',
598 action='store_const', dest='keyword_opt', const='-kk',
599 help='suppress CVS keyword expansion '
600 '(equivalent to --keyword-opt=-kk)')
601 parser.add_option('--keyword-opt',
602 metavar='OPT',
603 help='control CVS keyword expansion by adding OPT to '
604 'cvs export command line')
606 parser.set_defaults(run_diff=False,
607 tmpdir='',
608 skip_cleanup=False,
609 symbol_transforms=[],
610 repos_type='svn')
611 (options, args) = parser.parse_args()
613 symbol_transforms = []
614 for value in options.symbol_transforms:
615 # This is broken!
616 [pattern, replacement] = value.split(":")
617 try:
618 symbol_transforms.append(
619 RegexpSymbolTransform(pattern, replacement))
620 except re.error:
621 parser.error("'%s' is not a valid regexp." % (pattern,))
623 def error(msg):
624 """Print an error to sys.stderr."""
625 sys.stderr.write('Error: ' + str(msg) + '\n')
627 verify_branch = options.branch
628 verify_tag = options.tag
629 verify_trunk = options.trunk
631 # Consistency check for options and arguments.
632 if len(args) != 2:
633 parser.error("wrong number of arguments")
635 cvs_path = args[0]
636 verify_path = args[1]
637 verify_klass = {'svn': SvnRepos,
638 'hg': HgRepos,
639 'git': GitRepos}[options.repos_type]
641 failures = Failures()
642 try:
643 # Open the repositories
644 cvsrepos = CvsRepos(cvs_path)
645 verifyrepos = verify_klass(verify_path)
647 # Do our thing...
648 if verify_branch:
649 print 'Verifying branch', verify_branch
650 verify_contents_single(
651 failures, cvsrepos, verifyrepos, 'branch', verify_branch, options
653 elif verify_tag:
654 print 'Verifying tag', verify_tag
655 verify_contents_single(
656 failures, cvsrepos, verifyrepos, 'tag', verify_tag, options
658 elif verify_trunk:
659 print 'Verifying trunk'
660 verify_contents_single(
661 failures, cvsrepos, verifyrepos, 'trunk', None, options
663 else:
664 # Verify trunk, tags and branches
665 verify_contents(failures, cvsrepos, verifyrepos, options)
666 except RuntimeError, e:
667 error(str(e))
668 except KeyboardInterrupt:
669 pass
671 sys.exit(failures and 1 or 0)
674 if __name__ == '__main__':
675 main(sys.argv)