verify-cvs2svn.py: Fix \n vs \r\n issue under Windows.
[cvs2svn.git] / contrib / verify-cvs2svn.py
bloba3d4d820d4ed8436f668fb77ca1f151d472e014e
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import optparse
36 import subprocess
37 import shutil
38 import re
39 import tarfile
42 # CVS and Subversion command line client commands
43 CVS_CMD = 'cvs'
44 SVN_CMD = 'svn'
45 HG_CMD = 'hg'
46 GIT_CMD = 'git'
49 def pipe(cmd):
50 """Run cmd as a pipe. Return (output, status)."""
51 child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
52 output = child.stdout.read()
53 status = child.wait()
54 return (output, status)
57 def cmd_failed(cmd, output, status):
58 print 'CMD FAILED:', ' '.join(cmd)
59 print 'Output:'
60 sys.stdout.write(output)
61 raise RuntimeError('%s command failed!' % cmd[0])
64 class CvsRepos:
65 def __init__(self, path):
66 """Open the CVS repository at PATH."""
67 path = os.path.abspath(path)
68 if not os.path.isdir(path):
69 raise RuntimeError('CVS path is not a directory')
71 if os.path.exists(os.path.join(path, 'CVSROOT')):
72 # The whole repository
73 self.module = "."
74 self.cvsroot = path
75 else:
76 self.cvsroot = os.path.dirname(path)
77 self.module = os.path.basename(path)
78 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
79 parent = os.path.dirname(self.cvsroot)
80 if parent == self.cvsroot:
81 raise RuntimeError('Cannot find the CVSROOT')
82 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
83 self.cvsroot = parent
85 def __str__(self):
86 return os.path.basename(self.cvsroot)
88 def export(self, dest_path, rev=None, keyword_opt=None):
89 """Export revision REV to DEST_PATH where REV can be None to export
90 the HEAD revision, or any valid CVS revision string to export that
91 revision."""
92 os.mkdir(dest_path)
93 cmd = [CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export']
94 if rev:
95 cmd.extend(['-r', rev])
96 else:
97 cmd.extend(['-D', 'now'])
98 if keyword_opt:
99 cmd.append(keyword_opt)
100 cmd.extend(['-d', dest_path, self.module])
101 (output, status) = pipe(cmd)
102 if status or output:
103 cmd_failed(cmd, output, status)
106 class SvnRepos:
107 name = 'svn'
109 def __init__(self, url):
110 """Open the Subversion repository at URL."""
111 # Check if the user supplied an URL or a path
112 if url.find('://') == -1:
113 abspath = os.path.abspath(url)
114 url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
115 if os.sep != '/':
116 url = url.replace(os.sep, '/')
118 self.url = url
120 # Cache a list of all tags and branches
121 list = self.list('')
122 if 'tags' in list:
123 self.tag_list = self.list('tags')
124 else:
125 self.tag_list = []
126 if 'branches' in list:
127 self.branch_list = self.list('branches')
128 else:
129 self.branch_list = []
131 def __str__(self):
132 return self.url.split('/')[-1]
134 def export(self, path, dest_path):
135 """Export PATH to DEST_PATH."""
136 url = '/'.join([self.url, path])
137 cmd = [SVN_CMD, 'export', '-q', url, dest_path]
138 (output, status) = pipe(cmd)
139 if status or output:
140 cmd_failed(cmd, output, status)
142 def export_trunk(self, dest_path):
143 """Export trunk to DEST_PATH."""
144 self.export('trunk', dest_path)
146 def export_tag(self, dest_path, tag):
147 """Export the tag TAG to DEST_PATH."""
148 self.export('tags/' + tag, dest_path)
150 def export_branch(self, dest_path, branch):
151 """Export the branch BRANCH to DEST_PATH."""
152 self.export('branches/' + branch, dest_path)
154 def list(self, path):
155 """Return a list of all files and directories in PATH."""
156 cmd = [SVN_CMD, 'ls', self.url + '/' + path]
157 (output, status) = pipe(cmd)
158 if status:
159 cmd_failed(cmd, output, status)
160 entries = []
161 for line in output.split(os.linesep):
162 if line:
163 entries.append(line[:-1])
164 return entries
166 def tags(self):
167 """Return a list of all tags in the repository."""
168 return self.tag_list
170 def branches(self):
171 """Return a list of all branches in the repository."""
172 return self.branch_list
175 class HgRepos:
176 name = 'hg'
178 def __init__(self, path):
179 self.path = path
180 self.base_cmd = [HG_CMD, '-R', self.path]
182 self._branches = None # cache result of branches()
183 self._have_default = None # so export_trunk() doesn't blow up
185 def __str__(self):
186 return os.path.basename(self.path)
188 def _export(self, dest_path, rev):
189 cmd = self.base_cmd + ['archive',
190 '--type', 'files',
191 '--rev', rev,
192 '--exclude', 're:^\.hg',
193 dest_path]
194 (output, status) = pipe(cmd)
195 if status or output:
196 cmd_failed(cmd, output, status)
198 # If Mercurial has nothing to export, then it doesn't create
199 # dest_path. This breaks tree_compare(), so just check that the
200 # manifest for the chosen revision really is empty, and if so create
201 # the empty dir.
202 if not os.path.exists(dest_path):
203 cmd = self.base_cmd + ['manifest', '--rev', rev]
205 (output, status) = pipe(cmd)
206 if status:
207 cmd_failed(cmd, output, status)
208 manifest = [fn for fn in output.split(os.linesep)[:-1]
209 if not fn.startswith('.hg')]
210 if not manifest:
211 os.mkdir(dest_path)
213 def export_trunk(self, dest_path):
214 self.branches() # ensure _have_default is set
215 if self._have_default:
216 self._export(dest_path, 'default')
217 else:
218 # same as CVS does when exporting empty trunk
219 os.mkdir(dest_path)
221 def export_tag(self, dest_path, tag):
222 self._export(dest_path, tag)
224 def export_branch(self, dest_path, branch):
225 self._export(dest_path, branch)
227 def tags(self):
228 cmd = self.base_cmd + ['tags', '-q']
229 tags = self._split_output(cmd)
230 tags.remove('tip')
231 return tags
233 def branches(self):
234 if self._branches is None:
235 cmd = self.base_cmd + ['branches', '-q']
236 self._branches = branches = self._split_output(cmd)
237 try:
238 branches.remove('default')
239 self._have_default = True
240 except ValueError:
241 self._have_default = False
243 return self._branches
245 def _split_output(self, cmd):
246 (output, status) = pipe(cmd)
247 if status:
248 cmd_failed(cmd, output, status)
249 return output.split(os.linesep)[:-1]
252 class GitRepos:
253 name = 'git'
255 def __init__(self, path):
256 self.path = path
257 self.repo_cmd = [
258 GIT_CMD,
259 '--git-dir=' + os.path.join(self.path, '.git'),
260 '--work-tree=' + self.path,
263 self._branches = None # cache result of branches()
264 self._have_master = None # so export_trunk() doesn't blow up
266 def __str__(self):
267 return os.path.basename(self.path)
269 def _export(self, dest_path, rev):
270 # clone the repository
271 cmd = [GIT_CMD, 'archive', '--remote=' + self.path, '--format=tar', rev]
272 git_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
274 if False:
275 # Unfortunately for some git tags the below causes
276 # git_proc.wait() to hang. The git archive process is in a
277 # <defunct> state and the verify-cvs2svn hangs for good.
278 tar = tarfile.open(mode="r|", fileobj=git_proc.stdout)
279 for tarinfo in tar:
280 tar.extract(tarinfo, dest_path)
281 tar.close()
282 else:
283 os.mkdir(dest_path)
284 tar_proc = subprocess.Popen(
285 ['tar', '-C', dest_path, '-x'],
286 stdin=git_proc.stdout, stdout=subprocess.PIPE,
288 output = tar_proc.stdout.read()
289 status = tar_proc.wait()
290 if output or status:
291 raise RuntimeError(
292 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
293 % (rev, self.path, dest_path, output)
296 status = git_proc.wait()
297 if status:
298 raise RuntimeError(
299 'Git extract of rev %s from repo %s to %s failed!'
300 % (rev, self.path, dest_path)
303 if not os.path.exists(dest_path):
304 raise RuntimeError(
305 'Git clone of %s to %s failed!' % (self.path, dest_path)
308 def export_trunk(self, dest_path):
309 self.branches() # ensure _have_default is set
310 if self._have_master:
311 self._export(dest_path, 'master')
312 else:
313 # same as CVS does when exporting empty trunk
314 os.mkdir(dest_path)
316 def export_tag(self, dest_path, tag):
317 self._export(dest_path, tag)
319 def export_branch(self, dest_path, branch):
320 self._export(dest_path, branch)
322 def tags(self):
323 cmd = self.repo_cmd + ['tag']
324 tags = self._split_output(cmd)
325 return tags
327 def branches(self):
328 if self._branches is None:
329 cmd = self.repo_cmd + ['branch']
330 branches = self._split_output(cmd)
331 # Remove the two chracters at the start of the branch name
332 for i in range(len(branches)):
333 branches[i] = branches[i][2:]
334 self._branches = branches
335 try:
336 branches.remove('master')
337 self._have_master = True
338 except ValueError:
339 self._have_master = False
341 return self._branches
343 def _split_output(self, cmd):
344 (output, status) = pipe(cmd)
345 if status:
346 cmd_failed(cmd, output, status)
347 return output.split(os.linesep)[:-1]
350 def transform_symbol(ctx, name):
351 """Transform the symbol NAME using the renaming rules specified
352 with --symbol-transform. Return the transformed symbol name."""
354 for (pattern, replacement) in ctx.symbol_transforms:
355 newname = pattern.sub(replacement, name)
356 if newname != name:
357 print " symbol '%s' transformed to '%s'" % (name, newname)
358 name = newname
360 return name
363 class Failures(object):
364 def __init__(self):
365 self.count = 0 # number of failures seen
367 def __str__(self):
368 return str(self.count)
370 def __repr__(self):
371 return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self.count)
373 def report(self, summary, details=None):
374 self.count += 1
375 sys.stdout.write(' FAIL: %s\n' % summary)
376 if details:
377 for line in details:
378 sys.stdout.write(' %s\n' % line)
380 def __nonzero__(self):
381 return self.count > 0
384 def file_compare(failures, base1, base2, run_diff, rel_path):
385 """Compare the mode and contents of two files.
387 The paths are specified as two base paths BASE1 and BASE2, and a
388 path REL_PATH that is relative to the two base paths. Return True
389 iff the file mode and contents are identical."""
391 ok = True
392 path1 = os.path.join(base1, rel_path)
393 path2 = os.path.join(base2, rel_path)
394 mode1 = os.stat(path1).st_mode & 0700 # only look at owner bits
395 mode2 = os.stat(path2).st_mode & 0700
396 if mode1 != mode2:
397 failures.report('File modes differ for %s' % rel_path,
398 details=['%s: %o' % (path1, mode1),
399 '%s: %o' % (path2, mode2)])
400 ok = False
402 file1 = open(path1, 'rb')
403 file2 = open(path2, 'rb')
404 while True:
405 data1 = file1.read(8192)
406 data2 = file2.read(8192)
407 if data1 != data2:
408 if run_diff:
409 cmd = ['diff', '-u', path1, path2]
410 (output, status) = pipe(cmd)
411 diff = output.split(os.linesep)
412 else:
413 diff = None
414 failures.report('File contents differ for %s' % rel_path,
415 details=diff)
416 ok = False
417 break
418 if len(data1) == 0:
419 # eof
420 break
422 return ok
425 def tree_compare(failures, base1, base2, run_diff, rel_path=''):
426 """Compare the contents of two directory trees, including file contents.
428 The paths are specified as two base paths BASE1 and BASE2, and a
429 path REL_PATH that is relative to the two base paths. Return True
430 iff the trees are identical."""
432 if not rel_path:
433 path1 = base1
434 path2 = base2
435 else:
436 path1 = os.path.join(base1, rel_path)
437 path2 = os.path.join(base2, rel_path)
438 if not os.path.exists(path1):
439 failures.report('%s does not exist' % path1)
440 return False
441 if not os.path.exists(path2):
442 failures.report('%s does not exist' % path2)
443 return False
444 if os.path.isfile(path1) and os.path.isfile(path2):
445 return file_compare(failures, base1, base2, run_diff, rel_path)
446 if not (os.path.isdir(path1) and os.path.isdir(path2)):
447 failures.report('Path types differ for %r' % rel_path)
448 return False
449 entries1 = os.listdir(path1)
450 entries1.sort()
451 entries2 = os.listdir(path2)
452 entries2.sort()
454 ok = True
456 missing = filter(lambda x: x not in entries2, entries1)
457 extra = filter(lambda x: x not in entries1, entries2)
458 if missing:
459 failures.report('Directory /%s is missing entries: %s' %
460 (rel_path, ', '.join(missing)))
461 ok = False
462 if extra:
463 failures.report('Directory /%s has extra entries: %s' %
464 (rel_path, ', '.join(extra)))
465 ok = False
467 for entry in entries1:
468 new_rel_path = os.path.join(rel_path, entry)
469 if not tree_compare(failures, base1, base2, run_diff, new_rel_path):
470 ok = False
471 return ok
474 def verify_contents_single(failures, cvsrepos, verifyrepos, kind, label, ctx):
475 """Verify the HEAD revision of a trunk, tag, or branch.
477 Verify that the contents of the HEAD revision of all directories and
478 files in the conversion repository VERIFYREPOS match the ones in the
479 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
480 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
481 specify the name of the tag or branch. CTX has the attributes:
482 CTX.tmpdir: specifying the directory for all temporary files.
483 CTX.skip_cleanup: if true, the temporary files are not deleted.
484 CTX.run_diff: if true, run diff on differing files."""
486 itemname = kind + (kind != 'trunk' and '-' + label or '')
487 cvs_export_dir = os.path.join(
488 ctx.tmpdir, 'cvs-export-%s' % itemname)
489 vrf_export_dir = os.path.join(
490 ctx.tmpdir, '%s-export-%s' % (verifyrepos.name, itemname))
492 if label:
493 cvslabel = transform_symbol(ctx, label)
494 else:
495 cvslabel = None
497 try:
498 cvsrepos.export(cvs_export_dir, cvslabel, ctx.keyword_opt)
499 if kind == 'trunk':
500 verifyrepos.export_trunk(vrf_export_dir)
501 elif kind == 'tag':
502 verifyrepos.export_tag(vrf_export_dir, label)
503 else:
504 verifyrepos.export_branch(vrf_export_dir, label)
506 if not tree_compare(
507 failures, cvs_export_dir, vrf_export_dir, ctx.run_diff
509 return False
510 finally:
511 if not ctx.skip_cleanup:
512 if os.path.exists(cvs_export_dir):
513 shutil.rmtree(cvs_export_dir)
514 if os.path.exists(vrf_export_dir):
515 shutil.rmtree(vrf_export_dir)
516 return True
519 def verify_contents(failures, cvsrepos, verifyrepos, ctx):
520 """Verify that the contents of the HEAD revision of all directories
521 and files in the trunk, all tags and all branches in the conversion
522 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
523 CTX is passed through to verify_contents_single()."""
525 # branches/tags that failed:
526 locations = []
528 # Verify contents of trunk
529 print 'Verifying trunk'
530 sys.stdout.flush()
531 if not verify_contents_single(
532 failures, cvsrepos, verifyrepos, 'trunk', None, ctx
534 locations.append('trunk')
536 # Verify contents of all tags
537 for tag in verifyrepos.tags():
538 print 'Verifying tag', tag
539 sys.stdout.flush()
540 if not verify_contents_single(
541 failures, cvsrepos, verifyrepos, 'tag', tag, ctx
543 locations.append('tag:' + tag)
545 # Verify contents of all branches
546 for branch in verifyrepos.branches():
547 if branch[:10] == 'unlabeled-':
548 print 'Skipped branch', branch
549 else:
550 print 'Verifying branch', branch
551 if not verify_contents_single(
552 failures, cvsrepos, verifyrepos, 'branch', branch, ctx
554 locations.append('branch:' + branch)
555 sys.stdout.flush()
557 assert bool(failures) == bool(locations), \
558 "failures = %r\nlocations = %r" % (failures, locations)
560 # Show the results
561 if failures:
562 sys.stdout.write('FAIL: %s != %s: %d failure(s) in:\n'
563 % (cvsrepos, verifyrepos, failures.count))
564 for location in locations:
565 sys.stdout.write(' %s\n' % location)
566 else:
567 sys.stdout.write('PASS: %s == %s\n' % (cvsrepos, verifyrepos))
568 sys.stdout.flush()
571 class OptionContext:
572 pass
575 def main(argv):
576 parser = optparse.OptionParser(
577 usage='%prog [options] cvs-repos verify-repos')
578 parser.add_option('--branch',
579 help='verify contents of the branch BRANCH only')
580 parser.add_option('--diff', action='store_true', dest='run_diff',
581 help='run diff on differing files')
582 parser.add_option('--tag',
583 help='verify contents of the tag TAG only')
584 parser.add_option('--tmpdir',
585 metavar='PATH',
586 help='path to store temporary files')
587 parser.add_option('--trunk', action='store_true',
588 help='verify contents of trunk only')
589 parser.add_option('--symbol-transform', action='append',
590 metavar='P:S',
591 help='transform symbol names from P to S like cvs2svn, '
592 'except transforms SVN symbol to CVS symbol')
593 parser.add_option('--svn',
594 action='store_const', dest='repos_type', const='svn',
595 help='assume verify-repos is svn [default]')
596 parser.add_option('--hg',
597 action='store_const', dest='repos_type', const='hg',
598 help='assume verify-repos is hg')
599 parser.add_option('--git',
600 action='store_const', dest='repos_type', const='git',
601 help='assume verify-repos is git')
602 parser.add_option('--suppress-keywords',
603 action='store_const', dest='keyword_opt', const='-kk',
604 help='suppress CVS keyword expansion '
605 '(equivalent to --keyword-opt=-kk)')
606 parser.add_option('--keyword-opt',
607 metavar='OPT',
608 help='control CVS keyword expansion by adding OPT to '
609 'cvs export command line')
611 parser.set_defaults(run_diff=False,
612 tmpdir='',
613 skip_cleanup=False,
614 symbol_transforms=[],
615 repos_type='svn')
616 (options, args) = parser.parse_args()
618 symbol_transforms = []
619 for value in options.symbol_transforms:
620 # This is broken!
621 [pattern, replacement] = value.split(":")
622 try:
623 symbol_transforms.append(
624 RegexpSymbolTransform(pattern, replacement))
625 except re.error:
626 parser.error("'%s' is not a valid regexp." % (pattern,))
628 def error(msg):
629 """Print an error to sys.stderr."""
630 sys.stderr.write('Error: ' + str(msg) + '\n')
632 verify_branch = options.branch
633 verify_tag = options.tag
634 verify_trunk = options.trunk
636 # Consistency check for options and arguments.
637 if len(args) != 2:
638 parser.error("wrong number of arguments")
640 cvs_path = args[0]
641 verify_path = args[1]
642 verify_klass = {'svn': SvnRepos,
643 'hg': HgRepos,
644 'git': GitRepos}[options.repos_type]
646 failures = Failures()
647 try:
648 # Open the repositories
649 cvsrepos = CvsRepos(cvs_path)
650 verifyrepos = verify_klass(verify_path)
652 # Do our thing...
653 if verify_branch:
654 print 'Verifying branch', verify_branch
655 verify_contents_single(
656 failures, cvsrepos, verifyrepos, 'branch', verify_branch, options
658 elif verify_tag:
659 print 'Verifying tag', verify_tag
660 verify_contents_single(
661 failures, cvsrepos, verifyrepos, 'tag', verify_tag, options
663 elif verify_trunk:
664 print 'Verifying trunk'
665 verify_contents_single(
666 failures, cvsrepos, verifyrepos, 'trunk', None, options
668 else:
669 # Verify trunk, tags and branches
670 verify_contents(failures, cvsrepos, verifyrepos, options)
671 except RuntimeError, e:
672 error(str(e))
673 except KeyboardInterrupt:
674 pass
676 sys.exit(failures and 1 or 0)
679 if __name__ == '__main__':
680 main(sys.argv)