Add some filenames with double quotes to main test repo.
[cvs2svn.git] / contrib / verify-cvs2svn.py
blob767c9e88b5be7881ba8ce53936e1d8bf550bb22d
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import optparse
36 import subprocess
37 import shutil
38 import re
39 import tarfile
42 # CVS and Subversion command line client commands
43 CVS_CMD = 'cvs'
44 SVN_CMD = 'svn'
45 HG_CMD = 'hg'
46 GIT_CMD = 'git'
49 def pipe(cmd):
50 """Run cmd as a pipe. Return (output, status)."""
51 child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
52 output = child.stdout.read()
53 status = child.wait()
54 return (output, status)
57 def cmd_failed(cmd, output, status):
58 print 'CMD FAILED:', ' '.join(cmd)
59 print 'Output:'
60 sys.stdout.write(output)
61 raise RuntimeError('%s command failed!' % cmd[0])
64 def split_output(self, cmd):
65 (output, status) = pipe(cmd)
66 if status:
67 cmd_failed(cmd, output, status)
68 retval = output.split(os.linesep)[:-1]
69 if retval and not retval[-1]:
70 del retval[-1]
71 return retval
74 class CvsRepos:
75 def __init__(self, path):
76 """Open the CVS repository at PATH."""
77 path = os.path.abspath(path)
78 if not os.path.isdir(path):
79 raise RuntimeError('CVS path is not a directory')
81 if os.path.exists(os.path.join(path, 'CVSROOT')):
82 # The whole repository
83 self.module = "."
84 self.cvsroot = path
85 else:
86 self.cvsroot = os.path.dirname(path)
87 self.module = os.path.basename(path)
88 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
89 parent = os.path.dirname(self.cvsroot)
90 if parent == self.cvsroot:
91 raise RuntimeError('Cannot find the CVSROOT')
92 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
93 self.cvsroot = parent
95 def __str__(self):
96 return os.path.basename(self.cvsroot)
98 def export(self, dest_path, rev=None, keyword_opt=None):
99 """Export revision REV to DEST_PATH where REV can be None to export
100 the HEAD revision, or any valid CVS revision string to export that
101 revision."""
102 os.mkdir(dest_path)
103 cmd = [CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export']
104 if rev:
105 cmd.extend(['-r', rev])
106 else:
107 cmd.extend(['-D', 'now'])
108 if keyword_opt:
109 cmd.append(keyword_opt)
110 cmd.extend(['-d', dest_path, self.module])
111 (output, status) = pipe(cmd)
112 if status or output:
113 cmd_failed(cmd, output, status)
116 class SvnRepos:
117 name = 'svn'
119 def __init__(self, url):
120 """Open the Subversion repository at URL."""
121 # Check if the user supplied an URL or a path
122 if url.find('://') == -1:
123 abspath = os.path.abspath(url)
124 url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
125 if os.sep != '/':
126 url = url.replace(os.sep, '/')
128 self.url = url
130 # Cache a list of all tags and branches
131 list = self.list('')
132 if 'tags' in list:
133 self.tag_list = self.list('tags')
134 else:
135 self.tag_list = []
136 if 'branches' in list:
137 self.branch_list = self.list('branches')
138 else:
139 self.branch_list = []
141 def __str__(self):
142 return self.url.split('/')[-1]
144 def export(self, path, dest_path):
145 """Export PATH to DEST_PATH."""
146 url = '/'.join([self.url, path])
147 cmd = [SVN_CMD, 'export', '-q', url, dest_path]
148 (output, status) = pipe(cmd)
149 if status or output:
150 cmd_failed(cmd, output, status)
152 def export_trunk(self, dest_path):
153 """Export trunk to DEST_PATH."""
154 self.export('trunk', dest_path)
156 def export_tag(self, dest_path, tag):
157 """Export the tag TAG to DEST_PATH."""
158 self.export('tags/' + tag, dest_path)
160 def export_branch(self, dest_path, branch):
161 """Export the branch BRANCH to DEST_PATH."""
162 self.export('branches/' + branch, dest_path)
164 def list(self, path):
165 """Return a list of all files and directories in PATH."""
166 cmd = [SVN_CMD, 'ls', self.url + '/' + path]
167 entries = []
168 for line in split_output(cmd):
169 if line:
170 entries.append(line.rstrip('/'))
171 return entries
173 def tags(self):
174 """Return a list of all tags in the repository."""
175 return self.tag_list
177 def branches(self):
178 """Return a list of all branches in the repository."""
179 return self.branch_list
182 class HgRepos:
183 name = 'hg'
185 def __init__(self, path):
186 self.path = path
187 self.base_cmd = [HG_CMD, '-R', self.path]
189 self._branches = None # cache result of branches()
190 self._have_default = None # so export_trunk() doesn't blow up
192 def __str__(self):
193 return os.path.basename(self.path)
195 def _export(self, dest_path, rev):
196 cmd = self.base_cmd + ['archive',
197 '--type', 'files',
198 '--rev', rev,
199 '--exclude', 're:^\.hg',
200 dest_path]
201 (output, status) = pipe(cmd)
202 if status or output:
203 cmd_failed(cmd, output, status)
205 # If Mercurial has nothing to export, then it doesn't create
206 # dest_path. This breaks tree_compare(), so just check that the
207 # manifest for the chosen revision really is empty, and if so create
208 # the empty dir.
209 if not os.path.exists(dest_path):
210 cmd = self.base_cmd + ['manifest', '--rev', rev]
212 manifest = [fn for fn in split_output(cmd)
213 if not fn.startswith('.hg')]
214 if not manifest:
215 os.mkdir(dest_path)
217 def export_trunk(self, dest_path):
218 self.branches() # ensure _have_default is set
219 if self._have_default:
220 self._export(dest_path, 'default')
221 else:
222 # same as CVS does when exporting empty trunk
223 os.mkdir(dest_path)
225 def export_tag(self, dest_path, tag):
226 self._export(dest_path, tag)
228 def export_branch(self, dest_path, branch):
229 self._export(dest_path, branch)
231 def tags(self):
232 cmd = self.base_cmd + ['tags', '-q']
233 tags = split_output(cmd)
234 tags.remove('tip')
235 return tags
237 def branches(self):
238 if self._branches is None:
239 cmd = self.base_cmd + ['branches', '-q']
240 self._branches = branches = split_output(cmd)
241 try:
242 branches.remove('default')
243 self._have_default = True
244 except ValueError:
245 self._have_default = False
247 return self._branches
250 class GitRepos:
251 name = 'git'
253 def __init__(self, path):
254 self.path = path
255 self.repo_cmd = [
256 GIT_CMD,
257 '--git-dir=' + os.path.join(self.path, '.git'),
258 '--work-tree=' + self.path,
261 self._branches = None # cache result of branches()
262 self._have_master = None # so export_trunk() doesn't blow up
264 def __str__(self):
265 return os.path.basename(self.path)
267 def _export(self, dest_path, rev):
268 # clone the repository
269 cmd = [GIT_CMD, 'archive', '--remote=' + self.path, '--format=tar', rev]
270 git_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
272 if False:
273 # Unfortunately for some git tags the below causes
274 # git_proc.wait() to hang. The git archive process is in a
275 # <defunct> state and the verify-cvs2svn hangs for good.
276 tar = tarfile.open(mode="r|", fileobj=git_proc.stdout)
277 for tarinfo in tar:
278 tar.extract(tarinfo, dest_path)
279 tar.close()
280 else:
281 os.mkdir(dest_path)
282 tar_proc = subprocess.Popen(
283 ['tar', '-C', dest_path, '-x'],
284 stdin=git_proc.stdout, stdout=subprocess.PIPE,
286 output = tar_proc.stdout.read()
287 status = tar_proc.wait()
288 if output or status:
289 raise RuntimeError(
290 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
291 % (rev, self.path, dest_path, output)
294 status = git_proc.wait()
295 if status:
296 raise RuntimeError(
297 'Git extract of rev %s from repo %s to %s failed!'
298 % (rev, self.path, dest_path)
301 if not os.path.exists(dest_path):
302 raise RuntimeError(
303 'Git clone of %s to %s failed!' % (self.path, dest_path)
306 def export_trunk(self, dest_path):
307 self.branches() # ensure _have_default is set
308 if self._have_master:
309 self._export(dest_path, 'master')
310 else:
311 # same as CVS does when exporting empty trunk
312 os.mkdir(dest_path)
314 def export_tag(self, dest_path, tag):
315 self._export(dest_path, tag)
317 def export_branch(self, dest_path, branch):
318 self._export(dest_path, branch)
320 def tags(self):
321 cmd = self.repo_cmd + ['tag']
322 tags = split_output(cmd)
323 return tags
325 def branches(self):
326 if self._branches is None:
327 cmd = self.repo_cmd + ['branch']
328 branches = split_output(cmd)
329 # Remove the two chracters at the start of the branch name
330 for i in range(len(branches)):
331 branches[i] = branches[i][2:]
332 self._branches = branches
333 try:
334 branches.remove('master')
335 self._have_master = True
336 except ValueError:
337 self._have_master = False
339 return self._branches
342 def transform_symbol(ctx, name):
343 """Transform the symbol NAME using the renaming rules specified
344 with --symbol-transform. Return the transformed symbol name."""
346 for (pattern, replacement) in ctx.symbol_transforms:
347 newname = pattern.sub(replacement, name)
348 if newname != name:
349 print " symbol '%s' transformed to '%s'" % (name, newname)
350 name = newname
352 return name
355 class Failures(object):
356 def __init__(self):
357 self.count = 0 # number of failures seen
359 def __str__(self):
360 return str(self.count)
362 def __repr__(self):
363 return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self.count)
365 def report(self, summary, details=None):
366 self.count += 1
367 sys.stdout.write(' FAIL: %s\n' % summary)
368 if details:
369 for line in details:
370 sys.stdout.write(' %s\n' % line)
372 def __nonzero__(self):
373 return self.count > 0
376 def file_compare(failures, base1, base2, run_diff, rel_path):
377 """Compare the mode and contents of two files.
379 The paths are specified as two base paths BASE1 and BASE2, and a
380 path REL_PATH that is relative to the two base paths. Return True
381 iff the file mode and contents are identical."""
383 ok = True
384 path1 = os.path.join(base1, rel_path)
385 path2 = os.path.join(base2, rel_path)
386 mode1 = os.stat(path1).st_mode & 0700 # only look at owner bits
387 mode2 = os.stat(path2).st_mode & 0700
388 if mode1 != mode2:
389 failures.report('File modes differ for %s' % rel_path,
390 details=['%s: %o' % (path1, mode1),
391 '%s: %o' % (path2, mode2)])
392 ok = False
394 file1 = open(path1, 'rb')
395 file2 = open(path2, 'rb')
396 while True:
397 data1 = file1.read(8192)
398 data2 = file2.read(8192)
399 if data1 != data2:
400 if run_diff:
401 cmd = ['diff', '-u', path1, path2]
402 (output, status) = pipe(cmd)
403 diff = output.split(os.linesep)
404 else:
405 diff = None
406 failures.report('File contents differ for %s' % rel_path,
407 details=diff)
408 ok = False
409 break
410 if len(data1) == 0:
411 # eof
412 break
414 return ok
417 def tree_compare(failures, base1, base2, run_diff, rel_path=''):
418 """Compare the contents of two directory trees, including file contents.
420 The paths are specified as two base paths BASE1 and BASE2, and a
421 path REL_PATH that is relative to the two base paths. Return True
422 iff the trees are identical."""
424 if not rel_path:
425 path1 = base1
426 path2 = base2
427 else:
428 path1 = os.path.join(base1, rel_path)
429 path2 = os.path.join(base2, rel_path)
430 if not os.path.exists(path1):
431 failures.report('%s does not exist' % path1)
432 return False
433 if not os.path.exists(path2):
434 failures.report('%s does not exist' % path2)
435 return False
436 if os.path.isfile(path1) and os.path.isfile(path2):
437 return file_compare(failures, base1, base2, run_diff, rel_path)
438 if not (os.path.isdir(path1) and os.path.isdir(path2)):
439 failures.report('Path types differ for %r' % rel_path)
440 return False
441 entries1 = os.listdir(path1)
442 entries1.sort()
443 entries2 = os.listdir(path2)
444 entries2.sort()
446 ok = True
448 missing = filter(lambda x: x not in entries2, entries1)
449 extra = filter(lambda x: x not in entries1, entries2)
450 if missing:
451 failures.report('Directory /%s is missing entries: %s' %
452 (rel_path, ', '.join(missing)))
453 ok = False
454 if extra:
455 failures.report('Directory /%s has extra entries: %s' %
456 (rel_path, ', '.join(extra)))
457 ok = False
459 for entry in entries1:
460 new_rel_path = os.path.join(rel_path, entry)
461 if not tree_compare(failures, base1, base2, run_diff, new_rel_path):
462 ok = False
463 return ok
466 def verify_contents_single(failures, cvsrepos, verifyrepos, kind, label, ctx):
467 """Verify the HEAD revision of a trunk, tag, or branch.
469 Verify that the contents of the HEAD revision of all directories and
470 files in the conversion repository VERIFYREPOS match the ones in the
471 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
472 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
473 specify the name of the tag or branch. CTX has the attributes:
474 CTX.tmpdir: specifying the directory for all temporary files.
475 CTX.skip_cleanup: if true, the temporary files are not deleted.
476 CTX.run_diff: if true, run diff on differing files."""
478 itemname = kind + (kind != 'trunk' and '-' + label or '')
479 cvs_export_dir = os.path.join(
480 ctx.tmpdir, 'cvs-export-%s' % itemname)
481 vrf_export_dir = os.path.join(
482 ctx.tmpdir, '%s-export-%s' % (verifyrepos.name, itemname))
484 if label:
485 cvslabel = transform_symbol(ctx, label)
486 else:
487 cvslabel = None
489 try:
490 cvsrepos.export(cvs_export_dir, cvslabel, ctx.keyword_opt)
491 if kind == 'trunk':
492 verifyrepos.export_trunk(vrf_export_dir)
493 elif kind == 'tag':
494 verifyrepos.export_tag(vrf_export_dir, label)
495 else:
496 verifyrepos.export_branch(vrf_export_dir, label)
498 if not tree_compare(
499 failures, cvs_export_dir, vrf_export_dir, ctx.run_diff
501 return False
502 finally:
503 if not ctx.skip_cleanup:
504 if os.path.exists(cvs_export_dir):
505 shutil.rmtree(cvs_export_dir)
506 if os.path.exists(vrf_export_dir):
507 shutil.rmtree(vrf_export_dir)
508 return True
511 def verify_contents(failures, cvsrepos, verifyrepos, ctx):
512 """Verify that the contents of the HEAD revision of all directories
513 and files in the trunk, all tags and all branches in the conversion
514 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
515 CTX is passed through to verify_contents_single()."""
517 # branches/tags that failed:
518 locations = []
520 # Verify contents of trunk
521 print 'Verifying trunk'
522 sys.stdout.flush()
523 if not verify_contents_single(
524 failures, cvsrepos, verifyrepos, 'trunk', None, ctx
526 locations.append('trunk')
528 # Verify contents of all tags
529 for tag in verifyrepos.tags():
530 print 'Verifying tag', tag
531 sys.stdout.flush()
532 if not verify_contents_single(
533 failures, cvsrepos, verifyrepos, 'tag', tag, ctx
535 locations.append('tag:' + tag)
537 # Verify contents of all branches
538 for branch in verifyrepos.branches():
539 if branch[:10] == 'unlabeled-':
540 print 'Skipped branch', branch
541 else:
542 print 'Verifying branch', branch
543 if not verify_contents_single(
544 failures, cvsrepos, verifyrepos, 'branch', branch, ctx
546 locations.append('branch:' + branch)
547 sys.stdout.flush()
549 assert bool(failures) == bool(locations), \
550 "failures = %r\nlocations = %r" % (failures, locations)
552 # Show the results
553 if failures:
554 sys.stdout.write('FAIL: %s != %s: %d failure(s) in:\n'
555 % (cvsrepos, verifyrepos, failures.count))
556 for location in locations:
557 sys.stdout.write(' %s\n' % location)
558 else:
559 sys.stdout.write('PASS: %s == %s\n' % (cvsrepos, verifyrepos))
560 sys.stdout.flush()
563 class OptionContext:
564 pass
567 def main(argv):
568 parser = optparse.OptionParser(
569 usage='%prog [options] cvs-repos verify-repos')
570 parser.add_option('--branch',
571 help='verify contents of the branch BRANCH only')
572 parser.add_option('--diff', action='store_true', dest='run_diff',
573 help='run diff on differing files')
574 parser.add_option('--tag',
575 help='verify contents of the tag TAG only')
576 parser.add_option('--tmpdir',
577 metavar='PATH',
578 help='path to store temporary files')
579 parser.add_option('--trunk', action='store_true',
580 help='verify contents of trunk only')
581 parser.add_option('--symbol-transform', action='append',
582 metavar='P:S',
583 help='transform symbol names from P to S like cvs2svn, '
584 'except transforms SVN symbol to CVS symbol')
585 parser.add_option('--svn',
586 action='store_const', dest='repos_type', const='svn',
587 help='assume verify-repos is svn [default]')
588 parser.add_option('--hg',
589 action='store_const', dest='repos_type', const='hg',
590 help='assume verify-repos is hg')
591 parser.add_option('--git',
592 action='store_const', dest='repos_type', const='git',
593 help='assume verify-repos is git')
594 parser.add_option('--suppress-keywords',
595 action='store_const', dest='keyword_opt', const='-kk',
596 help='suppress CVS keyword expansion '
597 '(equivalent to --keyword-opt=-kk)')
598 parser.add_option('--keyword-opt',
599 metavar='OPT',
600 help='control CVS keyword expansion by adding OPT to '
601 'cvs export command line')
603 parser.set_defaults(run_diff=False,
604 tmpdir='',
605 skip_cleanup=False,
606 symbol_transforms=[],
607 repos_type='svn')
608 (options, args) = parser.parse_args()
610 symbol_transforms = []
611 for value in options.symbol_transforms:
612 # This is broken!
613 [pattern, replacement] = value.split(":")
614 try:
615 symbol_transforms.append(
616 RegexpSymbolTransform(pattern, replacement))
617 except re.error:
618 parser.error("'%s' is not a valid regexp." % (pattern,))
620 def error(msg):
621 """Print an error to sys.stderr."""
622 sys.stderr.write('Error: ' + str(msg) + '\n')
624 verify_branch = options.branch
625 verify_tag = options.tag
626 verify_trunk = options.trunk
628 # Consistency check for options and arguments.
629 if len(args) != 2:
630 parser.error("wrong number of arguments")
632 cvs_path = args[0]
633 verify_path = args[1]
634 verify_klass = {'svn': SvnRepos,
635 'hg': HgRepos,
636 'git': GitRepos}[options.repos_type]
638 failures = Failures()
639 try:
640 # Open the repositories
641 cvsrepos = CvsRepos(cvs_path)
642 verifyrepos = verify_klass(verify_path)
644 # Do our thing...
645 if verify_branch:
646 print 'Verifying branch', verify_branch
647 verify_contents_single(
648 failures, cvsrepos, verifyrepos, 'branch', verify_branch, options
650 elif verify_tag:
651 print 'Verifying tag', verify_tag
652 verify_contents_single(
653 failures, cvsrepos, verifyrepos, 'tag', verify_tag, options
655 elif verify_trunk:
656 print 'Verifying trunk'
657 verify_contents_single(
658 failures, cvsrepos, verifyrepos, 'trunk', None, options
660 else:
661 # Verify trunk, tags and branches
662 verify_contents(failures, cvsrepos, verifyrepos, options)
663 except RuntimeError, e:
664 error(str(e))
665 except KeyboardInterrupt:
666 pass
668 sys.exit(failures and 1 or 0)
671 if __name__ == '__main__':
672 main(sys.argv)