2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
42 # CVS and Subversion command line client commands
50 """Run cmd as a pipe. Return (output, status)."""
51 child
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
52 output
= child
.stdout
.read()
54 return (output
, status
)
57 def cmd_failed(cmd
, output
, status
):
58 print 'CMD FAILED:', ' '.join(cmd
)
60 sys
.stdout
.write(output
)
61 raise RuntimeError('%s command failed!' % cmd
[0])
64 def split_output(self
, cmd
):
65 (output
, status
) = pipe(cmd
)
67 cmd_failed(cmd
, output
, status
)
68 retval
= output
.split(os
.linesep
)[:-1]
69 if retval
and not retval
[-1]:
75 def __init__(self
, path
):
76 """Open the CVS repository at PATH."""
77 path
= os
.path
.abspath(path
)
78 if not os
.path
.isdir(path
):
79 raise RuntimeError('CVS path is not a directory')
81 if os
.path
.exists(os
.path
.join(path
, 'CVSROOT')):
82 # The whole repository
86 self
.cvsroot
= os
.path
.dirname(path
)
87 self
.module
= os
.path
.basename(path
)
88 while not os
.path
.exists(os
.path
.join(self
.cvsroot
, 'CVSROOT')):
89 parent
= os
.path
.dirname(self
.cvsroot
)
90 if parent
== self
.cvsroot
:
91 raise RuntimeError('Cannot find the CVSROOT')
92 self
.module
= os
.path
.join(os
.path
.basename(self
.cvsroot
), self
.module
)
96 return os
.path
.basename(self
.cvsroot
)
98 def export(self
, dest_path
, rev
=None, keyword_opt
=None):
99 """Export revision REV to DEST_PATH where REV can be None to export
100 the HEAD revision, or any valid CVS revision string to export that
103 cmd
= [CVS_CMD
, '-Q', '-d', ':local:' + self
.cvsroot
, 'export']
105 cmd
.extend(['-r', rev
])
107 cmd
.extend(['-D', 'now'])
109 cmd
.append(keyword_opt
)
110 cmd
.extend(['-d', dest_path
, self
.module
])
111 (output
, status
) = pipe(cmd
)
113 cmd_failed(cmd
, output
, status
)
119 def __init__(self
, url
):
120 """Open the Subversion repository at URL."""
121 # Check if the user supplied an URL or a path
122 if url
.find('://') == -1:
123 abspath
= os
.path
.abspath(url
)
124 url
= 'file://' + (abspath
[0] != '/' and '/' or '') + abspath
126 url
= url
.replace(os
.sep
, '/')
130 # Cache a list of all tags and branches
133 self
.tag_list
= self
.list('tags')
136 if 'branches' in list:
137 self
.branch_list
= self
.list('branches')
139 self
.branch_list
= []
142 return self
.url
.split('/')[-1]
144 def export(self
, path
, dest_path
):
145 """Export PATH to DEST_PATH."""
146 url
= '/'.join([self
.url
, path
])
147 cmd
= [SVN_CMD
, 'export', '-q', url
, dest_path
]
148 (output
, status
) = pipe(cmd
)
150 cmd_failed(cmd
, output
, status
)
152 def export_trunk(self
, dest_path
):
153 """Export trunk to DEST_PATH."""
154 self
.export('trunk', dest_path
)
156 def export_tag(self
, dest_path
, tag
):
157 """Export the tag TAG to DEST_PATH."""
158 self
.export('tags/' + tag
, dest_path
)
160 def export_branch(self
, dest_path
, branch
):
161 """Export the branch BRANCH to DEST_PATH."""
162 self
.export('branches/' + branch
, dest_path
)
164 def list(self
, path
):
165 """Return a list of all files and directories in PATH."""
166 cmd
= [SVN_CMD
, 'ls', self
.url
+ '/' + path
]
168 for line
in split_output(cmd
):
170 entries
.append(line
.rstrip('/'))
174 """Return a list of all tags in the repository."""
178 """Return a list of all branches in the repository."""
179 return self
.branch_list
185 def __init__(self
, path
):
187 self
.base_cmd
= [HG_CMD
, '-R', self
.path
]
189 self
._branches
= None # cache result of branches()
190 self
._have
_default
= None # so export_trunk() doesn't blow up
193 return os
.path
.basename(self
.path
)
195 def _export(self
, dest_path
, rev
):
196 cmd
= self
.base_cmd
+ ['archive',
199 '--exclude', 're:^\.hg',
201 (output
, status
) = pipe(cmd
)
203 cmd_failed(cmd
, output
, status
)
205 # If Mercurial has nothing to export, then it doesn't create
206 # dest_path. This breaks tree_compare(), so just check that the
207 # manifest for the chosen revision really is empty, and if so create
209 if not os
.path
.exists(dest_path
):
210 cmd
= self
.base_cmd
+ ['manifest', '--rev', rev
]
212 manifest
= [fn
for fn
in split_output(cmd
)
213 if not fn
.startswith('.hg')]
217 def export_trunk(self
, dest_path
):
218 self
.branches() # ensure _have_default is set
219 if self
._have
_default
:
220 self
._export
(dest_path
, 'default')
222 # same as CVS does when exporting empty trunk
225 def export_tag(self
, dest_path
, tag
):
226 self
._export
(dest_path
, tag
)
228 def export_branch(self
, dest_path
, branch
):
229 self
._export
(dest_path
, branch
)
232 cmd
= self
.base_cmd
+ ['tags', '-q']
233 tags
= split_output(cmd
)
238 if self
._branches
is None:
239 cmd
= self
.base_cmd
+ ['branches', '-q']
240 self
._branches
= branches
= split_output(cmd
)
242 branches
.remove('default')
243 self
._have
_default
= True
245 self
._have
_default
= False
247 return self
._branches
253 def __init__(self
, path
):
257 '--git-dir=' + os
.path
.join(self
.path
, '.git'),
258 '--work-tree=' + self
.path
,
261 self
._branches
= None # cache result of branches()
262 self
._have
_master
= None # so export_trunk() doesn't blow up
265 return os
.path
.basename(self
.path
)
267 def _export(self
, dest_path
, rev
):
268 # clone the repository
269 cmd
= [GIT_CMD
, 'archive', '--remote=' + self
.path
, '--format=tar', rev
]
270 git_proc
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
273 # Unfortunately for some git tags the below causes
274 # git_proc.wait() to hang. The git archive process is in a
275 # <defunct> state and the verify-cvs2svn hangs for good.
276 tar
= tarfile
.open(mode
="r|", fileobj
=git_proc
.stdout
)
278 tar
.extract(tarinfo
, dest_path
)
282 tar_proc
= subprocess
.Popen(
283 ['tar', '-C', dest_path
, '-x'],
284 stdin
=git_proc
.stdout
, stdout
=subprocess
.PIPE
,
286 output
= tar_proc
.stdout
.read()
287 status
= tar_proc
.wait()
290 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
291 % (rev
, self
.path
, dest_path
, output
)
294 status
= git_proc
.wait()
297 'Git extract of rev %s from repo %s to %s failed!'
298 % (rev
, self
.path
, dest_path
)
301 if not os
.path
.exists(dest_path
):
303 'Git clone of %s to %s failed!' % (self
.path
, dest_path
)
306 def export_trunk(self
, dest_path
):
307 self
.branches() # ensure _have_default is set
308 if self
._have
_master
:
309 self
._export
(dest_path
, 'master')
311 # same as CVS does when exporting empty trunk
314 def export_tag(self
, dest_path
, tag
):
315 self
._export
(dest_path
, tag
)
317 def export_branch(self
, dest_path
, branch
):
318 self
._export
(dest_path
, branch
)
321 cmd
= self
.repo_cmd
+ ['tag']
322 tags
= split_output(cmd
)
326 if self
._branches
is None:
327 cmd
= self
.repo_cmd
+ ['branch']
328 branches
= split_output(cmd
)
329 # Remove the two chracters at the start of the branch name
330 for i
in range(len(branches
)):
331 branches
[i
] = branches
[i
][2:]
332 self
._branches
= branches
334 branches
.remove('master')
335 self
._have
_master
= True
337 self
._have
_master
= False
339 return self
._branches
342 def transform_symbol(ctx
, name
):
343 """Transform the symbol NAME using the renaming rules specified
344 with --symbol-transform. Return the transformed symbol name."""
346 for (pattern
, replacement
) in ctx
.symbol_transforms
:
347 newname
= pattern
.sub(replacement
, name
)
349 print " symbol '%s' transformed to '%s'" % (name
, newname
)
355 class Failures(object):
357 self
.count
= 0 # number of failures seen
360 return str(self
.count
)
363 return "<%s at 0x%x: %s>" % (self
.__class
__.__name
__, id(self
), self
.count
)
365 def report(self
, summary
, details
=None):
367 sys
.stdout
.write(' FAIL: %s\n' % summary
)
370 sys
.stdout
.write(' %s\n' % line
)
372 def __nonzero__(self
):
373 return self
.count
> 0
376 def file_compare(failures
, base1
, base2
, run_diff
, rel_path
):
377 """Compare the mode and contents of two files.
379 The paths are specified as two base paths BASE1 and BASE2, and a
380 path REL_PATH that is relative to the two base paths. Return True
381 iff the file mode and contents are identical."""
384 path1
= os
.path
.join(base1
, rel_path
)
385 path2
= os
.path
.join(base2
, rel_path
)
386 mode1
= os
.stat(path1
).st_mode
& 0700 # only look at owner bits
387 mode2
= os
.stat(path2
).st_mode
& 0700
389 failures
.report('File modes differ for %s' % rel_path
,
390 details
=['%s: %o' % (path1
, mode1
),
391 '%s: %o' % (path2
, mode2
)])
394 file1
= open(path1
, 'rb')
395 file2
= open(path2
, 'rb')
397 data1
= file1
.read(8192)
398 data2
= file2
.read(8192)
401 cmd
= ['diff', '-u', path1
, path2
]
402 (output
, status
) = pipe(cmd
)
403 diff
= output
.split(os
.linesep
)
406 failures
.report('File contents differ for %s' % rel_path
,
417 def tree_compare(failures
, base1
, base2
, run_diff
, rel_path
=''):
418 """Compare the contents of two directory trees, including file contents.
420 The paths are specified as two base paths BASE1 and BASE2, and a
421 path REL_PATH that is relative to the two base paths. Return True
422 iff the trees are identical."""
428 path1
= os
.path
.join(base1
, rel_path
)
429 path2
= os
.path
.join(base2
, rel_path
)
430 if not os
.path
.exists(path1
):
431 failures
.report('%s does not exist' % path1
)
433 if not os
.path
.exists(path2
):
434 failures
.report('%s does not exist' % path2
)
436 if os
.path
.isfile(path1
) and os
.path
.isfile(path2
):
437 return file_compare(failures
, base1
, base2
, run_diff
, rel_path
)
438 if not (os
.path
.isdir(path1
) and os
.path
.isdir(path2
)):
439 failures
.report('Path types differ for %r' % rel_path
)
441 entries1
= os
.listdir(path1
)
443 entries2
= os
.listdir(path2
)
448 missing
= filter(lambda x
: x
not in entries2
, entries1
)
449 extra
= filter(lambda x
: x
not in entries1
, entries2
)
451 failures
.report('Directory /%s is missing entries: %s' %
452 (rel_path
, ', '.join(missing
)))
455 failures
.report('Directory /%s has extra entries: %s' %
456 (rel_path
, ', '.join(extra
)))
459 for entry
in entries1
:
460 new_rel_path
= os
.path
.join(rel_path
, entry
)
461 if not tree_compare(failures
, base1
, base2
, run_diff
, new_rel_path
):
466 def verify_contents_single(failures
, cvsrepos
, verifyrepos
, kind
, label
, ctx
):
467 """Verify the HEAD revision of a trunk, tag, or branch.
469 Verify that the contents of the HEAD revision of all directories and
470 files in the conversion repository VERIFYREPOS match the ones in the
471 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
472 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
473 specify the name of the tag or branch. CTX has the attributes:
474 CTX.tmpdir: specifying the directory for all temporary files.
475 CTX.skip_cleanup: if true, the temporary files are not deleted.
476 CTX.run_diff: if true, run diff on differing files."""
478 itemname
= kind
+ (kind
!= 'trunk' and '-' + label
or '')
479 cvs_export_dir
= os
.path
.join(
480 ctx
.tmpdir
, 'cvs-export-%s' % itemname
)
481 vrf_export_dir
= os
.path
.join(
482 ctx
.tmpdir
, '%s-export-%s' % (verifyrepos
.name
, itemname
))
485 cvslabel
= transform_symbol(ctx
, label
)
490 cvsrepos
.export(cvs_export_dir
, cvslabel
, ctx
.keyword_opt
)
492 verifyrepos
.export_trunk(vrf_export_dir
)
494 verifyrepos
.export_tag(vrf_export_dir
, label
)
496 verifyrepos
.export_branch(vrf_export_dir
, label
)
499 failures
, cvs_export_dir
, vrf_export_dir
, ctx
.run_diff
503 if not ctx
.skip_cleanup
:
504 if os
.path
.exists(cvs_export_dir
):
505 shutil
.rmtree(cvs_export_dir
)
506 if os
.path
.exists(vrf_export_dir
):
507 shutil
.rmtree(vrf_export_dir
)
511 def verify_contents(failures
, cvsrepos
, verifyrepos
, ctx
):
512 """Verify that the contents of the HEAD revision of all directories
513 and files in the trunk, all tags and all branches in the conversion
514 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
515 CTX is passed through to verify_contents_single()."""
517 # branches/tags that failed:
520 # Verify contents of trunk
521 print 'Verifying trunk'
523 if not verify_contents_single(
524 failures
, cvsrepos
, verifyrepos
, 'trunk', None, ctx
526 locations
.append('trunk')
528 # Verify contents of all tags
529 for tag
in verifyrepos
.tags():
530 print 'Verifying tag', tag
532 if not verify_contents_single(
533 failures
, cvsrepos
, verifyrepos
, 'tag', tag
, ctx
535 locations
.append('tag:' + tag
)
537 # Verify contents of all branches
538 for branch
in verifyrepos
.branches():
539 if branch
[:10] == 'unlabeled-':
540 print 'Skipped branch', branch
542 print 'Verifying branch', branch
543 if not verify_contents_single(
544 failures
, cvsrepos
, verifyrepos
, 'branch', branch
, ctx
546 locations
.append('branch:' + branch
)
549 assert bool(failures
) == bool(locations
), \
550 "failures = %r\nlocations = %r" % (failures
, locations
)
554 sys
.stdout
.write('FAIL: %s != %s: %d failure(s) in:\n'
555 % (cvsrepos
, verifyrepos
, failures
.count
))
556 for location
in locations
:
557 sys
.stdout
.write(' %s\n' % location
)
559 sys
.stdout
.write('PASS: %s == %s\n' % (cvsrepos
, verifyrepos
))
568 parser
= optparse
.OptionParser(
569 usage
='%prog [options] cvs-repos verify-repos')
570 parser
.add_option('--branch',
571 help='verify contents of the branch BRANCH only')
572 parser
.add_option('--diff', action
='store_true', dest
='run_diff',
573 help='run diff on differing files')
574 parser
.add_option('--tag',
575 help='verify contents of the tag TAG only')
576 parser
.add_option('--tmpdir',
578 help='path to store temporary files')
579 parser
.add_option('--trunk', action
='store_true',
580 help='verify contents of trunk only')
581 parser
.add_option('--symbol-transform', action
='append',
583 help='transform symbol names from P to S like cvs2svn, '
584 'except transforms SVN symbol to CVS symbol')
585 parser
.add_option('--svn',
586 action
='store_const', dest
='repos_type', const
='svn',
587 help='assume verify-repos is svn [default]')
588 parser
.add_option('--hg',
589 action
='store_const', dest
='repos_type', const
='hg',
590 help='assume verify-repos is hg')
591 parser
.add_option('--git',
592 action
='store_const', dest
='repos_type', const
='git',
593 help='assume verify-repos is git')
594 parser
.add_option('--suppress-keywords',
595 action
='store_const', dest
='keyword_opt', const
='-kk',
596 help='suppress CVS keyword expansion '
597 '(equivalent to --keyword-opt=-kk)')
598 parser
.add_option('--keyword-opt',
600 help='control CVS keyword expansion by adding OPT to '
601 'cvs export command line')
603 parser
.set_defaults(run_diff
=False,
606 symbol_transforms
=[],
608 (options
, args
) = parser
.parse_args()
610 symbol_transforms
= []
611 for value
in options
.symbol_transforms
:
613 [pattern
, replacement
] = value
.split(":")
615 symbol_transforms
.append(
616 RegexpSymbolTransform(pattern
, replacement
))
618 parser
.error("'%s' is not a valid regexp." % (pattern
,))
621 """Print an error to sys.stderr."""
622 sys
.stderr
.write('Error: ' + str(msg
) + '\n')
624 verify_branch
= options
.branch
625 verify_tag
= options
.tag
626 verify_trunk
= options
.trunk
628 # Consistency check for options and arguments.
630 parser
.error("wrong number of arguments")
633 verify_path
= args
[1]
634 verify_klass
= {'svn': SvnRepos
,
636 'git': GitRepos
}[options
.repos_type
]
638 failures
= Failures()
640 # Open the repositories
641 cvsrepos
= CvsRepos(cvs_path
)
642 verifyrepos
= verify_klass(verify_path
)
646 print 'Verifying branch', verify_branch
647 verify_contents_single(
648 failures
, cvsrepos
, verifyrepos
, 'branch', verify_branch
, options
651 print 'Verifying tag', verify_tag
652 verify_contents_single(
653 failures
, cvsrepos
, verifyrepos
, 'tag', verify_tag
, options
656 print 'Verifying trunk'
657 verify_contents_single(
658 failures
, cvsrepos
, verifyrepos
, 'trunk', None, options
661 # Verify trunk, tags and branches
662 verify_contents(failures
, cvsrepos
, verifyrepos
, options
)
663 except RuntimeError, e
:
665 except KeyboardInterrupt:
668 sys
.exit(failures
and 1 or 0)
671 if __name__
== '__main__':