2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
42 # CVS and Subversion command line client commands
50 """Run cmd as a pipe. Return (output, status)."""
51 child
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
52 output
= child
.stdout
.read()
54 return (output
, status
)
57 def cmd_failed(cmd
, output
, status
):
58 print 'CMD FAILED:', ' '.join(cmd
)
60 sys
.stdout
.write(output
)
61 raise RuntimeError('%s command failed!' % cmd
[0])
64 def split_output(cmd
):
65 (output
, status
) = pipe(cmd
)
67 cmd_failed(cmd
, output
, status
)
68 retval
= output
.split(os
.linesep
)[:-1]
69 if retval
and not retval
[-1]:
75 def __init__(self
, path
):
76 """Open the CVS repository at PATH."""
77 path
= os
.path
.abspath(path
)
78 if not os
.path
.isdir(path
):
79 raise RuntimeError('CVS path is not a directory')
81 if os
.path
.exists(os
.path
.join(path
, 'CVSROOT')):
82 # The whole repository
86 self
.cvsroot
= os
.path
.dirname(path
)
87 self
.module
= os
.path
.basename(path
)
88 while not os
.path
.exists(os
.path
.join(self
.cvsroot
, 'CVSROOT')):
89 parent
= os
.path
.dirname(self
.cvsroot
)
90 if parent
== self
.cvsroot
:
91 raise RuntimeError('Cannot find the CVSROOT')
92 self
.module
= os
.path
.join(os
.path
.basename(self
.cvsroot
), self
.module
)
96 return os
.path
.basename(self
.cvsroot
)
98 def export(self
, dest_path
, rev
=None, keyword_opt
=None):
99 """Export revision REV to DEST_PATH where REV can be None to export
100 the HEAD revision, or any valid CVS revision string to export that
103 cmd
= [CVS_CMD
, '-Q', '-d', ':local:' + self
.cvsroot
, 'export']
105 cmd
.extend(['-r', rev
])
107 cmd
.extend(['-D', 'now'])
109 cmd
.append(keyword_opt
)
110 cmd
.extend(['-d', dest_path
, self
.module
])
111 (output
, status
) = pipe(cmd
)
113 cmd_failed(cmd
, output
, status
)
119 def __init__(self
, url
):
120 """Open the Subversion repository at URL."""
121 # Check if the user supplied an URL or a path
122 if url
.find('://') == -1:
123 abspath
= os
.path
.abspath(url
)
124 url
= 'file://' + (abspath
[0] != '/' and '/' or '') + abspath
126 url
= url
.replace(os
.sep
, '/')
130 # Cache a list of all tags and branches
133 self
.tag_list
= self
.list('tags')
136 if 'branches' in list:
137 self
.branch_list
= self
.list('branches')
139 self
.branch_list
= []
142 return self
.url
.split('/')[-1]
144 def export(self
, path
, dest_path
):
145 """Export PATH to DEST_PATH."""
146 url
= '/'.join([self
.url
, path
])
147 cmd
= [SVN_CMD
, 'export', '-q', url
, dest_path
]
148 (output
, status
) = pipe(cmd
)
150 cmd_failed(cmd
, output
, status
)
152 def export_trunk(self
, dest_path
):
153 """Export trunk to DEST_PATH."""
154 self
.export('trunk', dest_path
)
156 def export_tag(self
, dest_path
, tag
):
157 """Export the tag TAG to DEST_PATH."""
158 self
.export('tags/' + tag
, dest_path
)
160 def export_branch(self
, dest_path
, branch
):
161 """Export the branch BRANCH to DEST_PATH."""
162 self
.export('branches/' + branch
, dest_path
)
164 def list(self
, path
):
165 """Return a list of all files and directories in PATH."""
166 cmd
= [SVN_CMD
, 'ls', self
.url
+ '/' + path
]
168 for line
in split_output(cmd
):
170 entries
.append(line
.rstrip('/'))
174 """Return a list of all tags in the repository."""
178 """Return a list of all branches in the repository."""
179 return self
.branch_list
185 def __init__(self
, path
):
187 self
.base_cmd
= [HG_CMD
, '-R', self
.path
]
189 self
._branches
= None # cache result of branches()
190 self
._have
_default
= None # so export_trunk() doesn't blow up
193 return os
.path
.basename(self
.path
)
195 def _export(self
, dest_path
, rev
):
196 cmd
= self
.base_cmd
+ ['archive',
199 '--exclude', 're:^\.hg',
201 (output
, status
) = pipe(cmd
)
203 cmd_failed(cmd
, output
, status
)
205 # If Mercurial has nothing to export, then it doesn't create
206 # dest_path. This breaks tree_compare(), so just check that the
207 # manifest for the chosen revision really is empty, and if so create
209 if not os
.path
.exists(dest_path
):
210 cmd
= self
.base_cmd
+ ['manifest', '--rev', rev
]
212 manifest
= [fn
for fn
in split_output(cmd
)
213 if not fn
.startswith('.hg')]
217 def export_trunk(self
, dest_path
):
218 self
.branches() # ensure _have_default is set
219 if self
._have
_default
:
220 self
._export
(dest_path
, 'default')
222 # same as CVS does when exporting empty trunk
225 def export_tag(self
, dest_path
, tag
):
226 self
._export
(dest_path
, tag
)
228 def export_branch(self
, dest_path
, branch
):
229 self
._export
(dest_path
, branch
)
232 cmd
= self
.base_cmd
+ ['tags', '-q']
233 tags
= split_output(cmd
)
238 if self
._branches
is None:
239 cmd
= self
.base_cmd
+ ['branches', '-q']
240 self
._branches
= branches
= split_output(cmd
)
242 branches
.remove('default')
243 self
._have
_default
= True
245 self
._have
_default
= False
247 return self
._branches
253 def __init__(self
, path
):
257 '--git-dir=' + os
.path
.join(self
.path
, '.git'),
258 '--work-tree=' + self
.path
,
261 self
._branches
= None # cache result of branches()
262 self
._have
_master
= None # so export_trunk() doesn't blow up
265 return os
.path
.basename(self
.path
)
267 def _export(self
, dest_path
, rev
):
268 # clone the repository
269 cmd
= [GIT_CMD
, 'archive', '--remote=' + self
.path
, '--format=tar', rev
]
270 git_proc
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
273 # Unfortunately for some git tags the below causes
274 # git_proc.wait() to hang. The git archive process is in a
275 # <defunct> state and the verify-cvs2svn hangs for good.
276 tar
= tarfile
.open(mode
="r|", fileobj
=git_proc
.stdout
)
278 tar
.extract(tarinfo
, dest_path
)
282 tar_proc
= subprocess
.Popen(
283 ['tar', '-C', dest_path
, '-x'],
284 stdin
=git_proc
.stdout
, stdout
=subprocess
.PIPE
,
286 output
= tar_proc
.stdout
.read()
287 status
= tar_proc
.wait()
290 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
291 % (rev
, self
.path
, dest_path
, output
)
294 status
= git_proc
.wait()
297 'Git extract of rev %s from repo %s to %s failed!'
298 % (rev
, self
.path
, dest_path
)
301 if not os
.path
.exists(dest_path
):
303 'Git clone of %s to %s failed!' % (self
.path
, dest_path
)
306 def export_trunk(self
, dest_path
):
307 self
.branches() # ensure _have_default is set
308 if self
._have
_master
:
309 self
._export
(dest_path
, 'master')
311 # same as CVS does when exporting empty trunk
314 def export_tag(self
, dest_path
, tag
):
315 self
._export
(dest_path
, tag
)
317 def export_branch(self
, dest_path
, branch
):
318 self
._export
(dest_path
, branch
)
321 cmd
= self
.repo_cmd
+ ['tag']
322 tags
= split_output(cmd
)
326 if self
._branches
is None:
327 cmd
= self
.repo_cmd
+ ['branch']
328 branches
= split_output(cmd
)
329 # Remove the two chracters at the start of the branch name
330 for i
in range(len(branches
)):
331 branches
[i
] = branches
[i
][2:]
332 self
._branches
= branches
334 branches
.remove('master')
335 self
._have
_master
= True
337 self
._have
_master
= False
339 return self
._branches
342 def transform_symbol(ctx
, name
):
343 """Transform the symbol NAME using the renaming rules specified
344 with --symbol-transform. Return the transformed symbol name."""
346 for (pattern
, replacement
) in ctx
.symbol_transforms
:
347 newname
= pattern
.sub(replacement
, name
)
349 print " symbol '%s' transformed to '%s'" % (name
, newname
)
355 class Failures(object):
357 self
.count
= 0 # number of failures seen
360 return str(self
.count
)
363 return "<%s at 0x%x: %s>" % (self
.__class
__.__name
__, id(self
), self
.count
)
365 def report(self
, summary
, details
=None):
367 sys
.stdout
.write(' FAIL: %s\n' % summary
)
370 sys
.stdout
.write(' %s\n' % line
)
372 def __nonzero__(self
):
373 return self
.count
> 0
376 def file_compare(failures
, base1
, base2
, run_diff
, rel_path
):
377 """Compare the mode and contents of two files.
379 The paths are specified as two base paths BASE1 and BASE2, and a
380 path REL_PATH that is relative to the two base paths. Return True
381 iff the file mode and contents are identical."""
384 path1
= os
.path
.join(base1
, rel_path
)
385 path2
= os
.path
.join(base2
, rel_path
)
386 mode1
= os
.stat(path1
).st_mode
& 0700 # only look at owner bits
387 mode2
= os
.stat(path2
).st_mode
& 0700
389 failures
.report('File modes differ for %s' % rel_path
,
390 details
=['%s: %o' % (path1
, mode1
),
391 '%s: %o' % (path2
, mode2
)])
394 file1
= open(path1
, 'rb')
395 file2
= open(path2
, 'rb')
398 data1
= file1
.read(8192)
399 data2
= file2
.read(8192)
402 cmd
= ['diff', '-u', path1
, path2
]
403 (output
, status
) = pipe(cmd
)
404 diff
= output
.split(os
.linesep
)
407 failures
.report('File contents differ for %s' % rel_path
,
421 def tree_compare(failures
, base1
, base2
, run_diff
, rel_path
=''):
422 """Compare the contents of two directory trees, including file contents.
424 The paths are specified as two base paths BASE1 and BASE2, and a
425 path REL_PATH that is relative to the two base paths. Return True
426 iff the trees are identical."""
432 path1
= os
.path
.join(base1
, rel_path
)
433 path2
= os
.path
.join(base2
, rel_path
)
434 if not os
.path
.exists(path1
):
435 failures
.report('%s does not exist' % path1
)
437 if not os
.path
.exists(path2
):
438 failures
.report('%s does not exist' % path2
)
440 if os
.path
.isfile(path1
) and os
.path
.isfile(path2
):
441 return file_compare(failures
, base1
, base2
, run_diff
, rel_path
)
442 if not (os
.path
.isdir(path1
) and os
.path
.isdir(path2
)):
443 failures
.report('Path types differ for %r' % rel_path
)
445 entries1
= os
.listdir(path1
)
447 entries2
= os
.listdir(path2
)
452 missing
= filter(lambda x
: x
not in entries2
, entries1
)
453 extra
= filter(lambda x
: x
not in entries1
, entries2
)
455 failures
.report('Directory /%s is missing entries: %s' %
456 (rel_path
, ', '.join(missing
)))
459 failures
.report('Directory /%s has extra entries: %s' %
460 (rel_path
, ', '.join(extra
)))
463 for entry
in entries1
:
464 new_rel_path
= os
.path
.join(rel_path
, entry
)
465 if not tree_compare(failures
, base1
, base2
, run_diff
, new_rel_path
):
470 def verify_contents_single(failures
, cvsrepos
, verifyrepos
, kind
, label
, ctx
):
471 """Verify the HEAD revision of a trunk, tag, or branch.
473 Verify that the contents of the HEAD revision of all directories and
474 files in the conversion repository VERIFYREPOS match the ones in the
475 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
476 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
477 specify the name of the tag or branch. CTX has the attributes:
478 CTX.tmpdir: specifying the directory for all temporary files.
479 CTX.skip_cleanup: if true, the temporary files are not deleted.
480 CTX.run_diff: if true, run diff on differing files."""
482 itemname
= kind
+ (kind
!= 'trunk' and '-' + label
or '')
483 cvs_export_dir
= os
.path
.join(
484 ctx
.tmpdir
, 'cvs-export-%s' % itemname
)
485 vrf_export_dir
= os
.path
.join(
486 ctx
.tmpdir
, '%s-export-%s' % (verifyrepos
.name
, itemname
))
489 cvslabel
= transform_symbol(ctx
, label
)
494 cvsrepos
.export(cvs_export_dir
, cvslabel
, ctx
.keyword_opt
)
496 verifyrepos
.export_trunk(vrf_export_dir
)
498 verifyrepos
.export_tag(vrf_export_dir
, label
)
500 verifyrepos
.export_branch(vrf_export_dir
, label
)
503 failures
, cvs_export_dir
, vrf_export_dir
, ctx
.run_diff
507 if not ctx
.skip_cleanup
:
508 if os
.path
.exists(cvs_export_dir
):
509 shutil
.rmtree(cvs_export_dir
)
510 if os
.path
.exists(vrf_export_dir
):
511 shutil
.rmtree(vrf_export_dir
)
515 def verify_contents(failures
, cvsrepos
, verifyrepos
, ctx
):
516 """Verify that the contents of the HEAD revision of all directories
517 and files in the trunk, all tags and all branches in the conversion
518 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
519 CTX is passed through to verify_contents_single()."""
521 # branches/tags that failed:
524 # Verify contents of trunk
525 print 'Verifying trunk'
527 if not verify_contents_single(
528 failures
, cvsrepos
, verifyrepos
, 'trunk', None, ctx
530 locations
.append('trunk')
532 # Verify contents of all tags
533 for tag
in verifyrepos
.tags():
534 print 'Verifying tag', tag
536 if not verify_contents_single(
537 failures
, cvsrepos
, verifyrepos
, 'tag', tag
, ctx
539 locations
.append('tag:' + tag
)
541 # Verify contents of all branches
542 for branch
in verifyrepos
.branches():
543 if branch
[:10] == 'unlabeled-':
544 print 'Skipped branch', branch
546 print 'Verifying branch', branch
547 if not verify_contents_single(
548 failures
, cvsrepos
, verifyrepos
, 'branch', branch
, ctx
550 locations
.append('branch:' + branch
)
553 assert bool(failures
) == bool(locations
), \
554 "failures = %r\nlocations = %r" % (failures
, locations
)
558 sys
.stdout
.write('FAIL: %s != %s: %d failure(s) in:\n'
559 % (cvsrepos
, verifyrepos
, failures
.count
))
560 for location
in locations
:
561 sys
.stdout
.write(' %s\n' % location
)
563 sys
.stdout
.write('PASS: %s == %s\n' % (cvsrepos
, verifyrepos
))
572 parser
= optparse
.OptionParser(
573 usage
='%prog [options] cvs-repos verify-repos')
574 parser
.add_option('--branch',
575 help='verify contents of the branch BRANCH only')
576 parser
.add_option('--diff', action
='store_true', dest
='run_diff',
577 help='run diff on differing files')
578 parser
.add_option('--tag',
579 help='verify contents of the tag TAG only')
580 parser
.add_option('--tmpdir',
582 help='path to store temporary files')
583 parser
.add_option('--trunk', action
='store_true',
584 help='verify contents of trunk only')
585 parser
.add_option('--symbol-transform', action
='append',
587 help='transform symbol names from P to S like cvs2svn, '
588 'except transforms SVN symbol to CVS symbol')
589 parser
.add_option('--svn',
590 action
='store_const', dest
='repos_type', const
='svn',
591 help='assume verify-repos is svn [default]')
592 parser
.add_option('--hg',
593 action
='store_const', dest
='repos_type', const
='hg',
594 help='assume verify-repos is hg')
595 parser
.add_option('--git',
596 action
='store_const', dest
='repos_type', const
='git',
597 help='assume verify-repos is git')
598 parser
.add_option('--suppress-keywords',
599 action
='store_const', dest
='keyword_opt', const
='-kk',
600 help='suppress CVS keyword expansion '
601 '(equivalent to --keyword-opt=-kk)')
602 parser
.add_option('--keyword-opt',
604 help='control CVS keyword expansion by adding OPT to '
605 'cvs export command line')
607 parser
.set_defaults(run_diff
=False,
610 symbol_transforms
=[],
612 (options
, args
) = parser
.parse_args()
614 symbol_transforms
= []
615 for value
in options
.symbol_transforms
:
617 [pattern
, replacement
] = value
.split(":")
619 symbol_transforms
.append(
620 RegexpSymbolTransform(pattern
, replacement
))
622 parser
.error("'%s' is not a valid regexp." % (pattern
,))
625 """Print an error to sys.stderr."""
626 sys
.stderr
.write('Error: ' + str(msg
) + '\n')
628 verify_branch
= options
.branch
629 verify_tag
= options
.tag
630 verify_trunk
= options
.trunk
632 # Consistency check for options and arguments.
634 parser
.error("wrong number of arguments")
637 verify_path
= args
[1]
638 verify_klass
= {'svn': SvnRepos
,
640 'git': GitRepos
}[options
.repos_type
]
642 failures
= Failures()
644 # Open the repositories
645 cvsrepos
= CvsRepos(cvs_path
)
646 verifyrepos
= verify_klass(verify_path
)
650 print 'Verifying branch', verify_branch
651 verify_contents_single(
652 failures
, cvsrepos
, verifyrepos
, 'branch', verify_branch
, options
655 print 'Verifying tag', verify_tag
656 verify_contents_single(
657 failures
, cvsrepos
, verifyrepos
, 'tag', verify_tag
, options
660 print 'Verifying trunk'
661 verify_contents_single(
662 failures
, cvsrepos
, verifyrepos
, 'trunk', None, options
665 # Verify trunk, tags and branches
666 verify_contents(failures
, cvsrepos
, verifyrepos
, options
)
667 except RuntimeError, e
:
669 except KeyboardInterrupt:
672 sys
.exit(failures
and 1 or 0)
675 if __name__
== '__main__':