2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
42 # CVS and Subversion command line client commands
50 """Run cmd as a pipe. Return (output, status)."""
51 child
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
52 output
= child
.stdout
.read()
54 return (output
, status
)
57 def cmd_failed(cmd
, output
, status
):
58 print 'CMD FAILED:', ' '.join(cmd
)
60 sys
.stdout
.write(output
)
61 raise RuntimeError('%s command failed!' % cmd
[0])
65 def __init__(self
, path
):
66 """Open the CVS repository at PATH."""
67 path
= os
.path
.abspath(path
)
68 if not os
.path
.isdir(path
):
69 raise RuntimeError('CVS path is not a directory')
71 if os
.path
.exists(os
.path
.join(path
, 'CVSROOT')):
72 # The whole repository
76 self
.cvsroot
= os
.path
.dirname(path
)
77 self
.module
= os
.path
.basename(path
)
78 while not os
.path
.exists(os
.path
.join(self
.cvsroot
, 'CVSROOT')):
79 parent
= os
.path
.dirname(self
.cvsroot
)
80 if parent
== self
.cvsroot
:
81 raise RuntimeError('Cannot find the CVSROOT')
82 self
.module
= os
.path
.join(os
.path
.basename(self
.cvsroot
), self
.module
)
86 return os
.path
.basename(self
.cvsroot
)
88 def export(self
, dest_path
, rev
=None, keyword_opt
=None):
89 """Export revision REV to DEST_PATH where REV can be None to export
90 the HEAD revision, or any valid CVS revision string to export that
93 cmd
= [CVS_CMD
, '-Q', '-d', ':local:' + self
.cvsroot
, 'export']
95 cmd
.extend(['-r', rev
])
97 cmd
.extend(['-D', 'now'])
99 cmd
.append(keyword_opt
)
100 cmd
.extend(['-d', dest_path
, self
.module
])
101 (output
, status
) = pipe(cmd
)
103 cmd_failed(cmd
, output
, status
)
109 def __init__(self
, url
):
110 """Open the Subversion repository at URL."""
111 # Check if the user supplied an URL or a path
112 if url
.find('://') == -1:
113 abspath
= os
.path
.abspath(url
)
114 url
= 'file://' + (abspath
[0] != '/' and '/' or '') + abspath
116 url
= url
.replace(os
.sep
, '/')
120 # Cache a list of all tags and branches
123 self
.tag_list
= self
.list('tags')
126 if 'branches' in list:
127 self
.branch_list
= self
.list('branches')
129 self
.branch_list
= []
132 return self
.url
.split('/')[-1]
134 def export(self
, path
, dest_path
):
135 """Export PATH to DEST_PATH."""
136 url
= '/'.join([self
.url
, path
])
137 cmd
= [SVN_CMD
, 'export', '-q', url
, dest_path
]
138 (output
, status
) = pipe(cmd
)
140 cmd_failed(cmd
, output
, status
)
142 def export_trunk(self
, dest_path
):
143 """Export trunk to DEST_PATH."""
144 self
.export('trunk', dest_path
)
146 def export_tag(self
, dest_path
, tag
):
147 """Export the tag TAG to DEST_PATH."""
148 self
.export('tags/' + tag
, dest_path
)
150 def export_branch(self
, dest_path
, branch
):
151 """Export the branch BRANCH to DEST_PATH."""
152 self
.export('branches/' + branch
, dest_path
)
154 def list(self
, path
):
155 """Return a list of all files and directories in PATH."""
156 cmd
= [SVN_CMD
, 'ls', self
.url
+ '/' + path
]
157 (output
, status
) = pipe(cmd
)
159 cmd_failed(cmd
, output
, status
)
161 for line
in output
.split("\n"):
163 entries
.append(line
[:-1])
167 """Return a list of all tags in the repository."""
171 """Return a list of all branches in the repository."""
172 return self
.branch_list
178 def __init__(self
, path
):
180 self
.base_cmd
= [HG_CMD
, '-R', self
.path
]
182 self
._branches
= None # cache result of branches()
183 self
._have
_default
= None # so export_trunk() doesn't blow up
186 return os
.path
.basename(self
.path
)
188 def _export(self
, dest_path
, rev
):
189 cmd
= self
.base_cmd
+ ['archive',
192 '--exclude', 're:^\.hg',
194 (output
, status
) = pipe(cmd
)
196 cmd_failed(cmd
, output
, status
)
198 # If Mercurial has nothing to export, then it doesn't create
199 # dest_path. This breaks tree_compare(), so just check that the
200 # manifest for the chosen revision really is empty, and if so create
202 if not os
.path
.exists(dest_path
):
203 cmd
= self
.base_cmd
+ ['manifest', '--rev', rev
]
205 (output
, status
) = pipe(cmd
)
207 cmd_failed(cmd
, output
, status
)
208 manifest
= [fn
for fn
in output
.split("\n")[:-1]
209 if not fn
.startswith('.hg')]
213 def export_trunk(self
, dest_path
):
214 self
.branches() # ensure _have_default is set
215 if self
._have
_default
:
216 self
._export
(dest_path
, 'default')
218 # same as CVS does when exporting empty trunk
221 def export_tag(self
, dest_path
, tag
):
222 self
._export
(dest_path
, tag
)
224 def export_branch(self
, dest_path
, branch
):
225 self
._export
(dest_path
, branch
)
228 cmd
= self
.base_cmd
+ ['tags', '-q']
229 tags
= self
._split
_output
(cmd
)
234 if self
._branches
is None:
235 cmd
= self
.base_cmd
+ ['branches', '-q']
236 self
._branches
= branches
= self
._split
_output
(cmd
)
238 branches
.remove('default')
239 self
._have
_default
= True
241 self
._have
_default
= False
243 return self
._branches
245 def _split_output(self
, cmd
):
246 (output
, status
) = pipe(cmd
)
248 cmd_failed(cmd
, output
, status
)
249 return output
.split("\n")[:-1]
255 def __init__(self
, path
):
259 '--git-dir=' + os
.path
.join(self
.path
, '.git'),
260 '--work-tree=' + self
.path
,
263 self
._branches
= None # cache result of branches()
264 self
._have
_master
= None # so export_trunk() doesn't blow up
267 return os
.path
.basename(self
.path
)
269 def _export(self
, dest_path
, rev
):
270 # clone the repository
271 cmd
= [GIT_CMD
, 'archive', '--remote=' + self
.path
, '--format=tar', rev
]
272 git_proc
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
275 # Unfortunately for some git tags the below causes
276 # git_proc.wait() to hang. The git archive process is in a
277 # <defunct> state and the verify-cvs2svn hangs for good.
278 tar
= tarfile
.open(mode
="r|", fileobj
=git_proc
.stdout
)
280 tar
.extract(tarinfo
, dest_path
)
284 tar_proc
= subprocess
.Popen(
285 ['tar', '-C', dest_path
, '-x'],
286 stdin
=git_proc
.stdout
, stdout
=subprocess
.PIPE
,
288 output
= tar_proc
.stdout
.read()
289 status
= tar_proc
.wait()
292 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
293 % (rev
, self
.path
, dest_path
, output
)
296 status
= git_proc
.wait()
299 'Git extract of rev %s from repo %s to %s failed!'
300 % (rev
, self
.path
, dest_path
)
303 if not os
.path
.exists(dest_path
):
305 'Git clone of %s to %s failed!' % (self
.path
, dest_path
)
308 def export_trunk(self
, dest_path
):
309 self
.branches() # ensure _have_default is set
310 if self
._have
_master
:
311 self
._export
(dest_path
, 'master')
313 # same as CVS does when exporting empty trunk
316 def export_tag(self
, dest_path
, tag
):
317 self
._export
(dest_path
, tag
)
319 def export_branch(self
, dest_path
, branch
):
320 self
._export
(dest_path
, branch
)
323 cmd
= self
.repo_cmd
+ ['tag']
324 tags
= self
._split
_output
(cmd
)
328 if self
._branches
is None:
329 cmd
= self
.repo_cmd
+ ['branch']
330 branches
= self
._split
_output
(cmd
)
331 # Remove the two chracters at the start of the branch name
332 for i
in range(len(branches
)):
333 branches
[i
] = branches
[i
][2:]
334 self
._branches
= branches
336 branches
.remove('master')
337 self
._have
_master
= True
339 self
._have
_master
= False
341 return self
._branches
343 def _split_output(self
, cmd
):
344 (output
, status
) = pipe(cmd
)
346 cmd_failed(cmd
, output
, status
)
347 return output
.split("\n")[:-1]
350 def transform_symbol(ctx
, name
):
351 """Transform the symbol NAME using the renaming rules specified
352 with --symbol-transform. Return the transformed symbol name."""
354 for (pattern
, replacement
) in ctx
.symbol_transforms
:
355 newname
= pattern
.sub(replacement
, name
)
357 print " symbol '%s' transformed to '%s'" % (name
, newname
)
363 class Failures(object):
365 self
.count
= 0 # number of failures seen
368 return str(self
.count
)
371 return "<%s at 0x%x: %s>" % (self
.__class
__.__name
__, id(self
), self
.count
)
373 def report(self
, summary
, details
=None):
375 sys
.stdout
.write(' FAIL: %s\n' % summary
)
378 sys
.stdout
.write(' %s\n' % line
)
380 def __nonzero__(self
):
381 return self
.count
> 0
384 def file_compare(failures
, base1
, base2
, run_diff
, rel_path
):
385 """Compare the mode and contents of two files.
387 The paths are specified as two base paths BASE1 and BASE2, and a
388 path REL_PATH that is relative to the two base paths. Return True
389 iff the file mode and contents are identical."""
392 path1
= os
.path
.join(base1
, rel_path
)
393 path2
= os
.path
.join(base2
, rel_path
)
394 mode1
= os
.stat(path1
).st_mode
& 0700 # only look at owner bits
395 mode2
= os
.stat(path2
).st_mode
& 0700
397 failures
.report('File modes differ for %s' % rel_path
,
398 details
=['%s: %o' % (path1
, mode1
),
399 '%s: %o' % (path2
, mode2
)])
402 file1
= open(path1
, 'rb')
403 file2
= open(path2
, 'rb')
405 data1
= file1
.read(8192)
406 data2
= file2
.read(8192)
409 cmd
= ['diff', '-u', path1
, path2
]
410 (output
, status
) = pipe(cmd
)
411 diff
= output
.split('\n')
414 failures
.report('File contents differ for %s' % rel_path
,
425 def tree_compare(failures
, base1
, base2
, run_diff
, rel_path
=''):
426 """Compare the contents of two directory trees, including file contents.
428 The paths are specified as two base paths BASE1 and BASE2, and a
429 path REL_PATH that is relative to the two base paths. Return True
430 iff the trees are identical."""
436 path1
= os
.path
.join(base1
, rel_path
)
437 path2
= os
.path
.join(base2
, rel_path
)
438 if not os
.path
.exists(path1
):
439 failures
.report('%s does not exist' % path1
)
441 if not os
.path
.exists(path2
):
442 failures
.report('%s does not exist' % path2
)
444 if os
.path
.isfile(path1
) and os
.path
.isfile(path2
):
445 return file_compare(failures
, base1
, base2
, run_diff
, rel_path
)
446 if not (os
.path
.isdir(path1
) and os
.path
.isdir(path2
)):
447 failures
.report('Path types differ for %r' % rel_path
)
449 entries1
= os
.listdir(path1
)
451 entries2
= os
.listdir(path2
)
456 missing
= filter(lambda x
: x
not in entries2
, entries1
)
457 extra
= filter(lambda x
: x
not in entries1
, entries2
)
459 failures
.report('Directory /%s is missing entries: %s' %
460 (rel_path
, ', '.join(missing
)))
463 failures
.report('Directory /%s has extra entries: %s' %
464 (rel_path
, ', '.join(extra
)))
467 for entry
in entries1
:
468 new_rel_path
= os
.path
.join(rel_path
, entry
)
469 if not tree_compare(failures
, base1
, base2
, run_diff
, new_rel_path
):
474 def verify_contents_single(failures
, cvsrepos
, verifyrepos
, kind
, label
, ctx
):
475 """Verify the HEAD revision of a trunk, tag, or branch.
477 Verify that the contents of the HEAD revision of all directories and
478 files in the conversion repository VERIFYREPOS match the ones in the
479 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
480 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
481 specify the name of the tag or branch. CTX has the attributes:
482 CTX.tmpdir: specifying the directory for all temporary files.
483 CTX.skip_cleanup: if true, the temporary files are not deleted.
484 CTX.run_diff: if true, run diff on differing files."""
486 itemname
= kind
+ (kind
!= 'trunk' and '-' + label
or '')
487 cvs_export_dir
= os
.path
.join(
488 ctx
.tmpdir
, 'cvs-export-%s' % itemname
)
489 vrf_export_dir
= os
.path
.join(
490 ctx
.tmpdir
, '%s-export-%s' % (verifyrepos
.name
, itemname
))
493 cvslabel
= transform_symbol(ctx
, label
)
498 cvsrepos
.export(cvs_export_dir
, cvslabel
, ctx
.keyword_opt
)
500 verifyrepos
.export_trunk(vrf_export_dir
)
502 verifyrepos
.export_tag(vrf_export_dir
, label
)
504 verifyrepos
.export_branch(vrf_export_dir
, label
)
507 failures
, cvs_export_dir
, vrf_export_dir
, ctx
.run_diff
511 if not ctx
.skip_cleanup
:
512 if os
.path
.exists(cvs_export_dir
):
513 shutil
.rmtree(cvs_export_dir
)
514 if os
.path
.exists(vrf_export_dir
):
515 shutil
.rmtree(vrf_export_dir
)
519 def verify_contents(failures
, cvsrepos
, verifyrepos
, ctx
):
520 """Verify that the contents of the HEAD revision of all directories
521 and files in the trunk, all tags and all branches in the conversion
522 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
523 CTX is passed through to verify_contents_single()."""
525 # branches/tags that failed:
528 # Verify contents of trunk
529 print 'Verifying trunk'
531 if not verify_contents_single(
532 failures
, cvsrepos
, verifyrepos
, 'trunk', None, ctx
534 locations
.append('trunk')
536 # Verify contents of all tags
537 for tag
in verifyrepos
.tags():
538 print 'Verifying tag', tag
540 if not verify_contents_single(
541 failures
, cvsrepos
, verifyrepos
, 'tag', tag
, ctx
543 locations
.append('tag:' + tag
)
545 # Verify contents of all branches
546 for branch
in verifyrepos
.branches():
547 if branch
[:10] == 'unlabeled-':
548 print 'Skipped branch', branch
550 print 'Verifying branch', branch
551 if not verify_contents_single(
552 failures
, cvsrepos
, verifyrepos
, 'branch', branch
, ctx
554 locations
.append('branch:' + branch
)
557 assert bool(failures
) == bool(locations
), \
558 "failures = %r\nlocations = %r" % (failures
, locations
)
562 sys
.stdout
.write('FAIL: %s != %s: %d failure(s) in:\n'
563 % (cvsrepos
, verifyrepos
, failures
.count
))
564 for location
in locations
:
565 sys
.stdout
.write(' %s\n' % location
)
567 sys
.stdout
.write('PASS: %s == %s\n' % (cvsrepos
, verifyrepos
))
576 parser
= optparse
.OptionParser(
577 usage
='%prog [options] cvs-repos verify-repos')
578 parser
.add_option('--branch',
579 help='verify contents of the branch BRANCH only')
580 parser
.add_option('--diff', action
='store_true', dest
='run_diff',
581 help='run diff on differing files')
582 parser
.add_option('--tag',
583 help='verify contents of the tag TAG only')
584 parser
.add_option('--tmpdir',
586 help='path to store temporary files')
587 parser
.add_option('--trunk', action
='store_true',
588 help='verify contents of trunk only')
589 parser
.add_option('--symbol-transform', action
='append',
591 help='transform symbol names from P to S like cvs2svn, '
592 'except transforms SVN symbol to CVS symbol')
593 parser
.add_option('--svn',
594 action
='store_const', dest
='repos_type', const
='svn',
595 help='assume verify-repos is svn [default]')
596 parser
.add_option('--hg',
597 action
='store_const', dest
='repos_type', const
='hg',
598 help='assume verify-repos is hg')
599 parser
.add_option('--git',
600 action
='store_const', dest
='repos_type', const
='git',
601 help='assume verify-repos is git')
602 parser
.add_option('--suppress-keywords',
603 action
='store_const', dest
='keyword_opt', const
='-kk',
604 help='suppress CVS keyword expansion '
605 '(equivalent to --keyword-opt=-kk)')
606 parser
.add_option('--keyword-opt',
608 help='control CVS keyword expansion by adding OPT to '
609 'cvs export command line')
611 parser
.set_defaults(run_diff
=False,
614 symbol_transforms
=[],
616 (options
, args
) = parser
.parse_args()
618 symbol_transforms
= []
619 for value
in options
.symbol_transforms
:
621 [pattern
, replacement
] = value
.split(":")
623 symbol_transforms
.append(
624 RegexpSymbolTransform(pattern
, replacement
))
626 parser
.error("'%s' is not a valid regexp." % (pattern
,))
629 """Print an error to sys.stderr."""
630 sys
.stderr
.write('Error: ' + str(msg
) + '\n')
632 verify_branch
= options
.branch
633 verify_tag
= options
.tag
634 verify_trunk
= options
.trunk
636 # Consistency check for options and arguments.
638 parser
.error("wrong number of arguments")
641 verify_path
= args
[1]
642 verify_klass
= {'svn': SvnRepos
,
644 'git': GitRepos
}[options
.repos_type
]
646 failures
= Failures()
648 # Open the repositories
649 cvsrepos
= CvsRepos(cvs_path
)
650 verifyrepos
= verify_klass(verify_path
)
654 print 'Verifying branch', verify_branch
655 verify_contents_single(
656 failures
, cvsrepos
, verifyrepos
, 'branch', verify_branch
, options
659 print 'Verifying tag', verify_tag
660 verify_contents_single(
661 failures
, cvsrepos
, verifyrepos
, 'tag', verify_tag
, options
664 print 'Verifying trunk'
665 verify_contents_single(
666 failures
, cvsrepos
, verifyrepos
, 'trunk', None, options
669 # Verify trunk, tags and branches
670 verify_contents(failures
, cvsrepos
, verifyrepos
, options
)
671 except RuntimeError, e
:
673 except KeyboardInterrupt:
676 sys
.exit(failures
and 1 or 0)
679 if __name__
== '__main__':