2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
42 # CVS and Subversion command line client commands
50 """Run cmd as a pipe. Return (output, status)."""
51 child
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
52 output
= child
.stdout
.read()
54 return (output
, status
)
57 def cmd_failed(cmd
, output
, status
):
58 print 'CMD FAILED:', ' '.join(cmd
)
60 sys
.stdout
.write(output
)
61 raise RuntimeError('%s command failed!' % cmd
[0])
64 def split_output(self
, cmd
):
65 (output
, status
) = pipe(cmd
)
67 cmd_failed(cmd
, output
, status
)
68 return output
.split(os
.linesep
)[:-1]
72 def __init__(self
, path
):
73 """Open the CVS repository at PATH."""
74 path
= os
.path
.abspath(path
)
75 if not os
.path
.isdir(path
):
76 raise RuntimeError('CVS path is not a directory')
78 if os
.path
.exists(os
.path
.join(path
, 'CVSROOT')):
79 # The whole repository
83 self
.cvsroot
= os
.path
.dirname(path
)
84 self
.module
= os
.path
.basename(path
)
85 while not os
.path
.exists(os
.path
.join(self
.cvsroot
, 'CVSROOT')):
86 parent
= os
.path
.dirname(self
.cvsroot
)
87 if parent
== self
.cvsroot
:
88 raise RuntimeError('Cannot find the CVSROOT')
89 self
.module
= os
.path
.join(os
.path
.basename(self
.cvsroot
), self
.module
)
93 return os
.path
.basename(self
.cvsroot
)
95 def export(self
, dest_path
, rev
=None, keyword_opt
=None):
96 """Export revision REV to DEST_PATH where REV can be None to export
97 the HEAD revision, or any valid CVS revision string to export that
100 cmd
= [CVS_CMD
, '-Q', '-d', ':local:' + self
.cvsroot
, 'export']
102 cmd
.extend(['-r', rev
])
104 cmd
.extend(['-D', 'now'])
106 cmd
.append(keyword_opt
)
107 cmd
.extend(['-d', dest_path
, self
.module
])
108 (output
, status
) = pipe(cmd
)
110 cmd_failed(cmd
, output
, status
)
116 def __init__(self
, url
):
117 """Open the Subversion repository at URL."""
118 # Check if the user supplied an URL or a path
119 if url
.find('://') == -1:
120 abspath
= os
.path
.abspath(url
)
121 url
= 'file://' + (abspath
[0] != '/' and '/' or '') + abspath
123 url
= url
.replace(os
.sep
, '/')
127 # Cache a list of all tags and branches
130 self
.tag_list
= self
.list('tags')
133 if 'branches' in list:
134 self
.branch_list
= self
.list('branches')
136 self
.branch_list
= []
139 return self
.url
.split('/')[-1]
141 def export(self
, path
, dest_path
):
142 """Export PATH to DEST_PATH."""
143 url
= '/'.join([self
.url
, path
])
144 cmd
= [SVN_CMD
, 'export', '-q', url
, dest_path
]
145 (output
, status
) = pipe(cmd
)
147 cmd_failed(cmd
, output
, status
)
149 def export_trunk(self
, dest_path
):
150 """Export trunk to DEST_PATH."""
151 self
.export('trunk', dest_path
)
153 def export_tag(self
, dest_path
, tag
):
154 """Export the tag TAG to DEST_PATH."""
155 self
.export('tags/' + tag
, dest_path
)
157 def export_branch(self
, dest_path
, branch
):
158 """Export the branch BRANCH to DEST_PATH."""
159 self
.export('branches/' + branch
, dest_path
)
161 def list(self
, path
):
162 """Return a list of all files and directories in PATH."""
163 cmd
= [SVN_CMD
, 'ls', self
.url
+ '/' + path
]
164 (output
, status
) = pipe(cmd
)
166 cmd_failed(cmd
, output
, status
)
168 for line
in output
.split(os
.linesep
):
170 entries
.append(line
[:-1])
174 """Return a list of all tags in the repository."""
178 """Return a list of all branches in the repository."""
179 return self
.branch_list
185 def __init__(self
, path
):
187 self
.base_cmd
= [HG_CMD
, '-R', self
.path
]
189 self
._branches
= None # cache result of branches()
190 self
._have
_default
= None # so export_trunk() doesn't blow up
193 return os
.path
.basename(self
.path
)
195 def _export(self
, dest_path
, rev
):
196 cmd
= self
.base_cmd
+ ['archive',
199 '--exclude', 're:^\.hg',
201 (output
, status
) = pipe(cmd
)
203 cmd_failed(cmd
, output
, status
)
205 # If Mercurial has nothing to export, then it doesn't create
206 # dest_path. This breaks tree_compare(), so just check that the
207 # manifest for the chosen revision really is empty, and if so create
209 if not os
.path
.exists(dest_path
):
210 cmd
= self
.base_cmd
+ ['manifest', '--rev', rev
]
212 (output
, status
) = pipe(cmd
)
214 cmd_failed(cmd
, output
, status
)
215 manifest
= [fn
for fn
in output
.split(os
.linesep
)[:-1]
216 if not fn
.startswith('.hg')]
220 def export_trunk(self
, dest_path
):
221 self
.branches() # ensure _have_default is set
222 if self
._have
_default
:
223 self
._export
(dest_path
, 'default')
225 # same as CVS does when exporting empty trunk
228 def export_tag(self
, dest_path
, tag
):
229 self
._export
(dest_path
, tag
)
231 def export_branch(self
, dest_path
, branch
):
232 self
._export
(dest_path
, branch
)
235 cmd
= self
.base_cmd
+ ['tags', '-q']
236 tags
= split_output(cmd
)
241 if self
._branches
is None:
242 cmd
= self
.base_cmd
+ ['branches', '-q']
243 self
._branches
= branches
= split_output(cmd
)
245 branches
.remove('default')
246 self
._have
_default
= True
248 self
._have
_default
= False
250 return self
._branches
256 def __init__(self
, path
):
260 '--git-dir=' + os
.path
.join(self
.path
, '.git'),
261 '--work-tree=' + self
.path
,
264 self
._branches
= None # cache result of branches()
265 self
._have
_master
= None # so export_trunk() doesn't blow up
268 return os
.path
.basename(self
.path
)
270 def _export(self
, dest_path
, rev
):
271 # clone the repository
272 cmd
= [GIT_CMD
, 'archive', '--remote=' + self
.path
, '--format=tar', rev
]
273 git_proc
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
276 # Unfortunately for some git tags the below causes
277 # git_proc.wait() to hang. The git archive process is in a
278 # <defunct> state and the verify-cvs2svn hangs for good.
279 tar
= tarfile
.open(mode
="r|", fileobj
=git_proc
.stdout
)
281 tar
.extract(tarinfo
, dest_path
)
285 tar_proc
= subprocess
.Popen(
286 ['tar', '-C', dest_path
, '-x'],
287 stdin
=git_proc
.stdout
, stdout
=subprocess
.PIPE
,
289 output
= tar_proc
.stdout
.read()
290 status
= tar_proc
.wait()
293 'Git tar extraction of rev %s from repo %s to %s failed (%s)!'
294 % (rev
, self
.path
, dest_path
, output
)
297 status
= git_proc
.wait()
300 'Git extract of rev %s from repo %s to %s failed!'
301 % (rev
, self
.path
, dest_path
)
304 if not os
.path
.exists(dest_path
):
306 'Git clone of %s to %s failed!' % (self
.path
, dest_path
)
309 def export_trunk(self
, dest_path
):
310 self
.branches() # ensure _have_default is set
311 if self
._have
_master
:
312 self
._export
(dest_path
, 'master')
314 # same as CVS does when exporting empty trunk
317 def export_tag(self
, dest_path
, tag
):
318 self
._export
(dest_path
, tag
)
320 def export_branch(self
, dest_path
, branch
):
321 self
._export
(dest_path
, branch
)
324 cmd
= self
.repo_cmd
+ ['tag']
325 tags
= split_output(cmd
)
329 if self
._branches
is None:
330 cmd
= self
.repo_cmd
+ ['branch']
331 branches
= split_output(cmd
)
332 # Remove the two chracters at the start of the branch name
333 for i
in range(len(branches
)):
334 branches
[i
] = branches
[i
][2:]
335 self
._branches
= branches
337 branches
.remove('master')
338 self
._have
_master
= True
340 self
._have
_master
= False
342 return self
._branches
345 def transform_symbol(ctx
, name
):
346 """Transform the symbol NAME using the renaming rules specified
347 with --symbol-transform. Return the transformed symbol name."""
349 for (pattern
, replacement
) in ctx
.symbol_transforms
:
350 newname
= pattern
.sub(replacement
, name
)
352 print " symbol '%s' transformed to '%s'" % (name
, newname
)
358 class Failures(object):
360 self
.count
= 0 # number of failures seen
363 return str(self
.count
)
366 return "<%s at 0x%x: %s>" % (self
.__class
__.__name
__, id(self
), self
.count
)
368 def report(self
, summary
, details
=None):
370 sys
.stdout
.write(' FAIL: %s\n' % summary
)
373 sys
.stdout
.write(' %s\n' % line
)
375 def __nonzero__(self
):
376 return self
.count
> 0
379 def file_compare(failures
, base1
, base2
, run_diff
, rel_path
):
380 """Compare the mode and contents of two files.
382 The paths are specified as two base paths BASE1 and BASE2, and a
383 path REL_PATH that is relative to the two base paths. Return True
384 iff the file mode and contents are identical."""
387 path1
= os
.path
.join(base1
, rel_path
)
388 path2
= os
.path
.join(base2
, rel_path
)
389 mode1
= os
.stat(path1
).st_mode
& 0700 # only look at owner bits
390 mode2
= os
.stat(path2
).st_mode
& 0700
392 failures
.report('File modes differ for %s' % rel_path
,
393 details
=['%s: %o' % (path1
, mode1
),
394 '%s: %o' % (path2
, mode2
)])
397 file1
= open(path1
, 'rb')
398 file2
= open(path2
, 'rb')
400 data1
= file1
.read(8192)
401 data2
= file2
.read(8192)
404 cmd
= ['diff', '-u', path1
, path2
]
405 (output
, status
) = pipe(cmd
)
406 diff
= output
.split(os
.linesep
)
409 failures
.report('File contents differ for %s' % rel_path
,
420 def tree_compare(failures
, base1
, base2
, run_diff
, rel_path
=''):
421 """Compare the contents of two directory trees, including file contents.
423 The paths are specified as two base paths BASE1 and BASE2, and a
424 path REL_PATH that is relative to the two base paths. Return True
425 iff the trees are identical."""
431 path1
= os
.path
.join(base1
, rel_path
)
432 path2
= os
.path
.join(base2
, rel_path
)
433 if not os
.path
.exists(path1
):
434 failures
.report('%s does not exist' % path1
)
436 if not os
.path
.exists(path2
):
437 failures
.report('%s does not exist' % path2
)
439 if os
.path
.isfile(path1
) and os
.path
.isfile(path2
):
440 return file_compare(failures
, base1
, base2
, run_diff
, rel_path
)
441 if not (os
.path
.isdir(path1
) and os
.path
.isdir(path2
)):
442 failures
.report('Path types differ for %r' % rel_path
)
444 entries1
= os
.listdir(path1
)
446 entries2
= os
.listdir(path2
)
451 missing
= filter(lambda x
: x
not in entries2
, entries1
)
452 extra
= filter(lambda x
: x
not in entries1
, entries2
)
454 failures
.report('Directory /%s is missing entries: %s' %
455 (rel_path
, ', '.join(missing
)))
458 failures
.report('Directory /%s has extra entries: %s' %
459 (rel_path
, ', '.join(extra
)))
462 for entry
in entries1
:
463 new_rel_path
= os
.path
.join(rel_path
, entry
)
464 if not tree_compare(failures
, base1
, base2
, run_diff
, new_rel_path
):
469 def verify_contents_single(failures
, cvsrepos
, verifyrepos
, kind
, label
, ctx
):
470 """Verify the HEAD revision of a trunk, tag, or branch.
472 Verify that the contents of the HEAD revision of all directories and
473 files in the conversion repository VERIFYREPOS match the ones in the
474 CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
475 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
476 specify the name of the tag or branch. CTX has the attributes:
477 CTX.tmpdir: specifying the directory for all temporary files.
478 CTX.skip_cleanup: if true, the temporary files are not deleted.
479 CTX.run_diff: if true, run diff on differing files."""
481 itemname
= kind
+ (kind
!= 'trunk' and '-' + label
or '')
482 cvs_export_dir
= os
.path
.join(
483 ctx
.tmpdir
, 'cvs-export-%s' % itemname
)
484 vrf_export_dir
= os
.path
.join(
485 ctx
.tmpdir
, '%s-export-%s' % (verifyrepos
.name
, itemname
))
488 cvslabel
= transform_symbol(ctx
, label
)
493 cvsrepos
.export(cvs_export_dir
, cvslabel
, ctx
.keyword_opt
)
495 verifyrepos
.export_trunk(vrf_export_dir
)
497 verifyrepos
.export_tag(vrf_export_dir
, label
)
499 verifyrepos
.export_branch(vrf_export_dir
, label
)
502 failures
, cvs_export_dir
, vrf_export_dir
, ctx
.run_diff
506 if not ctx
.skip_cleanup
:
507 if os
.path
.exists(cvs_export_dir
):
508 shutil
.rmtree(cvs_export_dir
)
509 if os
.path
.exists(vrf_export_dir
):
510 shutil
.rmtree(vrf_export_dir
)
514 def verify_contents(failures
, cvsrepos
, verifyrepos
, ctx
):
515 """Verify that the contents of the HEAD revision of all directories
516 and files in the trunk, all tags and all branches in the conversion
517 repository VERIFYREPOS matches the ones in the CVS repository CVSREPOS.
518 CTX is passed through to verify_contents_single()."""
520 # branches/tags that failed:
523 # Verify contents of trunk
524 print 'Verifying trunk'
526 if not verify_contents_single(
527 failures
, cvsrepos
, verifyrepos
, 'trunk', None, ctx
529 locations
.append('trunk')
531 # Verify contents of all tags
532 for tag
in verifyrepos
.tags():
533 print 'Verifying tag', tag
535 if not verify_contents_single(
536 failures
, cvsrepos
, verifyrepos
, 'tag', tag
, ctx
538 locations
.append('tag:' + tag
)
540 # Verify contents of all branches
541 for branch
in verifyrepos
.branches():
542 if branch
[:10] == 'unlabeled-':
543 print 'Skipped branch', branch
545 print 'Verifying branch', branch
546 if not verify_contents_single(
547 failures
, cvsrepos
, verifyrepos
, 'branch', branch
, ctx
549 locations
.append('branch:' + branch
)
552 assert bool(failures
) == bool(locations
), \
553 "failures = %r\nlocations = %r" % (failures
, locations
)
557 sys
.stdout
.write('FAIL: %s != %s: %d failure(s) in:\n'
558 % (cvsrepos
, verifyrepos
, failures
.count
))
559 for location
in locations
:
560 sys
.stdout
.write(' %s\n' % location
)
562 sys
.stdout
.write('PASS: %s == %s\n' % (cvsrepos
, verifyrepos
))
571 parser
= optparse
.OptionParser(
572 usage
='%prog [options] cvs-repos verify-repos')
573 parser
.add_option('--branch',
574 help='verify contents of the branch BRANCH only')
575 parser
.add_option('--diff', action
='store_true', dest
='run_diff',
576 help='run diff on differing files')
577 parser
.add_option('--tag',
578 help='verify contents of the tag TAG only')
579 parser
.add_option('--tmpdir',
581 help='path to store temporary files')
582 parser
.add_option('--trunk', action
='store_true',
583 help='verify contents of trunk only')
584 parser
.add_option('--symbol-transform', action
='append',
586 help='transform symbol names from P to S like cvs2svn, '
587 'except transforms SVN symbol to CVS symbol')
588 parser
.add_option('--svn',
589 action
='store_const', dest
='repos_type', const
='svn',
590 help='assume verify-repos is svn [default]')
591 parser
.add_option('--hg',
592 action
='store_const', dest
='repos_type', const
='hg',
593 help='assume verify-repos is hg')
594 parser
.add_option('--git',
595 action
='store_const', dest
='repos_type', const
='git',
596 help='assume verify-repos is git')
597 parser
.add_option('--suppress-keywords',
598 action
='store_const', dest
='keyword_opt', const
='-kk',
599 help='suppress CVS keyword expansion '
600 '(equivalent to --keyword-opt=-kk)')
601 parser
.add_option('--keyword-opt',
603 help='control CVS keyword expansion by adding OPT to '
604 'cvs export command line')
606 parser
.set_defaults(run_diff
=False,
609 symbol_transforms
=[],
611 (options
, args
) = parser
.parse_args()
613 symbol_transforms
= []
614 for value
in options
.symbol_transforms
:
616 [pattern
, replacement
] = value
.split(":")
618 symbol_transforms
.append(
619 RegexpSymbolTransform(pattern
, replacement
))
621 parser
.error("'%s' is not a valid regexp." % (pattern
,))
624 """Print an error to sys.stderr."""
625 sys
.stderr
.write('Error: ' + str(msg
) + '\n')
627 verify_branch
= options
.branch
628 verify_tag
= options
.tag
629 verify_trunk
= options
.trunk
631 # Consistency check for options and arguments.
633 parser
.error("wrong number of arguments")
636 verify_path
= args
[1]
637 verify_klass
= {'svn': SvnRepos
,
639 'git': GitRepos
}[options
.repos_type
]
641 failures
= Failures()
643 # Open the repositories
644 cvsrepos
= CvsRepos(cvs_path
)
645 verifyrepos
= verify_klass(verify_path
)
649 print 'Verifying branch', verify_branch
650 verify_contents_single(
651 failures
, cvsrepos
, verifyrepos
, 'branch', verify_branch
, options
654 print 'Verifying tag', verify_tag
655 verify_contents_single(
656 failures
, cvsrepos
, verifyrepos
, 'tag', verify_tag
, options
659 print 'Verifying trunk'
660 verify_contents_single(
661 failures
, cvsrepos
, verifyrepos
, 'trunk', None, options
664 # Verify trunk, tags and branches
665 verify_contents(failures
, cvsrepos
, verifyrepos
, options
)
666 except RuntimeError, e
:
668 except KeyboardInterrupt:
671 sys
.exit(failures
and 1 or 0)
674 if __name__
== '__main__':