Make minimal changes to get HTML files to be valid XHTML, dropping from Strict
[cvs2svn.git] / verify-cvs2svn
blobb2de2105db1f543d3b85c52f349daaaba3699b1e
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 # The purpose of verify-cvs2svn is to verify the result of a cvs2svn
19 # repository conversion. The following tests are performed:
21 # 1. Content checking of the HEAD revision of trunk, all tags and all
22 # branches. Only the tags and branches in the Subversion
23 # repository are checked, i.e. there are no checks to verify that
24 # all tags and branches in the CVS repository are present.
26 # This program only works if you converted a subdirectory of a CVS
27 # repository, and not the whole repository. If you really did convert
28 # a whole repository and need to check it, you must create a CVSROOT
29 # directory above the current root using cvs init.
31 # ====================================================================
33 import os
34 import sys
35 import getopt
36 import popen2
37 import string
38 import shutil
41 # CVS and Subversion command line client commands
42 CVS_CMD = 'cvs'
43 SVN_CMD = 'svn'
46 # Minimal, incomplete, version of popen2.Popen4 for those platforms
47 # for which popen2 does not provide it.
48 try:
49 Popen4 = popen2.Popen4
50 except AttributeError:
51 class Popen4:
52 def __init__(self, cmd):
53 if type(cmd) != str:
54 cmd = " ".join(cmd)
55 self.fromchild, self.tochild = popen2.popen4(cmd)
56 def wait(self):
57 return self.fromchild.close() or self.tochild.close()
60 class CvsRepos:
61 def __init__(self, path):
62 """Open the CVS repository at PATH."""
63 path = os.path.abspath(path)
64 if not os.path.isdir(path):
65 raise RuntimeError('CVS path is not a directory')
67 if os.path.exists(os.path.join(path, 'CVSROOT')):
68 # The whole repository
69 self.module = "."
70 self.cvsroot = path
71 else:
72 self.cvsroot = os.path.dirname(path)
73 self.module = os.path.basename(path)
74 while not os.path.exists(os.path.join(self.cvsroot, 'CVSROOT')):
75 parent = os.path.dirname(self.cvsroot)
76 if parent == self.cvsroot:
77 raise RuntimeError('Cannot find the CVSROOT')
78 self.module = os.path.join(os.path.basename(self.cvsroot), self.module)
79 self.cvsroot = parent
81 def export(self, dest_path, rev=None):
82 """Export revision REV to DEST_PATH where REV can be None to export
83 the HEAD revision, or any valid CVS revision string to export that
84 revision."""
85 os.mkdir(dest_path)
86 cmd = [ CVS_CMD, '-Q', '-d', ':local:' + self.cvsroot, 'export' ]
87 if rev:
88 cmd.extend([ '-r', rev ])
89 else:
90 cmd.extend([ '-D', 'now' ])
91 cmd.extend([ '-d', dest_path, self.module ])
92 pipe = Popen4(cmd)
93 output = pipe.fromchild.read()
94 status = pipe.wait()
95 if status or output:
96 print 'CMD FAILED:', string.join(cmd, ' ')
97 print 'Output:'
98 sys.stdout.write(output)
99 raise RuntimeError('CVS command failed!')
102 class SvnRepos:
103 def __init__(self, url):
104 """Open the Subversion repository at URL."""
105 self.url = url
107 # Cache a list of all tags and branches
108 list = self.list('')
109 if 'tags' in list:
110 self.tag_list = self.list('tags')
111 else:
112 self.tag_list = []
113 if 'branches' in list:
114 self.branch_list = self.list('branches')
115 else:
116 self.branch_list = []
118 def export(self, path, dest_path):
119 """Export PATH to DEST_PATH."""
120 url = string.join([self.url, path], '/')
121 cmd = [ SVN_CMD, 'export', '-q', url, dest_path ]
122 pipe = Popen4(cmd)
123 output = pipe.fromchild.read()
124 status = pipe.wait()
125 if status or output:
126 print 'CMD FAILED:', string.join(cmd, ' ')
127 print 'Output:'
128 sys.stdout.write(output)
129 raise RuntimeError('SVN command failed!')
131 def export_trunk(self, dest_path):
132 """Export trunk to DEST_PATH."""
133 self.export('trunk', dest_path)
135 def export_tag(self, dest_path, tag):
136 """Export the tag TAG to DEST_PATH."""
137 self.export('tags/' + tag, dest_path)
139 def export_branch(self, dest_path, branch):
140 """Export the branch BRANCH to DEST_PATH."""
141 self.export('branches/' + branch, dest_path)
143 def list(self, path):
144 """Return a list of all files and directories in PATH."""
145 cmd = [ SVN_CMD, 'ls', self.url + '/' + path ]
146 pipe = Popen4(cmd)
147 lines = pipe.fromchild.readlines()
148 status = pipe.wait()
149 if status:
150 print 'CMD FAILED:', string.join(cmd, ' ')
151 print 'Output:'
152 sys.stdout.writelines(lines)
153 raise RuntimeError('SVN command failed!')
154 entries = []
155 for line in lines:
156 entries.append(line[:-2])
157 return entries
159 def tags(self):
160 """Return a list of all tags in the repository."""
161 return self.tag_list
163 def branches(self):
164 """Return a list of all branches in the repository."""
165 return self.branch_list
168 def file_compare(base1, base2, run_diff, rel_path):
169 """Compare the contents of two files. The paths are specified as two
170 base paths BASE1 and BASE2, and a path REL_PATH that is relative to the
171 two base paths. Return 1 if the file contetns are identical, else 0."""
172 path1 = os.path.join(base1, rel_path)
173 path2 = os.path.join(base2, rel_path)
174 file1 = open(path1, 'rb')
175 file2 = open(path2, 'rb')
176 while 1:
177 data1 = file1.read(8192)
178 data2 = file2.read(8192)
179 if data1 != data2:
180 print '*** ANOMALY: File contents differ for %s' % rel_path
181 if run_diff:
182 os.system('diff -u "' + path1 + '" "' + path2 + '"')
183 return 0
184 if len(data1) == 0:
185 return 1
188 def tree_compare(base1, base2, run_diff, rel_path=''):
189 """Compare the contents of two directory trees, including the contents
190 of all files. The paths are specified as two base paths BASE1 and BASE2,
191 and a path REL_PATH that is relative to the two base paths. Return 1
192 if the trees are identical, else 0."""
193 if not rel_path:
194 path1 = base1
195 path2 = base2
196 else:
197 path1 = os.path.join(base1, rel_path)
198 path2 = os.path.join(base2, rel_path)
199 if os.path.isfile(path1) and os.path.isfile(path2):
200 return file_compare(base1, base2, run_diff, rel_path)
201 if not os.path.isdir(path1) or not os.path.isdir(path2):
202 print '*** ANOMALY: Path type differ for %s' % rel_path
203 return 0
204 entries1 = os.listdir(path1)
205 entries1.sort()
206 entries2 = os.listdir(path2)
207 entries2.sort()
208 missing = filter(lambda x: x not in entries2, entries1)
209 extra = filter(lambda x: x not in entries1, entries2)
210 if missing:
211 print '*** ANOMALY: Directory /%s is missing entries: %s' % (
212 rel_path, string.join(missing, ', '))
213 if extra:
214 print '*** ANOMALY: Directory /%s has extra entries: %s' % (
215 rel_path, string.join(extra, ', '))
216 if missing or extra:
217 return 0
218 ok = 1
219 for entry in entries1:
220 new_rel_path = os.path.join(rel_path, entry)
221 if not tree_compare(base1, base2, run_diff, new_rel_path):
222 ok = 0
223 return ok
226 def verify_contents_single(cvsrepos, svnrepos, kind, label, ctx):
227 """Verify that the contents of the HEAD revision of all directories
228 and files in the Subversion repository SVNREPOS matches the ones in
229 the CVS repository CVSREPOS. KIND can be either 'trunk', 'tag' or
230 'branch'. If KIND is either 'tag' or 'branch', LABEL is used to
231 specify the name of the tag or branch. CTX has the attributes:
232 CTX.tempdir: specifying the directory for all temporary files.
233 CTX.skip_cleanup: if true, the temporary files are not deleted.
234 CTX.run_diff: if true, run diff on differing files.
236 itemname = kind + (kind != 'trunk' and '-' + label or '')
237 cvs_export_dir = os.path.join(ctx.tempdir, 'cvs-export-' + itemname)
238 svn_export_dir = os.path.join(ctx.tempdir, 'svn-export-' + itemname)
240 try:
241 cvsrepos.export(cvs_export_dir, label)
242 if kind == 'trunk':
243 svnrepos.export_trunk(svn_export_dir)
244 elif kind == 'tag':
245 svnrepos.export_tag(svn_export_dir, label)
246 else:
247 svnrepos.export_branch(svn_export_dir, label)
249 if not tree_compare(cvs_export_dir, svn_export_dir, ctx.run_diff):
250 return 0
251 finally:
252 if not ctx.skip_cleanup:
253 if os.path.exists(cvs_export_dir):
254 shutil.rmtree(cvs_export_dir)
255 if os.path.exists(svn_export_dir):
256 shutil.rmtree(svn_export_dir)
257 return 1
260 def verify_contents(cvsrepos, svnrepos, ctx):
261 """Verify that the contents of the HEAD revision of all directories
262 and files in the trunk, all tags and all branches in the Subversion
263 repository SVNREPOS matches the ones in the CVS repository CVSREPOS.
264 CTX is passed through to verify_contents_single()."""
265 anomalies = []
267 # Verify contents of trunk
268 print 'Verifying trunk'
269 if not verify_contents_single(cvsrepos, svnrepos, 'trunk', None, ctx):
270 anomalies.append('trunk')
272 # Verify contents of all tags
273 for tag in svnrepos.tags():
274 print 'Verifying tag', tag
275 if not verify_contents_single(cvsrepos, svnrepos, 'tag', tag, ctx):
276 anomalies.append('tag:' + tag)
278 # Verify contents of all branches
279 for branch in svnrepos.branches():
280 if branch[:10] == 'unlabeled-':
281 print 'Skipped branch', branch
282 else:
283 print 'Verifying branch', branch
284 if not verify_contents_single(cvsrepos, svnrepos, 'branch', branch, ctx):
285 anomalies.append('branch:' + branch)
287 # Show the results
288 print
289 if len(anomalies) == 0:
290 print 'No content anomalies detected'
291 else:
292 print '%d content anomal%s detected:' % (len(anomalies),
293 len(anomalies) == 1 and "y" or "ies")
294 for anomaly in anomalies:
295 print ' ', anomaly
298 class OptionContext:
299 pass
302 def main(argv):
303 def usage():
304 """Print usage."""
305 print 'USAGE: %s cvs-repos-path svn-repos-path' \
306 % os.path.basename(argv[0])
307 print ' --branch=BRANCH verify contents of the branch BRANCH only'
308 print ' --diff run diff on differing files'
309 print ' --help, -h print this usage message and exit'
310 print ' --tag=TAG verify contents of the tag TAG only'
311 print ' --tempdir=PATH path to store temporary files'
312 print ' --trunk verify contents of trunk only'
314 def error(msg):
315 """Print an error to sys.stderr."""
316 sys.stderr.write('Error: ' + str(msg) + '\n')
318 try:
319 opts, args = getopt.getopt(argv[1:], 'h',
320 [ 'branch=', 'diff', 'help', 'tag=', 'tempdir=',
321 'trunk', 'skip-cleanup' ])
322 except getopt.GetoptError, e:
323 error(e)
324 usage()
325 sys.exit(1)
327 # Default values
328 ctx = OptionContext()
329 ctx.run_diff = 0
330 ctx.tempdir = ''
331 ctx.skip_cleanup = 0
333 verify_branch = None
334 verify_tag = None
335 verify_trunk = None
337 for opt, value in opts:
338 if (opt == '--branch'):
339 verify_branch = value
340 elif (opt == '--diff'):
341 ctx.run_diff = 1
342 elif (opt == '--help') or (opt == '-h'):
343 usage()
344 sys.exit(0)
345 elif (opt == '--tag'):
346 verify_tag = value
347 elif (opt == '--tempdir'):
348 ctx.tempdir = value
349 elif (opt == '--trunk'):
350 verify_trunk = 1
351 elif (opt == '--skip-cleanup'):
352 ctx.skip_cleanup = 1
354 # Consistency check for options and arguments.
355 if len(args) != 2:
356 usage()
357 sys.exit(1)
359 cvs_path = args[0]
360 # Check if the use supplied an URL or a path
361 if args[1].find('://') != -1:
362 svn_url = args[1]
363 else:
364 abspath = os.path.abspath(args[1])
365 svn_url = 'file://' + (abspath[0] != '/' and '/' or '') + abspath
366 if os.sep != '/':
367 svn_url = svn_url.replace(os.sep, '/')
369 try:
370 # Open the repositories
371 cvsrepos = CvsRepos(cvs_path)
372 svnrepos = SvnRepos(svn_url)
374 # Do our thing...
375 if verify_branch:
376 print 'Verifying branch', verify_branch
377 verify_contents_single(cvsrepos, svnrepos, 'branch', verify_branch, ctx)
378 elif verify_tag:
379 print 'Verifying tag', verify_tag
380 verify_contents_single(cvsrepos, svnrepos, 'tag', verify_tag, ctx)
381 elif verify_trunk:
382 print 'Verifying trunk'
383 verify_contents_single(cvsrepos, svnrepos, 'trunk', None, ctx)
384 else:
385 # Verify trunk, tags and branches
386 verify_contents(cvsrepos, svnrepos, ctx)
387 except RuntimeError, e:
388 error(str(e))
389 except KeyboardInterrupt:
390 pass
393 if __name__ == '__main__':
394 main(sys.argv)