* cvs2svn: Use gnu_getopt when available (Python >= 2.3) for more flexible
[cvs2svn.git] / cvs2svn_lib / svn_repository_mirror.py
blob08e9775e41217a968b5137b7c87acf87dda2b7f7
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2006 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains the SVNRepositoryMirror class."""
20 from __future__ import generators
22 from boolean import *
23 import common
24 import config
25 from context import Ctx
26 from log import Log
27 import key_generator
28 from artifact_manager import artifact_manager
29 import database
30 from tags_database import TagsDatabase
31 from symbolings_reader import SymbolingsReader
32 from fill_source import FillSource
33 from svn_revision_range import SVNRevisionRange
34 from svn_commit_item import SVNCommitItem
35 from svn_commit import SVNCommit
38 class SVNRepositoryMirror:
39 """Mirror a Subversion Repository as it is constructed, one
40 SVNCommit at a time. The mirror is skeletal; it does not contain
41 file contents. The creation of a dumpfile or Subversion repository
42 is handled by delegates. See self.add_delegate method for how to
43 set delegates.
45 The structure of the repository is kept in two databases and one
46 hash. The revs_db database maps revisions to root node keys, and
47 the nodes_db database maps node keys to nodes. A node is a hash
48 from directory names to keys. Both the revs_db and the nodes_db are
49 stored on disk and each access is expensive.
51 The nodes_db database only has the keys for old revisions. The
52 revision that is being contructed is kept in memory in the new_nodes
53 hash which is cheap to access.
55 You must invoke _start_commit between SVNCommits.
57 *** WARNING *** All path arguments to methods in this class CANNOT
58 have leading or trailing slashes."""
60 class SVNRepositoryMirrorPathExistsError(Exception):
61 """Exception raised if an attempt is made to add a path to the
62 repository mirror and that path already exists in the youngest
63 revision of the repository."""
65 pass
67 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
68 """Exception raised if a CVSRevision is found to have an unexpected
69 operation (OP) value."""
71 pass
73 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
74 """Exception raised if an empty SymbolicNameFillingGuide is returned
75 during a fill where the branch in question already exists."""
77 pass
79 def __init__(self):
80 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
82 self.key_generator = key_generator.KeyGenerator()
84 self.delegates = [ ]
86 # This corresponds to the 'revisions' table in a Subversion fs.
87 self.revs_db = database.SDatabase(
88 artifact_manager.get_temp_file(config.SVN_MIRROR_REVISIONS_DB),
89 database.DB_OPEN_NEW)
91 # This corresponds to the 'nodes' table in a Subversion fs. (We
92 # don't need a 'representations' or 'strings' table because we
93 # only track metadata, not file contents.)
94 self.nodes_db = database.Database(
95 artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_DB),
96 database.DB_OPEN_NEW)
98 # Start at revision 0 without a root node. It will be created
99 # by _open_writable_root_node.
100 self.youngest = 0
101 self.new_root_key = None
102 self.new_nodes = { }
104 if not Ctx().trunk_only:
105 ###PERF IMPT: Suck this into memory.
106 self.tags_db = TagsDatabase(database.DB_OPEN_READ)
107 self.symbolings_reader = SymbolingsReader()
109 def _initialize_repository(self, date):
110 """Initialize the repository by creating the directories for
111 trunk, tags, and branches. This method should only be called
112 after all delegates are added to the repository mirror."""
114 # Make a 'fake' SVNCommit so we can take advantage of the revprops
115 # magic therein
116 svn_commit = SVNCommit("Initialization", 1)
117 svn_commit.set_date(date)
118 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
120 self._start_commit(svn_commit)
121 self._mkdir(Ctx().project.trunk_path)
122 if not Ctx().trunk_only:
123 self._mkdir(Ctx().project.branches_path)
124 self._mkdir(Ctx().project.tags_path)
126 def _start_commit(self, svn_commit):
127 """Start a new commit."""
129 if self.youngest > 0:
130 self._end_commit()
132 self.youngest = svn_commit.revnum
133 self.new_root_key = None
134 self.new_nodes = { }
136 self._invoke_delegates('start_commit', svn_commit)
138 def _end_commit(self):
139 """Called at the end of each commit. This method copies the newly
140 created nodes to the on-disk nodes db."""
142 if self.new_root_key is None:
143 # No changes were made in this revision, so we make the root node
144 # of the new revision be the same as the last one.
145 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
146 else:
147 self.revs_db[str(self.youngest)] = self.new_root_key
148 # Copy the new nodes to the nodes_db
149 for key, value in self.new_nodes.items():
150 self.nodes_db[key] = value
152 def _get_node(self, key):
153 """Returns the node contents for KEY which may refer to either
154 self.nodes_db or self.new_nodes."""
156 if self.new_nodes.has_key(key):
157 return self.new_nodes[key]
158 else:
159 return self.nodes_db[key]
161 def _open_readonly_node(self, path, revnum):
162 """Open a readonly node for PATH at revision REVNUM. Returns the
163 node key and node contents if the path exists, else (None, None)."""
165 # Get the root key
166 if revnum == self.youngest:
167 if self.new_root_key is None:
168 node_key = self.revs_db[str(self.youngest - 1)]
169 else:
170 node_key = self.new_root_key
171 else:
172 node_key = self.revs_db[str(revnum)]
174 for component in path.split('/'):
175 node_contents = self._get_node(node_key)
176 node_key = node_contents.get(component, None)
177 if node_key is None:
178 return None
180 return node_key
182 def _open_writable_root_node(self):
183 """Open a writable root node. The current root node is returned
184 immeditely if it is already writable. If not, create a new one by
185 copying the contents of the root node of the previous version."""
187 if self.new_root_key is not None:
188 return self.new_root_key, self.new_nodes[self.new_root_key]
190 if self.youngest < 2:
191 new_contents = { }
192 else:
193 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
194 self.new_root_key = self.key_generator.gen_key()
195 self.new_nodes = { self.new_root_key: new_contents }
197 return self.new_root_key, new_contents
199 def _open_writable_node(self, svn_path, create):
200 """Open a writable node for the path SVN_PATH, creating SVN_PATH
201 and any missing directories if CREATE is True."""
203 parent_key, parent_contents = self._open_writable_root_node()
205 # Walk up the path, one node at a time.
206 path_so_far = None
207 components = svn_path.split('/')
208 for i in range(len(components)):
209 component = components[i]
210 path_so_far = common.path_join(path_so_far, component)
211 this_key = parent_contents.get(component, None)
212 if this_key is not None:
213 # The component exists.
214 this_contents = self.new_nodes.get(this_key, None)
215 if this_contents is None:
216 # Suck the node from the nodes_db, but update the key
217 this_contents = self.nodes_db[this_key]
218 this_key = self.key_generator.gen_key()
219 self.new_nodes[this_key] = this_contents
220 parent_contents[component] = this_key
221 elif create:
222 # The component does not exists, so we create it.
223 this_contents = { }
224 this_key = self.key_generator.gen_key()
225 self.new_nodes[this_key] = this_contents
226 parent_contents[component] = this_key
227 if i < len(components) - 1:
228 self._invoke_delegates('mkdir', path_so_far)
229 else:
230 # The component does not exists and we are not instructed to
231 # create it, so we give up.
232 return None, None
234 parent_key = this_key
235 parent_contents = this_contents
237 return this_key, this_contents
239 def _path_exists(self, path):
240 """If PATH exists in self.youngest of the svn repository mirror,
241 return true, else return None.
243 PATH must not start with '/'."""
245 return self._open_readonly_node(path, self.youngest) is not None
247 def _fast_delete_path(self, parent_path, parent_contents, component):
248 """Delete COMPONENT from the parent direcory PARENT_PATH with the
249 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
250 in PARENT_CONTENTS."""
252 if parent_contents.has_key(component):
253 del parent_contents[component]
254 self._invoke_delegates('delete_path',
255 common.path_join(parent_path, component))
257 def _delete_path(self, svn_path, should_prune=False):
258 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
259 all ancestor directories that are made empty when SVN_PATH is deleted.
260 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
262 NOTE: This function ignores requests to delete the root directory
263 or any directory for which Ctx().project.is_unremovable() returns
264 True, either directly or by pruning."""
266 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
267 return
269 (parent_path, entry,) = common.path_split(svn_path)
270 if parent_path:
271 parent_key, parent_contents = \
272 self._open_writable_node(parent_path, False)
273 else:
274 parent_key, parent_contents = self._open_writable_root_node()
276 if parent_key is not None:
277 self._fast_delete_path(parent_path, parent_contents, entry)
278 # The following recursion makes pruning an O(n^2) operation in the
279 # worst case (where n is the depth of SVN_PATH), but the worst case
280 # is probably rare, and the constant cost is pretty low. Another
281 # drawback is that we issue a delete for each path and not just
282 # a single delete for the topmost directory pruned.
283 if should_prune and len(parent_contents) == 0:
284 self._delete_path(parent_path, True)
286 def _mkdir(self, path):
287 """Create PATH in the repository mirror at the youngest revision."""
289 self._open_writable_node(path, True)
290 self._invoke_delegates('mkdir', path)
292 def _change_path(self, cvs_rev):
293 """Register a change in self.youngest for the CVS_REV's svn_path
294 in the repository mirror."""
296 # We do not have to update the nodes because our mirror is only
297 # concerned with the presence or absence of paths, and a file
298 # content change does not cause any path changes.
299 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
301 def _add_path(self, cvs_rev):
302 """Add the CVS_REV's svn_path to the repository mirror."""
304 self._open_writable_node(cvs_rev.svn_path, True)
305 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
307 def _copy_path(self, src_path, dest_path, src_revnum):
308 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
309 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
310 parent *must* exist, but DEST_PATH *cannot* exist.
312 Return the node key and the contents of the new node at DEST_PATH
313 as a dictionary."""
315 # get the contents of the node of our src_path
316 src_key = self._open_readonly_node(src_path, src_revnum)
317 src_contents = self._get_node(src_key)
319 # Get the parent path and the base path of the dest_path
320 (dest_parent, dest_basename,) = common.path_split(dest_path)
321 dest_parent_key, dest_parent_contents = \
322 self._open_writable_node(dest_parent, False)
324 if dest_parent_contents.has_key(dest_basename):
325 msg = "Attempt to add path '%s' to repository mirror " % dest_path
326 msg += "when it already exists in the mirror."
327 raise self.SVNRepositoryMirrorPathExistsError, msg
329 dest_parent_contents[dest_basename] = src_key
330 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
332 # Yes sir, src_key and src_contents are also the contents of the
333 # destination. This is a cheap copy, remember! :-)
334 return src_key, src_contents
336 def _fill_symbolic_name(self, svn_commit):
337 """Performs all copies necessary to create as much of the the tag
338 or branch SVN_COMMIT.symbolic_name as possible given the current
339 revision of the repository mirror.
341 The symbolic name is guaranteed to exist in the Subversion
342 repository by the end of this call, even if there are no paths
343 under it."""
345 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
346 svn_commit.symbolic_name, self.youngest)
347 # Get the list of sources for the symbolic name.
348 sources = symbol_fill.get_sources()
350 if sources:
351 if svn_commit.symbolic_name in self.tags_db:
352 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
353 else:
354 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
356 dest_key = self._open_writable_node(dest_prefix, False)[0]
357 self._fill(symbol_fill, dest_prefix, dest_key, sources)
358 else:
359 # We can only get here for a branch whose first commit is an add
360 # (as opposed to a copy).
361 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
362 if not self._path_exists(dest_path):
363 # If our symbol_fill was empty, that means that our first
364 # commit on the branch was to a file added on the branch, and
365 # that this is our first fill of that branch.
367 # This case is covered by test 16.
369 # ...we create the branch by copying trunk from the our
370 # current revision number minus 1
371 source_path = Ctx().project.trunk_path
372 entries = self._copy_path(source_path, dest_path,
373 svn_commit.revnum - 1)[1]
374 # Now since we've just copied trunk to a branch that's
375 # *supposed* to be empty, we delete any entries in the
376 # copied directory.
377 for entry in entries:
378 del_path = dest_path + '/' + entry
379 # Delete but don't prune.
380 self._delete_path(del_path)
381 else:
382 msg = "Error filling branch '" \
383 + common.clean_symbolic_name(symbol_fill.name) + "'.\n"
384 msg += "Received an empty SymbolicNameFillingGuide and\n"
385 msg += "attempted to create a branch that already exists."
386 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
388 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
389 path = None, parent_source_prefix = None,
390 preferred_revnum = None, prune_ok = None):
391 """Fill the tag or branch at DEST_PREFIX + PATH with items from
392 SOURCES, and recurse into the child items.
394 DEST_PREFIX is the prefix of the destination directory, e.g.
395 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
396 FillSource classes that are candidates to be copied to the
397 destination. DEST_KEY is the key in self.nodes_db to the
398 destination, or None if the destination does not yet exist.
400 PATH is the path relative to DEST_PREFIX. If PATH is None, we
401 are at the top level, e.g. '/tags/my_tag'.
403 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
404 the parent directory, and PREFERRED_REVNUM is an int which is the
405 source revision number that the caller (who may have copied KEY's
406 parent) used to perform its copy. If PREFERRED_REVNUM is None,
407 then no revision is preferable to any other (which probably means
408 that no copies have happened yet).
410 PRUNE_OK means that a copy has been made in this recursion, and
411 it's safe to prune directories that are not in
412 SYMBOL_FILL._node_tree, provided that said directory has a source
413 prefix of one of the PARENT_SOURCE_PREFIX.
415 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
416 should only be passed in by recursive calls."""
418 # Calculate scores and revnums for all sources
419 for source in sources:
420 src_revnum, score = symbol_fill.get_best_revnum(source.node,
421 preferred_revnum)
422 source.set_score(score, src_revnum)
424 # Sort the sources in descending score order so that we will make
425 # a eventual copy from the source with the highest score.
426 sources.sort()
427 copy_source = sources[0]
429 src_path = common.path_join(copy_source.prefix, path)
430 dest_path = common.path_join(dest_prefix, path)
432 # Figure out if we shall copy to this destination and delete any
433 # destination path that is in the way.
434 do_copy = 0
435 if dest_key is None:
436 do_copy = 1
437 elif prune_ok and (parent_source_prefix != copy_source.prefix or
438 copy_source.revnum != preferred_revnum):
439 # We are about to replace the destination, so we need to remove
440 # it before we perform the copy.
441 self._delete_path(dest_path)
442 do_copy = 1
444 if do_copy:
445 dest_key, dest_entries = self._copy_path(src_path, dest_path,
446 copy_source.revnum)
447 prune_ok = 1
448 else:
449 dest_entries = self._get_node(dest_key)
451 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
452 # elements and the values are lists of FillSource classes where
453 # this path element exists.
454 src_entries = {}
455 for source in sources:
456 if isinstance(source.node, SVNRevisionRange):
457 continue
458 for entry, node in source.node.items():
459 src_entries.setdefault(entry, []).append(
460 FillSource(source.prefix, node))
462 if prune_ok:
463 # Delete the entries in DEST_ENTRIES that are not in src_entries.
464 delete_list = [ ]
465 for entry in dest_entries:
466 if not src_entries.has_key(entry):
467 delete_list.append(entry)
468 if delete_list:
469 if not self.new_nodes.has_key(dest_key):
470 dest_key, dest_entries = self._open_writable_node(dest_path, True)
471 # Sort the delete list to get "diffable" dumpfiles.
472 delete_list.sort()
473 for entry in delete_list:
474 self._fast_delete_path(dest_path, dest_entries, entry)
476 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
477 src_keys = src_entries.keys()
478 src_keys.sort()
479 for src_key in src_keys:
480 next_dest_key = dest_entries.get(src_key, None)
481 self._fill(symbol_fill, dest_prefix, next_dest_key,
482 src_entries[src_key], common.path_join(path, src_key),
483 copy_source.prefix, sources[0].revnum, prune_ok)
485 def _synchronize_default_branch(self, svn_commit):
486 """Propagate any changes that happened on a non-trunk default
487 branch to the trunk of the repository. See
488 CVSCommit._post_commit() for details on why this is necessary."""
490 for cvs_rev in svn_commit.cvs_revs:
491 svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
492 if cvs_rev.op == common.OP_ADD or cvs_rev.op == common.OP_CHANGE:
493 if self._path_exists(svn_trunk_path):
494 # Delete the path on trunk...
495 self._delete_path(svn_trunk_path)
496 # ...and copy over from branch
497 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
498 svn_commit.motivating_revnum)
499 elif cvs_rev.op == common.OP_DELETE:
500 # delete trunk path
501 self._delete_path(svn_trunk_path)
502 else:
503 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
504 % cvs_rev.op)
505 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
507 def commit(self, svn_commit):
508 """Add an SVNCommit to the SVNRepository, incrementing the
509 Repository revision number, and changing the repository. Invoke
510 the delegates' _start_commit() method."""
512 if svn_commit.revnum == 2:
513 self._initialize_repository(svn_commit.get_date())
515 self._start_commit(svn_commit)
517 if svn_commit.symbolic_name:
518 Log().verbose("Filling symbolic name:",
519 common.clean_symbolic_name(svn_commit.symbolic_name))
520 self._fill_symbolic_name(svn_commit)
521 elif svn_commit.motivating_revnum:
522 Log().verbose("Synchronizing default_branch motivated by %d"
523 % svn_commit.motivating_revnum)
524 self._synchronize_default_branch(svn_commit)
525 else: # This actually commits CVSRevisions
526 if len(svn_commit.cvs_revs) > 1: plural = "s"
527 else: plural = ""
528 Log().verbose("Committing %d CVSRevision%s"
529 % (len(svn_commit.cvs_revs), plural))
530 for cvs_rev in svn_commit.cvs_revs:
531 # See comment in CVSCommit._commit() for what this is all
532 # about. Note that although asking self._path_exists() is
533 # somewhat expensive, we only do it if the first two (cheap)
534 # tests succeed first.
535 if (cvs_rev.rev == "1.1.1.1"
536 and not cvs_rev.deltatext_exists
537 and self._path_exists(cvs_rev.svn_path)):
538 # This change can be omitted.
539 pass
540 else:
541 if cvs_rev.op == common.OP_ADD:
542 self._add_path(cvs_rev)
543 elif cvs_rev.op == common.OP_CHANGE:
544 # Fix for Issue #74:
546 # Here's the scenario. You have file FOO that is imported
547 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
548 # the file exists.
550 # Moving forward in time, FOO is deleted on the default
551 # branch (r1.1.1.2). cvs2svn determines that this delete
552 # also needs to happen on trunk, so FOO is deleted on
553 # trunk.
555 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
556 # not 'dead', we assume it's a change). However, since
557 # our trunk file has been deleted, svnadmin blows up--you
558 # can't change a file that doesn't exist!
560 # Soooo... we just check the path, and if it doesn't
561 # exist, we do an add... if the path does exist, it's
562 # business as usual.
563 if not self._path_exists(cvs_rev.svn_path):
564 self._add_path(cvs_rev)
565 else:
566 self._change_path(cvs_rev)
568 if cvs_rev.op == common.OP_DELETE:
569 self._delete_path(cvs_rev.svn_path, Ctx().prune)
571 def add_delegate(self, delegate):
572 """Adds DELEGATE to self.delegates.
574 For every delegate you add, as soon as SVNRepositoryMirror
575 performs a repository action method, SVNRepositoryMirror will call
576 the delegate's corresponding repository action method. Multiple
577 delegates will be called in the order that they are added. See
578 SVNRepositoryMirrorDelegate for more information."""
580 self.delegates.append(delegate)
582 def _invoke_delegates(self, method, *args):
583 """Iterate through each of our delegates, in the order that they
584 were added, and call the delegate's method named METHOD with the
585 arguments in ARGS."""
587 for delegate in self.delegates:
588 getattr(delegate, method)(*args)
590 def finish(self):
591 """Calls the delegate finish method."""
593 self._end_commit()
594 self._invoke_delegates('finish')
595 self.revs_db = None
596 self.nodes_db = None
599 class SVNRepositoryMirrorDelegate:
600 """Abstract superclass for any delegate to SVNRepositoryMirror.
601 Subclasses must implement all of the methods below.
603 For each method, a subclass implements, in its own way, the
604 Subversion operation implied by the method's name. For example, for
605 the add_path method, the DumpfileDelegate would write out a
606 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
607 would merely print that the path is being added to the repository,
608 and the RepositoryDelegate would actually cause the path to be added
609 to the Subversion repository that it is creating.
612 def start_commit(self, svn_commit):
613 """Perform any actions needed to start SVNCommit SVN_COMMIT;
614 see subclass implementation for details."""
616 raise NotImplementedError
618 def mkdir(self, path):
619 """PATH is a string; see subclass implementation for details."""
621 raise NotImplementedError
623 def add_path(self, s_item):
624 """S_ITEM is an SVNCommitItem; see subclass implementation for
625 details."""
627 raise NotImplementedError
629 def change_path(self, s_item):
630 """S_ITEM is an SVNCommitItem; see subclass implementation for
631 details."""
633 raise NotImplementedError
635 def delete_path(self, path):
636 """PATH is a string; see subclass implementation for
637 details."""
639 raise NotImplementedError
641 def copy_path(self, src_path, dest_path, src_revnum):
642 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
643 subversion revision number (int); see subclass implementation for
644 details."""
646 raise NotImplementedError
648 def finish(self):
649 """Perform any cleanup necessary after all revisions have been
650 committed."""
652 raise NotImplementedError