* cvs2svn: Use gnu_getopt when available (Python >= 2.3) for more flexible
[cvs2svn.git] / cvs2svn_lib / collect_data.py
blob01789dc55ab8658fb847674b4b09b05236163624
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2006 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 from __future__ import generators
22 import sys
23 import os
24 import re
25 import time
26 import sha
27 import stat
29 from boolean import *
30 import common
31 from common import warning_prefix
32 from common import error_prefix
33 import config
34 from log import Log
35 from context import Ctx
36 from artifact_manager import artifact_manager
37 from cvs_file import CVSFile
38 import cvs_revision
39 from stats_keeper import StatsKeeper
40 from key_generator import KeyGenerator
41 import database
42 from cvs_file_database import CVSFileDatabase
43 from cvs_revision_database import CVSRevisionDatabase
44 import symbol_database
45 import cvs2svn_rcsparse
48 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
50 trunk_rev = re.compile(r'^[0-9]+\.[0-9]+$')
51 cvs_branch_tag = re.compile(r'^((?:[0-9]+\.[0-9]+\.)+)0\.([0-9]+)$')
52 rcs_branch_tag = re.compile(r'^(?:[0-9]+\.[0-9]+\.)+[0-9]+$')
54 # This really only matches standard '1.1.1.*'-style vendor revisions.
55 # One could conceivably have a file whose default branch is 1.1.3 or
56 # whatever, or was that at some point in time, with vendor revisions
57 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
58 # is the only time this regexp gets used), we'd have no basis for
59 # assuming that the non-standard vendor branch had ever been the
60 # default branch anyway, so we don't want this to match them anyway.
61 vendor_revision = re.compile(r'^(1\.1\.1)\.([0-9])+$')
64 def is_branch_revision(rev):
65 """Return True iff this revision is not a trunk revision."""
67 return rev.count('.') >= 3
70 def is_same_line_of_development(rev1, rev2):
71 """Return True if rev1 and rev2 are on the same line of
72 development (i.e., both on trunk, or both on the same branch);
73 return False otherwise. Either rev1 or rev2 can be None, in
74 which case automatically return False."""
76 if rev1 is None or rev2 is None:
77 return False
78 if rev1.count('.') == 1 and rev2.count('.') == 1:
79 return True
80 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
81 return True
82 return False
85 class _RevisionData:
86 """We track the state of each revision so that in set_revision_info,
87 we can determine if our op is an add/change/delete. We can do this
88 because in set_revision_info, we'll have all of the _RevisionData
89 for a file at our fingertips, and we need to examine the state of
90 our prev_rev to determine if we're an add or a change. Without the
91 state of the prev_rev, we are unable to distinguish between an add
92 and a change."""
94 def __init__(self, rev, timestamp, author, state, branches):
95 self.rev = rev
96 self.timestamp = timestamp
97 self.author = author
98 self.original_timestamp = timestamp
99 self._adjusted = False
100 self.state = state
102 # Numbers of branch first revisions sprouting from this revision,
103 # as specified by define_revision():
104 self.branches = branches
106 # The revision number of the parent of this revision along the
107 # same line of development, if any.
109 # For the first revision R on a branch, we consider the revision
110 # from which R sprouted to be the 'previous'.
112 # Note that this revision can't be determined arithmetically (due
113 # to cvsadmin -o, which is why this is necessary).
115 # If the key has no previous revision, then this field is None.
116 self.parent = None
118 # The revision numbers of any children that depend on this revision:
119 self.children = []
121 def adjust_timestamp(self, timestamp):
122 self._adjusted = True
123 self.timestamp = timestamp
125 def timestamp_was_adjusted(self):
126 return self._adjusted
129 class FileDataCollector(cvs2svn_rcsparse.Sink):
130 """Class responsible for collecting RCS data for a particular file.
132 Any collected data that need to be remembered are stored into the
133 referenced CollectData instance."""
135 def __init__(self, collect_data, filename):
136 """Create an object that is prepared to receive data for FILENAME.
137 FILENAME is the absolute filesystem path to the file in question.
138 COLLECT_DATA is used to store the information collected about the
139 file."""
141 self.collect_data = collect_data
143 (dirname, basename,) = os.path.split(filename)
144 if dirname.endswith(OS_SEP_PLUS_ATTIC):
145 # drop the 'Attic' portion from the filename for the canonical name:
146 canonical_filename = os.path.join(
147 dirname[:-len(OS_SEP_PLUS_ATTIC)], basename)
148 file_in_attic = True
149 else:
150 canonical_filename = filename
151 file_in_attic = False
153 # We calculate and save some file metadata here, where we can do
154 # it only once per file, instead of waiting until later where we
155 # would have to do the same calculations once per CVS *revision*.
157 cvs_path = Ctx().cvs_repository.get_cvs_path(canonical_filename)
159 file_stat = os.stat(filename)
160 # The size of our file in bytes
161 file_size = file_stat[stat.ST_SIZE]
163 # Whether or not the executable bit is set.
164 file_executable = bool(file_stat[0] & stat.S_IXUSR)
166 # mode is not known yet, so we temporarily set it to None.
167 self.cvs_file = CVSFile(
168 None, filename, canonical_filename, cvs_path,
169 file_in_attic, file_executable, file_size, None
172 # A list [ ( name, revision) ] of each known symbol in this file
173 # with the revision number that it corresponds to.
174 self._symbols = []
176 # A map { revision -> c_rev } of the CVSRevision instances for all
177 # revisions related to this file. Note that items in this map
178 # might be pre-filled as CVSRevisionIDs for revisions referred to
179 # by earlier revisions but not yet processed. As the revisions
180 # are defined, the values are changed into CVSRevision instances.
181 self._c_revs = {}
183 # { revision : _RevisionData instance }
184 self._rev_data = { }
186 # A list [ revision ] of the revision numbers seen, in the order
187 # they were given to us by rcsparse:
188 self._rev_order = []
190 # A list [ (parent, child) ] of revision number pairs indicating
191 # that child depends on parent.
192 self._dependencies = []
194 # This dict is essentially self.prev_rev with the values mapped in
195 # the other direction, so following key -> value will yield you
196 # the next revision number.
198 # If the key has no next revision, then the key is not present.
199 self.next_rev = { }
201 # Hash mapping branch numbers, like '1.7.2', to branch names,
202 # like 'Release_1_0_dev'.
203 self.branch_names = { }
205 # Hash mapping revision numbers, like '1.7', to lists of names
206 # indicating which branches sprout from that revision, like
207 # ['Release_1_0_dev', 'experimental_driver', ...].
208 self.branchlist = { }
210 # Like self.branchlist, but the values are lists of tag names that
211 # apply to the key revision.
212 self.taglist = { }
214 # If set, this is an RCS branch number -- rcsparse calls this the
215 # "principal branch", but CVS and RCS refer to it as the "default
216 # branch", so that's what we call it, even though the rcsparse API
217 # setter method is still 'set_principal_branch'.
218 self.default_branch = None
220 # If the RCS file doesn't have a default branch anymore, but does
221 # have vendor revisions, then we make an educated guess that those
222 # revisions *were* the head of the default branch up until the
223 # commit of 1.2, at which point the file's default branch became
224 # trunk. This records the date at which 1.2 was committed.
225 self.first_non_vendor_revision_date = None
227 def _get_rev_id(self, revision):
228 if revision is None:
229 return None
230 id = self._c_revs.get(revision)
231 if id is None:
232 id = cvs_revision.CVSRevisionID(
233 self.collect_data.key_generator.gen_id(), self.cvs_file, revision)
234 self._c_revs[revision] = id
235 return id.id
237 def set_principal_branch(self, branch):
238 """This is a callback method declared in Sink."""
240 self.default_branch = branch
242 def set_expansion(self, mode):
243 """This is a callback method declared in Sink."""
245 self.cvs_file.mode = mode
247 def define_tag(self, name, revision):
248 """Remember the symbol name and revision, but don't process them yet.
250 This is a callback method declared in Sink."""
252 self._symbols.append( (name, revision,) )
254 def set_branch_name(self, branch_number, name):
255 """Record that BRANCH_NUMBER is the branch number for branch NAME,
256 and derive and record the revision from which NAME sprouts.
257 BRANCH_NUMBER is an RCS branch number with an odd number of
258 components, for example '1.7.2' (never '1.7.0.2')."""
260 if self.branch_names.has_key(branch_number):
261 sys.stderr.write("%s: in '%s':\n"
262 " branch '%s' already has name '%s',\n"
263 " cannot also have name '%s', ignoring the latter\n"
264 % (warning_prefix,
265 self.cvs_file.filename, branch_number,
266 self.branch_names[branch_number], name))
267 return
269 self.branch_names[branch_number] = name
270 # The branchlist is keyed on the revision number from which the
271 # branch sprouts, so strip off the odd final component.
272 sprout_rev = branch_number[:branch_number.rfind(".")]
273 self.branchlist.setdefault(sprout_rev, []).append(name)
274 self.collect_data.symbol_db.register_branch_creation(name)
276 def set_tag_name(self, revision, name):
277 """Record that tag NAME refers to the specified REVISION."""
279 self.taglist.setdefault(revision, []).append(name)
280 self.collect_data.symbol_db.register_tag_creation(name)
282 def rev_to_branch_name(self, revision):
283 """Return the name of the branch on which REVISION lies.
284 REVISION is a non-branch revision number with an even number of,
285 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
286 For the convenience of callers, REVISION can also be a trunk
287 revision such as '1.2', in which case just return None."""
289 if trunk_rev.match(revision):
290 return None
291 return self.branch_names.get(revision[:revision.rindex(".")])
293 def _process_symbol(self, name, revision):
294 """Record a bidirectional mapping between symbolic NAME and REVISION.
295 REVISION is an unprocessed revision number from the RCS file's
296 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
297 This function will determine what kind of symbolic name it is by
298 inspection, and record it in the right places."""
300 m = cvs_branch_tag.match(revision)
301 if m:
302 self.set_branch_name(m.group(1) + m.group(2), name)
303 elif rcs_branch_tag.match(revision):
304 self.set_branch_name(revision, name)
305 else:
306 self.set_tag_name(revision, name)
308 def _transform_symbol(self, name):
309 """Transform the symbol NAME using the renaming rules specified
310 with --symbol-transform. Return the transformed symbol name."""
312 for (pattern, replacement) in Ctx().symbol_transforms:
313 newname = pattern.sub(replacement, name)
314 if newname != name:
315 Log().warn(" symbol '%s' transformed to '%s'" % (name, newname))
316 name = newname
318 return name
320 def _process_symbols(self):
321 # A list of all symbols defined for the current file. Used to
322 # prevent multiple definitions of a symbol, something which can
323 # easily happen when --symbol-transform is used.
324 defined_symbols = { }
326 for (name, revision,) in self._symbols:
327 name = self._transform_symbol(name)
329 if defined_symbols.has_key(name):
330 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
331 % (error_prefix, name, self.cvs_file.filename)
332 sys.stderr.write(err + "\n")
333 self.collect_data.fatal_errors.append(err)
335 defined_symbols[name] = None
337 self._process_symbol(name, revision)
339 # Free memory:
340 self._symbols = None
342 def admin_completed(self):
343 """This is a callback method declared in Sink."""
345 self._process_symbols()
346 self.collect_data.add_cvs_file(self.cvs_file)
348 def define_revision(self, revision, timestamp, author, state,
349 branches, next):
350 """This is a callback method declared in Sink."""
352 rev_data = _RevisionData(
353 revision, int(timestamp), author, state, branches)
354 self._rev_order.append(revision)
355 self._rev_data[revision] = rev_data
357 # When on trunk, the RCS 'next' revision number points to what
358 # humans might consider to be the 'previous' revision number. For
359 # example, 1.3's RCS 'next' is 1.2.
361 # However, on a branch, the RCS 'next' revision number really does
362 # point to what humans would consider to be the 'next' revision
363 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
365 # In other words, in RCS, 'next' always means "where to find the next
366 # deltatext that you need this revision to retrieve.
368 # That said, we don't *want* RCS's behavior here, so we determine
369 # whether we're on trunk or a branch and set the dependencies
370 # accordingly.
372 # One last thing. Note that if REVISION is a branch revision,
373 # instead of mapping REVISION to NEXT, we instead map NEXT to
374 # REVISION. Since we loop over all revisions in the file before
375 # doing anything with the data we gather here, this 'reverse
376 # assignment' effectively does the following:
378 # 1. Gives us no 'prev' value for REVISION (in this
379 # iteration... it may have been set in a previous iteration)
381 # 2. Sets the 'prev' value for the revision with number NEXT to
382 # REVISION. So when we come around to the branch revision whose
383 # revision value is NEXT, its 'prev' and 'prev_rev' are already
384 # set.
385 if next:
386 if trunk_rev.match(revision):
387 self._dependencies.append( (next, revision,) )
388 else:
389 self._dependencies.append( (revision, next,) )
391 if next:
392 if trunk_rev.match(revision):
393 self.next_rev[next] = revision
394 else:
395 self.next_rev[revision] = next
397 def _set_branch_dependencies(self, rev_data):
398 """Set any branches sprouting from REV_DATA to depend on it."""
400 for b in rev_data.branches:
401 self._dependencies.append( (rev_data.rev, b) )
403 def _resolve_dependencies(self):
404 """Store the dependencies in self._dependencies into the rev_data
405 objects."""
407 for (parent, child,) in self._dependencies:
408 self._rev_data[parent].children.append(child)
409 child_data = self._rev_data[child]
410 assert child_data.parent is None
411 child_data.parent = parent
413 # Free memory:
414 self._dependencies = None
416 def _update_default_branch(self, rev_data):
417 """Ratchet up the highest vendor head revision based on REV_DATA,
418 if necessary."""
420 if self.default_branch:
421 default_branch_root = self.default_branch + "."
422 if (rev_data.rev.startswith(default_branch_root)
423 and default_branch_root.count('.') == rev_data.rev.count('.')):
424 # This revision is on the default branch, so record that it is
425 # the new highest default branch head revision.
426 self.collect_data.default_branches_db[self.cvs_file.cvs_path] = \
427 rev_data.rev
428 else:
429 # No default branch, so make an educated guess.
430 if rev_data.rev == '1.2':
431 # This is probably the time when the file stopped having a
432 # default branch, so make a note of it.
433 self.first_non_vendor_revision_date = rev_data.timestamp
434 else:
435 m = vendor_revision.match(rev_data.rev)
436 if m and ((not self.first_non_vendor_revision_date)
437 or (rev_data.timestamp
438 < self.first_non_vendor_revision_date)):
439 # We're looking at a vendor revision, and it wasn't
440 # committed after this file lost its default branch, so bump
441 # the maximum trunk vendor revision in the permanent record.
442 self.collect_data.default_branches_db[self.cvs_file.cvs_path] = \
443 rev_data.rev
445 def _register_branch_commit(self, rev):
446 """Register REV, which is a non-trunk revision number, as a commit
447 on the corresponding branch."""
449 # Check for unlabeled branches, record them. We tried to collect
450 # all branch names when we parsed the symbolic name header
451 # earlier, of course, but that didn't catch unlabeled branches.
452 # If a branch is unlabeled, this is our first encounter with it,
453 # so we have to record its data now.
454 branch_number = rev[:rev.rindex(".")]
455 if not self.branch_names.has_key(branch_number):
456 branch_name = "unlabeled-" + branch_number
457 self.set_branch_name(branch_number, branch_name)
459 # Register the commit on this non-trunk branch
460 branch_name = self.branch_names[branch_number]
461 self.collect_data.symbol_db.register_branch_commit(branch_name)
463 def _resync_chain(self, rev_data):
464 """If the REV_DATA.parent revision exists and it occurred later
465 than the REV_DATA revision, then shove the previous revision back
466 in time (and any before it that may need to shift). Return True
467 iff any resyncing was done.
469 We sync backwards and not forwards because any given CVS Revision
470 has only one previous revision. However, a CVS Revision can *be*
471 a previous revision for many other revisions (e.g., a revision
472 that is the source of multiple branches). This becomes relevant
473 when we do the secondary synchronization in pass 2--we can make
474 certain that we don't resync a revision earlier than its previous
475 revision, but it would be non-trivial to make sure that we don't
476 resync revision R *after* any revisions that have R as a previous
477 revision."""
479 resynced = False
480 while rev_data.parent is not None:
481 prev_rev_data = self._rev_data[rev_data.parent]
483 if prev_rev_data.timestamp < rev_data.timestamp:
484 # No resyncing needed here.
485 return resynced
487 old_timestamp = prev_rev_data.timestamp
488 prev_rev_data.adjust_timestamp(rev_data.timestamp - 1)
489 resynced = True
490 delta = prev_rev_data.timestamp - old_timestamp
491 Log().verbose(
492 "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds"
493 % (self.cvs_file.cvs_path, prev_rev_data.rev,
494 time.ctime(old_timestamp), delta))
495 if abs(delta) > config.COMMIT_THRESHOLD:
496 Log().warn(
497 "%s: Significant timestamp change for '%s' (%d seconds)"
498 % (warning_prefix, self.cvs_file.cvs_path, delta))
499 rev_data = prev_rev_data
501 return resynced
503 def tree_completed(self):
504 """The revision tree has been parsed. Analyze it for consistency.
506 This is a callback method declared in Sink."""
508 for rev in self._rev_order:
509 rev_data = self._rev_data[rev]
511 self._set_branch_dependencies(rev_data)
513 self._update_default_branch(rev_data)
515 if not trunk_rev.match(rev_data.rev):
516 self._register_branch_commit(rev_data.rev)
518 self._resolve_dependencies()
520 # Our algorithm depends upon the timestamps on the revisions occuring
521 # monotonically over time. That is, we want to see rev 1.34 occur in
522 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
523 # sorting), and then tried to insert 1.34, we'd be screwed.
525 # To perform the analysis, we'll simply visit all of the 'previous'
526 # links that we have recorded and validate that the timestamp on the
527 # previous revision is before the specified revision.
529 # If we have to resync some nodes, then we restart the scan. Just
530 # keep looping as long as we need to restart.
531 while True:
532 for rev_data in self._rev_data.values():
533 if self._resync_chain(rev_data):
534 # Abort for loop, causing the scan to start again:
535 break
536 else:
537 # Finished the for-loop without having to resync anything.
538 # We're done.
539 return
541 def _determine_operation(self, rev_data):
542 # How to tell if a CVSRevision is an add, a change, or a deletion:
544 # It's a delete if RCS state is 'dead'
546 # It's an add if RCS state is 'Exp.' and
547 # - we either have no previous revision
548 # or
549 # - we have a previous revision whose state is 'dead'
551 # Anything else is a change.
552 prev_rev_data = self._rev_data.get(rev_data.parent)
554 if rev_data.state == 'dead':
555 op = common.OP_DELETE
556 elif prev_rev_data is None or prev_rev_data.state == 'dead':
557 op = common.OP_ADD
558 else:
559 op = common.OP_CHANGE
561 # There can be an odd situation where the tip revision of a branch
562 # is alive, but every predecessor on the branch is in state 'dead',
563 # yet the revision from which the branch sprouts is alive. (This
564 # is sort of a mirror image of the more common case of adding a
565 # file on a branch, in which the first revision on the branch is
566 # alive while the revision from which it sprouts is dead.)
568 # In this odd situation, we must mark the first live revision on
569 # the branch as an OP_CHANGE instead of an OP_ADD, because it
570 # reflects, however indirectly, a change w.r.t. the source
571 # revision from which the branch sprouts.
573 # This is issue #89.
574 cur_num = rev_data.rev
575 if is_branch_revision(rev_data.rev) and rev_data.state != 'dead':
576 while 1:
577 prev_num = self._rev_data[cur_num].parent
578 if not cur_num or not prev_num:
579 break
580 if (not is_same_line_of_development(cur_num, prev_num)
581 and self._rev_data[cur_num].state == 'dead'
582 and self._rev_data[prev_num].state != 'dead'):
583 op = common.OP_CHANGE
584 cur_num = self._rev_data[cur_num].parent
586 return op
588 def set_revision_info(self, revision, log, text):
589 """This is a callback method declared in Sink."""
591 rev_data = self._rev_data[revision]
592 digest = sha.new(log + '\0' + rev_data.author).hexdigest()
593 if rev_data.timestamp_was_adjusted():
594 # the timestamp on this revision was changed. log it for later
595 # resynchronization of other files's revisions that occurred
596 # for this time and log message.
597 self.collect_data.resync.write(
598 '%08lx %s %08lx\n'
599 % (rev_data.original_timestamp, digest, rev_data.timestamp))
601 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
602 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
604 # If revision 1.1 appears to have been created via 'cvs add'
605 # instead of 'cvs import', then this file probably never had a
606 # default branch, so retroactively remove its record in the
607 # default branches db. The test is that the log message CVS uses
608 # for 1.1 in imports is "Initial revision\n" with no period.
609 if revision == '1.1' and log != 'Initial revision\n':
610 try:
611 del self.collect_data.default_branches_db[self.cvs_file.cvs_path]
612 except KeyError:
613 pass
615 # Get the timestamps of the previous and next revisions
616 prev_rev = rev_data.parent
617 prev_rev_data = self._rev_data.get(prev_rev)
618 if prev_rev_data is None:
619 prev_timestamp = 0
620 else:
621 prev_timestamp = prev_rev_data.timestamp
623 next_rev = self.next_rev.get(revision)
624 next_rev_data = self._rev_data.get(next_rev)
625 if next_rev_data is None:
626 next_timestamp = 0
627 else:
628 next_timestamp = next_rev_data.timestamp
630 c_rev = cvs_revision.CVSRevision(
631 self._get_rev_id(revision), self.cvs_file,
632 rev_data.timestamp, digest,
633 self._get_rev_id(prev_rev), self._get_rev_id(next_rev),
634 prev_timestamp, next_timestamp, self._determine_operation(rev_data),
635 prev_rev, revision, next_rev,
636 bool(text),
637 self.rev_to_branch_name(revision),
638 self.taglist.get(revision, []), self.branchlist.get(revision, []))
639 self._c_revs[revision] = c_rev
640 self.collect_data.add_cvs_revision(c_rev)
642 if not self.collect_data.metadata_db.has_key(digest):
643 self.collect_data.metadata_db[digest] = (rev_data.author, log)
645 def parse_completed(self):
646 """Walk through all branches and tags and register them with their
647 parent branch in the symbol database.
649 This is a callback method declared in Sink."""
651 for revision, symbols in self.taglist.items() + self.branchlist.items():
652 for symbol in symbols:
653 name = self.rev_to_branch_name(revision)
654 if name is not None:
655 self.collect_data.symbol_db.register_branch_blocker(name, symbol)
657 self.collect_data.num_files += 1
660 class CollectData:
661 """Repository for data collected by parsing the CVS repository files.
663 This class manages the databases into which information collected
664 from the CVS repository is stored. The data are stored into this
665 class by FileDataCollector instances, one of which is created for
666 each file to be parsed."""
668 def __init__(self):
669 self._cvs_file_db = CVSFileDatabase(
670 artifact_manager.get_temp_file(config.CVS_FILES_DB),
671 database.DB_OPEN_NEW)
672 self._cvs_revs_db = CVSRevisionDatabase(
673 self._cvs_file_db,
674 artifact_manager.get_temp_file(config.CVS_REVS_DB),
675 database.DB_OPEN_NEW)
676 self._all_revs = open(
677 artifact_manager.get_temp_file(config.ALL_REVS_DATAFILE), 'w')
678 self.resync = open(
679 artifact_manager.get_temp_file(config.RESYNC_DATAFILE), 'w')
680 self.default_branches_db = database.SDatabase(
681 artifact_manager.get_temp_file(config.DEFAULT_BRANCHES_DB),
682 database.DB_OPEN_NEW)
683 self.metadata_db = database.Database(
684 artifact_manager.get_temp_file(config.METADATA_DB),
685 database.DB_OPEN_NEW)
686 self.fatal_errors = []
687 self.num_files = 0
688 self.symbol_db = symbol_database.SymbolDatabase()
690 # 1 if we've collected data for at least one file, None otherwise.
691 self.found_valid_file = None
693 # Key generator to generate unique keys for each CVSFile object:
694 self.file_key_generator = KeyGenerator(1)
696 # Key generator to generate unique keys for each CVSRevision object:
697 self.key_generator = KeyGenerator()
699 def add_cvs_file(self, cvs_file):
700 """If CVS_FILE is not already stored to _cvs_revs_db, give it a
701 persistent id and store it now. The way we tell whether it was
702 already stored is by whether it already has a non-None id."""
704 assert cvs_file.id is None
705 cvs_file.id = self.file_key_generator.gen_id()
706 self._cvs_file_db.log_file(cvs_file)
708 def add_cvs_revision(self, c_rev):
709 self._cvs_revs_db.log_revision(c_rev)
710 self._all_revs.write('%s\n' % (c_rev.unique_key(),))
711 StatsKeeper().record_c_rev(c_rev)
713 def write_symbol_db(self):
714 self.symbol_db.write()