revision_store.py

   1 # Copyright (C) 2008, 2009 Canonical Ltd
   2 #
   3 # This program is free software; you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation; either version 2 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15
  16 """An abstraction of a repository providing just the bits importing needs."""
  17
  18 import cStringIO
  19
  20 from bzrlib import (
  21     errors,
  22     graph as _mod_graph,
  23     inventory,
  24     knit,
  25     lru_cache,
  26     osutils,
  27     revision as _mod_revision,
  28     trace,
  29     )
  30
  31
  32 class _TreeShim(object):
  33     """Fake a Tree implementation.
  34
  35     This implements just enough of the tree api to make commit builder happy.
  36     """
  37
  38     def __init__(self, repo, basis_inv, inv_delta, content_provider):
  39         self._repo = repo
  40         self._content_provider = content_provider
  41         self._basis_inv = basis_inv
  42         self._inv_delta = inv_delta
  43         self._new_info_by_id = dict([(file_id, (new_path, ie))
  44                                     for _, new_path, file_id, ie in inv_delta])
  45
  46     def id2path(self, file_id):
  47         if file_id in self._new_info_by_id:
  48             new_path = self._new_info_by_id[file_id][0]
  49             if new_path is None:
  50                 raise errors.NoSuchId(self, file_id)
  51             return new_path
  52         return self._basis_inv.id2path(file_id)
  53
  54     def path2id(self, path):
  55         # CommitBuilder currently only requires access to the root id. We don't
  56         # build a map of renamed files, etc. One possibility if we ever *do*
  57         # need more than just root, is to defer to basis_inv.path2id() and then
  58         # check if the file_id is in our _new_info_by_id dict. And in that
  59         # case, return _new_info_by_id[file_id][0]
  60         if path != '':
  61             raise NotImplementedError(_TreeShim.path2id)
  62         # TODO: Handle root renames?
  63         return self._basis_inv.root.file_id
  64
  65     def get_file_with_stat(self, file_id, path=None):
  66         content = self.get_file_text(file_id, path)
  67         sio = cStringIO.StringIO(content)
  68         return sio, None
  69
  70     def get_file_text(self, file_id, path=None):
  71         try:
  72             return self._content_provider(file_id)
  73         except KeyError:
  74             # The content wasn't shown as 'new'. Just validate this fact
  75             assert file_id not in self._new_info_by_id
  76             old_ie = self._basis_inv[file_id]
  77             old_text_key = (file_id, old_ie.revision)
  78             stream = self._repo.texts.get_record_stream([old_text_key],
  79                                                         'unordered', True)
  80             return stream.next().get_bytes_as('fulltext')
  81
  82     def get_symlink_target(self, file_id):
  83         if file_id in self._new_info_by_id:
  84             ie = self._new_info_by_id[file_id][1]
  85             return ie.symlink_target
  86         return self._basis_inv[file_id].symlink_target
  87
  88     def get_reference_revision(self, file_id, path=None):
  89         raise NotImplementedError(_TreeShim.get_reference_revision)
  90
  91     def _delta_to_iter_changes(self):
  92         """Convert the inv_delta into an iter_changes repr."""
  93         # iter_changes is:
  94         #   (file_id,
  95         #    (old_path, new_path),
  96         #    content_changed,
  97         #    (old_versioned, new_versioned),
  98         #    (old_parent_id, new_parent_id),
  99         #    (old_name, new_name),
 100         #    (old_kind, new_kind),
 101         #    (old_exec, new_exec),
 102         #   )
 103         basis_inv = self._basis_inv
 104         for old_path, new_path, file_id, ie in self._inv_delta:
 105             # Perf: Would this be faster if we did 'if file_id in basis_inv'?
 106             # Since the *very* common case is that the file already exists, it
 107             # probably is better to optimize for that
 108             try:
 109                 old_ie = basis_inv[file_id]
 110             except errors.NoSuchId:
 111                 old_ie = None
 112                 if ie is None:
 113                     raise AssertionError('How is both old and new None?')
 114                     change = (file_id,
 115                         (old_path, new_path),
 116                         False,
 117                         (False, False),
 118                         (None, None),
 119                         (None, None),
 120                         (None, None),
 121                         (None, None),
 122                         )
 123                 change = (file_id,
 124                     (old_path, new_path),
 125                     True,
 126                     (False, True),
 127                     (None, ie.parent_id),
 128                     (None, ie.name),
 129                     (None, ie.kind),
 130                     (None, ie.executable),
 131                     )
 132             else:
 133                 if ie is None:
 134                     change = (file_id,
 135                         (old_path, new_path),
 136                         True,
 137                         (True, False),
 138                         (old_ie.parent_id, None),
 139                         (old_ie.name, None),
 140                         (old_ie.kind, None),
 141                         (old_ie.executable, None),
 142                         )
 143                 else:
 144                     content_modified = (ie.text_sha1 != old_ie.text_sha1
 145                                         or ie.text_size != old_ie.text_size)
 146                     # TODO: ie.kind != old_ie.kind
 147                     # TODO: symlinks changing targets, content_modified?
 148                     change = (file_id,
 149                         (old_path, new_path),
 150                         content_modified,
 151                         (True, True),
 152                         (old_ie.parent_id, ie.parent_id),
 153                         (old_ie.name, ie.name),
 154                         (old_ie.kind, ie.kind),
 155                         (old_ie.executable, ie.executable),
 156                         )
 157             yield change
 158
 159
 160 class AbstractRevisionStore(object):
 161
 162     def __init__(self, repo):
 163         """An object responsible for loading revisions into a repository.
 164
 165         NOTE: Repository locking is not managed by this class. Clients
 166         should take a write lock, call load() multiple times, then release
 167         the lock.
 168
 169         :param repository: the target repository
 170         """
 171         self.repo = repo
 172         self._graph = None
 173         self._use_known_graph = True
 174         self._supports_chks = getattr(repo._format, 'supports_chks', False)
 175
 176     def expects_rich_root(self):
 177         """Does this store expect inventories with rich roots?"""
 178         return self.repo.supports_rich_root()
 179
 180     def init_inventory(self, revision_id):
 181         """Generate an inventory for a parentless revision."""
 182         if self._supports_chks:
 183             inv = self._init_chk_inventory(revision_id, inventory.ROOT_ID)
 184         else:
 185             inv = inventory.Inventory(revision_id=revision_id)
 186             if self.expects_rich_root():
 187                 # The very first root needs to have the right revision
 188                 inv.root.revision = revision_id
 189         return inv
 190
 191     def _init_chk_inventory(self, revision_id, root_id):
 192         """Generate a CHKInventory for a parentless revision."""
 193         from bzrlib import chk_map
 194         # Get the creation parameters
 195         chk_store = self.repo.chk_bytes
 196         serializer = self.repo._format._serializer
 197         search_key_name = serializer.search_key_name
 198         maximum_size = serializer.maximum_size
 199
 200         # Maybe the rest of this ought to be part of the CHKInventory API?
 201         inv = inventory.CHKInventory(search_key_name)
 202         inv.revision_id = revision_id
 203         inv.root_id = root_id
 204         search_key_func = chk_map.search_key_registry.get(search_key_name)
 205         inv.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
 206         inv.id_to_entry._root_node.set_maximum_size(maximum_size)
 207         inv.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
 208             None, search_key_func)
 209         inv.parent_id_basename_to_file_id._root_node.set_maximum_size(
 210             maximum_size)
 211         inv.parent_id_basename_to_file_id._root_node._key_width = 2
 212         return inv
 213
 214     def get_inventory(self, revision_id):
 215         """Get a stored inventory."""
 216         return self.repo.get_inventory(revision_id)
 217
 218     def get_file_text(self, revision_id, file_id):
 219         """Get the text stored for a file in a given revision."""
 220         revtree = self.repo.revision_tree(revision_id)
 221         return revtree.get_file_text(file_id)
 222
 223     def get_file_lines(self, revision_id, file_id):
 224         """Get the lines stored for a file in a given revision."""
 225         revtree = self.repo.revision_tree(revision_id)
 226         return osutils.split_lines(revtree.get_file_text(file_id))
 227
 228     def start_new_revision(self, revision, parents, parent_invs):
 229         """Init the metadata needed for get_parents_and_revision_for_entry().
 230
 231         :param revision: a Revision object
 232         """
 233         self._current_rev_id = revision.revision_id
 234         self._rev_parents = parents
 235         self._rev_parent_invs = parent_invs
 236         # We don't know what the branch will be so there's no real BranchConfig.
 237         # That means we won't be triggering any hooks and that's a good thing.
 238         # Without a config though, we must pass in the committer below so that
 239         # the commit builder doesn't try to look up the config.
 240         config = None
 241         # We can't use self.repo.get_commit_builder() here because it starts a
 242         # new write group. We want one write group around a batch of imports
 243         # where the default batch size is currently 10000. IGC 20090312
 244         self._commit_builder = self.repo._commit_builder_class(self.repo,
 245             parents, config, timestamp=revision.timestamp,
 246             timezone=revision.timezone, committer=revision.committer,
 247             revprops=revision.properties, revision_id=revision.revision_id)
 248
 249     def get_parents_and_revision_for_entry(self, ie):
 250         """Get the parents and revision for an inventory entry.
 251
 252         :param ie: the inventory entry
 253         :return parents, revision_id where
 254             parents is the tuple of parent revision_ids for the per-file graph
 255             revision_id is the revision_id to use for this entry
 256         """
 257         # Check for correct API usage
 258         if self._current_rev_id is None:
 259             raise AssertionError("start_new_revision() must be called"
 260                 " before get_parents_and_revision_for_entry()")
 261         if ie.revision != self._current_rev_id:
 262             raise AssertionError("start_new_revision() registered a different"
 263                 " revision (%s) to that in the inventory entry (%s)" %
 264                 (self._current_rev_id, ie.revision))
 265
 266         # Find the heads. This code is lifted from
 267         # repository.CommitBuilder.record_entry_contents().
 268         parent_candidate_entries = ie.parent_candidates(self._rev_parent_invs)
 269         head_set = self._commit_builder._heads(ie.file_id,
 270             parent_candidate_entries.keys())
 271         heads = []
 272         for inv in self._rev_parent_invs:
 273             if inv.has_id(ie.file_id):
 274                 old_rev = inv[ie.file_id].revision
 275                 if old_rev in head_set:
 276                     rev_id = inv[ie.file_id].revision
 277                     heads.append(rev_id)
 278                     head_set.remove(rev_id)
 279
 280         # Find the revision to use. If the content has not changed
 281         # since the parent, record the parent's revision.
 282         if len(heads) == 0:
 283             return (), ie.revision
 284         parent_entry = parent_candidate_entries[heads[0]]
 285         changed = False
 286         if len(heads) > 1:
 287             changed = True
 288         elif (parent_entry.name != ie.name or parent_entry.kind != ie.kind or
 289             parent_entry.parent_id != ie.parent_id):
 290             changed = True
 291         elif ie.kind == 'file':
 292             if (parent_entry.text_sha1 != ie.text_sha1 or
 293                 parent_entry.executable != ie.executable):
 294                 changed = True
 295         elif ie.kind == 'symlink':
 296             if parent_entry.symlink_target != ie.symlink_target:
 297                 changed = True
 298         if changed:
 299             rev_id = ie.revision
 300         else:
 301             rev_id = parent_entry.revision
 302         return tuple(heads), rev_id
 303
 304     def load(self, rev, inv, signature, text_provider, parents_provider,
 305         inventories_provider=None):
 306         """Load a revision.
 307
 308         :param rev: the Revision
 309         :param inv: the inventory
 310         :param signature: signing information
 311         :param text_provider: a callable expecting a file_id parameter
 312             that returns the text for that file-id
 313         :param parents_provider: a callable expecting a file_id parameter
 314             that return the list of parent-ids for that file-id
 315         :param inventories_provider: a callable expecting a repository and
 316             a list of revision-ids, that returns:
 317               * the list of revision-ids present in the repository
 318               * the list of inventories for the revision-id's,
 319                 including an empty inventory for the missing revisions
 320             If None, a default implementation is provided.
 321         """
 322         # NOTE: This is bzrlib.repository._install_revision refactored to
 323         # to provide more flexibility in how previous revisions are cached,
 324         # data is feed in, etc.
 325
 326         # Get the non-ghost parents and their inventories
 327         if inventories_provider is None:
 328             inventories_provider = self._default_inventories_provider
 329         present_parents, parent_invs = inventories_provider(rev.parent_ids)
 330
 331         # Load the inventory
 332         try:
 333             rev.inventory_sha1 = self._add_inventory(rev.revision_id,
 334                 inv, present_parents, parent_invs)
 335         except errors.RevisionAlreadyPresent:
 336             pass
 337
 338         # Load the texts, signature and revision
 339         entries = self._non_root_entries_iter(inv, rev.revision_id)
 340         self._load_texts(rev.revision_id, entries, text_provider,
 341             parents_provider)
 342         if signature is not None:
 343             self.repo.add_signature_text(rev.revision_id, signature)
 344         self._add_revision(rev, inv)
 345
 346     def load_using_delta(self, rev, basis_inv, inv_delta, signature,
 347         text_provider, parents_provider, inventories_provider=None):
 348         """Load a revision by applying a delta to a (CHK)Inventory.
 349
 350         :param rev: the Revision
 351         :param basis_inv: the basis Inventory or CHKInventory
 352         :param inv_delta: the inventory delta
 353         :param signature: signing information
 354         :param text_provider: a callable expecting a file_id parameter
 355             that returns the text for that file-id
 356         :param parents_provider: a callable expecting a file_id parameter
 357             that return the list of parent-ids for that file-id
 358         :param inventories_provider: a callable expecting a repository and
 359             a list of revision-ids, that returns:
 360               * the list of revision-ids present in the repository
 361               * the list of inventories for the revision-id's,
 362                 including an empty inventory for the missing revisions
 363             If None, a default implementation is provided.
 364         """
 365         # TODO: set revision_id = rev.revision_id
 366         builder = self.repo._commit_builder_class(self.repo,
 367             parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
 368             timezone=rev.timezone, committer=rev.committer,
 369             revprops=rev.properties, revision_id=rev.revision_id)
 370         if self._graph is None and self._use_known_graph:
 371             if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and
 372                 getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and
 373                 getattr(self.repo, "get_known_graph_ancestry", None)):
 374                 self._graph = self.repo.get_known_graph_ancestry(
 375                     rev.parent_ids)
 376             else:
 377                 self._use_known_graph = False
 378         if self._graph is not None:
 379             orig_heads = builder._heads
 380             def thunked_heads(file_id, revision_ids):
 381                 # self._graph thinks in terms of keys, not ids, so translate
 382                 # them
 383                 # old_res = orig_heads(file_id, revision_ids)
 384                 if len(revision_ids) < 2:
 385                     res = set(revision_ids)
 386                 else:
 387                     res = set(self._graph.heads(revision_ids))
 388                 # if old_res != res:
 389                 #     import pdb; pdb.set_trace()
 390                 return res
 391             builder._heads = thunked_heads
 392
 393         if rev.parent_ids:
 394             basis_rev_id = rev.parent_ids[0]
 395         else:
 396             basis_rev_id = _mod_revision.NULL_REVISION
 397         tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
 398         changes = tree._delta_to_iter_changes()
 399         for (file_id, path, fs_hash) in builder.record_iter_changes(
 400                 tree, basis_rev_id, changes):
 401             # So far, we don't *do* anything with the result
 402             pass
 403         builder.finish_inventory()
 404         # TODO: This is working around a bug in the bzrlib code base.
 405         # 'builder.finish_inventory()' ends up doing:
 406         # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
 407         # However, add_inventory_by_delta returns (sha1, inv)
 408         # And we *want* to keep a handle on both of those objects
 409         if isinstance(builder.inv_sha1, tuple):
 410             builder.inv_sha1, builder.new_inventory = builder.inv_sha1
 411         # This is a duplicate of Builder.commit() since we already have the
 412         # Revision object, and we *don't* want to call commit_write_group()
 413         rev.inv_sha1 = builder.inv_sha1
 414         try:
 415             config = builder._config_stack
 416         except AttributeError: # bzr < 2.5
 417             config = builder._config
 418         builder.repository.add_revision(builder._new_revision_id, rev,
 419             builder.new_inventory)
 420         if self._graph is not None:
 421             # TODO: Use StaticTuple and .intern() for these things
 422             self._graph.add_node(builder._new_revision_id, rev.parent_ids)
 423
 424         if signature is not None:
 425             raise AssertionError('signatures not guaranteed yet')
 426             self.repo.add_signature_text(rev.revision_id, signature)
 427         # self._add_revision(rev, inv)
 428         return builder.revision_tree().inventory
 429
 430     def _non_root_entries_iter(self, inv, revision_id):
 431         if hasattr(inv, 'iter_non_root_entries'):
 432             entries = inv.iter_non_root_entries()
 433         else:
 434             path_entries = inv.iter_entries()
 435             # Backwards compatibility hack: skip the root id.
 436             if not self.repo.supports_rich_root():
 437                 path, root = path_entries.next()
 438                 if root.revision != revision_id:
 439                     raise errors.IncompatibleRevision(repr(self.repo))
 440             entries = iter([ie for path, ie in path_entries])
 441         return entries
 442
 443     def _load_texts(self, revision_id, entries, text_provider,
 444         parents_provider):
 445         """Load texts to a repository for inventory entries.
 446
 447         This method is provided for subclasses to use or override.
 448
 449         :param revision_id: the revision identifier
 450         :param entries: iterator over the inventory entries
 451         :param text_provider: a callable expecting a file_id parameter
 452             that returns the text for that file-id
 453         :param parents_provider: a callable expecting a file_id parameter
 454             that return the list of parent-ids for that file-id
 455         """
 456         raise NotImplementedError(self._load_texts)
 457
 458     def _add_inventory(self, revision_id, inv, parents, parent_invs):
 459         """Add the inventory inv to the repository as revision_id.
 460
 461         :param parents: The revision ids of the parents that revision_id
 462                         is known to have and are in the repository already.
 463         :param parent_invs: the parent inventories
 464
 465         :returns: The validator(which is a sha1 digest, though what is sha'd is
 466             repository format specific) of the serialized inventory.
 467         """
 468         return self.repo.add_inventory(revision_id, inv, parents)
 469
 470     def _add_inventory_by_delta(self, revision_id, basis_inv, inv_delta,
 471         parents, parent_invs):
 472         """Add the inventory to the repository as revision_id.
 473
 474         :param basis_inv: the basis Inventory or CHKInventory
 475         :param inv_delta: the inventory delta
 476         :param parents: The revision ids of the parents that revision_id
 477                         is known to have and are in the repository already.
 478         :param parent_invs: the parent inventories
 479
 480         :returns: (validator, inv) where validator is the validator
 481           (which is a sha1 digest, though what is sha'd is repository format
 482           specific) of the serialized inventory;
 483           inv is the generated inventory
 484         """
 485         if len(parents):
 486             if self._supports_chks:
 487                 try:
 488                     validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
 489                         inv_delta, revision_id, parents, basis_inv=basis_inv,
 490                         propagate_caches=False)
 491                 except errors.InconsistentDelta:
 492                     #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
 493                     trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
 494                     raise
 495             else:
 496                 validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
 497                     inv_delta, revision_id, parents)
 498         else:
 499             if isinstance(basis_inv, inventory.CHKInventory):
 500                 new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
 501             else:
 502                 new_inv = inventory.Inventory(revision_id=revision_id)
 503                 # This is set in the delta so remove it to prevent a duplicate
 504                 del new_inv[inventory.ROOT_ID]
 505                 new_inv.apply_delta(inv_delta)
 506             validator = self.repo.add_inventory(revision_id, new_inv, parents)
 507         return validator, new_inv
 508
 509     def _add_revision(self, rev, inv):
 510         """Add a revision and its inventory to a repository.
 511
 512         :param rev: the Revision
 513         :param inv: the inventory
 514         """
 515         self.repo.add_revision(rev.revision_id, rev, inv)
 516
 517     def _default_inventories_provider(self, revision_ids):
 518         """An inventories provider that queries the repository."""
 519         present = []
 520         inventories = []
 521         for revision_id in revision_ids:
 522             if self.repo.has_revision(revision_id):
 523                 present.append(revision_id)
 524                 rev_tree = self.repo.revision_tree(revision_id)
 525             else:
 526                 rev_tree = self.repo.revision_tree(None)
 527             inventories.append(rev_tree.inventory)
 528         return present, inventories
 529
 530
 531 class RevisionStore1(AbstractRevisionStore):
 532     """A RevisionStore that uses the old bzrlib Repository API.
 533
 534     The old API was present until bzr.dev rev 3510.
 535     """
 536
 537     def _load_texts(self, revision_id, entries, text_provider, parents_provider):
 538         """See RevisionStore._load_texts()."""
 539         # Add the texts that are not already present
 540         tx = self.repo.get_transaction()
 541         for ie in entries:
 542             # This test is *really* slow: over 50% of import time
 543             #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
 544             #if ie.revision in w:
 545             #    continue
 546             # Try another way, realising that this assumes that the
 547             # version is not already there. In the general case,
 548             # a shared repository might already have the revision but
 549             # we arguably don't need that check when importing from
 550             # a foreign system.
 551             if ie.revision != revision_id:
 552                 continue
 553             file_id = ie.file_id
 554             text_parents = [(file_id, p) for p in parents_provider(file_id)]
 555             lines = text_provider(file_id)
 556             vfile = self.repo.weave_store.get_weave_or_empty(file_id,  tx)
 557             vfile.add_lines(revision_id, text_parents, lines)
 558
 559     def get_file_lines(self, revision_id, file_id):
 560         tx = self.repo.get_transaction()
 561         w = self.repo.weave_store.get_weave(file_id, tx)
 562         return w.get_lines(revision_id)
 563
 564     def _add_revision(self, rev, inv):
 565         # There's no need to do everything repo.add_revision does and
 566         # doing so (since bzr.dev 3392) can be pretty slow for long
 567         # delta chains on inventories. Just do the essentials here ...
 568         _mod_revision.check_not_reserved_id(rev.revision_id)
 569         self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
 570
 571
 572 class RevisionStore2(AbstractRevisionStore):
 573     """A RevisionStore that uses the new bzrlib Repository API."""
 574
 575     def _load_texts(self, revision_id, entries, text_provider, parents_provider):
 576         """See RevisionStore._load_texts()."""
 577         text_keys = {}
 578         for ie in entries:
 579             text_keys[(ie.file_id, ie.revision)] = ie
 580         text_parent_map = self.repo.texts.get_parent_map(text_keys)
 581         missing_texts = set(text_keys) - set(text_parent_map)
 582         self._load_texts_for_file_rev_ids(missing_texts, text_provider,
 583             parents_provider)
 584
 585     def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,
 586         parents_provider):
 587         """Load texts to a repository for file-ids, revision-id tuples.
 588
 589         :param file_rev_ids: iterator over the (file_id, revision_id) tuples
 590         :param text_provider: a callable expecting a file_id parameter
 591             that returns the text for that file-id
 592         :param parents_provider: a callable expecting a file_id parameter
 593             that return the list of parent-ids for that file-id
 594         """
 595         for file_id, revision_id in file_rev_ids:
 596             text_key = (file_id, revision_id)
 597             text_parents = [(file_id, p) for p in parents_provider(file_id)]
 598             lines = text_provider(file_id)
 599             #print "adding text for %s\n\tparents:%s" % (text_key,text_parents)
 600             self.repo.texts.add_lines(text_key, text_parents, lines)
 601
 602     def get_file_lines(self, revision_id, file_id):
 603         record = self.repo.texts.get_record_stream([(file_id, revision_id)],
 604             'unordered', True).next()
 605         if record.storage_kind == 'absent':
 606             raise errors.RevisionNotPresent(record.key, self.repo)
 607         return osutils.split_lines(record.get_bytes_as('fulltext'))
 608
 609     # This is breaking imports into brisbane-core currently
 610     #def _add_revision(self, rev, inv):
 611     #    # There's no need to do everything repo.add_revision does and
 612     #    # doing so (since bzr.dev 3392) can be pretty slow for long
 613     #    # delta chains on inventories. Just do the essentials here ...
 614     #    _mod_revision.check_not_reserved_id(rev.revision_id)
 615     #    self.repo._add_revision(rev)
 616
 617
 618 class ImportRevisionStore1(RevisionStore1):
 619     """A RevisionStore (old Repository API) optimised for importing.
 620
 621     This implementation caches serialised inventory texts and provides
 622     fine-grained control over when inventories are stored as fulltexts.
 623     """
 624
 625     def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
 626         random_ids=True):
 627         """See AbstractRevisionStore.__init__.
 628
 629         :param repository: the target repository
 630         :param parent_text_to_cache: the number of parent texts to cache
 631         :para fulltext_when: if non None, a function to call to decide
 632           whether to fulltext the inventory or not. The revision count
 633           is passed as a parameter and the result is treated as a boolean.
 634         """
 635         RevisionStore1.__init__(self, repo)
 636         self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
 637         self.fulltext_when = fulltext_when
 638         self.random_ids = random_ids
 639         self.revision_count = 0
 640
 641     def _add_inventory(self, revision_id, inv, parents, parent_invs):
 642         """See RevisionStore._add_inventory."""
 643         # Code taken from bzrlib.repository.add_inventory
 644         assert self.repo.is_in_write_group()
 645         _mod_revision.check_not_reserved_id(revision_id)
 646         assert inv.revision_id is None or inv.revision_id == revision_id, \
 647             "Mismatch between inventory revision" \
 648             " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
 649         assert inv.root is not None
 650         inv_lines = self.repo._serialise_inventory_to_lines(inv)
 651         inv_vf = self.repo.get_inventory_weave()
 652         sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
 653             revision_id, parents, inv_lines, self.inv_parent_texts)
 654         self.inv_parent_texts[revision_id] = parent_text
 655         return sha1
 656
 657     def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
 658             parent_texts):
 659         """See Repository._inventory_add_lines()."""
 660         # setup parameters used in original code but not this API
 661         self.revision_count += 1
 662         if self.fulltext_when is not None:
 663             delta = not self.fulltext_when(self.revision_count)
 664         else:
 665             delta = inv_vf.delta
 666         left_matching_blocks = None
 667         random_id = self.random_ids
 668         check_content = False
 669
 670         # bzrlib.knit.add_lines() but error checking optimised
 671         inv_vf._check_add(version_id, lines, random_id, check_content)
 672
 673         ####################################################################
 674         # bzrlib.knit._add() but skip checking if fulltext better than delta
 675         ####################################################################
 676
 677         line_bytes = ''.join(lines)
 678         digest = osutils.sha_string(line_bytes)
 679         present_parents = []
 680         for parent in parents:
 681             if inv_vf.has_version(parent):
 682                 present_parents.append(parent)
 683         if parent_texts is None:
 684             parent_texts = {}
 685
 686         # can only compress against the left most present parent.
 687         if (delta and
 688             (len(present_parents) == 0 or
 689              present_parents[0] != parents[0])):
 690             delta = False
 691
 692         text_length = len(line_bytes)
 693         options = []
 694         if lines:
 695             if lines[-1][-1] != '\n':
 696                 # copy the contents of lines.
 697                 lines = lines[:]
 698                 options.append('no-eol')
 699                 lines[-1] = lines[-1] + '\n'
 700                 line_bytes += '\n'
 701
 702         #if delta:
 703         #    # To speed the extract of texts the delta chain is limited
 704         #    # to a fixed number of deltas.  This should minimize both
 705         #    # I/O and the time spend applying deltas.
 706         #    delta = inv_vf._check_should_delta(present_parents)
 707
 708         assert isinstance(version_id, str)
 709         content = inv_vf.factory.make(lines, version_id)
 710         if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
 711             # Merge annotations from parent texts if needed.
 712             delta_hunks = inv_vf._merge_annotations(content, present_parents,
 713                 parent_texts, delta, inv_vf.factory.annotated,
 714                 left_matching_blocks)
 715
 716         if delta:
 717             options.append('line-delta')
 718             store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
 719             size, bytes = inv_vf._data._record_to_data(version_id, digest,
 720                 store_lines)
 721         else:
 722             options.append('fulltext')
 723             # isinstance is slower and we have no hierarchy.
 724             if inv_vf.factory.__class__ == knit.KnitPlainFactory:
 725                 # Use the already joined bytes saving iteration time in
 726                 # _record_to_data.
 727                 size, bytes = inv_vf._data._record_to_data(version_id, digest,
 728                     lines, [line_bytes])
 729             else:
 730                 # get mixed annotation + content and feed it into the
 731                 # serialiser.
 732                 store_lines = inv_vf.factory.lower_fulltext(content)
 733                 size, bytes = inv_vf._data._record_to_data(version_id, digest,
 734                     store_lines)
 735
 736         access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
 737         inv_vf._index.add_versions(
 738             ((version_id, options, access_memo, parents),),
 739             random_id=random_id)
 740         return digest, text_length, content