revision_store.py

   1 # Copyright (C) 2008, 2009 Canonical Ltd
   2 #
   3 # This program is free software; you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation; either version 2 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; if not, write to the Free Software
  15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  16
  17 """An abstraction of a repository providing just the bits importing needs."""
  18
  19 import cStringIO
  20
  21 from bzrlib import (
  22     errors,
  23     graph as _mod_graph,
  24     inventory,
  25     knit,
  26     lru_cache,
  27     osutils,
  28     revision as _mod_revision,
  29     trace,
  30     )
  31
  32
  33 class _TreeShim(object):
  34     """Fake a Tree implementation.
  35
  36     This implements just enough of the tree api to make commit builder happy.
  37     """
  38
  39     def __init__(self, repo, basis_inv, inv_delta, content_provider):
  40         self._repo = repo
  41         self._content_provider = content_provider
  42         self._basis_inv = basis_inv
  43         self._inv_delta = inv_delta
  44         self._new_info_by_id = dict([(file_id, (new_path, ie))
  45                                     for _, new_path, file_id, ie in inv_delta])
  46
  47     def id2path(self, file_id):
  48         if file_id in self._new_info_by_id:
  49             new_path = self._new_info_by_id[file_id][0]
  50             if new_path is None:
  51                 raise errors.NoSuchId(self, file_id)
  52             return new_path
  53         return self._basis_inv.id2path(file_id)
  54
  55     def path2id(self, path):
  56         # CommitBuilder currently only requires access to the root id. We don't
  57         # build a map of renamed files, etc. One possibility if we ever *do*
  58         # need more than just root, is to defer to basis_inv.path2id() and then
  59         # check if the file_id is in our _new_info_by_id dict. And in that
  60         # case, return _new_info_by_id[file_id][0]
  61         if path != '':
  62             raise NotImplementedError(_TreeShim.path2id)
  63         # TODO: Handle root renames?
  64         return self._basis_inv.root.file_id
  65
  66     def get_file_with_stat(self, file_id, path=None):
  67         content = self.get_file_text(file_id, path)
  68         sio = cStringIO.StringIO(content)
  69         return sio, None
  70
  71     def get_file_text(self, file_id, path=None):
  72         try:
  73             return self._content_provider(file_id)
  74         except KeyError:
  75             # The content wasn't shown as 'new'. Just validate this fact
  76             assert file_id not in self._new_info_by_id
  77             old_ie = self._basis_inv[file_id]
  78             old_text_key = (file_id, old_ie.revision)
  79             stream = self._repo.texts.get_record_stream([old_text_key],
  80                                                         'unordered', True)
  81             return stream.next().get_bytes_as('fulltext')
  82
  83     def get_symlink_target(self, file_id):
  84         if file_id in self._new_info_by_id:
  85             ie = self._new_info_by_id[file_id][1]
  86             return ie.symlink_target
  87         return self._basis_inv[file_id].symlink_target
  88
  89     def get_reference_revision(self, file_id, path=None):
  90         raise NotImplementedError(_TreeShim.get_reference_revision)
  91
  92     def _delta_to_iter_changes(self):
  93         """Convert the inv_delta into an iter_changes repr."""
  94         # iter_changes is:
  95         #   (file_id,
  96         #    (old_path, new_path),
  97         #    content_changed,
  98         #    (old_versioned, new_versioned),
  99         #    (old_parent_id, new_parent_id),
 100         #    (old_name, new_name),
 101         #    (old_kind, new_kind),
 102         #    (old_exec, new_exec),
 103         #   )
 104         basis_inv = self._basis_inv
 105         for old_path, new_path, file_id, ie in self._inv_delta:
 106             # Perf: Would this be faster if we did 'if file_id in basis_inv'?
 107             # Since the *very* common case is that the file already exists, it
 108             # probably is better to optimize for that
 109             try:
 110                 old_ie = basis_inv[file_id]
 111             except errors.NoSuchId:
 112                 old_ie = None
 113                 if ie is None:
 114                     raise AssertionError('How is both old and new None?')
 115                     change = (file_id,
 116                         (old_path, new_path),
 117                         False,
 118                         (False, False),
 119                         (None, None),
 120                         (None, None),
 121                         (None, None),
 122                         (None, None),
 123                         )
 124                 change = (file_id,
 125                     (old_path, new_path),
 126                     True,
 127                     (False, True),
 128                     (None, ie.parent_id),
 129                     (None, ie.name),
 130                     (None, ie.kind),
 131                     (None, ie.executable),
 132                     )
 133             else:
 134                 if ie is None:
 135                     change = (file_id,
 136                         (old_path, new_path),
 137                         True,
 138                         (True, False),
 139                         (old_ie.parent_id, None),
 140                         (old_ie.name, None),
 141                         (old_ie.kind, None),
 142                         (old_ie.executable, None),
 143                         )
 144                 else:
 145                     content_modified = (ie.text_sha1 != old_ie.text_sha1
 146                                         or ie.text_size != old_ie.text_size)
 147                     # TODO: ie.kind != old_ie.kind
 148                     # TODO: symlinks changing targets, content_modified?
 149                     change = (file_id,
 150                         (old_path, new_path),
 151                         content_modified,
 152                         (True, True),
 153                         (old_ie.parent_id, ie.parent_id),
 154                         (old_ie.name, ie.name),
 155                         (old_ie.kind, ie.kind),
 156                         (old_ie.executable, ie.executable),
 157                         )
 158             yield change
 159
 160
 161 class AbstractRevisionStore(object):
 162
 163     def __init__(self, repo):
 164         """An object responsible for loading revisions into a repository.
 165
 166         NOTE: Repository locking is not managed by this class. Clients
 167         should take a write lock, call load() multiple times, then release
 168         the lock.
 169
 170         :param repository: the target repository
 171         """
 172         self.repo = repo
 173         self._graph = None
 174         self._use_known_graph = True
 175         self._supports_chks = getattr(repo._format, 'supports_chks', False)
 176
 177     def expects_rich_root(self):
 178         """Does this store expect inventories with rich roots?"""
 179         return self.repo.supports_rich_root()
 180
 181     def init_inventory(self, revision_id):
 182         """Generate an inventory for a parentless revision."""
 183         if self._supports_chks:
 184             inv = self._init_chk_inventory(revision_id, inventory.ROOT_ID)
 185         else:
 186             inv = inventory.Inventory(revision_id=revision_id)
 187             if self.expects_rich_root():
 188                 # The very first root needs to have the right revision
 189                 inv.root.revision = revision_id
 190         return inv
 191
 192     def _init_chk_inventory(self, revision_id, root_id):
 193         """Generate a CHKInventory for a parentless revision."""
 194         from bzrlib import chk_map
 195         # Get the creation parameters
 196         chk_store = self.repo.chk_bytes
 197         serializer = self.repo._format._serializer
 198         search_key_name = serializer.search_key_name
 199         maximum_size = serializer.maximum_size
 200
 201         # Maybe the rest of this ought to be part of the CHKInventory API?
 202         inv = inventory.CHKInventory(search_key_name)
 203         inv.revision_id = revision_id
 204         inv.root_id = root_id
 205         search_key_func = chk_map.search_key_registry.get(search_key_name)
 206         inv.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
 207         inv.id_to_entry._root_node.set_maximum_size(maximum_size)
 208         inv.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
 209             None, search_key_func)
 210         inv.parent_id_basename_to_file_id._root_node.set_maximum_size(
 211             maximum_size)
 212         inv.parent_id_basename_to_file_id._root_node._key_width = 2
 213         return inv
 214
 215     def get_inventory(self, revision_id):
 216         """Get a stored inventory."""
 217         return self.repo.get_inventory(revision_id)
 218
 219     def get_file_text(self, revision_id, file_id):
 220         """Get the text stored for a file in a given revision."""
 221         revtree = self.repo.revision_tree(revision_id)
 222         return revtree.get_file_text(file_id)
 223
 224     def get_file_lines(self, revision_id, file_id):
 225         """Get the lines stored for a file in a given revision."""
 226         revtree = self.repo.revision_tree(revision_id)
 227         return osutils.split_lines(revtree.get_file_text(file_id))
 228
 229     def start_new_revision(self, revision, parents, parent_invs):
 230         """Init the metadata needed for get_parents_and_revision_for_entry().
 231
 232         :param revision: a Revision object
 233         """
 234         self._current_rev_id = revision.revision_id
 235         self._rev_parents = parents
 236         self._rev_parent_invs = parent_invs
 237         # We don't know what the branch will be so there's no real BranchConfig.
 238         # That means we won't be triggering any hooks and that's a good thing.
 239         # Without a config though, we must pass in the committer below so that
 240         # the commit builder doesn't try to look up the config.
 241         config = None
 242         # We can't use self.repo.get_commit_builder() here because it starts a
 243         # new write group. We want one write group around a batch of imports
 244         # where the default batch size is currently 10000. IGC 20090312
 245         self._commit_builder = self.repo._commit_builder_class(self.repo,
 246             parents, config, timestamp=revision.timestamp,
 247             timezone=revision.timezone, committer=revision.committer,
 248             revprops=revision.properties, revision_id=revision.revision_id)
 249
 250     def get_parents_and_revision_for_entry(self, ie):
 251         """Get the parents and revision for an inventory entry.
 252
 253         :param ie: the inventory entry
 254         :return parents, revision_id where
 255             parents is the tuple of parent revision_ids for the per-file graph
 256             revision_id is the revision_id to use for this entry
 257         """
 258         # Check for correct API usage
 259         if self._current_rev_id is None:
 260             raise AssertionError("start_new_revision() must be called"
 261                 " before get_parents_and_revision_for_entry()")
 262         if ie.revision != self._current_rev_id:
 263             raise AssertionError("start_new_revision() registered a different"
 264                 " revision (%s) to that in the inventory entry (%s)" %
 265                 (self._current_rev_id, ie.revision))
 266
 267         # Find the heads. This code is lifted from
 268         # repository.CommitBuilder.record_entry_contents().
 269         parent_candidate_entries = ie.parent_candidates(self._rev_parent_invs)
 270         head_set = self._commit_builder._heads(ie.file_id,
 271             parent_candidate_entries.keys())
 272         heads = []
 273         for inv in self._rev_parent_invs:
 274             if ie.file_id in inv:
 275                 old_rev = inv[ie.file_id].revision
 276                 if old_rev in head_set:
 277                     rev_id = inv[ie.file_id].revision
 278                     heads.append(rev_id)
 279                     head_set.remove(rev_id)
 280
 281         # Find the revision to use. If the content has not changed
 282         # since the parent, record the parent's revision.
 283         if len(heads) == 0:
 284             return (), ie.revision
 285         parent_entry = parent_candidate_entries[heads[0]]
 286         changed = False
 287         if len(heads) > 1:
 288             changed = True
 289         elif (parent_entry.name != ie.name or parent_entry.kind != ie.kind or
 290             parent_entry.parent_id != ie.parent_id):
 291             changed = True
 292         elif ie.kind == 'file':
 293             if (parent_entry.text_sha1 != ie.text_sha1 or
 294                 parent_entry.executable != ie.executable):
 295                 changed = True
 296         elif ie.kind == 'symlink':
 297             if parent_entry.symlink_target != ie.symlink_target:
 298                 changed = True
 299         if changed:
 300             rev_id = ie.revision
 301         else:
 302             rev_id = parent_entry.revision
 303         return tuple(heads), rev_id
 304
 305     def load(self, rev, inv, signature, text_provider, parents_provider,
 306         inventories_provider=None):
 307         """Load a revision.
 308
 309         :param rev: the Revision
 310         :param inv: the inventory
 311         :param signature: signing information
 312         :param text_provider: a callable expecting a file_id parameter
 313             that returns the text for that file-id
 314         :param parents_provider: a callable expecting a file_id parameter
 315             that return the list of parent-ids for that file-id
 316         :param inventories_provider: a callable expecting a repository and
 317             a list of revision-ids, that returns:
 318               * the list of revision-ids present in the repository
 319               * the list of inventories for the revision-id's,
 320                 including an empty inventory for the missing revisions
 321             If None, a default implementation is provided.
 322         """
 323         # NOTE: This is bzrlib.repository._install_revision refactored to
 324         # to provide more flexibility in how previous revisions are cached,
 325         # data is feed in, etc.
 326
 327         # Get the non-ghost parents and their inventories
 328         if inventories_provider is None:
 329             inventories_provider = self._default_inventories_provider
 330         present_parents, parent_invs = inventories_provider(rev.parent_ids)
 331
 332         # Load the inventory
 333         try:
 334             rev.inventory_sha1 = self._add_inventory(rev.revision_id,
 335                 inv, present_parents, parent_invs)
 336         except errors.RevisionAlreadyPresent:
 337             pass
 338
 339         # Load the texts, signature and revision
 340         entries = self._non_root_entries_iter(inv, rev.revision_id)
 341         self._load_texts(rev.revision_id, entries, text_provider,
 342             parents_provider)
 343         if signature is not None:
 344             self.repo.add_signature_text(rev.revision_id, signature)
 345         self._add_revision(rev, inv)
 346
 347     def load_using_delta(self, rev, basis_inv, inv_delta, signature,
 348         text_provider, parents_provider, inventories_provider=None):
 349         """Load a revision by applying a delta to a (CHK)Inventory.
 350
 351         :param rev: the Revision
 352         :param basis_inv: the basis Inventory or CHKInventory
 353         :param inv_delta: the inventory delta
 354         :param signature: signing information
 355         :param text_provider: a callable expecting a file_id parameter
 356             that returns the text for that file-id
 357         :param parents_provider: a callable expecting a file_id parameter
 358             that return the list of parent-ids for that file-id
 359         :param inventories_provider: a callable expecting a repository and
 360             a list of revision-ids, that returns:
 361               * the list of revision-ids present in the repository
 362               * the list of inventories for the revision-id's,
 363                 including an empty inventory for the missing revisions
 364             If None, a default implementation is provided.
 365         """
 366         # TODO: set revision_id = rev.revision_id
 367         builder = self.repo._commit_builder_class(self.repo,
 368             parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
 369             timezone=rev.timezone, committer=rev.committer,
 370             revprops=rev.properties, revision_id=rev.revision_id)
 371         if self._graph is None and self._use_known_graph:
 372             if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) is None
 373                 or getattr(_mod_graph.KnownGraph, 'add_node', None) is None):
 374                 self._use_known_graph = False
 375             else:
 376                 self._graph = self.repo.revisions.get_known_graph_ancestry(
 377                     [(r,) for r in rev.parent_ids])
 378         if self._graph is not None:
 379             orig_heads = builder._heads
 380             def thunked_heads(file_id, revision_ids):
 381                 # self._graph thinks in terms of keys, not ids, so translate
 382                 # them
 383                 # old_res = orig_heads(file_id, revision_ids)
 384                 if len(revision_ids) < 2:
 385                     res = set(revision_ids)
 386                 else:
 387                     res = set([h[0] for h in
 388                               self._graph.heads([(r,) for r in revision_ids])])
 389                 # if old_res != res:
 390                 #     import pdb; pdb.set_trace()
 391                 return res
 392             builder._heads = thunked_heads
 393
 394         if rev.parent_ids:
 395             basis_rev_id = rev.parent_ids[0]
 396         else:
 397             basis_rev_id = _mod_revision.NULL_REVISION
 398         tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
 399         changes = tree._delta_to_iter_changes()
 400         for (file_id, path, fs_hash) in builder.record_iter_changes(
 401                 tree, basis_rev_id, changes):
 402             # So far, we don't *do* anything with the result
 403             pass
 404         builder.finish_inventory()
 405         # TODO: This is working around a bug in the bzrlib code base.
 406         # 'builder.finish_inventory()' ends up doing:
 407         # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
 408         # However, add_inventory_by_delta returns (sha1, inv)
 409         # And we *want* to keep a handle on both of those objects
 410         if isinstance(builder.inv_sha1, tuple):
 411             builder.inv_sha1, builder.new_inventory = builder.inv_sha1
 412         # This is a duplicate of Builder.commit() since we already have the
 413         # Revision object, and we *don't* want to call commit_write_group()
 414         rev.inv_sha1 = builder.inv_sha1
 415         builder.repository.add_revision(builder._new_revision_id, rev,
 416             builder.new_inventory, builder._config)
 417         if self._graph is not None:
 418             # TODO: Use StaticTuple and .intern() for these things
 419             self._graph.add_node((builder._new_revision_id,),
 420                                  [(p,) for p in rev.parent_ids])
 421
 422         if signature is not None:
 423             raise AssertionError('signatures not guaranteed yet')
 424             self.repo.add_signature_text(rev_id, signature)
 425         # self._add_revision(rev, inv)
 426         return builder.revision_tree().inventory
 427
 428     def _non_root_entries_iter(self, inv, revision_id):
 429         if hasattr(inv, 'iter_non_root_entries'):
 430             entries = inv.iter_non_root_entries()
 431         else:
 432             path_entries = inv.iter_entries()
 433             # Backwards compatibility hack: skip the root id.
 434             if not self.repo.supports_rich_root():
 435                 path, root = path_entries.next()
 436                 if root.revision != revision_id:
 437                     raise errors.IncompatibleRevision(repr(self.repo))
 438             entries = iter([ie for path, ie in path_entries])
 439         return entries
 440
 441     def _load_texts(self, revision_id, entries, text_provider,
 442         parents_provider):
 443         """Load texts to a repository for inventory entries.
 444
 445         This method is provided for subclasses to use or override.
 446
 447         :param revision_id: the revision identifier
 448         :param entries: iterator over the inventory entries
 449         :param text_provider: a callable expecting a file_id parameter
 450             that returns the text for that file-id
 451         :param parents_provider: a callable expecting a file_id parameter
 452             that return the list of parent-ids for that file-id
 453         """
 454         raise NotImplementedError(self._load_texts)
 455
 456     def _add_inventory(self, revision_id, inv, parents, parent_invs):
 457         """Add the inventory inv to the repository as revision_id.
 458
 459         :param parents: The revision ids of the parents that revision_id
 460                         is known to have and are in the repository already.
 461         :param parent_invs: the parent inventories
 462
 463         :returns: The validator(which is a sha1 digest, though what is sha'd is
 464             repository format specific) of the serialized inventory.
 465         """
 466         return self.repo.add_inventory(revision_id, inv, parents)
 467
 468     def _add_inventory_by_delta(self, revision_id, basis_inv, inv_delta,
 469         parents, parent_invs):
 470         """Add the inventory to the repository as revision_id.
 471
 472         :param basis_inv: the basis Inventory or CHKInventory
 473         :param inv_delta: the inventory delta
 474         :param parents: The revision ids of the parents that revision_id
 475                         is known to have and are in the repository already.
 476         :param parent_invs: the parent inventories
 477
 478         :returns: (validator, inv) where validator is the validator
 479           (which is a sha1 digest, though what is sha'd is repository format
 480           specific) of the serialized inventory;
 481           inv is the generated inventory
 482         """
 483         if len(parents):
 484             if self._supports_chks:
 485                 try:
 486                     validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
 487                         inv_delta, revision_id, parents, basis_inv=basis_inv,
 488                         propagate_caches=False)
 489                 except errors.InconsistentDelta:
 490                     #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
 491                     trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
 492                     raise
 493             else:
 494                 validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
 495                     inv_delta, revision_id, parents)
 496         else:
 497             if isinstance(basis_inv, inventory.CHKInventory):
 498                 new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
 499             else:
 500                 new_inv = inventory.Inventory(revision_id=revision_id)
 501                 # This is set in the delta so remove it to prevent a duplicate
 502                 del new_inv[inventory.ROOT_ID]
 503                 new_inv.apply_delta(inv_delta)
 504             validator = self.repo.add_inventory(revision_id, new_inv, parents)
 505         return validator, new_inv
 506
 507     def _add_revision(self, rev, inv):
 508         """Add a revision and its inventory to a repository.
 509
 510         :param rev: the Revision
 511         :param inv: the inventory
 512         """
 513         self.repo.add_revision(rev.revision_id, rev, inv)
 514
 515     def _default_inventories_provider(self, revision_ids):
 516         """An inventories provider that queries the repository."""
 517         present = []
 518         inventories = []
 519         for revision_id in revision_ids:
 520             if self.repo.has_revision(revision_id):
 521                 present.append(revision_id)
 522                 rev_tree = self.repo.revision_tree(revision_id)
 523             else:
 524                 rev_tree = self.repo.revision_tree(None)
 525             inventories.append(rev_tree.inventory)
 526         return present, inventories
 527
 528
 529 class RevisionStore1(AbstractRevisionStore):
 530     """A RevisionStore that uses the old bzrlib Repository API.
 531
 532     The old API was present until bzr.dev rev 3510.
 533     """
 534
 535     def _load_texts(self, revision_id, entries, text_provider, parents_provider):
 536         """See RevisionStore._load_texts()."""
 537         # Add the texts that are not already present
 538         tx = self.repo.get_transaction()
 539         for ie in entries:
 540             # This test is *really* slow: over 50% of import time
 541             #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
 542             #if ie.revision in w:
 543             #    continue
 544             # Try another way, realising that this assumes that the
 545             # version is not already there. In the general case,
 546             # a shared repository might already have the revision but
 547             # we arguably don't need that check when importing from
 548             # a foreign system.
 549             if ie.revision != revision_id:
 550                 continue
 551             file_id = ie.file_id
 552             text_parents = [(file_id, p) for p in parents_provider(file_id)]
 553             lines = text_provider(file_id)
 554             vfile = self.repo.weave_store.get_weave_or_empty(file_id,  tx)
 555             vfile.add_lines(revision_id, text_parents, lines)
 556
 557     def get_file_lines(self, revision_id, file_id):
 558         tx = self.repo.get_transaction()
 559         w = self.repo.weave_store.get_weave(file_id, tx)
 560         return w.get_lines(revision_id)
 561
 562     def _add_revision(self, rev, inv):
 563         # There's no need to do everything repo.add_revision does and
 564         # doing so (since bzr.dev 3392) can be pretty slow for long
 565         # delta chains on inventories. Just do the essentials here ...
 566         _mod_revision.check_not_reserved_id(rev.revision_id)
 567         self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
 568
 569
 570 class RevisionStore2(AbstractRevisionStore):
 571     """A RevisionStore that uses the new bzrlib Repository API."""
 572
 573     def _load_texts(self, revision_id, entries, text_provider, parents_provider):
 574         """See RevisionStore._load_texts()."""
 575         text_keys = {}
 576         for ie in entries:
 577             text_keys[(ie.file_id, ie.revision)] = ie
 578         text_parent_map = self.repo.texts.get_parent_map(text_keys)
 579         missing_texts = set(text_keys) - set(text_parent_map)
 580         self._load_texts_for_file_rev_ids(missing_texts, text_provider,
 581             parents_provider)
 582
 583     def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,
 584         parents_provider):
 585         """Load texts to a repository for file-ids, revision-id tuples.
 586
 587         :param file_rev_ids: iterator over the (file_id, revision_id) tuples
 588         :param text_provider: a callable expecting a file_id parameter
 589             that returns the text for that file-id
 590         :param parents_provider: a callable expecting a file_id parameter
 591             that return the list of parent-ids for that file-id
 592         """
 593         for file_id, revision_id in file_rev_ids:
 594             text_key = (file_id, revision_id)
 595             text_parents = [(file_id, p) for p in parents_provider(file_id)]
 596             lines = text_provider(file_id)
 597             #print "adding text for %s\n\tparents:%s" % (text_key,text_parents)
 598             self.repo.texts.add_lines(text_key, text_parents, lines)
 599
 600     def get_file_lines(self, revision_id, file_id):
 601         record = self.repo.texts.get_record_stream([(file_id, revision_id)],
 602             'unordered', True).next()
 603         if record.storage_kind == 'absent':
 604             raise errors.RevisionNotPresent(record.key, self.repo)
 605         return osutils.split_lines(record.get_bytes_as('fulltext'))
 606
 607     # This is breaking imports into brisbane-core currently
 608     #def _add_revision(self, rev, inv):
 609     #    # There's no need to do everything repo.add_revision does and
 610     #    # doing so (since bzr.dev 3392) can be pretty slow for long
 611     #    # delta chains on inventories. Just do the essentials here ...
 612     #    _mod_revision.check_not_reserved_id(rev.revision_id)
 613     #    self.repo._add_revision(rev)
 614
 615
 616 class ImportRevisionStore1(RevisionStore1):
 617     """A RevisionStore (old Repository API) optimised for importing.
 618
 619     This implementation caches serialised inventory texts and provides
 620     fine-grained control over when inventories are stored as fulltexts.
 621     """
 622
 623     def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
 624         random_ids=True):
 625         """See AbstractRevisionStore.__init__.
 626
 627         :param repository: the target repository
 628         :param parent_text_to_cache: the number of parent texts to cache
 629         :para fulltext_when: if non None, a function to call to decide
 630           whether to fulltext the inventory or not. The revision count
 631           is passed as a parameter and the result is treated as a boolean.
 632         """
 633         RevisionStore1.__init__(self, repo)
 634         self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
 635         self.fulltext_when = fulltext_when
 636         self.random_ids = random_ids
 637         self.revision_count = 0
 638
 639     def _add_inventory(self, revision_id, inv, parents, parent_invs):
 640         """See RevisionStore._add_inventory."""
 641         # Code taken from bzrlib.repository.add_inventory
 642         assert self.repo.is_in_write_group()
 643         _mod_revision.check_not_reserved_id(revision_id)
 644         assert inv.revision_id is None or inv.revision_id == revision_id, \
 645             "Mismatch between inventory revision" \
 646             " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
 647         assert inv.root is not None
 648         inv_lines = self.repo._serialise_inventory_to_lines(inv)
 649         inv_vf = self.repo.get_inventory_weave()
 650         sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
 651             revision_id, parents, inv_lines, self.inv_parent_texts)
 652         self.inv_parent_texts[revision_id] = parent_text
 653         return sha1
 654
 655     def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
 656             parent_texts):
 657         """See Repository._inventory_add_lines()."""
 658         # setup parameters used in original code but not this API
 659         self.revision_count += 1
 660         if self.fulltext_when is not None:
 661             delta = not self.fulltext_when(self.revision_count)
 662         else:
 663             delta = inv_vf.delta
 664         left_matching_blocks = None
 665         random_id = self.random_ids
 666         check_content = False
 667
 668         # bzrlib.knit.add_lines() but error checking optimised
 669         inv_vf._check_add(version_id, lines, random_id, check_content)
 670
 671         ####################################################################
 672         # bzrlib.knit._add() but skip checking if fulltext better than delta
 673         ####################################################################
 674
 675         line_bytes = ''.join(lines)
 676         digest = osutils.sha_string(line_bytes)
 677         present_parents = []
 678         for parent in parents:
 679             if inv_vf.has_version(parent):
 680                 present_parents.append(parent)
 681         if parent_texts is None:
 682             parent_texts = {}
 683
 684         # can only compress against the left most present parent.
 685         if (delta and
 686             (len(present_parents) == 0 or
 687              present_parents[0] != parents[0])):
 688             delta = False
 689
 690         text_length = len(line_bytes)
 691         options = []
 692         if lines:
 693             if lines[-1][-1] != '\n':
 694                 # copy the contents of lines.
 695                 lines = lines[:]
 696                 options.append('no-eol')
 697                 lines[-1] = lines[-1] + '\n'
 698                 line_bytes += '\n'
 699
 700         #if delta:
 701         #    # To speed the extract of texts the delta chain is limited
 702         #    # to a fixed number of deltas.  This should minimize both
 703         #    # I/O and the time spend applying deltas.
 704         #    delta = inv_vf._check_should_delta(present_parents)
 705
 706         assert isinstance(version_id, str)
 707         content = inv_vf.factory.make(lines, version_id)
 708         if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
 709             # Merge annotations from parent texts if needed.
 710             delta_hunks = inv_vf._merge_annotations(content, present_parents,
 711                 parent_texts, delta, inv_vf.factory.annotated,
 712                 left_matching_blocks)
 713
 714         if delta:
 715             options.append('line-delta')
 716             store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
 717             size, bytes = inv_vf._data._record_to_data(version_id, digest,
 718                 store_lines)
 719         else:
 720             options.append('fulltext')
 721             # isinstance is slower and we have no hierarchy.
 722             if inv_vf.factory.__class__ == knit.KnitPlainFactory:
 723                 # Use the already joined bytes saving iteration time in
 724                 # _record_to_data.
 725                 size, bytes = inv_vf._data._record_to_data(version_id, digest,
 726                     lines, [line_bytes])
 727             else:
 728                 # get mixed annotation + content and feed it into the
 729                 # serialiser.
 730                 store_lines = inv_vf.factory.lower_fulltext(content)
 731                 size, bytes = inv_vf._data._record_to_data(version_id, digest,
 732                     store_lines)
 733
 734         access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
 735         inv_vf._index.add_versions(
 736             ((version_id, options, access_memo, parents),),
 737             random_id=random_id)
 738         return digest, text_length, content