revision_store.py

   1 # Copyright (C) 2008, 2009 Canonical Ltd
   2 #
   3 # This program is free software; you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation; either version 2 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; if not, write to the Free Software
  15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  16
  17 """An abstraction of a repository providing just the bits importing needs."""
  18
  19 import cStringIO
  20
  21 from bzrlib import (
  22     errors,
  23     graph as _mod_graph,
  24     inventory,
  25     knit,
  26     lru_cache,
  27     osutils,
  28     revision as _mod_revision,
  29     trace,
  30     )
  31
  32
  33 class _TreeShim(object):
  34     """Fake a Tree implementation.
  35
  36     This implements just enough of the tree api to make commit builder happy.
  37     """
  38
  39     def __init__(self, repo, basis_inv, inv_delta, content_provider):
  40         self._repo = repo
  41         self._content_provider = content_provider
  42         self._basis_inv = basis_inv
  43         self._inv_delta = inv_delta
  44         self._new_info_by_id = dict([(file_id, (new_path, ie))
  45                                     for _, new_path, file_id, ie in inv_delta])
  46
  47     def id2path(self, file_id):
  48         if file_id in self._new_info_by_id:
  49             new_path = self._new_info_by_id[file_id][0]
  50             if new_path is None:
  51                 raise errors.NoSuchId(self, file_id)
  52             return new_path
  53         return self._basis_inv.id2path(file_id)
  54
  55     def path2id(self, path):
  56         # CommitBuilder currently only requires access to the root id. We don't
  57         # build a map of renamed files, etc. One possibility if we ever *do*
  58         # need more than just root, is to defer to basis_inv.path2id() and then
  59         # check if the file_id is in our _new_info_by_id dict. And in that
  60         # case, return _new_info_by_id[file_id][0]
  61         if path != '':
  62             raise NotImplementedError(_TreeShim.path2id)
  63         # TODO: Handle root renames?
  64         return self._basis_inv.root.file_id
  65
  66     def get_file_with_stat(self, file_id, path=None):
  67         content = self.get_file_text(file_id, path)
  68         sio = cStringIO.StringIO(content)
  69         return sio, None
  70
  71     def get_file_text(self, file_id, path=None):
  72         try:
  73             return self._content_provider(file_id)
  74         except KeyError:
  75             # The content wasn't shown as 'new'. Just validate this fact
  76             assert file_id not in self._new_info_by_id
  77             old_ie = self._basis_inv[file_id]
  78             old_text_key = (file_id, old_ie.revision)
  79             stream = self._repo.texts.get_record_stream([old_text_key],
  80                                                         'unordered', True)
  81             return stream.next().get_bytes_as('fulltext')
  82
  83     def get_symlink_target(self, file_id):
  84         if file_id in self._new_info_by_id:
  85             ie = self._new_info_by_id[file_id][1]
  86             return ie.symlink_target
  87         return self._basis_inv[file_id].symlink_target
  88
  89     def get_reference_revision(self, file_id, path=None):
  90         raise NotImplementedError(_TreeShim.get_reference_revision)
  91
  92     def _delta_to_iter_changes(self):
  93         """Convert the inv_delta into an iter_changes repr."""
  94         # iter_changes is:
  95         #   (file_id,
  96         #    (old_path, new_path),
  97         #    content_changed,
  98         #    (old_versioned, new_versioned),
  99         #    (old_parent_id, new_parent_id),
 100         #    (old_name, new_name),
 101         #    (old_kind, new_kind),
 102         #    (old_exec, new_exec),
 103         #   )
 104         basis_inv = self._basis_inv
 105         for old_path, new_path, file_id, ie in self._inv_delta:
 106             # Perf: Would this be faster if we did 'if file_id in basis_inv'?
 107             # Since the *very* common case is that the file already exists, it
 108             # probably is better to optimize for that
 109             try:
 110                 old_ie = basis_inv[file_id]
 111             except errors.NoSuchId:
 112                 old_ie = None
 113                 if ie is None:
 114                     raise AssertionError('How is both old and new None?')
 115                     change = (file_id,
 116                         (old_path, new_path),
 117                         False,
 118                         (False, False),
 119                         (None, None),
 120                         (None, None),
 121                         (None, None),
 122                         (None, None),
 123                         )
 124                 change = (file_id,
 125                     (old_path, new_path),
 126                     True,
 127                     (False, True),
 128                     (None, ie.parent_id),
 129                     (None, ie.name),
 130                     (None, ie.kind),
 131                     (None, ie.executable),
 132                     )
 133             else:
 134                 if ie is None:
 135                     change = (file_id,
 136                         (old_path, new_path),
 137                         True,
 138                         (True, False),
 139                         (old_ie.parent_id, None),
 140                         (old_ie.name, None),
 141                         (old_ie.kind, None),
 142                         (old_ie.executable, None),
 143                         )
 144                 else:
 145                     content_modified = (ie.text_sha1 != old_ie.text_sha1
 146                                         or ie.text_size != old_ie.text_size)
 147                     # TODO: ie.kind != old_ie.kind
 148                     # TODO: symlinks changing targets, content_modified?
 149                     change = (file_id,
 150                         (old_path, new_path),
 151                         content_modified,
 152                         (True, True),
 153                         (old_ie.parent_id, ie.parent_id),
 154                         (old_ie.name, ie.name),
 155                         (old_ie.kind, ie.kind),
 156                         (old_ie.executable, ie.executable),
 157                         )
 158             yield change
 159
 160
 161 class AbstractRevisionStore(object):
 162
 163     def __init__(self, repo):
 164         """An object responsible for loading revisions into a repository.
 165
 166         NOTE: Repository locking is not managed by this class. Clients
 167         should take a write lock, call load() multiple times, then release
 168         the lock.
 169
 170         :param repository: the target repository
 171         """
 172         self.repo = repo
 173         self._graph = None
 174         self._use_known_graph = True
 175         self._supports_chks = getattr(repo._format, 'supports_chks', False)
 176
 177     def expects_rich_root(self):
 178         """Does this store expect inventories with rich roots?"""
 179         return self.repo.supports_rich_root()
 180
 181     def init_inventory(self, revision_id):
 182         """Generate an inventory for a parentless revision."""
 183         if self._supports_chks:
 184             inv = self._init_chk_inventory(revision_id, inventory.ROOT_ID)
 185         else:
 186             inv = inventory.Inventory(revision_id=revision_id)
 187             if self.expects_rich_root():
 188                 # The very first root needs to have the right revision
 189                 inv.root.revision = revision_id
 190         return inv
 191
 192     def _init_chk_inventory(self, revision_id, root_id):
 193         """Generate a CHKInventory for a parentless revision."""
 194         from bzrlib import chk_map
 195         # Get the creation parameters
 196         chk_store = self.repo.chk_bytes
 197         serializer = self.repo._format._serializer
 198         search_key_name = serializer.search_key_name
 199         maximum_size = serializer.maximum_size
 200
 201         # Maybe the rest of this ought to be part of the CHKInventory API?
 202         inv = inventory.CHKInventory(search_key_name)
 203         inv.revision_id = revision_id
 204         inv.root_id = root_id
 205         search_key_func = chk_map.search_key_registry.get(search_key_name)
 206         inv.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
 207         inv.id_to_entry._root_node.set_maximum_size(maximum_size)
 208         inv.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
 209             None, search_key_func)
 210         inv.parent_id_basename_to_file_id._root_node.set_maximum_size(
 211             maximum_size)
 212         inv.parent_id_basename_to_file_id._root_node._key_width = 2
 213         return inv
 214
 215     def get_inventory(self, revision_id):
 216         """Get a stored inventory."""
 217         return self.repo.get_inventory(revision_id)
 218
 219     def get_file_text(self, revision_id, file_id):
 220         """Get the text stored for a file in a given revision."""
 221         revtree = self.repo.revision_tree(revision_id)
 222         return revtree.get_file_text(file_id)
 223
 224     def get_file_lines(self, revision_id, file_id):
 225         """Get the lines stored for a file in a given revision."""
 226         revtree = self.repo.revision_tree(revision_id)
 227         return osutils.split_lines(revtree.get_file_text(file_id))
 228
 229     def start_new_revision(self, revision, parents, parent_invs):
 230         """Init the metadata needed for get_parents_and_revision_for_entry().
 231
 232         :param revision: a Revision object
 233         """
 234         self._current_rev_id = revision.revision_id
 235         self._rev_parents = parents
 236         self._rev_parent_invs = parent_invs
 237         # We don't know what the branch will be so there's no real BranchConfig.
 238         # That means we won't be triggering any hooks and that's a good thing.
 239         # Without a config though, we must pass in the committer below so that
 240         # the commit builder doesn't try to look up the config.
 241         config = None
 242         # We can't use self.repo.get_commit_builder() here because it starts a
 243         # new write group. We want one write group around a batch of imports
 244         # where the default batch size is currently 10000. IGC 20090312
 245         self._commit_builder = self.repo._commit_builder_class(self.repo,
 246             parents, config, timestamp=revision.timestamp,
 247             timezone=revision.timezone, committer=revision.committer,
 248             revprops=revision.properties, revision_id=revision.revision_id)
 249
 250     def get_parents_and_revision_for_entry(self, ie):
 251         """Get the parents and revision for an inventory entry.
 252
 253         :param ie: the inventory entry
 254         :return parents, revision_id where
 255             parents is the tuple of parent revision_ids for the per-file graph
 256             revision_id is the revision_id to use for this entry
 257         """
 258         # Check for correct API usage
 259         if self._current_rev_id is None:
 260             raise AssertionError("start_new_revision() must be called"
 261                 " before get_parents_and_revision_for_entry()")
 262         if ie.revision != self._current_rev_id:
 263             raise AssertionError("start_new_revision() registered a different"
 264                 " revision (%s) to that in the inventory entry (%s)" %
 265                 (self._current_rev_id, ie.revision))
 266
 267         # Find the heads. This code is lifted from
 268         # repository.CommitBuilder.record_entry_contents().
 269         parent_candidate_entries = ie.parent_candidates(self._rev_parent_invs)
 270         head_set = self._commit_builder._heads(ie.file_id,
 271             parent_candidate_entries.keys())
 272         heads = []
 273         for inv in self._rev_parent_invs:
 274             if inv.has_id(ie.file_id):
 275                 old_rev = inv[ie.file_id].revision
 276                 if old_rev in head_set:
 277                     rev_id = inv[ie.file_id].revision
 278                     heads.append(rev_id)
 279                     head_set.remove(rev_id)
 280
 281         # Find the revision to use. If the content has not changed
 282         # since the parent, record the parent's revision.
 283         if len(heads) == 0:
 284             return (), ie.revision
 285         parent_entry = parent_candidate_entries[heads[0]]
 286         changed = False
 287         if len(heads) > 1:
 288             changed = True
 289         elif (parent_entry.name != ie.name or parent_entry.kind != ie.kind or
 290             parent_entry.parent_id != ie.parent_id):
 291             changed = True
 292         elif ie.kind == 'file':
 293             if (parent_entry.text_sha1 != ie.text_sha1 or
 294                 parent_entry.executable != ie.executable):
 295                 changed = True
 296         elif ie.kind == 'symlink':
 297             if parent_entry.symlink_target != ie.symlink_target:
 298                 changed = True
 299         if changed:
 300             rev_id = ie.revision
 301         else:
 302             rev_id = parent_entry.revision
 303         return tuple(heads), rev_id
 304
 305     def load(self, rev, inv, signature, text_provider, parents_provider,
 306         inventories_provider=None):
 307         """Load a revision.
 308
 309         :param rev: the Revision
 310         :param inv: the inventory
 311         :param signature: signing information
 312         :param text_provider: a callable expecting a file_id parameter
 313             that returns the text for that file-id
 314         :param parents_provider: a callable expecting a file_id parameter
 315             that return the list of parent-ids for that file-id
 316         :param inventories_provider: a callable expecting a repository and
 317             a list of revision-ids, that returns:
 318               * the list of revision-ids present in the repository
 319               * the list of inventories for the revision-id's,
 320                 including an empty inventory for the missing revisions
 321             If None, a default implementation is provided.
 322         """
 323         # NOTE: This is bzrlib.repository._install_revision refactored to
 324         # to provide more flexibility in how previous revisions are cached,
 325         # data is feed in, etc.
 326
 327         # Get the non-ghost parents and their inventories
 328         if inventories_provider is None:
 329             inventories_provider = self._default_inventories_provider
 330         present_parents, parent_invs = inventories_provider(rev.parent_ids)
 331
 332         # Load the inventory
 333         try:
 334             rev.inventory_sha1 = self._add_inventory(rev.revision_id,
 335                 inv, present_parents, parent_invs)
 336         except errors.RevisionAlreadyPresent:
 337             pass
 338
 339         # Load the texts, signature and revision
 340         entries = self._non_root_entries_iter(inv, rev.revision_id)
 341         self._load_texts(rev.revision_id, entries, text_provider,
 342             parents_provider)
 343         if signature is not None:
 344             self.repo.add_signature_text(rev.revision_id, signature)
 345         self._add_revision(rev, inv)
 346
 347     def load_using_delta(self, rev, basis_inv, inv_delta, signature,
 348         text_provider, parents_provider, inventories_provider=None):
 349         """Load a revision by applying a delta to a (CHK)Inventory.
 350
 351         :param rev: the Revision
 352         :param basis_inv: the basis Inventory or CHKInventory
 353         :param inv_delta: the inventory delta
 354         :param signature: signing information
 355         :param text_provider: a callable expecting a file_id parameter
 356             that returns the text for that file-id
 357         :param parents_provider: a callable expecting a file_id parameter
 358             that return the list of parent-ids for that file-id
 359         :param inventories_provider: a callable expecting a repository and
 360             a list of revision-ids, that returns:
 361               * the list of revision-ids present in the repository
 362               * the list of inventories for the revision-id's,
 363                 including an empty inventory for the missing revisions
 364             If None, a default implementation is provided.
 365         """
 366         # TODO: set revision_id = rev.revision_id
 367         builder = self.repo._commit_builder_class(self.repo,
 368             parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
 369             timezone=rev.timezone, committer=rev.committer,
 370             revprops=rev.properties, revision_id=rev.revision_id)
 371         if self._graph is None and self._use_known_graph:
 372             if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and
 373                 getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and
 374                 getattr(self.repo, "get_known_graph_ancestry", None)):
 375                 self._graph = self.repo.get_known_graph_ancestry(
 376                     rev.parent_ids)
 377             else:
 378                 self._use_known_graph = False
 379         if self._graph is not None:
 380             orig_heads = builder._heads
 381             def thunked_heads(file_id, revision_ids):
 382                 # self._graph thinks in terms of keys, not ids, so translate
 383                 # them
 384                 # old_res = orig_heads(file_id, revision_ids)
 385                 if len(revision_ids) < 2:
 386                     res = set(revision_ids)
 387                 else:
 388                     res = set(self._graph.heads(revision_ids))
 389                 # if old_res != res:
 390                 #     import pdb; pdb.set_trace()
 391                 return res
 392             builder._heads = thunked_heads
 393
 394         if rev.parent_ids:
 395             basis_rev_id = rev.parent_ids[0]
 396         else:
 397             basis_rev_id = _mod_revision.NULL_REVISION
 398         tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
 399         changes = tree._delta_to_iter_changes()
 400         for (file_id, path, fs_hash) in builder.record_iter_changes(
 401                 tree, basis_rev_id, changes):
 402             # So far, we don't *do* anything with the result
 403             pass
 404         builder.finish_inventory()
 405         # TODO: This is working around a bug in the bzrlib code base.
 406         # 'builder.finish_inventory()' ends up doing:
 407         # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
 408         # However, add_inventory_by_delta returns (sha1, inv)
 409         # And we *want* to keep a handle on both of those objects
 410         if isinstance(builder.inv_sha1, tuple):
 411             builder.inv_sha1, builder.new_inventory = builder.inv_sha1
 412         # This is a duplicate of Builder.commit() since we already have the
 413         # Revision object, and we *don't* want to call commit_write_group()
 414         rev.inv_sha1 = builder.inv_sha1
 415         builder.repository.add_revision(builder._new_revision_id, rev,
 416             builder.new_inventory, builder._config)
 417         if self._graph is not None:
 418             # TODO: Use StaticTuple and .intern() for these things
 419             self._graph.add_node(builder._new_revision_id, rev.parent_ids)
 420
 421         if signature is not None:
 422             raise AssertionError('signatures not guaranteed yet')
 423             self.repo.add_signature_text(rev.revision_id, signature)
 424         # self._add_revision(rev, inv)
 425         return builder.revision_tree().inventory
 426
 427     def _non_root_entries_iter(self, inv, revision_id):
 428         if hasattr(inv, 'iter_non_root_entries'):
 429             entries = inv.iter_non_root_entries()
 430         else:
 431             path_entries = inv.iter_entries()
 432             # Backwards compatibility hack: skip the root id.
 433             if not self.repo.supports_rich_root():
 434                 path, root = path_entries.next()
 435                 if root.revision != revision_id:
 436                     raise errors.IncompatibleRevision(repr(self.repo))
 437             entries = iter([ie for path, ie in path_entries])
 438         return entries
 439
 440     def _load_texts(self, revision_id, entries, text_provider,
 441         parents_provider):
 442         """Load texts to a repository for inventory entries.
 443
 444         This method is provided for subclasses to use or override.
 445
 446         :param revision_id: the revision identifier
 447         :param entries: iterator over the inventory entries
 448         :param text_provider: a callable expecting a file_id parameter
 449             that returns the text for that file-id
 450         :param parents_provider: a callable expecting a file_id parameter
 451             that return the list of parent-ids for that file-id
 452         """
 453         raise NotImplementedError(self._load_texts)
 454
 455     def _add_inventory(self, revision_id, inv, parents, parent_invs):
 456         """Add the inventory inv to the repository as revision_id.
 457
 458         :param parents: The revision ids of the parents that revision_id
 459                         is known to have and are in the repository already.
 460         :param parent_invs: the parent inventories
 461
 462         :returns: The validator(which is a sha1 digest, though what is sha'd is
 463             repository format specific) of the serialized inventory.
 464         """
 465         return self.repo.add_inventory(revision_id, inv, parents)
 466
 467     def _add_inventory_by_delta(self, revision_id, basis_inv, inv_delta,
 468         parents, parent_invs):
 469         """Add the inventory to the repository as revision_id.
 470
 471         :param basis_inv: the basis Inventory or CHKInventory
 472         :param inv_delta: the inventory delta
 473         :param parents: The revision ids of the parents that revision_id
 474                         is known to have and are in the repository already.
 475         :param parent_invs: the parent inventories
 476
 477         :returns: (validator, inv) where validator is the validator
 478           (which is a sha1 digest, though what is sha'd is repository format
 479           specific) of the serialized inventory;
 480           inv is the generated inventory
 481         """
 482         if len(parents):
 483             if self._supports_chks:
 484                 try:
 485                     validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
 486                         inv_delta, revision_id, parents, basis_inv=basis_inv,
 487                         propagate_caches=False)
 488                 except errors.InconsistentDelta:
 489                     #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
 490                     trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
 491                     raise
 492             else:
 493                 validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
 494                     inv_delta, revision_id, parents)
 495         else:
 496             if isinstance(basis_inv, inventory.CHKInventory):
 497                 new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
 498             else:
 499                 new_inv = inventory.Inventory(revision_id=revision_id)
 500                 # This is set in the delta so remove it to prevent a duplicate
 501                 del new_inv[inventory.ROOT_ID]
 502                 new_inv.apply_delta(inv_delta)
 503             validator = self.repo.add_inventory(revision_id, new_inv, parents)
 504         return validator, new_inv
 505
 506     def _add_revision(self, rev, inv):
 507         """Add a revision and its inventory to a repository.
 508
 509         :param rev: the Revision
 510         :param inv: the inventory
 511         """
 512         self.repo.add_revision(rev.revision_id, rev, inv)
 513
 514     def _default_inventories_provider(self, revision_ids):
 515         """An inventories provider that queries the repository."""
 516         present = []
 517         inventories = []
 518         for revision_id in revision_ids:
 519             if self.repo.has_revision(revision_id):
 520                 present.append(revision_id)
 521                 rev_tree = self.repo.revision_tree(revision_id)
 522             else:
 523                 rev_tree = self.repo.revision_tree(None)
 524             inventories.append(rev_tree.inventory)
 525         return present, inventories
 526
 527
 528 class RevisionStore1(AbstractRevisionStore):
 529     """A RevisionStore that uses the old bzrlib Repository API.
 530
 531     The old API was present until bzr.dev rev 3510.
 532     """
 533
 534     def _load_texts(self, revision_id, entries, text_provider, parents_provider):
 535         """See RevisionStore._load_texts()."""
 536         # Add the texts that are not already present
 537         tx = self.repo.get_transaction()
 538         for ie in entries:
 539             # This test is *really* slow: over 50% of import time
 540             #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
 541             #if ie.revision in w:
 542             #    continue
 543             # Try another way, realising that this assumes that the
 544             # version is not already there. In the general case,
 545             # a shared repository might already have the revision but
 546             # we arguably don't need that check when importing from
 547             # a foreign system.
 548             if ie.revision != revision_id:
 549                 continue
 550             file_id = ie.file_id
 551             text_parents = [(file_id, p) for p in parents_provider(file_id)]
 552             lines = text_provider(file_id)
 553             vfile = self.repo.weave_store.get_weave_or_empty(file_id,  tx)
 554             vfile.add_lines(revision_id, text_parents, lines)
 555
 556     def get_file_lines(self, revision_id, file_id):
 557         tx = self.repo.get_transaction()
 558         w = self.repo.weave_store.get_weave(file_id, tx)
 559         return w.get_lines(revision_id)
 560
 561     def _add_revision(self, rev, inv):
 562         # There's no need to do everything repo.add_revision does and
 563         # doing so (since bzr.dev 3392) can be pretty slow for long
 564         # delta chains on inventories. Just do the essentials here ...
 565         _mod_revision.check_not_reserved_id(rev.revision_id)
 566         self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
 567
 568
 569 class RevisionStore2(AbstractRevisionStore):
 570     """A RevisionStore that uses the new bzrlib Repository API."""
 571
 572     def _load_texts(self, revision_id, entries, text_provider, parents_provider):
 573         """See RevisionStore._load_texts()."""
 574         text_keys = {}
 575         for ie in entries:
 576             text_keys[(ie.file_id, ie.revision)] = ie
 577         text_parent_map = self.repo.texts.get_parent_map(text_keys)
 578         missing_texts = set(text_keys) - set(text_parent_map)
 579         self._load_texts_for_file_rev_ids(missing_texts, text_provider,
 580             parents_provider)
 581
 582     def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,
 583         parents_provider):
 584         """Load texts to a repository for file-ids, revision-id tuples.
 585
 586         :param file_rev_ids: iterator over the (file_id, revision_id) tuples
 587         :param text_provider: a callable expecting a file_id parameter
 588             that returns the text for that file-id
 589         :param parents_provider: a callable expecting a file_id parameter
 590             that return the list of parent-ids for that file-id
 591         """
 592         for file_id, revision_id in file_rev_ids:
 593             text_key = (file_id, revision_id)
 594             text_parents = [(file_id, p) for p in parents_provider(file_id)]
 595             lines = text_provider(file_id)
 596             #print "adding text for %s\n\tparents:%s" % (text_key,text_parents)
 597             self.repo.texts.add_lines(text_key, text_parents, lines)
 598
 599     def get_file_lines(self, revision_id, file_id):
 600         record = self.repo.texts.get_record_stream([(file_id, revision_id)],
 601             'unordered', True).next()
 602         if record.storage_kind == 'absent':
 603             raise errors.RevisionNotPresent(record.key, self.repo)
 604         return osutils.split_lines(record.get_bytes_as('fulltext'))
 605
 606     # This is breaking imports into brisbane-core currently
 607     #def _add_revision(self, rev, inv):
 608     #    # There's no need to do everything repo.add_revision does and
 609     #    # doing so (since bzr.dev 3392) can be pretty slow for long
 610     #    # delta chains on inventories. Just do the essentials here ...
 611     #    _mod_revision.check_not_reserved_id(rev.revision_id)
 612     #    self.repo._add_revision(rev)
 613
 614
 615 class ImportRevisionStore1(RevisionStore1):
 616     """A RevisionStore (old Repository API) optimised for importing.
 617
 618     This implementation caches serialised inventory texts and provides
 619     fine-grained control over when inventories are stored as fulltexts.
 620     """
 621
 622     def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
 623         random_ids=True):
 624         """See AbstractRevisionStore.__init__.
 625
 626         :param repository: the target repository
 627         :param parent_text_to_cache: the number of parent texts to cache
 628         :para fulltext_when: if non None, a function to call to decide
 629           whether to fulltext the inventory or not. The revision count
 630           is passed as a parameter and the result is treated as a boolean.
 631         """
 632         RevisionStore1.__init__(self, repo)
 633         self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
 634         self.fulltext_when = fulltext_when
 635         self.random_ids = random_ids
 636         self.revision_count = 0
 637
 638     def _add_inventory(self, revision_id, inv, parents, parent_invs):
 639         """See RevisionStore._add_inventory."""
 640         # Code taken from bzrlib.repository.add_inventory
 641         assert self.repo.is_in_write_group()
 642         _mod_revision.check_not_reserved_id(revision_id)
 643         assert inv.revision_id is None or inv.revision_id == revision_id, \
 644             "Mismatch between inventory revision" \
 645             " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
 646         assert inv.root is not None
 647         inv_lines = self.repo._serialise_inventory_to_lines(inv)
 648         inv_vf = self.repo.get_inventory_weave()
 649         sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
 650             revision_id, parents, inv_lines, self.inv_parent_texts)
 651         self.inv_parent_texts[revision_id] = parent_text
 652         return sha1
 653
 654     def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
 655             parent_texts):
 656         """See Repository._inventory_add_lines()."""
 657         # setup parameters used in original code but not this API
 658         self.revision_count += 1
 659         if self.fulltext_when is not None:
 660             delta = not self.fulltext_when(self.revision_count)
 661         else:
 662             delta = inv_vf.delta
 663         left_matching_blocks = None
 664         random_id = self.random_ids
 665         check_content = False
 666
 667         # bzrlib.knit.add_lines() but error checking optimised
 668         inv_vf._check_add(version_id, lines, random_id, check_content)
 669
 670         ####################################################################
 671         # bzrlib.knit._add() but skip checking if fulltext better than delta
 672         ####################################################################
 673
 674         line_bytes = ''.join(lines)
 675         digest = osutils.sha_string(line_bytes)
 676         present_parents = []
 677         for parent in parents:
 678             if inv_vf.has_version(parent):
 679                 present_parents.append(parent)
 680         if parent_texts is None:
 681             parent_texts = {}
 682
 683         # can only compress against the left most present parent.
 684         if (delta and
 685             (len(present_parents) == 0 or
 686              present_parents[0] != parents[0])):
 687             delta = False
 688
 689         text_length = len(line_bytes)
 690         options = []
 691         if lines:
 692             if lines[-1][-1] != '\n':
 693                 # copy the contents of lines.
 694                 lines = lines[:]
 695                 options.append('no-eol')
 696                 lines[-1] = lines[-1] + '\n'
 697                 line_bytes += '\n'
 698
 699         #if delta:
 700         #    # To speed the extract of texts the delta chain is limited
 701         #    # to a fixed number of deltas.  This should minimize both
 702         #    # I/O and the time spend applying deltas.
 703         #    delta = inv_vf._check_should_delta(present_parents)
 704
 705         assert isinstance(version_id, str)
 706         content = inv_vf.factory.make(lines, version_id)
 707         if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
 708             # Merge annotations from parent texts if needed.
 709             delta_hunks = inv_vf._merge_annotations(content, present_parents,
 710                 parent_texts, delta, inv_vf.factory.annotated,
 711                 left_matching_blocks)
 712
 713         if delta:
 714             options.append('line-delta')
 715             store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
 716             size, bytes = inv_vf._data._record_to_data(version_id, digest,
 717                 store_lines)
 718         else:
 719             options.append('fulltext')
 720             # isinstance is slower and we have no hierarchy.
 721             if inv_vf.factory.__class__ == knit.KnitPlainFactory:
 722                 # Use the already joined bytes saving iteration time in
 723                 # _record_to_data.
 724                 size, bytes = inv_vf._data._record_to_data(version_id, digest,
 725                     lines, [line_bytes])
 726             else:
 727                 # get mixed annotation + content and feed it into the
 728                 # serialiser.
 729                 store_lines = inv_vf.factory.lower_fulltext(content)
 730                 size, bytes = inv_vf._data._record_to_data(version_id, digest,
 731                     store_lines)
 732
 733         access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
 734         inv_vf._index.add_versions(
 735             ((version_id, options, access_memo, parents),),
 736             random_id=random_id)
 737         return digest, text_length, content