Fix bzr-fastimport when used with newer versions of python-fastimport.(Jelmer Vernooij)
[bzr-fastimport.git] / revision_store.py
blob33b89de6bd922e5ed046d9517194984fc41181b8
1 # Copyright (C) 2008, 2009 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 """An abstraction of a repository providing just the bits importing needs."""
18 import cStringIO
20 from bzrlib import (
21 errors,
22 graph as _mod_graph,
23 inventory,
24 knit,
25 lru_cache,
26 osutils,
27 revision as _mod_revision,
28 trace,
32 class _TreeShim(object):
33 """Fake a Tree implementation.
35 This implements just enough of the tree api to make commit builder happy.
36 """
38 def __init__(self, repo, basis_inv, inv_delta, content_provider):
39 self._repo = repo
40 self._content_provider = content_provider
41 self._basis_inv = basis_inv
42 self._inv_delta = inv_delta
43 self._new_info_by_id = dict([(file_id, (new_path, ie))
44 for _, new_path, file_id, ie in inv_delta])
46 def id2path(self, file_id):
47 if file_id in self._new_info_by_id:
48 new_path = self._new_info_by_id[file_id][0]
49 if new_path is None:
50 raise errors.NoSuchId(self, file_id)
51 return new_path
52 return self._basis_inv.id2path(file_id)
54 def path2id(self, path):
55 # CommitBuilder currently only requires access to the root id. We don't
56 # build a map of renamed files, etc. One possibility if we ever *do*
57 # need more than just root, is to defer to basis_inv.path2id() and then
58 # check if the file_id is in our _new_info_by_id dict. And in that
59 # case, return _new_info_by_id[file_id][0]
60 if path != '':
61 raise NotImplementedError(_TreeShim.path2id)
62 # TODO: Handle root renames?
63 return self._basis_inv.root.file_id
65 def get_file_with_stat(self, file_id, path=None):
66 content = self.get_file_text(file_id, path)
67 sio = cStringIO.StringIO(content)
68 return sio, None
70 def get_file_text(self, file_id, path=None):
71 try:
72 return self._content_provider(file_id)
73 except KeyError:
74 # The content wasn't shown as 'new'. Just validate this fact
75 assert file_id not in self._new_info_by_id
76 old_ie = self._basis_inv[file_id]
77 old_text_key = (file_id, old_ie.revision)
78 stream = self._repo.texts.get_record_stream([old_text_key],
79 'unordered', True)
80 return stream.next().get_bytes_as('fulltext')
82 def get_symlink_target(self, file_id):
83 if file_id in self._new_info_by_id:
84 ie = self._new_info_by_id[file_id][1]
85 return ie.symlink_target
86 return self._basis_inv[file_id].symlink_target
88 def get_reference_revision(self, file_id, path=None):
89 raise NotImplementedError(_TreeShim.get_reference_revision)
91 def _delta_to_iter_changes(self):
92 """Convert the inv_delta into an iter_changes repr."""
93 # iter_changes is:
94 # (file_id,
95 # (old_path, new_path),
96 # content_changed,
97 # (old_versioned, new_versioned),
98 # (old_parent_id, new_parent_id),
99 # (old_name, new_name),
100 # (old_kind, new_kind),
101 # (old_exec, new_exec),
103 basis_inv = self._basis_inv
104 for old_path, new_path, file_id, ie in self._inv_delta:
105 # Perf: Would this be faster if we did 'if file_id in basis_inv'?
106 # Since the *very* common case is that the file already exists, it
107 # probably is better to optimize for that
108 try:
109 old_ie = basis_inv[file_id]
110 except errors.NoSuchId:
111 old_ie = None
112 if ie is None:
113 raise AssertionError('How is both old and new None?')
114 change = (file_id,
115 (old_path, new_path),
116 False,
117 (False, False),
118 (None, None),
119 (None, None),
120 (None, None),
121 (None, None),
123 change = (file_id,
124 (old_path, new_path),
125 True,
126 (False, True),
127 (None, ie.parent_id),
128 (None, ie.name),
129 (None, ie.kind),
130 (None, ie.executable),
132 else:
133 if ie is None:
134 change = (file_id,
135 (old_path, new_path),
136 True,
137 (True, False),
138 (old_ie.parent_id, None),
139 (old_ie.name, None),
140 (old_ie.kind, None),
141 (old_ie.executable, None),
143 else:
144 content_modified = (ie.text_sha1 != old_ie.text_sha1
145 or ie.text_size != old_ie.text_size)
146 # TODO: ie.kind != old_ie.kind
147 # TODO: symlinks changing targets, content_modified?
148 change = (file_id,
149 (old_path, new_path),
150 content_modified,
151 (True, True),
152 (old_ie.parent_id, ie.parent_id),
153 (old_ie.name, ie.name),
154 (old_ie.kind, ie.kind),
155 (old_ie.executable, ie.executable),
157 yield change
160 class AbstractRevisionStore(object):
162 def __init__(self, repo):
163 """An object responsible for loading revisions into a repository.
165 NOTE: Repository locking is not managed by this class. Clients
166 should take a write lock, call load() multiple times, then release
167 the lock.
169 :param repository: the target repository
171 self.repo = repo
172 self._graph = None
173 self._use_known_graph = True
174 self._supports_chks = getattr(repo._format, 'supports_chks', False)
176 def expects_rich_root(self):
177 """Does this store expect inventories with rich roots?"""
178 return self.repo.supports_rich_root()
180 def init_inventory(self, revision_id):
181 """Generate an inventory for a parentless revision."""
182 if self._supports_chks:
183 inv = self._init_chk_inventory(revision_id, inventory.ROOT_ID)
184 else:
185 inv = inventory.Inventory(revision_id=revision_id)
186 if self.expects_rich_root():
187 # The very first root needs to have the right revision
188 inv.root.revision = revision_id
189 return inv
191 def _init_chk_inventory(self, revision_id, root_id):
192 """Generate a CHKInventory for a parentless revision."""
193 from bzrlib import chk_map
194 # Get the creation parameters
195 chk_store = self.repo.chk_bytes
196 serializer = self.repo._format._serializer
197 search_key_name = serializer.search_key_name
198 maximum_size = serializer.maximum_size
200 # Maybe the rest of this ought to be part of the CHKInventory API?
201 inv = inventory.CHKInventory(search_key_name)
202 inv.revision_id = revision_id
203 inv.root_id = root_id
204 search_key_func = chk_map.search_key_registry.get(search_key_name)
205 inv.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
206 inv.id_to_entry._root_node.set_maximum_size(maximum_size)
207 inv.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
208 None, search_key_func)
209 inv.parent_id_basename_to_file_id._root_node.set_maximum_size(
210 maximum_size)
211 inv.parent_id_basename_to_file_id._root_node._key_width = 2
212 return inv
214 def get_inventory(self, revision_id):
215 """Get a stored inventory."""
216 return self.repo.get_inventory(revision_id)
218 def get_file_text(self, revision_id, file_id):
219 """Get the text stored for a file in a given revision."""
220 revtree = self.repo.revision_tree(revision_id)
221 return revtree.get_file_text(file_id)
223 def get_file_lines(self, revision_id, file_id):
224 """Get the lines stored for a file in a given revision."""
225 revtree = self.repo.revision_tree(revision_id)
226 return osutils.split_lines(revtree.get_file_text(file_id))
228 def start_new_revision(self, revision, parents, parent_invs):
229 """Init the metadata needed for get_parents_and_revision_for_entry().
231 :param revision: a Revision object
233 self._current_rev_id = revision.revision_id
234 self._rev_parents = parents
235 self._rev_parent_invs = parent_invs
236 # We don't know what the branch will be so there's no real BranchConfig.
237 # That means we won't be triggering any hooks and that's a good thing.
238 # Without a config though, we must pass in the committer below so that
239 # the commit builder doesn't try to look up the config.
240 config = None
241 # We can't use self.repo.get_commit_builder() here because it starts a
242 # new write group. We want one write group around a batch of imports
243 # where the default batch size is currently 10000. IGC 20090312
244 self._commit_builder = self.repo._commit_builder_class(self.repo,
245 parents, config, timestamp=revision.timestamp,
246 timezone=revision.timezone, committer=revision.committer,
247 revprops=revision.properties, revision_id=revision.revision_id)
249 def get_parents_and_revision_for_entry(self, ie):
250 """Get the parents and revision for an inventory entry.
252 :param ie: the inventory entry
253 :return parents, revision_id where
254 parents is the tuple of parent revision_ids for the per-file graph
255 revision_id is the revision_id to use for this entry
257 # Check for correct API usage
258 if self._current_rev_id is None:
259 raise AssertionError("start_new_revision() must be called"
260 " before get_parents_and_revision_for_entry()")
261 if ie.revision != self._current_rev_id:
262 raise AssertionError("start_new_revision() registered a different"
263 " revision (%s) to that in the inventory entry (%s)" %
264 (self._current_rev_id, ie.revision))
266 # Find the heads. This code is lifted from
267 # repository.CommitBuilder.record_entry_contents().
268 parent_candidate_entries = ie.parent_candidates(self._rev_parent_invs)
269 head_set = self._commit_builder._heads(ie.file_id,
270 parent_candidate_entries.keys())
271 heads = []
272 for inv in self._rev_parent_invs:
273 if inv.has_id(ie.file_id):
274 old_rev = inv[ie.file_id].revision
275 if old_rev in head_set:
276 rev_id = inv[ie.file_id].revision
277 heads.append(rev_id)
278 head_set.remove(rev_id)
280 # Find the revision to use. If the content has not changed
281 # since the parent, record the parent's revision.
282 if len(heads) == 0:
283 return (), ie.revision
284 parent_entry = parent_candidate_entries[heads[0]]
285 changed = False
286 if len(heads) > 1:
287 changed = True
288 elif (parent_entry.name != ie.name or parent_entry.kind != ie.kind or
289 parent_entry.parent_id != ie.parent_id):
290 changed = True
291 elif ie.kind == 'file':
292 if (parent_entry.text_sha1 != ie.text_sha1 or
293 parent_entry.executable != ie.executable):
294 changed = True
295 elif ie.kind == 'symlink':
296 if parent_entry.symlink_target != ie.symlink_target:
297 changed = True
298 if changed:
299 rev_id = ie.revision
300 else:
301 rev_id = parent_entry.revision
302 return tuple(heads), rev_id
304 def load(self, rev, inv, signature, text_provider, parents_provider,
305 inventories_provider=None):
306 """Load a revision.
308 :param rev: the Revision
309 :param inv: the inventory
310 :param signature: signing information
311 :param text_provider: a callable expecting a file_id parameter
312 that returns the text for that file-id
313 :param parents_provider: a callable expecting a file_id parameter
314 that return the list of parent-ids for that file-id
315 :param inventories_provider: a callable expecting a repository and
316 a list of revision-ids, that returns:
317 * the list of revision-ids present in the repository
318 * the list of inventories for the revision-id's,
319 including an empty inventory for the missing revisions
320 If None, a default implementation is provided.
322 # NOTE: This is bzrlib.repository._install_revision refactored to
323 # to provide more flexibility in how previous revisions are cached,
324 # data is feed in, etc.
326 # Get the non-ghost parents and their inventories
327 if inventories_provider is None:
328 inventories_provider = self._default_inventories_provider
329 present_parents, parent_invs = inventories_provider(rev.parent_ids)
331 # Load the inventory
332 try:
333 rev.inventory_sha1 = self._add_inventory(rev.revision_id,
334 inv, present_parents, parent_invs)
335 except errors.RevisionAlreadyPresent:
336 pass
338 # Load the texts, signature and revision
339 entries = self._non_root_entries_iter(inv, rev.revision_id)
340 self._load_texts(rev.revision_id, entries, text_provider,
341 parents_provider)
342 if signature is not None:
343 self.repo.add_signature_text(rev.revision_id, signature)
344 self._add_revision(rev, inv)
346 def load_using_delta(self, rev, basis_inv, inv_delta, signature,
347 text_provider, parents_provider, inventories_provider=None):
348 """Load a revision by applying a delta to a (CHK)Inventory.
350 :param rev: the Revision
351 :param basis_inv: the basis Inventory or CHKInventory
352 :param inv_delta: the inventory delta
353 :param signature: signing information
354 :param text_provider: a callable expecting a file_id parameter
355 that returns the text for that file-id
356 :param parents_provider: a callable expecting a file_id parameter
357 that return the list of parent-ids for that file-id
358 :param inventories_provider: a callable expecting a repository and
359 a list of revision-ids, that returns:
360 * the list of revision-ids present in the repository
361 * the list of inventories for the revision-id's,
362 including an empty inventory for the missing revisions
363 If None, a default implementation is provided.
365 # TODO: set revision_id = rev.revision_id
366 builder = self.repo._commit_builder_class(self.repo,
367 parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
368 timezone=rev.timezone, committer=rev.committer,
369 revprops=rev.properties, revision_id=rev.revision_id)
370 if self._graph is None and self._use_known_graph:
371 if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and
372 getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and
373 getattr(self.repo, "get_known_graph_ancestry", None)):
374 self._graph = self.repo.get_known_graph_ancestry(
375 rev.parent_ids)
376 else:
377 self._use_known_graph = False
378 if self._graph is not None:
379 orig_heads = builder._heads
380 def thunked_heads(file_id, revision_ids):
381 # self._graph thinks in terms of keys, not ids, so translate
382 # them
383 # old_res = orig_heads(file_id, revision_ids)
384 if len(revision_ids) < 2:
385 res = set(revision_ids)
386 else:
387 res = set(self._graph.heads(revision_ids))
388 # if old_res != res:
389 # import pdb; pdb.set_trace()
390 return res
391 builder._heads = thunked_heads
393 if rev.parent_ids:
394 basis_rev_id = rev.parent_ids[0]
395 else:
396 basis_rev_id = _mod_revision.NULL_REVISION
397 tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
398 changes = tree._delta_to_iter_changes()
399 for (file_id, path, fs_hash) in builder.record_iter_changes(
400 tree, basis_rev_id, changes):
401 # So far, we don't *do* anything with the result
402 pass
403 builder.finish_inventory()
404 # TODO: This is working around a bug in the bzrlib code base.
405 # 'builder.finish_inventory()' ends up doing:
406 # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
407 # However, add_inventory_by_delta returns (sha1, inv)
408 # And we *want* to keep a handle on both of those objects
409 if isinstance(builder.inv_sha1, tuple):
410 builder.inv_sha1, builder.new_inventory = builder.inv_sha1
411 # This is a duplicate of Builder.commit() since we already have the
412 # Revision object, and we *don't* want to call commit_write_group()
413 rev.inv_sha1 = builder.inv_sha1
414 try:
415 config = builder._config_stack
416 except AttributeError: # bzr < 2.5
417 config = builder._config
418 builder.repository.add_revision(builder._new_revision_id, rev,
419 builder.new_inventory)
420 if self._graph is not None:
421 # TODO: Use StaticTuple and .intern() for these things
422 self._graph.add_node(builder._new_revision_id, rev.parent_ids)
424 if signature is not None:
425 raise AssertionError('signatures not guaranteed yet')
426 self.repo.add_signature_text(rev.revision_id, signature)
427 # self._add_revision(rev, inv)
428 return builder.revision_tree().inventory
430 def _non_root_entries_iter(self, inv, revision_id):
431 if hasattr(inv, 'iter_non_root_entries'):
432 entries = inv.iter_non_root_entries()
433 else:
434 path_entries = inv.iter_entries()
435 # Backwards compatibility hack: skip the root id.
436 if not self.repo.supports_rich_root():
437 path, root = path_entries.next()
438 if root.revision != revision_id:
439 raise errors.IncompatibleRevision(repr(self.repo))
440 entries = iter([ie for path, ie in path_entries])
441 return entries
443 def _load_texts(self, revision_id, entries, text_provider,
444 parents_provider):
445 """Load texts to a repository for inventory entries.
447 This method is provided for subclasses to use or override.
449 :param revision_id: the revision identifier
450 :param entries: iterator over the inventory entries
451 :param text_provider: a callable expecting a file_id parameter
452 that returns the text for that file-id
453 :param parents_provider: a callable expecting a file_id parameter
454 that return the list of parent-ids for that file-id
456 raise NotImplementedError(self._load_texts)
458 def _add_inventory(self, revision_id, inv, parents, parent_invs):
459 """Add the inventory inv to the repository as revision_id.
461 :param parents: The revision ids of the parents that revision_id
462 is known to have and are in the repository already.
463 :param parent_invs: the parent inventories
465 :returns: The validator(which is a sha1 digest, though what is sha'd is
466 repository format specific) of the serialized inventory.
468 return self.repo.add_inventory(revision_id, inv, parents)
470 def _add_inventory_by_delta(self, revision_id, basis_inv, inv_delta,
471 parents, parent_invs):
472 """Add the inventory to the repository as revision_id.
474 :param basis_inv: the basis Inventory or CHKInventory
475 :param inv_delta: the inventory delta
476 :param parents: The revision ids of the parents that revision_id
477 is known to have and are in the repository already.
478 :param parent_invs: the parent inventories
480 :returns: (validator, inv) where validator is the validator
481 (which is a sha1 digest, though what is sha'd is repository format
482 specific) of the serialized inventory;
483 inv is the generated inventory
485 if len(parents):
486 if self._supports_chks:
487 try:
488 validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
489 inv_delta, revision_id, parents, basis_inv=basis_inv,
490 propagate_caches=False)
491 except errors.InconsistentDelta:
492 #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
493 trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
494 raise
495 else:
496 validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
497 inv_delta, revision_id, parents)
498 else:
499 if isinstance(basis_inv, inventory.CHKInventory):
500 new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
501 else:
502 new_inv = inventory.Inventory(revision_id=revision_id)
503 # This is set in the delta so remove it to prevent a duplicate
504 del new_inv[inventory.ROOT_ID]
505 new_inv.apply_delta(inv_delta)
506 validator = self.repo.add_inventory(revision_id, new_inv, parents)
507 return validator, new_inv
509 def _add_revision(self, rev, inv):
510 """Add a revision and its inventory to a repository.
512 :param rev: the Revision
513 :param inv: the inventory
515 self.repo.add_revision(rev.revision_id, rev, inv)
517 def _default_inventories_provider(self, revision_ids):
518 """An inventories provider that queries the repository."""
519 present = []
520 inventories = []
521 for revision_id in revision_ids:
522 if self.repo.has_revision(revision_id):
523 present.append(revision_id)
524 rev_tree = self.repo.revision_tree(revision_id)
525 else:
526 rev_tree = self.repo.revision_tree(None)
527 inventories.append(rev_tree.inventory)
528 return present, inventories
531 class RevisionStore1(AbstractRevisionStore):
532 """A RevisionStore that uses the old bzrlib Repository API.
534 The old API was present until bzr.dev rev 3510.
537 def _load_texts(self, revision_id, entries, text_provider, parents_provider):
538 """See RevisionStore._load_texts()."""
539 # Add the texts that are not already present
540 tx = self.repo.get_transaction()
541 for ie in entries:
542 # This test is *really* slow: over 50% of import time
543 #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
544 #if ie.revision in w:
545 # continue
546 # Try another way, realising that this assumes that the
547 # version is not already there. In the general case,
548 # a shared repository might already have the revision but
549 # we arguably don't need that check when importing from
550 # a foreign system.
551 if ie.revision != revision_id:
552 continue
553 file_id = ie.file_id
554 text_parents = [(file_id, p) for p in parents_provider(file_id)]
555 lines = text_provider(file_id)
556 vfile = self.repo.weave_store.get_weave_or_empty(file_id, tx)
557 vfile.add_lines(revision_id, text_parents, lines)
559 def get_file_lines(self, revision_id, file_id):
560 tx = self.repo.get_transaction()
561 w = self.repo.weave_store.get_weave(file_id, tx)
562 return w.get_lines(revision_id)
564 def _add_revision(self, rev, inv):
565 # There's no need to do everything repo.add_revision does and
566 # doing so (since bzr.dev 3392) can be pretty slow for long
567 # delta chains on inventories. Just do the essentials here ...
568 _mod_revision.check_not_reserved_id(rev.revision_id)
569 self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
572 class RevisionStore2(AbstractRevisionStore):
573 """A RevisionStore that uses the new bzrlib Repository API."""
575 def _load_texts(self, revision_id, entries, text_provider, parents_provider):
576 """See RevisionStore._load_texts()."""
577 text_keys = {}
578 for ie in entries:
579 text_keys[(ie.file_id, ie.revision)] = ie
580 text_parent_map = self.repo.texts.get_parent_map(text_keys)
581 missing_texts = set(text_keys) - set(text_parent_map)
582 self._load_texts_for_file_rev_ids(missing_texts, text_provider,
583 parents_provider)
585 def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,
586 parents_provider):
587 """Load texts to a repository for file-ids, revision-id tuples.
589 :param file_rev_ids: iterator over the (file_id, revision_id) tuples
590 :param text_provider: a callable expecting a file_id parameter
591 that returns the text for that file-id
592 :param parents_provider: a callable expecting a file_id parameter
593 that return the list of parent-ids for that file-id
595 for file_id, revision_id in file_rev_ids:
596 text_key = (file_id, revision_id)
597 text_parents = [(file_id, p) for p in parents_provider(file_id)]
598 lines = text_provider(file_id)
599 #print "adding text for %s\n\tparents:%s" % (text_key,text_parents)
600 self.repo.texts.add_lines(text_key, text_parents, lines)
602 def get_file_lines(self, revision_id, file_id):
603 record = self.repo.texts.get_record_stream([(file_id, revision_id)],
604 'unordered', True).next()
605 if record.storage_kind == 'absent':
606 raise errors.RevisionNotPresent(record.key, self.repo)
607 return osutils.split_lines(record.get_bytes_as('fulltext'))
609 # This is breaking imports into brisbane-core currently
610 #def _add_revision(self, rev, inv):
611 # # There's no need to do everything repo.add_revision does and
612 # # doing so (since bzr.dev 3392) can be pretty slow for long
613 # # delta chains on inventories. Just do the essentials here ...
614 # _mod_revision.check_not_reserved_id(rev.revision_id)
615 # self.repo._add_revision(rev)
618 class ImportRevisionStore1(RevisionStore1):
619 """A RevisionStore (old Repository API) optimised for importing.
621 This implementation caches serialised inventory texts and provides
622 fine-grained control over when inventories are stored as fulltexts.
625 def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
626 random_ids=True):
627 """See AbstractRevisionStore.__init__.
629 :param repository: the target repository
630 :param parent_text_to_cache: the number of parent texts to cache
631 :para fulltext_when: if non None, a function to call to decide
632 whether to fulltext the inventory or not. The revision count
633 is passed as a parameter and the result is treated as a boolean.
635 RevisionStore1.__init__(self, repo)
636 self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
637 self.fulltext_when = fulltext_when
638 self.random_ids = random_ids
639 self.revision_count = 0
641 def _add_inventory(self, revision_id, inv, parents, parent_invs):
642 """See RevisionStore._add_inventory."""
643 # Code taken from bzrlib.repository.add_inventory
644 assert self.repo.is_in_write_group()
645 _mod_revision.check_not_reserved_id(revision_id)
646 assert inv.revision_id is None or inv.revision_id == revision_id, \
647 "Mismatch between inventory revision" \
648 " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
649 assert inv.root is not None
650 inv_lines = self.repo._serialise_inventory_to_lines(inv)
651 inv_vf = self.repo.get_inventory_weave()
652 sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
653 revision_id, parents, inv_lines, self.inv_parent_texts)
654 self.inv_parent_texts[revision_id] = parent_text
655 return sha1
657 def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
658 parent_texts):
659 """See Repository._inventory_add_lines()."""
660 # setup parameters used in original code but not this API
661 self.revision_count += 1
662 if self.fulltext_when is not None:
663 delta = not self.fulltext_when(self.revision_count)
664 else:
665 delta = inv_vf.delta
666 left_matching_blocks = None
667 random_id = self.random_ids
668 check_content = False
670 # bzrlib.knit.add_lines() but error checking optimised
671 inv_vf._check_add(version_id, lines, random_id, check_content)
673 ####################################################################
674 # bzrlib.knit._add() but skip checking if fulltext better than delta
675 ####################################################################
677 line_bytes = ''.join(lines)
678 digest = osutils.sha_string(line_bytes)
679 present_parents = []
680 for parent in parents:
681 if inv_vf.has_version(parent):
682 present_parents.append(parent)
683 if parent_texts is None:
684 parent_texts = {}
686 # can only compress against the left most present parent.
687 if (delta and
688 (len(present_parents) == 0 or
689 present_parents[0] != parents[0])):
690 delta = False
692 text_length = len(line_bytes)
693 options = []
694 if lines:
695 if lines[-1][-1] != '\n':
696 # copy the contents of lines.
697 lines = lines[:]
698 options.append('no-eol')
699 lines[-1] = lines[-1] + '\n'
700 line_bytes += '\n'
702 #if delta:
703 # # To speed the extract of texts the delta chain is limited
704 # # to a fixed number of deltas. This should minimize both
705 # # I/O and the time spend applying deltas.
706 # delta = inv_vf._check_should_delta(present_parents)
708 assert isinstance(version_id, str)
709 content = inv_vf.factory.make(lines, version_id)
710 if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
711 # Merge annotations from parent texts if needed.
712 delta_hunks = inv_vf._merge_annotations(content, present_parents,
713 parent_texts, delta, inv_vf.factory.annotated,
714 left_matching_blocks)
716 if delta:
717 options.append('line-delta')
718 store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
719 size, bytes = inv_vf._data._record_to_data(version_id, digest,
720 store_lines)
721 else:
722 options.append('fulltext')
723 # isinstance is slower and we have no hierarchy.
724 if inv_vf.factory.__class__ == knit.KnitPlainFactory:
725 # Use the already joined bytes saving iteration time in
726 # _record_to_data.
727 size, bytes = inv_vf._data._record_to_data(version_id, digest,
728 lines, [line_bytes])
729 else:
730 # get mixed annotation + content and feed it into the
731 # serialiser.
732 store_lines = inv_vf.factory.lower_fulltext(content)
733 size, bytes = inv_vf._data._record_to_data(version_id, digest,
734 store_lines)
736 access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
737 inv_vf._index.add_versions(
738 ((version_id, options, access_memo, parents),),
739 random_id=random_id)
740 return digest, text_length, content