Cope with non-ascii characters in symbolic links.
[bzr-fastimport.git] / bzr_commit_handler.py
blobc47a39d4be73c8b702513b979b2b7dbe53000450
1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 """CommitHandlers that build and save revisions & their inventories."""
20 from bzrlib import (
21 debug,
22 errors,
23 generate_ids,
24 inventory,
25 osutils,
26 revision,
27 serializer,
29 from bzrlib.trace import (
30 mutter,
31 note,
32 warning,
34 from fastimport import (
35 helpers,
36 processor,
39 from bzrlib.plugins.fastimport.helpers import (
40 mode_to_kind,
44 _serializer_handles_escaping = hasattr(serializer.Serializer,
45 'squashes_xml_invalid_characters')
47 def copy_inventory(inv):
48 # This currently breaks revision-id matching
49 #if hasattr(inv, "_get_mutable_inventory"):
50 # # TODO: Make this a public API on inventory
51 # return inv._get_mutable_inventory()
53 # TODO: Shallow copy - deep inventory copying is expensive
54 return inv.copy()
57 class GenericCommitHandler(processor.CommitHandler):
58 """Base class for Bazaar CommitHandlers."""
60 def __init__(self, command, cache_mgr, rev_store, verbose=False,
61 prune_empty_dirs=True):
62 super(GenericCommitHandler, self).__init__(command)
63 self.cache_mgr = cache_mgr
64 self.rev_store = rev_store
65 self.verbose = verbose
66 self.branch_ref = command.ref
67 self.prune_empty_dirs = prune_empty_dirs
68 # This tracks path->file-id for things we're creating this commit.
69 # If the same path is created multiple times, we need to warn the
70 # user and add it just once.
71 # If a path is added then renamed or copied, we need to handle that.
72 self._new_file_ids = {}
73 # This tracks path->file-id for things we're modifying this commit.
74 # If a path is modified then renamed or copied, we need the make
75 # sure we grab the new content.
76 self._modified_file_ids = {}
77 # This tracks the paths for things we're deleting this commit.
78 # If the same path is added or the destination of a rename say,
79 # then a fresh file-id is required.
80 self._paths_deleted_this_commit = set()
82 def mutter(self, msg, *args):
83 """Output a mutter but add context."""
84 msg = "%s (%s)" % (msg, self.command.id)
85 mutter(msg, *args)
87 def debug(self, msg, *args):
88 """Output a mutter if the appropriate -D option was given."""
89 if "fast-import" in debug.debug_flags:
90 msg = "%s (%s)" % (msg, self.command.id)
91 mutter(msg, *args)
93 def note(self, msg, *args):
94 """Output a note but add context."""
95 msg = "%s (%s)" % (msg, self.command.id)
96 note(msg, *args)
98 def warning(self, msg, *args):
99 """Output a warning but add context."""
100 msg = "%s (%s)" % (msg, self.command.id)
101 warning(msg, *args)
103 def pre_process_files(self):
104 """Prepare for committing."""
105 self.revision_id = self.gen_revision_id()
106 # cache of texts for this commit, indexed by file-id
107 self.data_for_commit = {}
108 #if self.rev_store.expects_rich_root():
109 self.data_for_commit[inventory.ROOT_ID] = []
111 # Track the heads and get the real parent list
112 parents = self.cache_mgr.reftracker.track_heads(self.command)
114 # Convert the parent commit-ids to bzr revision-ids
115 if parents:
116 self.parents = [self.cache_mgr.revision_ids[p]
117 for p in parents]
118 else:
119 self.parents = []
120 self.debug("%s id: %s, parents: %s", self.command.id,
121 self.revision_id, str(self.parents))
123 # Tell the RevisionStore we're starting a new commit
124 self.revision = self.build_revision()
125 self.parent_invs = [self.get_inventory(p) for p in self.parents]
126 self.rev_store.start_new_revision(self.revision, self.parents,
127 self.parent_invs)
129 # cache of per-file parents for this commit, indexed by file-id
130 self.per_file_parents_for_commit = {}
131 if self.rev_store.expects_rich_root():
132 self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
134 # Keep the basis inventory. This needs to be treated as read-only.
135 if len(self.parents) == 0:
136 self.basis_inventory = self._init_inventory()
137 else:
138 self.basis_inventory = self.get_inventory(self.parents[0])
139 if hasattr(self.basis_inventory, "root_id"):
140 self.inventory_root_id = self.basis_inventory.root_id
141 else:
142 self.inventory_root_id = self.basis_inventory.root.file_id
144 # directory-path -> inventory-entry for current inventory
145 self.directory_entries = {}
147 def _init_inventory(self):
148 return self.rev_store.init_inventory(self.revision_id)
150 def get_inventory(self, revision_id):
151 """Get the inventory for a revision id."""
152 try:
153 inv = self.cache_mgr.inventories[revision_id]
154 except KeyError:
155 if self.verbose:
156 self.mutter("get_inventory cache miss for %s", revision_id)
157 # Not cached so reconstruct from the RevisionStore
158 inv = self.rev_store.get_inventory(revision_id)
159 self.cache_mgr.inventories[revision_id] = inv
160 return inv
162 def _get_data(self, file_id):
163 """Get the data bytes for a file-id."""
164 return self.data_for_commit[file_id]
166 def _get_lines(self, file_id):
167 """Get the lines for a file-id."""
168 return osutils.split_lines(self._get_data(file_id))
170 def _get_per_file_parents(self, file_id):
171 """Get the lines for a file-id."""
172 return self.per_file_parents_for_commit[file_id]
174 def _get_inventories(self, revision_ids):
175 """Get the inventories for revision-ids.
177 This is a callback used by the RepositoryStore to
178 speed up inventory reconstruction.
180 present = []
181 inventories = []
182 # If an inventory is in the cache, we assume it was
183 # successfully loaded into the revision store
184 for revision_id in revision_ids:
185 try:
186 inv = self.cache_mgr.inventories[revision_id]
187 present.append(revision_id)
188 except KeyError:
189 if self.verbose:
190 self.note("get_inventories cache miss for %s", revision_id)
191 # Not cached so reconstruct from the revision store
192 try:
193 inv = self.get_inventory(revision_id)
194 present.append(revision_id)
195 except:
196 inv = self._init_inventory()
197 self.cache_mgr.inventories[revision_id] = inv
198 inventories.append(inv)
199 return present, inventories
201 def bzr_file_id_and_new(self, path):
202 """Get a Bazaar file identifier and new flag for a path.
204 :return: file_id, is_new where
205 is_new = True if the file_id is newly created
207 if path not in self._paths_deleted_this_commit:
208 # Try file-ids renamed in this commit
209 id = self._modified_file_ids.get(path)
210 if id is not None:
211 return id, False
213 # Try the basis inventory
214 id = self.basis_inventory.path2id(path)
215 if id is not None:
216 return id, False
218 # Try the other inventories
219 if len(self.parents) > 1:
220 for inv in self.parent_invs[1:]:
221 id = self.basis_inventory.path2id(path)
222 if id is not None:
223 return id, False
225 # Doesn't exist yet so create it
226 dirname, basename = osutils.split(path)
227 id = generate_ids.gen_file_id(basename)
228 self.debug("Generated new file id %s for '%s' in revision-id '%s'",
229 id, path, self.revision_id)
230 self._new_file_ids[path] = id
231 return id, True
233 def bzr_file_id(self, path):
234 """Get a Bazaar file identifier for a path."""
235 return self.bzr_file_id_and_new(path)[0]
237 def _format_name_email(self, name, email):
238 """Format name & email as a string."""
239 if email:
240 return "%s <%s>" % (name, email)
241 else:
242 return name
244 def gen_revision_id(self):
245 """Generate a revision id.
247 Subclasses may override this to produce deterministic ids say.
249 committer = self.command.committer
250 # Perhaps 'who' being the person running the import is ok? If so,
251 # it might be a bit quicker and give slightly better compression?
252 who = self._format_name_email(committer[0], committer[1])
253 timestamp = committer[2]
254 return generate_ids.gen_revision_id(who, timestamp)
256 def build_revision(self):
257 rev_props = self._legal_revision_properties(self.command.properties)
258 if 'branch-nick' not in rev_props:
259 rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
260 self.branch_ref)
261 self._save_author_info(rev_props)
262 committer = self.command.committer
263 who = self._format_name_email(committer[0], committer[1])
264 message = self.command.message
265 if not _serializer_handles_escaping:
266 # We need to assume the bad ol' days
267 message = helpers.escape_commit_message(message)
268 return revision.Revision(
269 timestamp=committer[2],
270 timezone=committer[3],
271 committer=who,
272 message=message,
273 revision_id=self.revision_id,
274 properties=rev_props,
275 parent_ids=self.parents)
277 def _legal_revision_properties(self, props):
278 """Clean-up any revision properties we can't handle."""
279 # For now, we just check for None because that's not allowed in 2.0rc1
280 result = {}
281 if props is not None:
282 for name, value in props.items():
283 if value is None:
284 self.warning(
285 "converting None to empty string for property %s"
286 % (name,))
287 result[name] = ''
288 else:
289 result[name] = value
290 return result
292 def _save_author_info(self, rev_props):
293 author = self.command.author
294 if author is None:
295 return
296 if self.command.more_authors:
297 authors = [author] + self.command.more_authors
298 author_ids = [self._format_name_email(a[0], a[1]) for a in authors]
299 elif author != self.command.committer:
300 author_ids = [self._format_name_email(author[0], author[1])]
301 else:
302 return
303 # If we reach here, there are authors worth storing
304 rev_props['authors'] = "\n".join(author_ids)
306 def _modify_item(self, path, kind, is_executable, data, inv):
307 """Add to or change an item in the inventory."""
308 # If we've already added this, warn the user that we're ignoring it.
309 # In the future, it might be nice to double check that the new data
310 # is the same as the old but, frankly, exporters should be fixed
311 # not to produce bad data streams in the first place ...
312 existing = self._new_file_ids.get(path)
313 if existing:
314 # We don't warn about directories because it's fine for them
315 # to be created already by a previous rename
316 if kind != 'directory':
317 self.warning("%s already added in this commit - ignoring" %
318 (path,))
319 return
321 # Create the new InventoryEntry
322 basename, parent_id = self._ensure_directory(path, inv)
323 file_id = self.bzr_file_id(path)
324 ie = inventory.make_entry(kind, basename, parent_id, file_id)
325 ie.revision = self.revision_id
326 if kind == 'file':
327 ie.executable = is_executable
328 # lines = osutils.split_lines(data)
329 ie.text_sha1 = osutils.sha_string(data)
330 ie.text_size = len(data)
331 self.data_for_commit[file_id] = data
332 elif kind == 'directory':
333 self.directory_entries[path] = ie
334 # There are no lines stored for a directory so
335 # make sure the cache used by get_lines knows that
336 self.data_for_commit[file_id] = ''
337 elif kind == 'symlink':
338 ie.symlink_target = data.decode('utf8')
339 # There are no lines stored for a symlink so
340 # make sure the cache used by get_lines knows that
341 self.data_for_commit[file_id] = ''
342 else:
343 self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
344 % (kind, path))
345 return
346 # Record it
347 if file_id in inv:
348 old_ie = inv[file_id]
349 if old_ie.kind == 'directory':
350 self.record_delete(path, old_ie)
351 self.record_changed(path, ie, parent_id)
352 else:
353 try:
354 self.record_new(path, ie)
355 except:
356 print "failed to add path '%s' with entry '%s' in command %s" \
357 % (path, ie, self.command.id)
358 print "parent's children are:\n%r\n" % (ie.parent_id.children,)
359 raise
361 def _ensure_directory(self, path, inv):
362 """Ensure that the containing directory exists for 'path'"""
363 dirname, basename = osutils.split(path)
364 if dirname == '':
365 # the root node doesn't get updated
366 return basename, self.inventory_root_id
367 try:
368 ie = self._get_directory_entry(inv, dirname)
369 except KeyError:
370 # We will create this entry, since it doesn't exist
371 pass
372 else:
373 return basename, ie.file_id
375 # No directory existed, we will just create one, first, make sure
376 # the parent exists
377 dir_basename, parent_id = self._ensure_directory(dirname, inv)
378 dir_file_id = self.bzr_file_id(dirname)
379 ie = inventory.entry_factory['directory'](dir_file_id,
380 dir_basename, parent_id)
381 ie.revision = self.revision_id
382 self.directory_entries[dirname] = ie
383 # There are no lines stored for a directory so
384 # make sure the cache used by get_lines knows that
385 self.data_for_commit[dir_file_id] = ''
387 # It's possible that a file or symlink with that file-id
388 # already exists. If it does, we need to delete it.
389 if dir_file_id in inv:
390 self.record_delete(dirname, ie)
391 self.record_new(dirname, ie)
392 return basename, ie.file_id
394 def _get_directory_entry(self, inv, dirname):
395 """Get the inventory entry for a directory.
397 Raises KeyError if dirname is not a directory in inv.
399 result = self.directory_entries.get(dirname)
400 if result is None:
401 if dirname in self._paths_deleted_this_commit:
402 raise KeyError
403 try:
404 file_id = inv.path2id(dirname)
405 except errors.NoSuchId:
406 # In a CHKInventory, this is raised if there's no root yet
407 raise KeyError
408 if file_id is None:
409 raise KeyError
410 result = inv[file_id]
411 # dirname must be a directory for us to return it
412 if result.kind == 'directory':
413 self.directory_entries[dirname] = result
414 else:
415 raise KeyError
416 return result
418 def _delete_item(self, path, inv):
419 newly_added = self._new_file_ids.get(path)
420 if newly_added:
421 # We've only just added this path earlier in this commit.
422 file_id = newly_added
423 # note: delta entries look like (old, new, file-id, ie)
424 ie = self._delta_entries_by_fileid[file_id][3]
425 else:
426 file_id = inv.path2id(path)
427 if file_id is None:
428 self.mutter("ignoring delete of %s as not in inventory", path)
429 return
430 try:
431 ie = inv[file_id]
432 except errors.NoSuchId:
433 self.mutter("ignoring delete of %s as not in inventory", path)
434 return
435 self.record_delete(path, ie)
437 def _copy_item(self, src_path, dest_path, inv):
438 newly_changed = self._new_file_ids.get(src_path) or \
439 self._modified_file_ids.get(src_path)
440 if newly_changed:
441 # We've only just added/changed this path earlier in this commit.
442 file_id = newly_changed
443 # note: delta entries look like (old, new, file-id, ie)
444 ie = self._delta_entries_by_fileid[file_id][3]
445 else:
446 file_id = inv.path2id(src_path)
447 if file_id is None:
448 self.warning("ignoring copy of %s to %s - source does not exist",
449 src_path, dest_path)
450 return
451 ie = inv[file_id]
452 kind = ie.kind
453 if kind == 'file':
454 if newly_changed:
455 content = self.data_for_commit[file_id]
456 else:
457 content = self.rev_store.get_file_text(self.parents[0], file_id)
458 self._modify_item(dest_path, kind, ie.executable, content, inv)
459 elif kind == 'symlink':
460 self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
461 else:
462 self.warning("ignoring copy of %s %s - feature not yet supported",
463 kind, dest_path)
465 def _rename_item(self, old_path, new_path, inv):
466 existing = self._new_file_ids.get(old_path) or \
467 self._modified_file_ids.get(old_path)
468 if existing:
469 # We've only just added/modified this path earlier in this commit.
470 # Change the add/modify of old_path to an add of new_path
471 self._rename_pending_change(old_path, new_path, existing)
472 return
474 file_id = inv.path2id(old_path)
475 if file_id is None:
476 self.warning(
477 "ignoring rename of %s to %s - old path does not exist" %
478 (old_path, new_path))
479 return
480 ie = inv[file_id]
481 rev_id = ie.revision
482 new_file_id = inv.path2id(new_path)
483 if new_file_id is not None:
484 self.record_delete(new_path, inv[new_file_id])
485 self.record_rename(old_path, new_path, file_id, ie)
487 # The revision-id for this entry will be/has been updated and
488 # that means the loader then needs to know what the "new" text is.
489 # We therefore must go back to the revision store to get it.
490 lines = self.rev_store.get_file_lines(rev_id, file_id)
491 self.data_for_commit[file_id] = ''.join(lines)
493 def _delete_all_items(self, inv):
494 for name, root_item in inv.root.children.iteritems():
495 inv.remove_recursive_id(root_item.file_id)
497 def _warn_unless_in_merges(self, fileid, path):
498 if len(self.parents) <= 1:
499 return
500 for parent in self.parents[1:]:
501 if fileid in self.get_inventory(parent):
502 return
503 self.warning("ignoring delete of %s as not in parent inventories", path)
506 class InventoryCommitHandler(GenericCommitHandler):
507 """A CommitHandler that builds and saves Inventory objects."""
509 def pre_process_files(self):
510 super(InventoryCommitHandler, self).pre_process_files()
512 # Seed the inventory from the previous one. Note that
513 # the parent class version of pre_process_files() has
514 # already set the right basis_inventory for this branch
515 # but we need to copy it in order to mutate it safely
516 # without corrupting the cached inventory value.
517 if len(self.parents) == 0:
518 self.inventory = self.basis_inventory
519 else:
520 self.inventory = copy_inventory(self.basis_inventory)
521 self.inventory_root = self.inventory.root
523 # directory-path -> inventory-entry for current inventory
524 self.directory_entries = dict(self.inventory.directories())
526 # Initialise the inventory revision info as required
527 if self.rev_store.expects_rich_root():
528 self.inventory.revision_id = self.revision_id
529 else:
530 # In this revision store, root entries have no knit or weave.
531 # When serializing out to disk and back in, root.revision is
532 # always the new revision_id.
533 self.inventory.root.revision = self.revision_id
535 def post_process_files(self):
536 """Save the revision."""
537 self.cache_mgr.inventories[self.revision_id] = self.inventory
538 self.rev_store.load(self.revision, self.inventory, None,
539 lambda file_id: self._get_data(file_id),
540 lambda file_id: self._get_per_file_parents(file_id),
541 lambda revision_ids: self._get_inventories(revision_ids))
543 def record_new(self, path, ie):
544 try:
545 # If this is a merge, the file was most likely added already.
546 # The per-file parent(s) must therefore be calculated and
547 # we can't assume there are none.
548 per_file_parents, ie.revision = \
549 self.rev_store.get_parents_and_revision_for_entry(ie)
550 self.per_file_parents_for_commit[ie.file_id] = per_file_parents
551 self.inventory.add(ie)
552 except errors.DuplicateFileId:
553 # Directory already exists as a file or symlink
554 del self.inventory[ie.file_id]
555 # Try again
556 self.inventory.add(ie)
558 def record_changed(self, path, ie, parent_id):
559 # HACK: no API for this (del+add does more than it needs to)
560 per_file_parents, ie.revision = \
561 self.rev_store.get_parents_and_revision_for_entry(ie)
562 self.per_file_parents_for_commit[ie.file_id] = per_file_parents
563 self.inventory._byid[ie.file_id] = ie
564 parent_ie = self.inventory._byid[parent_id]
565 parent_ie.children[ie.name] = ie
567 def record_delete(self, path, ie):
568 self.inventory.remove_recursive_id(ie.file_id)
570 def record_rename(self, old_path, new_path, file_id, ie):
571 # For a rename, the revision-id is always the new one so
572 # no need to change/set it here
573 ie.revision = self.revision_id
574 per_file_parents, _ = \
575 self.rev_store.get_parents_and_revision_for_entry(ie)
576 self.per_file_parents_for_commit[file_id] = per_file_parents
577 new_basename, new_parent_id = self._ensure_directory(new_path,
578 self.inventory)
579 self.inventory.rename(file_id, new_parent_id, new_basename)
581 def modify_handler(self, filecmd):
582 if filecmd.dataref is not None:
583 data = self.cache_mgr.fetch_blob(filecmd.dataref)
584 else:
585 data = filecmd.data
586 self.debug("modifying %s", filecmd.path)
587 (kind, is_executable) = mode_to_kind(filecmd.mode)
588 self._modify_item(filecmd.path, kind,
589 is_executable, data, self.inventory)
591 def delete_handler(self, filecmd):
592 self.debug("deleting %s", filecmd.path)
593 self._delete_item(filecmd.path, self.inventory)
595 def copy_handler(self, filecmd):
596 src_path = filecmd.src_path
597 dest_path = filecmd.dest_path
598 self.debug("copying %s to %s", src_path, dest_path)
599 self._copy_item(src_path, dest_path, self.inventory)
601 def rename_handler(self, filecmd):
602 old_path = filecmd.old_path
603 new_path = filecmd.new_path
604 self.debug("renaming %s to %s", old_path, new_path)
605 self._rename_item(old_path, new_path, self.inventory)
607 def deleteall_handler(self, filecmd):
608 self.debug("deleting all files (and also all directories)")
609 self._delete_all_items(self.inventory)
612 class InventoryDeltaCommitHandler(GenericCommitHandler):
613 """A CommitHandler that builds Inventories by applying a delta."""
615 def pre_process_files(self):
616 super(InventoryDeltaCommitHandler, self).pre_process_files()
617 self._dirs_that_might_become_empty = set()
619 # A given file-id can only appear once so we accumulate
620 # the entries in a dict then build the actual delta at the end
621 self._delta_entries_by_fileid = {}
622 if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
623 if self.parents:
624 old_path = ''
625 else:
626 old_path = None
627 # Need to explicitly add the root entry for the first revision
628 # and for non rich-root inventories
629 root_id = inventory.ROOT_ID
630 root_ie = inventory.InventoryDirectory(root_id, u'', None)
631 root_ie.revision = self.revision_id
632 self._add_entry((old_path, '', root_id, root_ie))
634 def post_process_files(self):
635 """Save the revision."""
636 delta = self._get_final_delta()
637 inv = self.rev_store.load_using_delta(self.revision,
638 self.basis_inventory, delta, None,
639 self._get_data,
640 self._get_per_file_parents,
641 self._get_inventories)
642 self.cache_mgr.inventories[self.revision_id] = inv
643 #print "committed %s" % self.revision_id
645 def _get_final_delta(self):
646 """Generate the final delta.
648 Smart post-processing of changes, e.g. pruning of directories
649 that would become empty, goes here.
651 delta = list(self._delta_entries_by_fileid.values())
652 if self.prune_empty_dirs and self._dirs_that_might_become_empty:
653 candidates = self._dirs_that_might_become_empty
654 while candidates:
655 never_born = set()
656 parent_dirs_that_might_become_empty = set()
657 for path, file_id in self._empty_after_delta(delta, candidates):
658 newly_added = self._new_file_ids.get(path)
659 if newly_added:
660 never_born.add(newly_added)
661 else:
662 delta.append((path, None, file_id, None))
663 parent_dir = osutils.dirname(path)
664 if parent_dir:
665 parent_dirs_that_might_become_empty.add(parent_dir)
666 candidates = parent_dirs_that_might_become_empty
667 # Clean up entries that got deleted before they were ever added
668 if never_born:
669 delta = [de for de in delta if de[2] not in never_born]
670 return delta
672 def _empty_after_delta(self, delta, candidates):
673 #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
674 #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
675 new_inv = self._get_proposed_inventory(delta)
676 result = []
677 for dir in candidates:
678 file_id = new_inv.path2id(dir)
679 if file_id is None:
680 continue
681 ie = new_inv[file_id]
682 if ie.kind != 'directory':
683 continue
684 if len(ie.children) == 0:
685 result.append((dir, file_id))
686 if self.verbose:
687 self.note("pruning empty directory %s" % (dir,))
688 return result
690 def _get_proposed_inventory(self, delta):
691 if len(self.parents):
692 # new_inv = self.basis_inventory._get_mutable_inventory()
693 # Note that this will create unreferenced chk pages if we end up
694 # deleting entries, because this 'test' inventory won't end up
695 # used. However, it is cheaper than having to create a full copy of
696 # the inventory for every commit.
697 new_inv = self.basis_inventory.create_by_apply_delta(delta,
698 'not-a-valid-revision-id:')
699 else:
700 new_inv = inventory.Inventory(revision_id=self.revision_id)
701 # This is set in the delta so remove it to prevent a duplicate
702 del new_inv[inventory.ROOT_ID]
703 try:
704 new_inv.apply_delta(delta)
705 except errors.InconsistentDelta:
706 self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
707 raise
708 return new_inv
710 def _add_entry(self, entry):
711 # We need to combine the data if multiple entries have the same file-id.
712 # For example, a rename followed by a modification looks like:
714 # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
716 # Likewise, a modification followed by a rename looks like:
718 # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
720 # Here's a rename followed by a delete and a modification followed by
721 # a delete:
723 # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
724 # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
726 # In summary, we use the original old-path, new new-path and new ie
727 # when combining entries.
728 old_path = entry[0]
729 new_path = entry[1]
730 file_id = entry[2]
731 ie = entry[3]
732 existing = self._delta_entries_by_fileid.get(file_id, None)
733 if existing is not None:
734 old_path = existing[0]
735 entry = (old_path, new_path, file_id, ie)
736 if new_path is None and old_path is None:
737 # This is a delete cancelling a previous add
738 del self._delta_entries_by_fileid[file_id]
739 parent_dir = osutils.dirname(existing[1])
740 self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
741 if parent_dir:
742 self._dirs_that_might_become_empty.add(parent_dir)
743 return
744 else:
745 self._delta_entries_by_fileid[file_id] = entry
747 # Collect parent directories that might become empty
748 if new_path is None:
749 # delete
750 parent_dir = osutils.dirname(old_path)
751 # note: no need to check the root
752 if parent_dir:
753 self._dirs_that_might_become_empty.add(parent_dir)
754 elif old_path is not None and old_path != new_path:
755 # rename
756 old_parent_dir = osutils.dirname(old_path)
757 new_parent_dir = osutils.dirname(new_path)
758 if old_parent_dir and old_parent_dir != new_parent_dir:
759 self._dirs_that_might_become_empty.add(old_parent_dir)
761 # Calculate the per-file parents, if not already done
762 if file_id in self.per_file_parents_for_commit:
763 return
764 if old_path is None:
765 # add
766 # If this is a merge, the file was most likely added already.
767 # The per-file parent(s) must therefore be calculated and
768 # we can't assume there are none.
769 per_file_parents, ie.revision = \
770 self.rev_store.get_parents_and_revision_for_entry(ie)
771 self.per_file_parents_for_commit[file_id] = per_file_parents
772 elif new_path is None:
773 # delete
774 pass
775 elif old_path != new_path:
776 # rename
777 per_file_parents, _ = \
778 self.rev_store.get_parents_and_revision_for_entry(ie)
779 self.per_file_parents_for_commit[file_id] = per_file_parents
780 else:
781 # modify
782 per_file_parents, ie.revision = \
783 self.rev_store.get_parents_and_revision_for_entry(ie)
784 self.per_file_parents_for_commit[file_id] = per_file_parents
786 def record_new(self, path, ie):
787 self._add_entry((None, path, ie.file_id, ie))
789 def record_changed(self, path, ie, parent_id=None):
790 self._add_entry((path, path, ie.file_id, ie))
791 self._modified_file_ids[path] = ie.file_id
793 def record_delete(self, path, ie):
794 self._add_entry((path, None, ie.file_id, None))
795 self._paths_deleted_this_commit.add(path)
796 if ie.kind == 'directory':
797 try:
798 del self.directory_entries[path]
799 except KeyError:
800 pass
801 for child_relpath, entry in \
802 self.basis_inventory.iter_entries_by_dir(from_dir=ie):
803 child_path = osutils.pathjoin(path, child_relpath)
804 self._add_entry((child_path, None, entry.file_id, None))
805 self._paths_deleted_this_commit.add(child_path)
806 if entry.kind == 'directory':
807 try:
808 del self.directory_entries[child_path]
809 except KeyError:
810 pass
812 def record_rename(self, old_path, new_path, file_id, old_ie):
813 new_ie = old_ie.copy()
814 new_basename, new_parent_id = self._ensure_directory(new_path,
815 self.basis_inventory)
816 new_ie.name = new_basename
817 new_ie.parent_id = new_parent_id
818 new_ie.revision = self.revision_id
819 self._add_entry((old_path, new_path, file_id, new_ie))
820 self._modified_file_ids[new_path] = file_id
821 self._paths_deleted_this_commit.discard(new_path)
822 if new_ie.kind == 'directory':
823 self.directory_entries[new_path] = new_ie
825 def _rename_pending_change(self, old_path, new_path, file_id):
826 """Instead of adding/modifying old-path, add new-path instead."""
827 # note: delta entries look like (old, new, file-id, ie)
828 old_ie = self._delta_entries_by_fileid[file_id][3]
830 # Delete the old path. Note that this might trigger implicit
831 # deletion of newly created parents that could now become empty.
832 self.record_delete(old_path, old_ie)
834 # Update the dictionaries used for tracking new file-ids
835 if old_path in self._new_file_ids:
836 del self._new_file_ids[old_path]
837 else:
838 del self._modified_file_ids[old_path]
839 self._new_file_ids[new_path] = file_id
841 # Create the new InventoryEntry
842 kind = old_ie.kind
843 basename, parent_id = self._ensure_directory(new_path,
844 self.basis_inventory)
845 ie = inventory.make_entry(kind, basename, parent_id, file_id)
846 ie.revision = self.revision_id
847 if kind == 'file':
848 ie.executable = old_ie.executable
849 ie.text_sha1 = old_ie.text_sha1
850 ie.text_size = old_ie.text_size
851 elif kind == 'symlink':
852 ie.symlink_target = old_ie.symlink_target
854 # Record it
855 self.record_new(new_path, ie)
857 def modify_handler(self, filecmd):
858 (kind, executable) = mode_to_kind(filecmd.mode)
859 if filecmd.dataref is not None:
860 if kind == "directory":
861 data = None
862 elif kind == "tree-reference":
863 data = filecmd.dataref
864 else:
865 data = self.cache_mgr.fetch_blob(filecmd.dataref)
866 else:
867 data = filecmd.data
868 self.debug("modifying %s", filecmd.path)
869 self._modify_item(filecmd.path, kind,
870 executable, data, self.basis_inventory)
872 def delete_handler(self, filecmd):
873 self.debug("deleting %s", filecmd.path)
874 self._delete_item(filecmd.path, self.basis_inventory)
876 def copy_handler(self, filecmd):
877 src_path = filecmd.src_path.decode("utf8")
878 dest_path = filecmd.dest_path.decode("utf8")
879 self.debug("copying %s to %s", src_path, dest_path)
880 self._copy_item(src_path, dest_path, self.basis_inventory)
882 def rename_handler(self, filecmd):
883 old_path = filecmd.old_path.decode("utf8")
884 new_path = filecmd.new_path.decode("utf8")
885 self.debug("renaming %s to %s", old_path, new_path)
886 self._rename_item(old_path, new_path, self.basis_inventory)
888 def deleteall_handler(self, filecmd):
889 self.debug("deleting all files (and also all directories)")
890 # I'm not 100% sure this will work in the delta case.
891 # But clearing out the basis inventory so that everything
892 # is added sounds ok in theory ...
893 # We grab a copy as the basis is likely to be cached and
894 # we don't want to destroy the cached version
895 self.basis_inventory = copy_inventory(self.basis_inventory)
896 self._delete_all_items(self.basis_inventory)