Fix bzr-fastimport when used with newer versions of python-fastimport.(Jelmer Vernooij)
[bzr-fastimport.git] / bzr_commit_handler.py
blobf72b4012d9dd5b9d852535ed7809cc03cbdc315b
1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 """CommitHandlers that build and save revisions & their inventories."""
19 from bzrlib import (
20 debug,
21 errors,
22 generate_ids,
23 inventory,
24 osutils,
25 revision,
26 serializer,
28 from bzrlib.trace import (
29 mutter,
30 note,
31 warning,
33 from fastimport import (
34 helpers,
35 processor,
38 from bzrlib.plugins.fastimport.helpers import (
39 mode_to_kind,
43 _serializer_handles_escaping = hasattr(serializer.Serializer,
44 'squashes_xml_invalid_characters')
47 def copy_inventory(inv):
48 entries = inv.iter_entries_by_dir()
49 inv = inventory.Inventory(None, inv.revision_id)
50 for path, inv_entry in entries:
51 inv.add(inv_entry.copy())
52 return inv
55 class GenericCommitHandler(processor.CommitHandler):
56 """Base class for Bazaar CommitHandlers."""
58 def __init__(self, command, cache_mgr, rev_store, verbose=False,
59 prune_empty_dirs=True):
60 super(GenericCommitHandler, self).__init__(command)
61 self.cache_mgr = cache_mgr
62 self.rev_store = rev_store
63 self.verbose = verbose
64 self.branch_ref = command.ref
65 self.prune_empty_dirs = prune_empty_dirs
66 # This tracks path->file-id for things we're creating this commit.
67 # If the same path is created multiple times, we need to warn the
68 # user and add it just once.
69 # If a path is added then renamed or copied, we need to handle that.
70 self._new_file_ids = {}
71 # This tracks path->file-id for things we're modifying this commit.
72 # If a path is modified then renamed or copied, we need the make
73 # sure we grab the new content.
74 self._modified_file_ids = {}
75 # This tracks the paths for things we're deleting this commit.
76 # If the same path is added or the destination of a rename say,
77 # then a fresh file-id is required.
78 self._paths_deleted_this_commit = set()
80 def mutter(self, msg, *args):
81 """Output a mutter but add context."""
82 msg = "%s (%s)" % (msg, self.command.id)
83 mutter(msg, *args)
85 def debug(self, msg, *args):
86 """Output a mutter if the appropriate -D option was given."""
87 if "fast-import" in debug.debug_flags:
88 msg = "%s (%s)" % (msg, self.command.id)
89 mutter(msg, *args)
91 def note(self, msg, *args):
92 """Output a note but add context."""
93 msg = "%s (%s)" % (msg, self.command.id)
94 note(msg, *args)
96 def warning(self, msg, *args):
97 """Output a warning but add context."""
98 msg = "%s (%s)" % (msg, self.command.id)
99 warning(msg, *args)
101 def pre_process_files(self):
102 """Prepare for committing."""
103 self.revision_id = self.gen_revision_id()
104 # cache of texts for this commit, indexed by file-id
105 self.data_for_commit = {}
106 #if self.rev_store.expects_rich_root():
107 self.data_for_commit[inventory.ROOT_ID] = []
109 # Track the heads and get the real parent list
110 parents = self.cache_mgr.reftracker.track_heads(self.command)
112 # Convert the parent commit-ids to bzr revision-ids
113 if parents:
114 self.parents = [self.cache_mgr.lookup_committish(p)
115 for p in parents]
116 else:
117 self.parents = []
118 self.debug("%s id: %s, parents: %s", self.command.id,
119 self.revision_id, str(self.parents))
121 # Tell the RevisionStore we're starting a new commit
122 self.revision = self.build_revision()
123 self.parent_invs = [self.get_inventory(p) for p in self.parents]
124 self.rev_store.start_new_revision(self.revision, self.parents,
125 self.parent_invs)
127 # cache of per-file parents for this commit, indexed by file-id
128 self.per_file_parents_for_commit = {}
129 if self.rev_store.expects_rich_root():
130 self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
132 # Keep the basis inventory. This needs to be treated as read-only.
133 if len(self.parents) == 0:
134 self.basis_inventory = self._init_inventory()
135 else:
136 self.basis_inventory = self.get_inventory(self.parents[0])
137 if hasattr(self.basis_inventory, "root_id"):
138 self.inventory_root_id = self.basis_inventory.root_id
139 else:
140 self.inventory_root_id = self.basis_inventory.root.file_id
142 # directory-path -> inventory-entry for current inventory
143 self.directory_entries = {}
145 def _init_inventory(self):
146 return self.rev_store.init_inventory(self.revision_id)
148 def get_inventory(self, revision_id):
149 """Get the inventory for a revision id."""
150 try:
151 inv = self.cache_mgr.inventories[revision_id]
152 except KeyError:
153 if self.verbose:
154 self.mutter("get_inventory cache miss for %s", revision_id)
155 # Not cached so reconstruct from the RevisionStore
156 inv = self.rev_store.get_inventory(revision_id)
157 self.cache_mgr.inventories[revision_id] = inv
158 return inv
160 def _get_data(self, file_id):
161 """Get the data bytes for a file-id."""
162 return self.data_for_commit[file_id]
164 def _get_lines(self, file_id):
165 """Get the lines for a file-id."""
166 return osutils.split_lines(self._get_data(file_id))
168 def _get_per_file_parents(self, file_id):
169 """Get the lines for a file-id."""
170 return self.per_file_parents_for_commit[file_id]
172 def _get_inventories(self, revision_ids):
173 """Get the inventories for revision-ids.
175 This is a callback used by the RepositoryStore to
176 speed up inventory reconstruction.
178 present = []
179 inventories = []
180 # If an inventory is in the cache, we assume it was
181 # successfully loaded into the revision store
182 for revision_id in revision_ids:
183 try:
184 inv = self.cache_mgr.inventories[revision_id]
185 present.append(revision_id)
186 except KeyError:
187 if self.verbose:
188 self.note("get_inventories cache miss for %s", revision_id)
189 # Not cached so reconstruct from the revision store
190 try:
191 inv = self.get_inventory(revision_id)
192 present.append(revision_id)
193 except:
194 inv = self._init_inventory()
195 self.cache_mgr.inventories[revision_id] = inv
196 inventories.append(inv)
197 return present, inventories
199 def bzr_file_id_and_new(self, path):
200 """Get a Bazaar file identifier and new flag for a path.
202 :return: file_id, is_new where
203 is_new = True if the file_id is newly created
205 if path not in self._paths_deleted_this_commit:
206 # Try file-ids renamed in this commit
207 id = self._modified_file_ids.get(path)
208 if id is not None:
209 return id, False
211 # Try the basis inventory
212 id = self.basis_inventory.path2id(path)
213 if id is not None:
214 return id, False
216 # Try the other inventories
217 if len(self.parents) > 1:
218 for inv in self.parent_invs[1:]:
219 id = self.basis_inventory.path2id(path)
220 if id is not None:
221 return id, False
223 # Doesn't exist yet so create it
224 dirname, basename = osutils.split(path)
225 id = generate_ids.gen_file_id(basename)
226 self.debug("Generated new file id %s for '%s' in revision-id '%s'",
227 id, path, self.revision_id)
228 self._new_file_ids[path] = id
229 return id, True
231 def bzr_file_id(self, path):
232 """Get a Bazaar file identifier for a path."""
233 return self.bzr_file_id_and_new(path)[0]
235 def _utf8_decode(self, field, value):
236 try:
237 return value.decode('utf-8')
238 except UnicodeDecodeError:
239 # The spec says fields are *typically* utf8 encoded
240 # but that isn't enforced by git-fast-export (at least)
241 self.warning("%s not in utf8 - replacing unknown "
242 "characters" % (field,))
243 return value.decode('utf-8', 'replace')
245 def _decode_path(self, path):
246 try:
247 return path.decode('utf-8')
248 except UnicodeDecodeError:
249 # The spec says fields are *typically* utf8 encoded
250 # but that isn't enforced by git-fast-export (at least)
251 self.warning("path %r not in utf8 - replacing unknown "
252 "characters" % (path,))
253 return path.decode('utf-8', 'replace')
255 def _format_name_email(self, section, name, email):
256 """Format name & email as a string."""
257 name = self._utf8_decode("%s name" % section, name)
258 email = self._utf8_decode("%s email" % section, email)
260 if email:
261 return "%s <%s>" % (name, email)
262 else:
263 return name
265 def gen_revision_id(self):
266 """Generate a revision id.
268 Subclasses may override this to produce deterministic ids say.
270 committer = self.command.committer
271 # Perhaps 'who' being the person running the import is ok? If so,
272 # it might be a bit quicker and give slightly better compression?
273 who = self._format_name_email("committer", committer[0], committer[1])
274 timestamp = committer[2]
275 return generate_ids.gen_revision_id(who, timestamp)
277 def build_revision(self):
278 rev_props = self._legal_revision_properties(self.command.properties)
279 if 'branch-nick' not in rev_props:
280 rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
281 self.branch_ref)
282 self._save_author_info(rev_props)
283 committer = self.command.committer
284 who = self._format_name_email("committer", committer[0], committer[1])
285 try:
286 message = self.command.message.decode("utf-8")
288 except UnicodeDecodeError:
289 self.warning(
290 "commit message not in utf8 - replacing unknown characters")
291 message = self.command.message.decode('utf-8', 'replace')
292 if not _serializer_handles_escaping:
293 # We need to assume the bad ol' days
294 message = helpers.escape_commit_message(message)
295 return revision.Revision(
296 timestamp=committer[2],
297 timezone=committer[3],
298 committer=who,
299 message=message,
300 revision_id=self.revision_id,
301 properties=rev_props,
302 parent_ids=self.parents)
304 def _legal_revision_properties(self, props):
305 """Clean-up any revision properties we can't handle."""
306 # For now, we just check for None because that's not allowed in 2.0rc1
307 result = {}
308 if props is not None:
309 for name, value in props.items():
310 if value is None:
311 self.warning(
312 "converting None to empty string for property %s"
313 % (name,))
314 result[name] = ''
315 else:
316 result[name] = value
317 return result
319 def _save_author_info(self, rev_props):
320 author = self.command.author
321 if author is None:
322 return
323 if self.command.more_authors:
324 authors = [author] + self.command.more_authors
325 author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
326 elif author != self.command.committer:
327 author_ids = [self._format_name_email("author", author[0], author[1])]
328 else:
329 return
330 # If we reach here, there are authors worth storing
331 rev_props['authors'] = "\n".join(author_ids)
333 def _modify_item(self, path, kind, is_executable, data, inv):
334 """Add to or change an item in the inventory."""
335 # If we've already added this, warn the user that we're ignoring it.
336 # In the future, it might be nice to double check that the new data
337 # is the same as the old but, frankly, exporters should be fixed
338 # not to produce bad data streams in the first place ...
339 existing = self._new_file_ids.get(path)
340 if existing:
341 # We don't warn about directories because it's fine for them
342 # to be created already by a previous rename
343 if kind != 'directory':
344 self.warning("%s already added in this commit - ignoring" %
345 (path,))
346 return
348 # Create the new InventoryEntry
349 basename, parent_id = self._ensure_directory(path, inv)
350 file_id = self.bzr_file_id(path)
351 ie = inventory.make_entry(kind, basename, parent_id, file_id)
352 ie.revision = self.revision_id
353 if kind == 'file':
354 ie.executable = is_executable
355 # lines = osutils.split_lines(data)
356 ie.text_sha1 = osutils.sha_string(data)
357 ie.text_size = len(data)
358 self.data_for_commit[file_id] = data
359 elif kind == 'directory':
360 self.directory_entries[path] = ie
361 # There are no lines stored for a directory so
362 # make sure the cache used by get_lines knows that
363 self.data_for_commit[file_id] = ''
364 elif kind == 'symlink':
365 ie.symlink_target = self._decode_path(data)
366 # There are no lines stored for a symlink so
367 # make sure the cache used by get_lines knows that
368 self.data_for_commit[file_id] = ''
369 else:
370 self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
371 % (kind, path))
372 return
373 # Record it
374 if inv.has_id(file_id):
375 old_ie = inv[file_id]
376 if old_ie.kind == 'directory':
377 self.record_delete(path, old_ie)
378 self.record_changed(path, ie, parent_id)
379 else:
380 try:
381 self.record_new(path, ie)
382 except:
383 print "failed to add path '%s' with entry '%s' in command %s" \
384 % (path, ie, self.command.id)
385 print "parent's children are:\n%r\n" % (ie.parent_id.children,)
386 raise
388 def _ensure_directory(self, path, inv):
389 """Ensure that the containing directory exists for 'path'"""
390 dirname, basename = osutils.split(path)
391 if dirname == '':
392 # the root node doesn't get updated
393 return basename, self.inventory_root_id
394 try:
395 ie = self._get_directory_entry(inv, dirname)
396 except KeyError:
397 # We will create this entry, since it doesn't exist
398 pass
399 else:
400 return basename, ie.file_id
402 # No directory existed, we will just create one, first, make sure
403 # the parent exists
404 dir_basename, parent_id = self._ensure_directory(dirname, inv)
405 dir_file_id = self.bzr_file_id(dirname)
406 ie = inventory.entry_factory['directory'](dir_file_id,
407 dir_basename, parent_id)
408 ie.revision = self.revision_id
409 self.directory_entries[dirname] = ie
410 # There are no lines stored for a directory so
411 # make sure the cache used by get_lines knows that
412 self.data_for_commit[dir_file_id] = ''
414 # It's possible that a file or symlink with that file-id
415 # already exists. If it does, we need to delete it.
416 if inv.has_id(dir_file_id):
417 self.record_delete(dirname, ie)
418 self.record_new(dirname, ie)
419 return basename, ie.file_id
421 def _get_directory_entry(self, inv, dirname):
422 """Get the inventory entry for a directory.
424 Raises KeyError if dirname is not a directory in inv.
426 result = self.directory_entries.get(dirname)
427 if result is None:
428 if dirname in self._paths_deleted_this_commit:
429 raise KeyError
430 try:
431 file_id = inv.path2id(dirname)
432 except errors.NoSuchId:
433 # In a CHKInventory, this is raised if there's no root yet
434 raise KeyError
435 if file_id is None:
436 raise KeyError
437 result = inv[file_id]
438 # dirname must be a directory for us to return it
439 if result.kind == 'directory':
440 self.directory_entries[dirname] = result
441 else:
442 raise KeyError
443 return result
445 def _delete_item(self, path, inv):
446 newly_added = self._new_file_ids.get(path)
447 if newly_added:
448 # We've only just added this path earlier in this commit.
449 file_id = newly_added
450 # note: delta entries look like (old, new, file-id, ie)
451 ie = self._delta_entries_by_fileid[file_id][3]
452 else:
453 file_id = inv.path2id(path)
454 if file_id is None:
455 self.mutter("ignoring delete of %s as not in inventory", path)
456 return
457 try:
458 ie = inv[file_id]
459 except errors.NoSuchId:
460 self.mutter("ignoring delete of %s as not in inventory", path)
461 return
462 self.record_delete(path, ie)
464 def _copy_item(self, src_path, dest_path, inv):
465 newly_changed = self._new_file_ids.get(src_path) or \
466 self._modified_file_ids.get(src_path)
467 if newly_changed:
468 # We've only just added/changed this path earlier in this commit.
469 file_id = newly_changed
470 # note: delta entries look like (old, new, file-id, ie)
471 ie = self._delta_entries_by_fileid[file_id][3]
472 else:
473 file_id = inv.path2id(src_path)
474 if file_id is None:
475 self.warning("ignoring copy of %s to %s - source does not exist",
476 src_path, dest_path)
477 return
478 ie = inv[file_id]
479 kind = ie.kind
480 if kind == 'file':
481 if newly_changed:
482 content = self.data_for_commit[file_id]
483 else:
484 content = self.rev_store.get_file_text(self.parents[0], file_id)
485 self._modify_item(dest_path, kind, ie.executable, content, inv)
486 elif kind == 'symlink':
487 self._modify_item(dest_path, kind, False,
488 ie.symlink_target.encode("utf-8"), inv)
489 else:
490 self.warning("ignoring copy of %s %s - feature not yet supported",
491 kind, dest_path)
493 def _rename_item(self, old_path, new_path, inv):
494 existing = self._new_file_ids.get(old_path) or \
495 self._modified_file_ids.get(old_path)
496 if existing:
497 # We've only just added/modified this path earlier in this commit.
498 # Change the add/modify of old_path to an add of new_path
499 self._rename_pending_change(old_path, new_path, existing)
500 return
502 file_id = inv.path2id(old_path)
503 if file_id is None:
504 self.warning(
505 "ignoring rename of %s to %s - old path does not exist" %
506 (old_path, new_path))
507 return
508 ie = inv[file_id]
509 rev_id = ie.revision
510 new_file_id = inv.path2id(new_path)
511 if new_file_id is not None:
512 self.record_delete(new_path, inv[new_file_id])
513 self.record_rename(old_path, new_path, file_id, ie)
515 # The revision-id for this entry will be/has been updated and
516 # that means the loader then needs to know what the "new" text is.
517 # We therefore must go back to the revision store to get it.
518 lines = self.rev_store.get_file_lines(rev_id, file_id)
519 self.data_for_commit[file_id] = ''.join(lines)
521 def _delete_all_items(self, inv):
522 if len(inv) == 0:
523 return
524 for path, ie in inv.iter_entries_by_dir():
525 if path != "":
526 self.record_delete(path, ie)
528 def _warn_unless_in_merges(self, fileid, path):
529 if len(self.parents) <= 1:
530 return
531 for parent in self.parents[1:]:
532 if fileid in self.get_inventory(parent):
533 return
534 self.warning("ignoring delete of %s as not in parent inventories", path)
537 class InventoryCommitHandler(GenericCommitHandler):
538 """A CommitHandler that builds and saves Inventory objects."""
540 def pre_process_files(self):
541 super(InventoryCommitHandler, self).pre_process_files()
543 # Seed the inventory from the previous one. Note that
544 # the parent class version of pre_process_files() has
545 # already set the right basis_inventory for this branch
546 # but we need to copy it in order to mutate it safely
547 # without corrupting the cached inventory value.
548 if len(self.parents) == 0:
549 self.inventory = self.basis_inventory
550 else:
551 self.inventory = copy_inventory(self.basis_inventory)
552 self.inventory_root = self.inventory.root
554 # directory-path -> inventory-entry for current inventory
555 self.directory_entries = dict(self.inventory.directories())
557 # Initialise the inventory revision info as required
558 if self.rev_store.expects_rich_root():
559 self.inventory.revision_id = self.revision_id
560 else:
561 # In this revision store, root entries have no knit or weave.
562 # When serializing out to disk and back in, root.revision is
563 # always the new revision_id.
564 self.inventory.root.revision = self.revision_id
566 def post_process_files(self):
567 """Save the revision."""
568 self.cache_mgr.inventories[self.revision_id] = self.inventory
569 self.rev_store.load(self.revision, self.inventory, None,
570 lambda file_id: self._get_data(file_id),
571 lambda file_id: self._get_per_file_parents(file_id),
572 lambda revision_ids: self._get_inventories(revision_ids))
574 def record_new(self, path, ie):
575 try:
576 # If this is a merge, the file was most likely added already.
577 # The per-file parent(s) must therefore be calculated and
578 # we can't assume there are none.
579 per_file_parents, ie.revision = \
580 self.rev_store.get_parents_and_revision_for_entry(ie)
581 self.per_file_parents_for_commit[ie.file_id] = per_file_parents
582 self.inventory.add(ie)
583 except errors.DuplicateFileId:
584 # Directory already exists as a file or symlink
585 del self.inventory[ie.file_id]
586 # Try again
587 self.inventory.add(ie)
589 def record_changed(self, path, ie, parent_id):
590 # HACK: no API for this (del+add does more than it needs to)
591 per_file_parents, ie.revision = \
592 self.rev_store.get_parents_and_revision_for_entry(ie)
593 self.per_file_parents_for_commit[ie.file_id] = per_file_parents
594 self.inventory._byid[ie.file_id] = ie
595 parent_ie = self.inventory._byid[parent_id]
596 parent_ie.children[ie.name] = ie
598 def record_delete(self, path, ie):
599 self.inventory.remove_recursive_id(ie.file_id)
601 def record_rename(self, old_path, new_path, file_id, ie):
602 # For a rename, the revision-id is always the new one so
603 # no need to change/set it here
604 ie.revision = self.revision_id
605 per_file_parents, _ = \
606 self.rev_store.get_parents_and_revision_for_entry(ie)
607 self.per_file_parents_for_commit[file_id] = per_file_parents
608 new_basename, new_parent_id = self._ensure_directory(new_path,
609 self.inventory)
610 self.inventory.rename(file_id, new_parent_id, new_basename)
612 def modify_handler(self, filecmd):
613 if filecmd.dataref is not None:
614 data = self.cache_mgr.fetch_blob(filecmd.dataref)
615 else:
616 data = filecmd.data
617 self.debug("modifying %s", filecmd.path)
618 (kind, is_executable) = mode_to_kind(filecmd.mode)
619 self._modify_item(self._decode_path(filecmd.path), kind,
620 is_executable, data, self.inventory)
622 def delete_handler(self, filecmd):
623 self.debug("deleting %s", filecmd.path)
624 self._delete_item(self._decode_path(filecmd.path), self.inventory)
626 def copy_handler(self, filecmd):
627 src_path = self._decode_path(filecmd.src_path)
628 dest_path = self._decode_path(filecmd.dest_path)
629 self.debug("copying %s to %s", src_path, dest_path)
630 self._copy_item(src_path, dest_path, self.inventory)
632 def rename_handler(self, filecmd):
633 old_path = self._decode_path(filecmd.old_path)
634 new_path = self._decode_path(filecmd.new_path)
635 self.debug("renaming %s to %s", old_path, new_path)
636 self._rename_item(old_path, new_path, self.inventory)
638 def deleteall_handler(self, filecmd):
639 self.debug("deleting all files (and also all directories)")
640 self._delete_all_items(self.inventory)
643 class InventoryDeltaCommitHandler(GenericCommitHandler):
644 """A CommitHandler that builds Inventories by applying a delta."""
646 def pre_process_files(self):
647 super(InventoryDeltaCommitHandler, self).pre_process_files()
648 self._dirs_that_might_become_empty = set()
650 # A given file-id can only appear once so we accumulate
651 # the entries in a dict then build the actual delta at the end
652 self._delta_entries_by_fileid = {}
653 if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
654 if self.parents:
655 old_path = ''
656 else:
657 old_path = None
658 # Need to explicitly add the root entry for the first revision
659 # and for non rich-root inventories
660 root_id = inventory.ROOT_ID
661 root_ie = inventory.InventoryDirectory(root_id, u'', None)
662 root_ie.revision = self.revision_id
663 self._add_entry((old_path, '', root_id, root_ie))
665 def post_process_files(self):
666 """Save the revision."""
667 delta = self._get_final_delta()
668 inv = self.rev_store.load_using_delta(self.revision,
669 self.basis_inventory, delta, None,
670 self._get_data,
671 self._get_per_file_parents,
672 self._get_inventories)
673 self.cache_mgr.inventories[self.revision_id] = inv
674 #print "committed %s" % self.revision_id
676 def _get_final_delta(self):
677 """Generate the final delta.
679 Smart post-processing of changes, e.g. pruning of directories
680 that would become empty, goes here.
682 delta = list(self._delta_entries_by_fileid.values())
683 if self.prune_empty_dirs and self._dirs_that_might_become_empty:
684 candidates = self._dirs_that_might_become_empty
685 while candidates:
686 never_born = set()
687 parent_dirs_that_might_become_empty = set()
688 for path, file_id in self._empty_after_delta(delta, candidates):
689 newly_added = self._new_file_ids.get(path)
690 if newly_added:
691 never_born.add(newly_added)
692 else:
693 delta.append((path, None, file_id, None))
694 parent_dir = osutils.dirname(path)
695 if parent_dir:
696 parent_dirs_that_might_become_empty.add(parent_dir)
697 candidates = parent_dirs_that_might_become_empty
698 # Clean up entries that got deleted before they were ever added
699 if never_born:
700 delta = [de for de in delta if de[2] not in never_born]
701 return delta
703 def _empty_after_delta(self, delta, candidates):
704 #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
705 #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
706 new_inv = self._get_proposed_inventory(delta)
707 result = []
708 for dir in candidates:
709 file_id = new_inv.path2id(dir)
710 if file_id is None:
711 continue
712 ie = new_inv[file_id]
713 if ie.kind != 'directory':
714 continue
715 if len(ie.children) == 0:
716 result.append((dir, file_id))
717 if self.verbose:
718 self.note("pruning empty directory %s" % (dir,))
719 return result
721 def _get_proposed_inventory(self, delta):
722 if len(self.parents):
723 # new_inv = self.basis_inventory._get_mutable_inventory()
724 # Note that this will create unreferenced chk pages if we end up
725 # deleting entries, because this 'test' inventory won't end up
726 # used. However, it is cheaper than having to create a full copy of
727 # the inventory for every commit.
728 new_inv = self.basis_inventory.create_by_apply_delta(delta,
729 'not-a-valid-revision-id:')
730 else:
731 new_inv = inventory.Inventory(revision_id=self.revision_id)
732 # This is set in the delta so remove it to prevent a duplicate
733 del new_inv[inventory.ROOT_ID]
734 try:
735 new_inv.apply_delta(delta)
736 except errors.InconsistentDelta:
737 self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
738 raise
739 return new_inv
741 def _add_entry(self, entry):
742 # We need to combine the data if multiple entries have the same file-id.
743 # For example, a rename followed by a modification looks like:
745 # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
747 # Likewise, a modification followed by a rename looks like:
749 # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
751 # Here's a rename followed by a delete and a modification followed by
752 # a delete:
754 # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
755 # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
757 # In summary, we use the original old-path, new new-path and new ie
758 # when combining entries.
759 old_path = entry[0]
760 new_path = entry[1]
761 file_id = entry[2]
762 ie = entry[3]
763 existing = self._delta_entries_by_fileid.get(file_id, None)
764 if existing is not None:
765 old_path = existing[0]
766 entry = (old_path, new_path, file_id, ie)
767 if new_path is None and old_path is None:
768 # This is a delete cancelling a previous add
769 del self._delta_entries_by_fileid[file_id]
770 parent_dir = osutils.dirname(existing[1])
771 self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
772 if parent_dir:
773 self._dirs_that_might_become_empty.add(parent_dir)
774 return
775 else:
776 self._delta_entries_by_fileid[file_id] = entry
778 # Collect parent directories that might become empty
779 if new_path is None:
780 # delete
781 parent_dir = osutils.dirname(old_path)
782 # note: no need to check the root
783 if parent_dir:
784 self._dirs_that_might_become_empty.add(parent_dir)
785 elif old_path is not None and old_path != new_path:
786 # rename
787 old_parent_dir = osutils.dirname(old_path)
788 new_parent_dir = osutils.dirname(new_path)
789 if old_parent_dir and old_parent_dir != new_parent_dir:
790 self._dirs_that_might_become_empty.add(old_parent_dir)
792 # Calculate the per-file parents, if not already done
793 if file_id in self.per_file_parents_for_commit:
794 return
795 if old_path is None:
796 # add
797 # If this is a merge, the file was most likely added already.
798 # The per-file parent(s) must therefore be calculated and
799 # we can't assume there are none.
800 per_file_parents, ie.revision = \
801 self.rev_store.get_parents_and_revision_for_entry(ie)
802 self.per_file_parents_for_commit[file_id] = per_file_parents
803 elif new_path is None:
804 # delete
805 pass
806 elif old_path != new_path:
807 # rename
808 per_file_parents, _ = \
809 self.rev_store.get_parents_and_revision_for_entry(ie)
810 self.per_file_parents_for_commit[file_id] = per_file_parents
811 else:
812 # modify
813 per_file_parents, ie.revision = \
814 self.rev_store.get_parents_and_revision_for_entry(ie)
815 self.per_file_parents_for_commit[file_id] = per_file_parents
817 def record_new(self, path, ie):
818 self._add_entry((None, path, ie.file_id, ie))
820 def record_changed(self, path, ie, parent_id=None):
821 self._add_entry((path, path, ie.file_id, ie))
822 self._modified_file_ids[path] = ie.file_id
824 def record_delete(self, path, ie):
825 self._add_entry((path, None, ie.file_id, None))
826 self._paths_deleted_this_commit.add(path)
827 if ie.kind == 'directory':
828 try:
829 del self.directory_entries[path]
830 except KeyError:
831 pass
832 for child_relpath, entry in \
833 self.basis_inventory.iter_entries_by_dir(from_dir=ie):
834 child_path = osutils.pathjoin(path, child_relpath)
835 self._add_entry((child_path, None, entry.file_id, None))
836 self._paths_deleted_this_commit.add(child_path)
837 if entry.kind == 'directory':
838 try:
839 del self.directory_entries[child_path]
840 except KeyError:
841 pass
843 def record_rename(self, old_path, new_path, file_id, old_ie):
844 new_ie = old_ie.copy()
845 new_basename, new_parent_id = self._ensure_directory(new_path,
846 self.basis_inventory)
847 new_ie.name = new_basename
848 new_ie.parent_id = new_parent_id
849 new_ie.revision = self.revision_id
850 self._add_entry((old_path, new_path, file_id, new_ie))
851 self._modified_file_ids[new_path] = file_id
852 self._paths_deleted_this_commit.discard(new_path)
853 if new_ie.kind == 'directory':
854 self.directory_entries[new_path] = new_ie
856 def _rename_pending_change(self, old_path, new_path, file_id):
857 """Instead of adding/modifying old-path, add new-path instead."""
858 # note: delta entries look like (old, new, file-id, ie)
859 old_ie = self._delta_entries_by_fileid[file_id][3]
861 # Delete the old path. Note that this might trigger implicit
862 # deletion of newly created parents that could now become empty.
863 self.record_delete(old_path, old_ie)
865 # Update the dictionaries used for tracking new file-ids
866 if old_path in self._new_file_ids:
867 del self._new_file_ids[old_path]
868 else:
869 del self._modified_file_ids[old_path]
870 self._new_file_ids[new_path] = file_id
872 # Create the new InventoryEntry
873 kind = old_ie.kind
874 basename, parent_id = self._ensure_directory(new_path,
875 self.basis_inventory)
876 ie = inventory.make_entry(kind, basename, parent_id, file_id)
877 ie.revision = self.revision_id
878 if kind == 'file':
879 ie.executable = old_ie.executable
880 ie.text_sha1 = old_ie.text_sha1
881 ie.text_size = old_ie.text_size
882 elif kind == 'symlink':
883 ie.symlink_target = old_ie.symlink_target
885 # Record it
886 self.record_new(new_path, ie)
888 def modify_handler(self, filecmd):
889 (kind, executable) = mode_to_kind(filecmd.mode)
890 if filecmd.dataref is not None:
891 if kind == "directory":
892 data = None
893 elif kind == "tree-reference":
894 data = filecmd.dataref
895 else:
896 data = self.cache_mgr.fetch_blob(filecmd.dataref)
897 else:
898 data = filecmd.data
899 self.debug("modifying %s", filecmd.path)
900 decoded_path = self._decode_path(filecmd.path)
901 self._modify_item(decoded_path, kind,
902 executable, data, self.basis_inventory)
904 def delete_handler(self, filecmd):
905 self.debug("deleting %s", filecmd.path)
906 self._delete_item(
907 self._decode_path(filecmd.path), self.basis_inventory)
909 def copy_handler(self, filecmd):
910 src_path = self._decode_path(filecmd.src_path)
911 dest_path = self._decode_path(filecmd.dest_path)
912 self.debug("copying %s to %s", src_path, dest_path)
913 self._copy_item(src_path, dest_path, self.basis_inventory)
915 def rename_handler(self, filecmd):
916 old_path = self._decode_path(filecmd.old_path)
917 new_path = self._decode_path(filecmd.new_path)
918 self.debug("renaming %s to %s", old_path, new_path)
919 self._rename_item(old_path, new_path, self.basis_inventory)
921 def deleteall_handler(self, filecmd):
922 self.debug("deleting all files (and also all directories)")
923 self._delete_all_items(self.basis_inventory)