The get_ancestry() method was removed from bzr sometime in the past.
[bzr-fastimport.git] / cmds.py
blob5f93105aea4e7ae56b2ece5c3485f6f7e54134e2
1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 """Fastimport/fastexport commands."""
18 from bzrlib import bzrdir
19 from bzrlib.commands import Command
20 from bzrlib.option import Option, ListOption, RegistryOption
22 from bzrlib.plugins.fastimport import load_fastimport
25 def _run(source, processor_factory, verbose=False, user_map=None, **kwargs):
26 """Create and run a processor.
28 :param source: a filename or '-' for standard input. If the
29 filename ends in .gz, it will be opened as a gzip file and
30 the stream will be implicitly uncompressed
31 :param processor_factory: a callable for creating a processor
32 :param user_map: if not None, the file containing the user map.
33 """
34 from fastimport.errors import ParsingError
35 from bzrlib.errors import BzrCommandError
36 from fastimport import parser
37 stream = _get_source_stream(source)
38 user_mapper = _get_user_mapper(user_map)
39 proc = processor_factory(verbose=verbose, **kwargs)
40 p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
41 try:
42 return proc.process(p.iter_commands)
43 except ParsingError, e:
44 raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
47 def _get_source_stream(source):
48 if source == '-' or source is None:
49 import sys
50 from fastimport import helpers
51 stream = helpers.binary_stream(sys.stdin)
52 elif source.endswith('.gz'):
53 import gzip
54 stream = gzip.open(source, "rb")
55 else:
56 stream = open(source, "rb")
57 return stream
60 def _get_user_mapper(filename):
61 import user_mapper
62 if filename is None:
63 return None
64 f = open(filename)
65 lines = f.readlines()
66 f.close()
67 return user_mapper.UserMapper(lines)
70 class cmd_fast_import(Command):
71 """Backend for fast Bazaar data importers.
73 This command reads a mixed command/data stream and creates
74 branches in a Bazaar repository accordingly. The preferred
75 recipe is::
77 bzr fast-import project.fi project.bzr
79 Numerous commands are provided for generating a fast-import file
80 to use as input.
81 To specify standard input as the input stream, use a
82 source name of '-' (instead of project.fi). If the source name
83 ends in '.gz', it is assumed to be compressed in gzip format.
85 project.bzr will be created if it doesn't exist. If it exists
86 already, it should be empty or be an existing Bazaar repository
87 or branch. If not specified, the current directory is assumed.
89 fast-import will intelligently select the format to use when
90 creating a repository or branch. If you are running Bazaar 1.17
91 up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
92 Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
93 is used. If you wish to specify a custom format, use the `--format`
94 option.
96 .. note::
98 To maintain backwards compatibility, fast-import lets you
99 create the target repository or standalone branch yourself.
100 It is recommended though that you let fast-import create
101 these for you instead.
103 :Branch mapping rules:
105 Git reference names are mapped to Bazaar branch names as follows:
107 * refs/heads/foo is mapped to foo
108 * refs/remotes/origin/foo is mapped to foo.remote
109 * refs/tags/foo is mapped to foo.tag
110 * */master is mapped to trunk, trunk.remote, etc.
111 * */trunk is mapped to git-trunk, git-trunk.remote, etc.
113 :Branch creation rules:
115 When a shared repository is created or found at the destination,
116 branches are created inside it. In the simple case of a single
117 branch (refs/heads/master) inside the input file, the branch is
118 project.bzr/trunk.
120 When a standalone branch is found at the destination, the trunk
121 is imported there and warnings are output about any other branches
122 found in the input file.
124 When a branch in a shared repository is found at the destination,
125 that branch is made the trunk and other branches, if any, are
126 created in sister directories.
128 :Working tree updates:
130 The working tree is generated for the trunk branch. If multiple
131 branches are created, a message is output on completion explaining
132 how to create the working trees for other branches.
134 :Custom exporters:
136 The fast-export-from-xxx commands typically call more advanced
137 xxx-fast-export scripts. You are welcome to use the advanced
138 scripts if you prefer.
140 If you wish to write a custom exporter for your project, see
141 http://bazaar-vcs.org/BzrFastImport for the detailed protocol
142 specification. In many cases, exporters can be written quite
143 quickly using whatever scripting/programming language you like.
145 :User mapping:
147 Some source repositories store just the user name while Bazaar
148 prefers a full email address. You can adjust user-ids while
149 importing by using the --user-map option. The argument is a
150 text file with lines in the format::
152 old-id = new-id
154 Blank lines and lines beginning with # are ignored.
155 If old-id has the special value '@', then users without an
156 email address will get one created by using the matching new-id
157 as the domain, unless a more explicit address is given for them.
158 For example, given the user-map of::
160 @ = example.com
161 bill = William Jones <bill@example.com>
163 then user-ids are mapped as follows::
165 maria => maria <maria@example.com>
166 bill => William Jones <bill@example.com>
168 .. note::
170 User mapping is supported by both the fast-import and
171 fast-import-filter commands.
173 :Blob tracking:
175 As some exporters (like git-fast-export) reuse blob data across
176 commits, fast-import makes two passes over the input file by
177 default. In the first pass, it collects data about what blobs are
178 used when, along with some other statistics (e.g. total number of
179 commits). In the second pass, it generates the repository and
180 branches.
182 .. note::
184 The initial pass isn't done if the --info option is used
185 to explicitly pass in information about the input stream.
186 It also isn't done if the source is standard input. In the
187 latter case, memory consumption may be higher than otherwise
188 because some blobs may be kept in memory longer than necessary.
190 :Restarting an import:
192 At checkpoints and on completion, the commit-id -> revision-id
193 map is saved to a file called 'fastimport-id-map' in the control
194 directory for the repository (e.g. .bzr/repository). If the import
195 is interrupted or unexpectedly crashes, it can be started again
196 and this file will be used to skip over already loaded revisions.
197 As long as subsequent exports from the original source begin
198 with exactly the same revisions, you can use this feature to
199 maintain a mirror of a repository managed by a foreign tool.
200 If and when Bazaar is used to manage the repository, this file
201 can be safely deleted.
203 :Examples:
205 Import a Subversion repository into Bazaar::
207 svn-fast-export /svn/repo/path > project.fi
208 bzr fast-import project.fi project.bzr
210 Import a CVS repository into Bazaar::
212 cvs2git /cvs/repo/path > project.fi
213 bzr fast-import project.fi project.bzr
215 Import a Git repository into Bazaar::
217 cd /git/repo/path
218 git fast-export --all > project.fi
219 bzr fast-import project.fi project.bzr
221 Import a Mercurial repository into Bazaar::
223 cd /hg/repo/path
224 hg fast-export > project.fi
225 bzr fast-import project.fi project.bzr
227 Import a Darcs repository into Bazaar::
229 cd /darcs/repo/path
230 darcs-fast-export > project.fi
231 bzr fast-import project.fi project.bzr
233 hidden = False
234 _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
235 takes_args = ['source', 'destination?']
236 takes_options = ['verbose',
237 Option('user-map', type=str,
238 help="Path to file containing a map of user-ids.",
240 Option('info', type=str,
241 help="Path to file containing caching hints.",
243 Option('trees',
244 help="Update all working trees, not just trunk's.",
246 Option('count', type=int,
247 help="Import this many revisions then exit.",
249 Option('checkpoint', type=int,
250 help="Checkpoint automatically every N revisions."
251 " The default is 10000.",
253 Option('autopack', type=int,
254 help="Pack every N checkpoints. The default is 4.",
256 Option('inv-cache', type=int,
257 help="Number of inventories to cache.",
259 RegistryOption.from_kwargs('mode',
260 'The import algorithm to use.',
261 title='Import Algorithm',
262 default='Use the preferred algorithm (inventory deltas).',
263 classic="Use the original algorithm (mutable inventories).",
264 experimental="Enable experimental features.",
265 value_switches=True, enum_switch=False,
267 Option('import-marks', type=str,
268 help="Import marks from file."
270 Option('export-marks', type=str,
271 help="Export marks to file."
273 RegistryOption('format',
274 help='Specify a format for the created repository. See'
275 ' "bzr help formats" for details.',
276 lazy_registry=('bzrlib.bzrdir', 'format_registry'),
277 converter=lambda name: bzrdir.format_registry.make_bzrdir(name),
278 value_switches=False, title='Repository format'),
280 def run(self, source, destination='.', verbose=False, info=None,
281 trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
282 mode=None, import_marks=None, export_marks=None, format=None,
283 user_map=None):
284 load_fastimport()
285 from bzrlib.plugins.fastimport.processors import generic_processor
286 from bzrlib.plugins.fastimport.helpers import (
287 open_destination_directory,
289 control = open_destination_directory(destination, format=format)
291 # If an information file was given and the source isn't stdin,
292 # generate the information by reading the source file as a first pass
293 if info is None and source != '-':
294 info = self._generate_info(source)
296 # Do the work
297 if mode is None:
298 mode = 'default'
299 params = {
300 'info': info,
301 'trees': trees,
302 'count': count,
303 'checkpoint': checkpoint,
304 'autopack': autopack,
305 'inv-cache': inv_cache,
306 'mode': mode,
307 'import-marks': import_marks,
308 'export-marks': export_marks,
310 return _run(source, generic_processor.GenericProcessor,
311 bzrdir=control, params=params, verbose=verbose,
312 user_map=user_map)
314 def _generate_info(self, source):
315 from cStringIO import StringIO
316 from fastimport import parser
317 from fastimport.errors import ParsingError
318 from bzrlib.errors import BzrCommandError
319 from bzrlib.plugins.fastimport.processors import info_processor
320 stream = _get_source_stream(source)
321 output = StringIO()
322 try:
323 proc = info_processor.InfoProcessor(verbose=True, outf=output)
324 p = parser.ImportParser(stream)
325 try:
326 return_code = proc.process(p.iter_commands)
327 except ParsingError, e:
328 raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
329 lines = output.getvalue().splitlines()
330 finally:
331 output.close()
332 stream.seek(0)
333 return lines
336 class cmd_fast_import_filter(Command):
337 """Filter a fast-import stream to include/exclude files & directories.
339 This command is useful for splitting a subdirectory or bunch of
340 files out from a project to create a new project complete with history
341 for just those files. It can also be used to create a new project
342 repository that removes all references to files that should not have
343 been committed, e.g. security-related information (like passwords),
344 commercially sensitive material, files with an incompatible license or
345 large binary files like CD images.
347 To specify standard input as the input stream, use a source name
348 of '-'. If the source name ends in '.gz', it is assumed to be
349 compressed in gzip format.
351 :File/directory filtering:
353 This is supported by the -i and -x options. Excludes take precedence
354 over includes.
356 When filtering out a subdirectory (or file), the new stream uses the
357 subdirectory (or subdirectory containing the file) as the root. As
358 fast-import doesn't know in advance whether a path is a file or
359 directory in the stream, you need to specify a trailing '/' on
360 directories passed to the `--includes option`. If multiple files or
361 directories are given, the new root is the deepest common directory.
363 Note: If a path has been renamed, take care to specify the *original*
364 path name, not the final name that it ends up with.
366 :User mapping:
368 Some source repositories store just the user name while Bazaar
369 prefers a full email address. You can adjust user-ids
370 by using the --user-map option. The argument is a
371 text file with lines in the format::
373 old-id = new-id
375 Blank lines and lines beginning with # are ignored.
376 If old-id has the special value '@', then users without an
377 email address will get one created by using the matching new-id
378 as the domain, unless a more explicit address is given for them.
379 For example, given the user-map of::
381 @ = example.com
382 bill = William Jones <bill@example.com>
384 then user-ids are mapped as follows::
386 maria => maria <maria@example.com>
387 bill => William Jones <bill@example.com>
389 .. note::
391 User mapping is supported by both the fast-import and
392 fast-import-filter commands.
394 :History rewriting:
396 By default fast-import-filter does quite aggressive history rewriting.
397 Empty commits (or commits which had all their content filtered out) will
398 be removed, and so are the references to commits not included in the stream.
400 Flag --dont-squash-empty-commits reverses this behavior and makes it possible to
401 use fast-import-filter on incremental streams.
403 :Examples:
405 Create a new project from a library (note the trailing / on the
406 directory name of the library)::
408 front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
409 bzr fast-import xxx.fi mylibrary.bzr
410 (lib/xxx/foo is now foo)
412 Create a new repository without a sensitive file::
414 front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
415 bzr fast-import clean.fi clean.bzr
417 hidden = False
418 _see_also = ['fast-import']
419 takes_args = ['source?']
420 takes_options = ['verbose',
421 ListOption('include_paths', short_name='i', type=str,
422 help="Only include commits affecting these paths."
423 " Directories should have a trailing /."
425 ListOption('exclude_paths', short_name='x', type=str,
426 help="Exclude these paths from commits."
428 Option('user-map', type=str,
429 help="Path to file containing a map of user-ids.",
431 Option('dont-squash-empty-commits',
432 help="Preserve all commits and links between them"
435 encoding_type = 'exact'
436 def run(self, source=None, verbose=False, include_paths=None,
437 exclude_paths=None, user_map=None, dont_squash_empty_commits=False):
438 from bzrlib.errors import BzrCommandError
439 load_fastimport()
440 from fastimport.processors import filter_processor
441 params = {
442 'include_paths': include_paths,
443 'exclude_paths': exclude_paths,
445 if ('squash_empty_commits' in
446 filter_processor.FilterProcessor.known_params):
447 params['squash_empty_commits'] = (not dont_squash_empty_commits)
448 else:
449 if dont_squash_empty_commits:
450 raise BzrCommandError("installed python-fastimport does not "
451 "support not squashing empty commits. Please install "
452 " a newer python-fastimport to use "
453 "--dont-squash-empty-commits")
455 from fastimport.errors import ParsingError
456 from fastimport import parser
457 stream = _get_source_stream(source)
458 user_mapper = _get_user_mapper(user_map)
459 proc = filter_processor.FilterProcessor(params=params, verbose=verbose)
460 p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
461 try:
462 return proc.process(p.iter_commands)
463 except ParsingError, e:
464 raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
467 class cmd_fast_import_info(Command):
468 """Output information about a fast-import stream.
470 This command reads a fast-import stream and outputs
471 statistics and interesting properties about what it finds.
472 When run in verbose mode, the information is output as a
473 configuration file that can be passed to fast-import to
474 assist it in intelligently caching objects.
476 To specify standard input as the input stream, use a source name
477 of '-'. If the source name ends in '.gz', it is assumed to be
478 compressed in gzip format.
480 :Examples:
482 Display statistics about the import stream produced by front-end::
484 front-end | bzr fast-import-info -
486 Create a hints file for running fast-import on a large repository::
488 front-end | bzr fast-import-info -v - > front-end.cfg
490 hidden = False
491 _see_also = ['fast-import']
492 takes_args = ['source']
493 takes_options = ['verbose']
494 def run(self, source, verbose=False):
495 load_fastimport()
496 from bzrlib.plugins.fastimport.processors import info_processor
497 return _run(source, info_processor.InfoProcessor, verbose=verbose)
500 class cmd_fast_import_query(Command):
501 """Query a fast-import stream displaying selected commands.
503 To specify standard input as the input stream, use a source name
504 of '-'. If the source name ends in '.gz', it is assumed to be
505 compressed in gzip format.
507 To specify a commit to display, give its mark using the
508 --commit-mark option. The commit will be displayed with
509 file-commands included but with inline blobs hidden.
511 To specify the commands to display, use the -C option one or
512 more times. To specify just some fields for a command, use the
513 syntax::
515 command=field1,...
517 By default, the nominated fields for the nominated commands
518 are displayed tab separated. To see the information in
519 a name:value format, use verbose mode.
521 Note: Binary fields (e.g. data for blobs) are masked out
522 so it is generally safe to view the output in a terminal.
524 :Examples:
526 Show the commit with mark 429::
528 bzr fast-import-query xxx.fi -m429
530 Show all the fields of the reset and tag commands::
532 bzr fast-import-query xxx.fi -Creset -Ctag
534 Show the mark and merge fields of the commit commands::
536 bzr fast-import-query xxx.fi -Ccommit=mark,merge
538 hidden = True
539 _see_also = ['fast-import', 'fast-import-filter']
540 takes_args = ['source']
541 takes_options = ['verbose',
542 Option('commit-mark', short_name='m', type=str,
543 help="Mark of the commit to display."
545 ListOption('commands', short_name='C', type=str,
546 help="Display fields for these commands."
549 def run(self, source, verbose=False, commands=None, commit_mark=None):
550 load_fastimport()
551 from fastimport.processors import query_processor
552 from bzrlib.plugins.fastimport import helpers
553 params = helpers.defines_to_dict(commands) or {}
554 if commit_mark:
555 params['commit-mark'] = commit_mark
556 return _run(source, query_processor.QueryProcessor, params=params,
557 verbose=verbose)
560 class cmd_fast_export(Command):
561 """Generate a fast-import stream from a Bazaar branch.
563 This program generates a stream from a Bazaar branch in fast-import
564 format used by tools such as bzr fast-import, git-fast-import and
565 hg-fast-import.
567 It takes two optional arguments: the source bzr branch to export and
568 the destination to write the file to write the fastimport stream to.
570 If no source is specified, it will search for a branch in the
571 current directory.
573 If no destination is given or the destination is '-', standard output
574 is used. Otherwise, the destination is the name of a file. If the
575 destination ends in '.gz', the output will be compressed into gzip
576 format.
578 :Round-tripping:
580 Recent versions of the fast-import specification support features
581 that allow effective round-tripping most of the metadata in Bazaar
582 branches. As such, fast-exporting a branch and fast-importing the data
583 produced will create a new repository with roughly equivalent history, i.e.
584 "bzr log -v -p --include-merges --forward" on the old branch and
585 new branch should produce similar, if not identical, results.
587 .. note::
589 Be aware that the new repository may appear to have similar history
590 but internally it is quite different with new revision-ids and
591 file-ids assigned. As a consequence, the ability to easily merge
592 with branches based on the old repository is lost. Depending on your
593 reasons for producing a new repository, this may or may not be an
594 issue.
596 :Interoperability:
598 fast-export can use the following "extended features" to
599 produce a richer data stream:
601 * *multiple-authors* - if a commit has multiple authors (as commonly
602 occurs in pair-programming), all authors will be included in the
603 output, not just the first author
605 * *commit-properties* - custom metadata per commit that Bazaar stores
606 in revision properties (e.g. branch-nick and bugs fixed by this
607 change) will be included in the output.
609 * *empty-directories* - directories, even the empty ones, will be
610 included in the output.
612 To disable these features and produce output acceptable to git 1.6,
613 use the --plain option. To enable these features, use --no-plain.
614 Currently, --plain is the default but that will change in the near
615 future once the feature names and definitions are formally agreed
616 to by the broader fast-import developer community.
618 Git has stricter naming rules for tags and fast-export --plain
619 will skip tags which can't be imported into git. To replace characters
620 unsupported in git with an underscore instead, specify
621 --rewrite-tag-names.
623 :History truncation:
625 It is sometimes convenient to simply truncate the revision history at a
626 certain point. The --baseline option, to be used in conjunction with -r,
627 emits a baseline commit containing the state of the entire source tree at
628 the first requested revision. This allows a user to produce a tree
629 identical to the original without munging multiple exports.
631 :Examples:
633 To produce data destined for import into Bazaar::
635 bzr fast-export --no-plain my-bzr-branch my.fi.gz
637 To produce data destined for Git 1.6::
639 bzr fast-export --plain my-bzr-branch my.fi
641 To import several unmerged but related branches into the same repository,
642 use the --{export,import}-marks options, and specify a name for the git
643 branch like this::
645 bzr fast-export --export-marks=marks.bzr project.dev |
646 GIT_DIR=project/.git git-fast-import --export-marks=marks.git
648 bzr fast-export --import-marks=marks.bzr -b other project.other |
649 GIT_DIR=project/.git git-fast-import --import-marks=marks.git
651 If you get a "Missing space after source" error from git-fast-import,
652 see the top of the commands.py module for a work-around.
654 hidden = False
655 _see_also = ['fast-import', 'fast-import-filter']
656 takes_args = ['source?', 'destination?']
657 takes_options = ['verbose', 'revision',
658 Option('git-branch', short_name='b', type=str,
659 argname='FILE',
660 help='Name of the git branch to create (default=master).'
662 Option('checkpoint', type=int, argname='N',
663 help="Checkpoint every N revisions (default=10000)."
665 Option('marks', type=str, argname='FILE',
666 help="Import marks from and export marks to file."
668 Option('import-marks', type=str, argname='FILE',
669 help="Import marks from file."
671 Option('export-marks', type=str, argname='FILE',
672 help="Export marks to file."
674 Option('plain',
675 help="Exclude metadata to maximise interoperability."
677 Option('rewrite-tag-names',
678 help="Replace characters invalid in git with '_'"
679 " (plain mode only).",
681 Option('baseline',
682 help="Export an 'absolute' baseline commit prior to"
683 "the first relative commit",
686 encoding_type = 'exact'
687 def run(self, source=None, destination=None, verbose=False,
688 git_branch="master", checkpoint=10000, marks=None,
689 import_marks=None, export_marks=None, revision=None,
690 plain=True, rewrite_tag_names=False, baseline=False):
691 load_fastimport()
692 from bzrlib.branch import Branch
693 from bzrlib.plugins.fastimport import exporter
695 if marks:
696 import_marks = export_marks = marks
698 # Open the source
699 if source is None:
700 source = "."
701 branch = Branch.open_containing(source)[0]
702 outf = exporter._get_output_stream(destination)
703 exporter = exporter.BzrFastExporter(branch,
704 outf=outf, ref="refs/heads/%s" % git_branch, checkpoint=checkpoint,
705 import_marks_file=import_marks, export_marks_file=export_marks,
706 revision=revision, verbose=verbose, plain_format=plain,
707 rewrite_tags=rewrite_tag_names, baseline=baseline)
708 return exporter.run()