Fix bzr-fastimport when used with newer versions of python-fastimport.(Jelmer Vernooij)
[bzr-fastimport.git] / cmds.py
blobc4a5a8a51706851f220ac7cb43a186ec026223f2
1 # Copyright (C) 2008 Canonical Ltd
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 """Fastimport/fastexport commands."""
18 from bzrlib import bzrdir
19 from bzrlib.commands import Command
20 from bzrlib.option import Option, ListOption, RegistryOption
22 from bzrlib.plugins.fastimport import (
23 helpers,
24 load_fastimport,
28 def _run(source, processor_factory, verbose=False, user_map=None, **kwargs):
29 """Create and run a processor.
31 :param source: a filename or '-' for standard input. If the
32 filename ends in .gz, it will be opened as a gzip file and
33 the stream will be implicitly uncompressed
34 :param processor_factory: a callable for creating a processor
35 :param user_map: if not None, the file containing the user map.
36 """
37 from fastimport.errors import ParsingError
38 from bzrlib.errors import BzrCommandError
39 from fastimport import parser
40 stream = _get_source_stream(source)
41 user_mapper = _get_user_mapper(user_map)
42 proc = processor_factory(verbose=verbose, **kwargs)
43 p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
44 try:
45 return proc.process(p.iter_commands)
46 except ParsingError, e:
47 raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
50 def _get_source_stream(source):
51 if source == '-' or source is None:
52 import sys
53 stream = helpers.binary_stream(sys.stdin)
54 elif source.endswith('.gz'):
55 import gzip
56 stream = gzip.open(source, "rb")
57 else:
58 stream = open(source, "rb")
59 return stream
62 def _get_user_mapper(filename):
63 import user_mapper
64 if filename is None:
65 return None
66 f = open(filename)
67 lines = f.readlines()
68 f.close()
69 return user_mapper.UserMapper(lines)
72 class cmd_fast_import(Command):
73 """Backend for fast Bazaar data importers.
75 This command reads a mixed command/data stream and creates
76 branches in a Bazaar repository accordingly. The preferred
77 recipe is::
79 bzr fast-import project.fi project.bzr
81 Numerous commands are provided for generating a fast-import file
82 to use as input.
83 To specify standard input as the input stream, use a
84 source name of '-' (instead of project.fi). If the source name
85 ends in '.gz', it is assumed to be compressed in gzip format.
87 project.bzr will be created if it doesn't exist. If it exists
88 already, it should be empty or be an existing Bazaar repository
89 or branch. If not specified, the current directory is assumed.
91 fast-import will intelligently select the format to use when
92 creating a repository or branch. If you are running Bazaar 1.17
93 up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used.
94 Otherwise, the current default format ("pack-0.92" for Bazaar 1.x)
95 is used. If you wish to specify a custom format, use the `--format`
96 option.
98 .. note::
100 To maintain backwards compatibility, fast-import lets you
101 create the target repository or standalone branch yourself.
102 It is recommended though that you let fast-import create
103 these for you instead.
105 :Branch mapping rules:
107 Git reference names are mapped to Bazaar branch names as follows:
109 * refs/heads/foo is mapped to foo
110 * refs/remotes/origin/foo is mapped to foo.remote
111 * refs/tags/foo is mapped to foo.tag
112 * */master is mapped to trunk, trunk.remote, etc.
113 * */trunk is mapped to git-trunk, git-trunk.remote, etc.
115 :Branch creation rules:
117 When a shared repository is created or found at the destination,
118 branches are created inside it. In the simple case of a single
119 branch (refs/heads/master) inside the input file, the branch is
120 project.bzr/trunk.
122 When a standalone branch is found at the destination, the trunk
123 is imported there and warnings are output about any other branches
124 found in the input file.
126 When a branch in a shared repository is found at the destination,
127 that branch is made the trunk and other branches, if any, are
128 created in sister directories.
130 :Working tree updates:
132 The working tree is generated for the trunk branch. If multiple
133 branches are created, a message is output on completion explaining
134 how to create the working trees for other branches.
136 :Custom exporters:
138 The fast-export-from-xxx commands typically call more advanced
139 xxx-fast-export scripts. You are welcome to use the advanced
140 scripts if you prefer.
142 If you wish to write a custom exporter for your project, see
143 http://bazaar-vcs.org/BzrFastImport for the detailed protocol
144 specification. In many cases, exporters can be written quite
145 quickly using whatever scripting/programming language you like.
147 :User mapping:
149 Some source repositories store just the user name while Bazaar
150 prefers a full email address. You can adjust user-ids while
151 importing by using the --user-map option. The argument is a
152 text file with lines in the format::
154 old-id = new-id
156 Blank lines and lines beginning with # are ignored.
157 If old-id has the special value '@', then users without an
158 email address will get one created by using the matching new-id
159 as the domain, unless a more explicit address is given for them.
160 For example, given the user-map of::
162 @ = example.com
163 bill = William Jones <bill@example.com>
165 then user-ids are mapped as follows::
167 maria => maria <maria@example.com>
168 bill => William Jones <bill@example.com>
170 .. note::
172 User mapping is supported by both the fast-import and
173 fast-import-filter commands.
175 :Blob tracking:
177 As some exporters (like git-fast-export) reuse blob data across
178 commits, fast-import makes two passes over the input file by
179 default. In the first pass, it collects data about what blobs are
180 used when, along with some other statistics (e.g. total number of
181 commits). In the second pass, it generates the repository and
182 branches.
184 .. note::
186 The initial pass isn't done if the --info option is used
187 to explicitly pass in information about the input stream.
188 It also isn't done if the source is standard input. In the
189 latter case, memory consumption may be higher than otherwise
190 because some blobs may be kept in memory longer than necessary.
192 :Restarting an import:
194 At checkpoints and on completion, the commit-id -> revision-id
195 map is saved to a file called 'fastimport-id-map' in the control
196 directory for the repository (e.g. .bzr/repository). If the import
197 is interrupted or unexpectedly crashes, it can be started again
198 and this file will be used to skip over already loaded revisions.
199 As long as subsequent exports from the original source begin
200 with exactly the same revisions, you can use this feature to
201 maintain a mirror of a repository managed by a foreign tool.
202 If and when Bazaar is used to manage the repository, this file
203 can be safely deleted.
205 :Examples:
207 Import a Subversion repository into Bazaar::
209 svn-fast-export /svn/repo/path > project.fi
210 bzr fast-import project.fi project.bzr
212 Import a CVS repository into Bazaar::
214 cvs2git /cvs/repo/path > project.fi
215 bzr fast-import project.fi project.bzr
217 Import a Git repository into Bazaar::
219 cd /git/repo/path
220 git fast-export --all > project.fi
221 bzr fast-import project.fi project.bzr
223 Import a Mercurial repository into Bazaar::
225 cd /hg/repo/path
226 hg fast-export > project.fi
227 bzr fast-import project.fi project.bzr
229 Import a Darcs repository into Bazaar::
231 cd /darcs/repo/path
232 darcs-fast-export > project.fi
233 bzr fast-import project.fi project.bzr
235 hidden = False
236 _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info']
237 takes_args = ['source', 'destination?']
238 takes_options = ['verbose',
239 Option('user-map', type=str,
240 help="Path to file containing a map of user-ids.",
242 Option('info', type=str,
243 help="Path to file containing caching hints.",
245 Option('trees',
246 help="Update all working trees, not just trunk's.",
248 Option('count', type=int,
249 help="Import this many revisions then exit.",
251 Option('checkpoint', type=int,
252 help="Checkpoint automatically every N revisions."
253 " The default is 10000.",
255 Option('autopack', type=int,
256 help="Pack every N checkpoints. The default is 4.",
258 Option('inv-cache', type=int,
259 help="Number of inventories to cache.",
261 RegistryOption.from_kwargs('mode',
262 'The import algorithm to use.',
263 title='Import Algorithm',
264 default='Use the preferred algorithm (inventory deltas).',
265 classic="Use the original algorithm (mutable inventories).",
266 experimental="Enable experimental features.",
267 value_switches=True, enum_switch=False,
269 Option('import-marks', type=str,
270 help="Import marks from file."
272 Option('export-marks', type=str,
273 help="Export marks to file."
275 RegistryOption('format',
276 help='Specify a format for the created repository. See'
277 ' "bzr help formats" for details.',
278 lazy_registry=('bzrlib.bzrdir', 'format_registry'),
279 converter=lambda name: bzrdir.format_registry.make_bzrdir(name),
280 value_switches=False, title='Repository format'),
282 def run(self, source, destination='.', verbose=False, info=None,
283 trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1,
284 mode=None, import_marks=None, export_marks=None, format=None,
285 user_map=None):
286 load_fastimport()
287 from bzrlib.plugins.fastimport.processors import generic_processor
288 from bzrlib.plugins.fastimport.helpers import (
289 open_destination_directory,
291 control = open_destination_directory(destination, format=format)
293 # If an information file was given and the source isn't stdin,
294 # generate the information by reading the source file as a first pass
295 if info is None and source != '-':
296 info = self._generate_info(source)
298 # Do the work
299 if mode is None:
300 mode = 'default'
301 params = {
302 'info': info,
303 'trees': trees,
304 'count': count,
305 'checkpoint': checkpoint,
306 'autopack': autopack,
307 'inv-cache': inv_cache,
308 'mode': mode,
309 'import-marks': import_marks,
310 'export-marks': export_marks,
312 return _run(source, generic_processor.GenericProcessor,
313 bzrdir=control, params=params, verbose=verbose,
314 user_map=user_map)
316 def _generate_info(self, source):
317 from cStringIO import StringIO
318 from fastimport import parser
319 from fastimport.errors import ParsingError
320 from bzrlib.errors import BzrCommandError
321 from bzrlib.plugins.fastimport.processors import info_processor
322 stream = _get_source_stream(source)
323 output = StringIO()
324 try:
325 proc = info_processor.InfoProcessor(verbose=True, outf=output)
326 p = parser.ImportParser(stream)
327 try:
328 return_code = proc.process(p.iter_commands)
329 except ParsingError, e:
330 raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
331 lines = output.getvalue().splitlines()
332 finally:
333 output.close()
334 stream.seek(0)
335 return lines
338 class cmd_fast_import_filter(Command):
339 """Filter a fast-import stream to include/exclude files & directories.
341 This command is useful for splitting a subdirectory or bunch of
342 files out from a project to create a new project complete with history
343 for just those files. It can also be used to create a new project
344 repository that removes all references to files that should not have
345 been committed, e.g. security-related information (like passwords),
346 commercially sensitive material, files with an incompatible license or
347 large binary files like CD images.
349 To specify standard input as the input stream, use a source name
350 of '-'. If the source name ends in '.gz', it is assumed to be
351 compressed in gzip format.
353 :File/directory filtering:
355 This is supported by the -i and -x options. Excludes take precedence
356 over includes.
358 When filtering out a subdirectory (or file), the new stream uses the
359 subdirectory (or subdirectory containing the file) as the root. As
360 fast-import doesn't know in advance whether a path is a file or
361 directory in the stream, you need to specify a trailing '/' on
362 directories passed to the `--includes option`. If multiple files or
363 directories are given, the new root is the deepest common directory.
365 Note: If a path has been renamed, take care to specify the *original*
366 path name, not the final name that it ends up with.
368 :User mapping:
370 Some source repositories store just the user name while Bazaar
371 prefers a full email address. You can adjust user-ids
372 by using the --user-map option. The argument is a
373 text file with lines in the format::
375 old-id = new-id
377 Blank lines and lines beginning with # are ignored.
378 If old-id has the special value '@', then users without an
379 email address will get one created by using the matching new-id
380 as the domain, unless a more explicit address is given for them.
381 For example, given the user-map of::
383 @ = example.com
384 bill = William Jones <bill@example.com>
386 then user-ids are mapped as follows::
388 maria => maria <maria@example.com>
389 bill => William Jones <bill@example.com>
391 .. note::
393 User mapping is supported by both the fast-import and
394 fast-import-filter commands.
396 :History rewriting:
398 By default fast-import-filter does quite aggressive history rewriting.
399 Empty commits (or commits which had all their content filtered out) will
400 be removed, and so are the references to commits not included in the stream.
402 Flag --dont-squash-empty-commits reverses this behavior and makes it possible to
403 use fast-import-filter on incremental streams.
405 :Examples:
407 Create a new project from a library (note the trailing / on the
408 directory name of the library)::
410 front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi
411 bzr fast-import xxx.fi mylibrary.bzr
412 (lib/xxx/foo is now foo)
414 Create a new repository without a sensitive file::
416 front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi
417 bzr fast-import clean.fi clean.bzr
419 hidden = False
420 _see_also = ['fast-import']
421 takes_args = ['source?']
422 takes_options = ['verbose',
423 ListOption('include_paths', short_name='i', type=str,
424 help="Only include commits affecting these paths."
425 " Directories should have a trailing /."
427 ListOption('exclude_paths', short_name='x', type=str,
428 help="Exclude these paths from commits."
430 Option('user-map', type=str,
431 help="Path to file containing a map of user-ids.",
433 Option('dont-squash-empty-commits',
434 help="Preserve all commits and links between them"
437 encoding_type = 'exact'
438 def run(self, source=None, verbose=False, include_paths=None,
439 exclude_paths=None, user_map=None, dont_squash_empty_commits=False):
440 from bzrlib.errors import BzrCommandError
441 load_fastimport()
442 from fastimport.processors import filter_processor
443 params = {
444 'include_paths': include_paths,
445 'exclude_paths': exclude_paths,
447 if ('squash_empty_commits' in
448 filter_processor.FilterProcessor.known_params):
449 params['squash_empty_commits'] = (not dont_squash_empty_commits)
450 else:
451 if dont_squash_empty_commits:
452 raise BzrCommandError("installed python-fastimport does not "
453 "support not squashing empty commits. Please install "
454 " a newer python-fastimport to use "
455 "--dont-squash-empty-commits")
457 from fastimport.errors import ParsingError
458 from fastimport import parser
459 stream = _get_source_stream(source)
460 user_mapper = _get_user_mapper(user_map)
461 proc = filter_processor.FilterProcessor(params=params, verbose=verbose)
462 p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper)
463 try:
464 return proc.process(p.iter_commands)
465 except ParsingError, e:
466 raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e))
469 class cmd_fast_import_info(Command):
470 """Output information about a fast-import stream.
472 This command reads a fast-import stream and outputs
473 statistics and interesting properties about what it finds.
474 When run in verbose mode, the information is output as a
475 configuration file that can be passed to fast-import to
476 assist it in intelligently caching objects.
478 To specify standard input as the input stream, use a source name
479 of '-'. If the source name ends in '.gz', it is assumed to be
480 compressed in gzip format.
482 :Examples:
484 Display statistics about the import stream produced by front-end::
486 front-end | bzr fast-import-info -
488 Create a hints file for running fast-import on a large repository::
490 front-end | bzr fast-import-info -v - > front-end.cfg
492 hidden = False
493 _see_also = ['fast-import']
494 takes_args = ['source']
495 takes_options = ['verbose']
496 def run(self, source, verbose=False):
497 load_fastimport()
498 from bzrlib.plugins.fastimport.processors import info_processor
499 return _run(source, info_processor.InfoProcessor, verbose=verbose)
502 class cmd_fast_import_query(Command):
503 """Query a fast-import stream displaying selected commands.
505 To specify standard input as the input stream, use a source name
506 of '-'. If the source name ends in '.gz', it is assumed to be
507 compressed in gzip format.
509 To specify a commit to display, give its mark using the
510 --commit-mark option. The commit will be displayed with
511 file-commands included but with inline blobs hidden.
513 To specify the commands to display, use the -C option one or
514 more times. To specify just some fields for a command, use the
515 syntax::
517 command=field1,...
519 By default, the nominated fields for the nominated commands
520 are displayed tab separated. To see the information in
521 a name:value format, use verbose mode.
523 Note: Binary fields (e.g. data for blobs) are masked out
524 so it is generally safe to view the output in a terminal.
526 :Examples:
528 Show the commit with mark 429::
530 bzr fast-import-query xxx.fi -m429
532 Show all the fields of the reset and tag commands::
534 bzr fast-import-query xxx.fi -Creset -Ctag
536 Show the mark and merge fields of the commit commands::
538 bzr fast-import-query xxx.fi -Ccommit=mark,merge
540 hidden = True
541 _see_also = ['fast-import', 'fast-import-filter']
542 takes_args = ['source']
543 takes_options = ['verbose',
544 Option('commit-mark', short_name='m', type=str,
545 help="Mark of the commit to display."
547 ListOption('commands', short_name='C', type=str,
548 help="Display fields for these commands."
551 def run(self, source, verbose=False, commands=None, commit_mark=None):
552 load_fastimport()
553 from fastimport.processors import query_processor
554 from bzrlib.plugins.fastimport import helpers
555 params = helpers.defines_to_dict(commands) or {}
556 if commit_mark:
557 params['commit-mark'] = commit_mark
558 return _run(source, query_processor.QueryProcessor, params=params,
559 verbose=verbose)
562 class cmd_fast_export(Command):
563 """Generate a fast-import stream from a Bazaar branch.
565 This program generates a stream from a Bazaar branch in fast-import
566 format used by tools such as bzr fast-import, git-fast-import and
567 hg-fast-import.
569 It takes two optional arguments: the source bzr branch to export and
570 the destination to write the file to write the fastimport stream to.
572 If no source is specified, it will search for a branch in the
573 current directory.
575 If no destination is given or the destination is '-', standard output
576 is used. Otherwise, the destination is the name of a file. If the
577 destination ends in '.gz', the output will be compressed into gzip
578 format.
580 :Round-tripping:
582 Recent versions of the fast-import specification support features
583 that allow effective round-tripping most of the metadata in Bazaar
584 branches. As such, fast-exporting a branch and fast-importing the data
585 produced will create a new repository with roughly equivalent history, i.e.
586 "bzr log -v -p --include-merges --forward" on the old branch and
587 new branch should produce similar, if not identical, results.
589 .. note::
591 Be aware that the new repository may appear to have similar history
592 but internally it is quite different with new revision-ids and
593 file-ids assigned. As a consequence, the ability to easily merge
594 with branches based on the old repository is lost. Depending on your
595 reasons for producing a new repository, this may or may not be an
596 issue.
598 :Interoperability:
600 fast-export can use the following "extended features" to
601 produce a richer data stream:
603 * *multiple-authors* - if a commit has multiple authors (as commonly
604 occurs in pair-programming), all authors will be included in the
605 output, not just the first author
607 * *commit-properties* - custom metadata per commit that Bazaar stores
608 in revision properties (e.g. branch-nick and bugs fixed by this
609 change) will be included in the output.
611 * *empty-directories* - directories, even the empty ones, will be
612 included in the output.
614 To disable these features and produce output acceptable to git 1.6,
615 use the --plain option. To enable these features, use --no-plain.
616 Currently, --plain is the default but that will change in the near
617 future once the feature names and definitions are formally agreed
618 to by the broader fast-import developer community.
620 Git has stricter naming rules for tags and fast-export --plain
621 will skip tags which can't be imported into git. To replace characters
622 unsupported in git with an underscore instead, specify
623 --rewrite-tag-names.
625 :History truncation:
627 It is sometimes convenient to simply truncate the revision history at a
628 certain point. The --baseline option, to be used in conjunction with -r,
629 emits a baseline commit containing the state of the entire source tree at
630 the first requested revision. This allows a user to produce a tree
631 identical to the original without munging multiple exports.
633 :Examples:
635 To produce data destined for import into Bazaar::
637 bzr fast-export --no-plain my-bzr-branch my.fi.gz
639 To produce data destined for Git 1.6::
641 bzr fast-export --plain my-bzr-branch my.fi
643 To import several unmerged but related branches into the same repository,
644 use the --{export,import}-marks options, and specify a name for the git
645 branch like this::
647 bzr fast-export --export-marks=marks.bzr project.dev |
648 GIT_DIR=project/.git git-fast-import --export-marks=marks.git
650 bzr fast-export --import-marks=marks.bzr -b other project.other |
651 GIT_DIR=project/.git git-fast-import --import-marks=marks.git
653 If you get a "Missing space after source" error from git-fast-import,
654 see the top of the commands.py module for a work-around.
656 Since bzr uses per-branch tags and git/hg use per-repo tags, the
657 way bzr fast-export presently emits tags (unconditional reset &
658 new ref) may result in clashes when several different branches
659 are imported into single git/hg repo. If this occurs, use the
660 bzr fast-export option --no-tags during the export of one or more
661 branches to avoid the issue.
663 hidden = False
664 _see_also = ['fast-import', 'fast-import-filter']
665 takes_args = ['source?', 'destination?']
666 takes_options = ['verbose', 'revision',
667 Option('git-branch', short_name='b', type=str,
668 argname='FILE',
669 help='Name of the git branch to create (default=master).'
671 Option('checkpoint', type=int, argname='N',
672 help="Checkpoint every N revisions (default=10000)."
674 Option('marks', type=str, argname='FILE',
675 help="Import marks from and export marks to file."
677 Option('import-marks', type=str, argname='FILE',
678 help="Import marks from file."
680 Option('export-marks', type=str, argname='FILE',
681 help="Export marks to file."
683 Option('plain',
684 help="Exclude metadata to maximise interoperability."
686 Option('rewrite-tag-names',
687 help="Replace characters invalid in git with '_'"
688 " (plain mode only).",
690 Option('baseline',
691 help="Export an 'absolute' baseline commit prior to"
692 "the first relative commit",
694 Option('no-tags',
695 help="Don't export tags"
698 encoding_type = 'exact'
699 def run(self, source=None, destination=None, verbose=False,
700 git_branch="master", checkpoint=10000, marks=None,
701 import_marks=None, export_marks=None, revision=None,
702 plain=True, rewrite_tag_names=False, no_tags=False, baseline=False):
703 load_fastimport()
704 from bzrlib.branch import Branch
705 from bzrlib.plugins.fastimport import exporter
707 if marks:
708 import_marks = export_marks = marks
710 # Open the source
711 if source is None:
712 source = "."
713 branch = Branch.open_containing(source)[0]
714 outf = exporter._get_output_stream(destination)
715 exporter = exporter.BzrFastExporter(branch,
716 outf=outf, ref="refs/heads/%s" % git_branch, checkpoint=checkpoint,
717 import_marks_file=import_marks, export_marks_file=export_marks,
718 revision=revision, verbose=verbose, plain_format=plain,
719 rewrite_tags=rewrite_tag_names, no_tags=no_tags, baseline=baseline)
720 return exporter.run()