filter-branch: fix behaviour of '-k'
[git.git] / git-filter-branch.sh
blobf2b0e273ba993b6102066b6fd9e9c52096424037
1 #!/bin/sh
3 # Rewrite revision history
4 # Copyright (c) Petr Baudis, 2006
5 # Minimal changes to "port" it to core-git (c) Johannes Schindelin, 2007
7 # Lets you rewrite GIT revision history by creating a new branch from
8 # your current branch by applying custom filters on each revision.
9 # Those filters can modify each tree (e.g. removing a file or running
10 # a perl rewrite on all files) or information about each commit.
11 # Otherwise, all information (including original commit times or merge
12 # information) will be preserved.
14 # The command takes the new branch name as a mandatory argument and
15 # the filters as optional arguments. If you specify no filters, the
16 # commits will be recommitted without any changes, which would normally
17 # have no effect and result with the new branch pointing to the same
18 # branch as your current branch. (Nevertheless, this may be useful in
19 # the future for compensating for some Git bugs or such, therefore
20 # such a usage is permitted.)
22 # WARNING! The rewritten history will have different ids for all the
23 # objects and will not converge with the original branch. You will not
24 # be able to easily push and distribute the rewritten branch. Please do
25 # not use this command if you do not know the full implications, and
26 # avoid using it anyway - do not do what a simple single commit on top
27 # of the current version would fix.
29 # Always verify that the rewritten version is correct before disposing
30 # the original branch.
32 # Note that since this operation is extensively I/O expensive, it might
33 # be a good idea to do it off-disk, e.g. on tmpfs. Reportedly the speedup
34 # is very noticeable.
36 # OPTIONS
37 # -------
38 # -d TEMPDIR:: The path to the temporary tree used for rewriting
39 # When applying a tree filter, the command needs to temporary
40 # checkout the tree to some directory, which may consume
41 # considerable space in case of large projects. By default it
42 # does this in the '.git-rewrite/' directory but you can override
43 # that choice by this parameter.
45 # -r STARTREV:: The commit id to start the rewrite at
46 # Normally, the command will rewrite the entire history. If you
47 # pass this argument, though, this will be the first commit it
48 # will rewrite and keep the previous commits intact.
50 # -k KEEPREV:: A commit id until which _not_ to rewrite history
51 # If you pass this argument, this commit and all of its
52 # predecessors are kept intact.
54 # Filters
55 # ~~~~~~~
56 # The filters are applied in the order as listed below. The COMMAND
57 # argument is always evaluated in shell using the 'eval' command.
58 # The $GIT_COMMIT environment variable is permanently set to contain
59 # the id of the commit being rewritten. The author/committer environment
60 # variables are set before the first filter is run.
62 # A 'map' function is available that takes an "original sha1 id" argument
63 # and outputs a "rewritten sha1 id" if the commit has been already
64 # rewritten, fails otherwise; the 'map' function can return several
65 # ids on separate lines if your commit filter emitted multiple commits
66 # (see below).
68 # --env-filter COMMAND:: The filter for modifying environment
69 # This is the filter for modifying the environment in which
70 # the commit will be performed. Specifically, you might want
71 # to rewrite the author/committer name/email/time environment
72 # variables (see `git-commit` for details). Do not forget to
73 # re-export the variables.
75 # --tree-filter COMMAND:: The filter for rewriting tree (and its contents)
76 # This is the filter for rewriting the tree and its contents.
77 # The COMMAND argument is evaluated in shell with the working
78 # directory set to the root of the checked out tree. The new tree
79 # is then used as-is (new files are auto-added, disappeared files
80 # are auto-removed - .gitignore files nor any other ignore rules
81 # HAVE NO EFFECT!).
83 # --index-filter COMMAND:: The filter for rewriting index
84 # This is the filter for rewriting the Git's directory index.
85 # It is similar to the tree filter but does not check out the
86 # tree, which makes it much faster. However, you must use the
87 # lowlevel Git index manipulation commands to do your work.
89 # --parent-filter COMMAND:: The filter for rewriting parents
90 # This is the filter for rewriting the commit's parent list.
91 # It will receive the parent string on stdin and shall output
92 # the new parent string on stdout. The parent string is in
93 # format accepted by `git-commit-tree`: empty for initial
94 # commit, "-p parent" for a normal commit and "-p parent1
95 # -p parent2 -p parent3 ..." for a merge commit.
97 # --msg-filter COMMAND:: The filter for rewriting commit message
98 # This is the filter for rewriting the commit messages.
99 # The COMMAND argument is evaluated in shell with the original
100 # commit message on standard input; its standard output is
101 # is used as the new commit message.
103 # --commit-filter COMMAND:: The filter for performing the commit
104 # If this filter is passed, it will be called instead of the
105 # `git-commit-tree` command, with those arguments:
107 # TREE_ID [-p PARENT_COMMIT_ID]...
109 # and the log message on stdin. The commit id is expected on
110 # stdout. As a special extension, the commit filter may emit
111 # multiple commit ids; in that case, all of them will be used
112 # as parents instead of the original commit in further commits.
114 # --tag-name-filter COMMAND:: The filter for rewriting tag names.
115 # If this filter is passed, it will be called for every tag ref
116 # that points to a rewritten object (or to a tag object which
117 # points to a rewritten object). The original tag name is passed
118 # via standard input, and the new tag name is expected on standard
119 # output.
121 # The original tags are not deleted, but can be overwritten;
122 # use "--tag-name-filter=cat" to simply update the tags. In this
123 # case, be very careful and make sure you have the old tags
124 # backed up in case the conversion has run afoul.
126 # Note that there is currently no support for proper rewriting of
127 # tag objects; in layman terms, if the tag has a message or signature
128 # attached, the rewritten tag won't have it. Sorry. (It is by
129 # definition impossible to preserve signatures at any rate, though.)
131 # EXAMPLE USAGE
132 # -------------
133 # Suppose you want to remove a file (containing confidential information
134 # or copyright violation) from all commits:
136 # git-filter-branch --tree-filter 'rm filename' newbranch
138 # A significantly faster version:
140 # git-filter-branch --index-filter 'git-update-index --remove filename' newbranch
142 # Now, you will get the rewritten history saved in the branch 'newbranch'
143 # (your current branch is left untouched).
145 # To "etch-graft" a commit to the revision history (set a commit to be
146 # the parent of the current initial commit and propagate that):
148 # git-filter-branch --parent-filter sed\ 's/^$/-p graftcommitid/' newbranch
150 # (if the parent string is empty - therefore we are dealing with the
151 # initial commit - add graftcommit as a parent). Note that this assumes
152 # history with a single root (that is, no git-merge without common ancestors
153 # happened). If this is not the case, use:
155 # git-filter-branch --parent-filter 'cat; [ "$GIT_COMMIT" = "COMMIT" ] && echo "-p GRAFTCOMMIT"' newbranch
157 # To remove commits authored by "Darl McBribe" from the history:
159 # git-filter-branch --commit-filter 'if [ "$GIT_AUTHOR_NAME" = "Darl McBribe" ]; then shift; while [ -n "$1" ]; do shift; echo "$1"; shift; done; else git-commit-tree "$@"; fi' newbranch
161 # (the shift magic first throws away the tree id and then the -p
162 # parameters). Note that this handles merges properly! In case Darl
163 # committed a merge between P1 and P2, it will be propagated properly
164 # and all children of the merge will become merge commits with P1,P2
165 # as their parents instead of the merge commit.
167 # To restrict rewriting to only part of the history, use -r or -k or both.
168 # Consider this history:
170 # D--E--F--G--H
171 # / /
172 # A--B-----C
174 # To rewrite only commits F,G,H, use:
176 # git-filter-branch -r F ...
178 # To rewrite commits E,F,G,H, use one of these:
180 # git-filter-branch -r E -k C ...
181 # git-filter-branch -k D -k C ...
183 # Testsuite: TODO
185 set -e
187 USAGE="git-filter-branch [-d TEMPDIR] [-r STARTREV]... [-k KEEPREV]... [-s SRCBRANCH] [FILTERS] DESTBRANCH"
188 . git-sh-setup
190 map()
192 [ -r "$workdir/../map/$1" ] || return 1
193 cat "$workdir/../map/$1"
196 # When piped a commit, output a script to set the ident of either
197 # "author" or "committer
199 set_ident () {
200 lid="$(echo "$1" | tr "A-Z" "a-z")"
201 uid="$(echo "$1" | tr "a-z" "A-Z")"
202 pick_id_script='
203 /^'$lid' /{
204 s/'\''/'\''\\'\'\''/g
206 s/^'$lid' \([^<]*\) <[^>]*> .*$/\1/
207 s/'\''/'\''\'\'\''/g
208 s/.*/export GIT_'$uid'_NAME='\''&'\''/p
211 s/^'$lid' [^<]* <\([^>]*\)> .*$/\1/
212 s/'\''/'\''\'\'\''/g
213 s/.*/export GIT_'$uid'_EMAIL='\''&'\''/p
216 s/^'$lid' [^<]* <[^>]*> \(.*\)$/\1/
217 s/'\''/'\''\'\'\''/g
218 s/.*/export GIT_'$uid'_DATE='\''&'\''/p
224 LANG=C LC_ALL=C sed -ne "$pick_id_script"
225 # Ensure non-empty id name.
226 echo "[ -n \"\$GIT_${uid}_NAME\" ] || export GIT_${uid}_NAME=\"\${GIT_${uid}_EMAIL%%@*}\""
229 # list all parent's object names for a given commit
230 get_parents () {
231 git-rev-list -1 --parents "$1" | sed "s/^[0-9a-f]*//"
234 tempdir=.git-rewrite
235 unchanged=" "
236 filter_env=
237 filter_tree=
238 filter_index=
239 filter_parent=
240 filter_msg=cat
241 filter_commit='git-commit-tree "$@"'
242 filter_tag_name=
243 srcbranch=HEAD
244 while case "$#" in 0) usage;; esac
246 case "$1" in
248 shift
249 break
254 break;
255 esac
257 # all switches take one argument
258 ARG="$1"
259 case "$#" in 1) usage ;; esac
260 shift
261 OPTARG="$1"
262 shift
264 case "$ARG" in
266 tempdir="$OPTARG"
269 unchanged="$(get_parents "$OPTARG") $unchanged"
272 unchanged="$(git-rev-parse "$OPTARG"^{commit}) $unchanged"
274 --env-filter)
275 filter_env="$OPTARG"
277 --tree-filter)
278 filter_tree="$OPTARG"
280 --index-filter)
281 filter_index="$OPTARG"
283 --parent-filter)
284 filter_parent="$OPTARG"
286 --msg-filter)
287 filter_msg="$OPTARG"
289 --commit-filter)
290 filter_commit="$OPTARG"
292 --tag-name-filter)
293 filter_tag_name="$OPTARG"
296 srcbranch="$OPTARG"
299 usage
301 esac
302 done
304 dstbranch="$1"
305 test -n "$dstbranch" || die "missing branch name"
306 git-show-ref "refs/heads/$dstbranch" 2> /dev/null &&
307 die "branch $dstbranch already exists"
309 test ! -e "$tempdir" || die "$tempdir already exists, please remove it"
310 mkdir -p "$tempdir/t"
311 cd "$tempdir/t"
312 workdir="$(pwd)"
314 case "$GIT_DIR" in
318 export GIT_DIR="$(pwd)/../../$GIT_DIR"
320 esac
322 export GIT_INDEX_FILE="$(pwd)/../index"
323 git-read-tree # seed the index file
325 ret=0
328 mkdir ../map # map old->new commit ids for rewriting parents
330 git-rev-list --reverse --topo-order $srcbranch --not $unchanged >../revs
331 commits=$(cat ../revs | wc -l | tr -d " ")
333 test $commits -eq 0 && die "Found nothing to rewrite"
336 while read commit; do
337 i=$(($i+1))
338 printf "$commit ($i/$commits) "
340 git-read-tree -i -m $commit
342 export GIT_COMMIT=$commit
343 git-cat-file commit "$commit" >../commit
345 eval "$(set_ident AUTHOR <../commit)"
346 eval "$(set_ident COMMITTER <../commit)"
347 eval "$filter_env" < /dev/null
349 if [ "$filter_tree" ]; then
350 git-checkout-index -f -u -a
351 # files that $commit removed are now still in the working tree;
352 # remove them, else they would be added again
353 git-ls-files -z --others | xargs -0 rm -f
354 eval "$filter_tree" < /dev/null
355 git-diff-index -r $commit | cut -f 2- | tr '\n' '\0' | \
356 xargs -0 git-update-index --add --replace --remove
357 git-ls-files -z --others | \
358 xargs -0 git-update-index --add --replace --remove
361 eval "$filter_index" < /dev/null
363 parentstr=
364 for parent in $(get_parents $commit); do
365 if [ -r "../map/$parent" ]; then
366 for reparent in $(cat "../map/$parent"); do
367 parentstr="$parentstr -p $reparent"
368 done
369 else
370 # if it was not rewritten, take the original
371 parentstr="$parentstr -p $parent"
373 done
374 if [ "$filter_parent" ]; then
375 parentstr="$(echo "$parentstr" | eval "$filter_parent")"
378 sed -e '1,/^$/d' <../commit | \
379 eval "$filter_msg" | \
380 sh -c "$filter_commit" git-commit-tree $(git-write-tree) $parentstr | \
381 tee ../map/$commit
382 done <../revs
384 src_head=$(tail -n 1 ../revs)
385 target_head=$(head -n 1 ../map/$src_head)
386 case "$target_head" in
388 echo Nothing rewritten
391 git-update-ref refs/heads/"$dstbranch" $target_head
392 if [ $(cat ../map/$src_head | wc -l) -gt 1 ]; then
393 echo "WARNING: Your commit filter caused the head commit to expand to several rewritten commits. Only the first such commit was recorded as the current $dstbranch head but you will need to resolve the situation now (probably by manually merging the other commits). These are all the commits:" >&2
394 sed 's/^/ /' ../map/$src_head >&2
395 ret=1
398 esac
400 if [ "$filter_tag_name" ]; then
401 git-for-each-ref --format='%(objectname) %(objecttype) %(refname)' refs/tags |
402 while read sha1 type ref; do
403 ref="${ref#refs/tags/}"
404 # XXX: Rewrite tagged trees as well?
405 if [ "$type" != "commit" -a "$type" != "tag" ]; then
406 continue;
409 if [ "$type" = "tag" ]; then
410 # Dereference to a commit
411 sha1t="$sha1"
412 sha1="$(git-rev-parse "$sha1"^{commit} 2>/dev/null)" || continue
415 [ -f "../map/$sha1" ] || continue
416 new_sha1="$(cat "../map/$sha1")"
417 export GIT_COMMIT="$sha1"
418 new_ref="$(echo "$ref" | eval "$filter_tag_name")"
420 echo "$ref -> $new_ref ($sha1 -> $new_sha1)"
422 if [ "$type" = "tag" ]; then
423 # Warn that we are not rewriting the tag object itself.
424 warn "unreferencing tag object $sha1t"
427 git-update-ref "refs/tags/$new_ref" "$new_sha1"
428 done
431 cd ../..
432 rm -rf "$tempdir"
433 echo "Rewritten history saved to the $dstbranch branch"
435 exit $ret