From deb95f4bba848de03b8cd73a3a23f1c6ddf34dce Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sun, 15 Jan 2012 23:04:58 +0100 Subject: [PATCH] Add recodetree script to convert non-ASCII file names to UTF-8 Add a script that allows to check a repository for non-ASCII file names, and to convert the HEAD commit or the entire history from the current system encoding to UTF-8. Signed-off-by: Karsten Blees --- bin/recodetree | 74 ++++++++++++++++++++++++++++++++++++++++++++++ share/WinGit/copy-files.sh | 2 +- 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 bin/recodetree diff --git a/bin/recodetree b/bin/recodetree new file mode 100644 index 00000000..7cc82576 --- /dev/null +++ b/bin/recodetree @@ -0,0 +1,74 @@ +#!/bin/sh +# reencode file names in git tree objects from Windows system encoding to UTF-8 + +usage() +{ + echo "WARNING this script is not intended as a full-fledged and fool-proof +migration utility, but rather as a proof of concept or collection of examples. +Use at your own risk. + +In particular, the script doesn't check or fix any non-ascii names in config +files (such as .git/config, .gitignore or .gitmodules). For specific migration +tasks, some of the commands in this script may require tweaking. + +Usage: recodetree +recodetree check Checks git repository for non-ascii file names. +recodetree preview Same as 'check', but prints file names as if converted + from cp$(getcp) to UTF-8. +recodetree head Converts the current HEAD from cp$(getcp) to UTF-8. + Does not commit, use git status to check results first. +recodetree history Converts entire repository history from cp$(getcp) to UTF-8. + WARNING: this rewrites the history of the repository. + - Make a BACKUP copy of the repository before using this! + - Read 'git help filter-branch' for implications of + rewriting history." +} + +check() +{ + # use fast-export to dump all file names in the history + git fast-export --no-data --signed-tags=strip --tag-of-filtered-object=drop --all | + # use awk to filter for non-ascii names + awk --posix '/^M [0-9]{6} [0-9a-f]{40} .*[\200-\377]/{print substr($0,51);}' +} + +recode() +{ + # convert from system encoding to UTF-8 + iconv -c -f cp$(getcp) -t utf-8 +} + +recode_tree() +{ + # clear the index + git rm -f -r -q --cached --ignore-unmatch \* + # list specified commit, reencode and add to index + git ls-tree -z -r $1 | recode | git update-index -z --index-info +} + +case $1 in + check) + check + ;; + + preview) + check | recode + ;; + + head) + recode_tree HEAD + ;; + + history) + git filter-branch --index-filter 'recodetree filter' -- --all + ;; + + filter) + # used internally by recodetree history + recode_tree $GIT_COMMIT + ;; + + *) + usage + ;; +esac diff --git a/share/WinGit/copy-files.sh b/share/WinGit/copy-files.sh index 1b488549..ac7cc626 100755 --- a/share/WinGit/copy-files.sh +++ b/share/WinGit/copy-files.sh @@ -45,7 +45,7 @@ dirname.exe,\ du.exe,echo,egrep,env.exe,expr.exe,false.exe,find.exe,flex.exe,gawk.exe,grep.exe,\ head.exe,id.exe,kill.exe,less.exe,libW11.dll,ln.exe,\ ls.exe,m4.exe,md5sum.exe,mkdir.exe,msys-1.0.dll,msysltdl-3.dll,mv.exe,patch.exe,\ -patch.exe.manifest,perl.exe,printf,ps.exe,pwd,rm.exe,rmdir.exe,rxvt.exe,\ +patch.exe.manifest,perl.exe,printf,ps.exe,pwd,recodetree,rm.exe,rmdir.exe,rxvt.exe,\ scp.exe,sed.exe,sh.exe,sleep.exe,sort.exe,split.exe,\ ssh-agent.exe,ssh.exe,ssh-add.exe,ssh-keygen.exe,ssh-keyscan.exe,\ tail.exe,tar.exe,tee.exe,touch.exe,tr.exe,true.exe,uname.exe,uniq.exe,\ -- 2.11.4.GIT