From 04a7ec1351e7e7ae7a8e666e064a1e3c41c65946 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Fri, 4 Apr 2014 05:13:28 -0700 Subject: [PATCH] hg-fast-export.py: do not generate invalid ref names Git has various rules about what is and is not a valid ref name. These are summarized in the 'git help check-ref-format' output. Update the ref name conversion code to take into account all the rules. Also compile the regexs only once and eliminate invalid index exceptions. These reference names no longer cause exceptions: /a a//b These reference names no longer generate invalid names: a@{b} a.lock All ref names that were previously converted to valid Git ref names should continue to be converted to the same ref name now. --- hg-fast-export.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index d255c6d..e52da83 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -25,6 +25,13 @@ sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$') cfg_checkpoint_count=0 # write some progress message every this many file contents written cfg_export_boundary=1000 +# ref manipulation regexs +ref_crud_re = re.compile(r'[[\x00-\x1f\x7f ~^:\\*?]+', re.S) +ref_dotdot_re = re.compile(r'\.\.') +ref_atbrace_re = re.compile(r'@\{') +ref_dotlock_re = re.compile(r'.*\.lock$', re.I) +ref_separators_re = re.compile(r'/+') +ref_collapse_re = re.compile(r'_+') def gitmode(flags): return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' @@ -141,16 +148,24 @@ def sanitize_name(name,what="branch"): """Sanitize input roughly according to git-check-ref-format(1)""" def dot(name): - if name[0] == '.': return '_'+name[1:] + if len(name) >= 1 and name[0] == '.': return '_'+name[1:] return name - n=name - p=re.compile('([[ ~^:?\\\\*]|\.\.)') - n=p.sub('_', n) + if name == '': + # be paranoid just in case + n = '_' + else: + n = name + n = ref_crud_re.sub('_', n) + n = ref_dotdot_re.sub('_', n) + n = ref_atbrace_re.sub('_{', n) + if ref_dotlock_re.match(n): + n = n[:-5] + '_' + n[-4:] if n[-1] in ('/', '.'): n=n[:-1]+'_' n='/'.join(map(dot,n.split('/'))) - p=re.compile('_+') - n=p.sub('_', n) + if n[0] == '/': n='_'+n[1:] + n = ref_separators_re.sub('/', n) + n = ref_collapse_re.sub('_', n) if n!=name: sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n)) -- 2.11.4.GIT