From cc8fefe0083bc38c781341a86a1779dc5941f6e2 Mon Sep 17 00:00:00 2001 From: Frej Drejhammar Date: Sat, 30 Sep 2017 14:51:24 +0200 Subject: [PATCH] Change syntax of mapping files This is done to allow escape sequences in the key and value strings. --- README.md | 12 +++++++++--- hg-fast-export.py | 21 ++++++++++++++++----- hg-fast-export.sh | 1 + 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 649fc4a..e09b271 100644 --- a/README.md +++ b/README.md @@ -65,12 +65,18 @@ As mercurial appears to be much less picky about the syntax of the author information than git, an author mapping file can be given to hg-fast-export to fix up malformed author strings. The file is specified using the -A option. The file should contain lines of the -form `FromAuthor=ToAuthor`. The example authors.map below will -translate `User ` to `User `. +form `""=""`. Inside the key and value strings, all escape +sequences understood by the python `string_escape` encoding are +supported. (Versions of fast-export prior to v171002 had a different +syntax, the old syntax can be enabled by the flag +`--mappings-are-raw`.) + +The example authors.map below will translate `User +` to `User `. ``` -- Start of authors.map -- -User =User +"User "="User " -- End of authors.map -- ``` diff --git a/hg-fast-export.py b/hg-fast-export.py index c9780f8..47290df 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -294,8 +294,10 @@ def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap): count=checkpoint(count) return count -def load_mapping(name, filename): +def load_mapping(name, filename, mapping_is_raw): raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$') + string_regexp='"(((\\.)|(\\")|[^"])*)"' + quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$') def parse_raw_line(line): m=raw_regexp.match(line) @@ -303,6 +305,13 @@ def load_mapping(name, filename): return None return (m.group(1).strip(), m.group(2).strip()) + def parse_quoted_line(line): + m=quoted_regexp.match(line) + if m==None: + return None + return (m.group(1).decode('string_escape'), + m.group(5).decode('string_escape')) + cache={} if not os.path.exists(filename): sys.stderr.write('Could not open mapping file [%s]\n' % (filename)) @@ -317,7 +326,7 @@ def load_mapping(name, filename): continue elif line=='' or line[0]=='#': continue - m=parse_raw_line(line) + m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line) if m==None: sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) continue @@ -469,6 +478,8 @@ if __name__=='__main__': help="Assume commit and author strings retrieved from Mercurial are encoded in ") parser.add_option("--fe",dest="fn_encoding", help="Assume file names from Mercurial are encoded in ") + parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False, + help="Assume mappings are raw = lines") (options,args)=parser.parse_args() @@ -483,15 +494,15 @@ if __name__=='__main__': a={} if options.authorfile!=None: - a=load_mapping('authors', options.authorfile) + a=load_mapping('authors', options.authorfile, options.raw_mappings) b={} if options.branchesfile!=None: - b=load_mapping('branches', options.branchesfile) + b=load_mapping('branches', options.branchesfile, options.raw_mappings) t={} if options.tagsfile!=None: - t=load_mapping('tags', options.tagsfile) + t=load_mapping('tags', options.tagsfile, True) if options.default_branch!=None: set_default_branch(options.default_branch) diff --git a/hg-fast-export.sh b/hg-fast-export.sh index a762ee6..7c36a50 100755 --- a/hg-fast-export.sh +++ b/hg-fast-export.sh @@ -55,6 +55,7 @@ Options: Mercurial are encoded in --fe Assume filenames from Mercurial are encoded in + --mappings-are-raw Assume mappings are raw = lines " case "$1" in -h|--help) -- 2.11.4.GIT