From cadcfcbe9020ddfb0a97b53fc0df323f1259cb56 Mon Sep 17 00:00:00 2001 From: Johan Henkens Date: Wed, 5 Dec 2018 09:24:56 -0800 Subject: [PATCH] Move filter_contents to plugin system --- hg-fast-export.py | 30 +++++++------------------- plugins/shell_filter_file_contents/README.md | 30 ++++++++++++++++++++++++++ plugins/shell_filter_file_contents/__init__.py | 28 ++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 22 deletions(-) create mode 100644 plugins/shell_filter_file_contents/README.md create mode 100644 plugins/shell_filter_file_contents/__init__.py diff --git a/hg-fast-export.py b/hg-fast-export.py index 253055d..e53b5dd 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -124,7 +124,7 @@ def get_author(logmessage,committer,authors): return r return committer -def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None,plugins={}): +def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): count=0 max=len(files) for file in files: @@ -138,18 +138,6 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=N filename=file file_ctx=ctx.filectx(file) d=file_ctx.data() - if filter_contents: - import subprocess - filter_cmd=filter_contents + [filename,node.hex(file_ctx.filenode()),'1' if file_ctx.isbinary() else '0'] - try: - filter_proc=subprocess.Popen(filter_cmd,stdin=subprocess.PIPE,stdout=subprocess.PIPE) - d,_=filter_proc.communicate(d) - except: - sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) - raise - filter_ret=filter_proc.poll() - if filter_ret: - raise subprocess.CalledProcessError(filter_ret,filter_cmd) if plugins and plugins['file_data_filters']: file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} @@ -208,7 +196,7 @@ def strip_leading_slash(filename): return filename def export_commit(ui,repo,revision,old_marks,max,count,authors, - branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None, + branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='', plugins={}): def get_branchname(name): if brmap.has_key(name): @@ -280,8 +268,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, removed=[strip_leading_slash(x) for x in removed] map(lambda r: wr('D %s' % r),removed) - export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents,plugins) - export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents,plugins) + export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) + export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) wr() return checkpoint(count) @@ -417,7 +405,7 @@ def verify_heads(ui,repo,cache,force,branchesmap): def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, authors={},branchesmap={},tagsmap={}, - sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None, + sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='', plugins={}): def check_cache(filename, contents): if len(contents) == 0: @@ -460,7 +448,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, brmap={} for rev in range(min,max): c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, - sob,brmap,hgtags,encoding,fn_encoding,filter_contents, + sob,brmap,hgtags,encoding,fn_encoding, plugins) if notes: for rev in range(min,max): @@ -569,10 +557,8 @@ if __name__=='__main__': if options.plugins!=None: plugins+=options.plugins - filter_contents=None if options.filter_contents!=None: - import shlex - filter_contents=shlex.split(options.filter_contents) + plugins+=['shell_filter_file_contents='+options.filter_contents] plugins_dict={} plugins_dict['commit_message_filters']=[] @@ -596,5 +582,5 @@ if __name__=='__main__': options.headsfile, options.statusfile, authors=a,branchesmap=b,tagsmap=t, sob=options.sob,force=options.force,hgtags=options.hgtags, - notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents, + notes=options.notes,encoding=encoding,fn_encoding=fn_encoding, plugins=plugins_dict)) diff --git a/plugins/shell_filter_file_contents/README.md b/plugins/shell_filter_file_contents/README.md new file mode 100644 index 0000000..108cd2a --- /dev/null +++ b/plugins/shell_filter_file_contents/README.md @@ -0,0 +1,30 @@ +## Shell Script File Filter + +This plugin uses shell scripts in order to perform filtering of files. +If your preferred scripting is done via shell, this tool is for you. +Be noted, though, that this method can cause an order of magnitude slow +down. For small repositories, this wont be an issue. + +To use the plugin, add +`--plugin shell_filter_file_contents=path/to/shell/script.sh`. +The filter script is supplied to the plugin option after the plugin name, +which is in turned passed to the plugin initialization. hg-fast-export +runs the filter for each exported file, pipes its content to the filter's +standard input, and uses the filter's standard output in place +of the file's original content. An example use of this feature +is to convert line endings in text files from CRLF to git's preferred LF, +although this task is faster performed using the native plugin. + +The script is called with the following syntax: +`FILTER_CONTENTS ` + +``` +-- Start of crlf-filter.sh -- +#!/bin/sh +# $1 = pathname of exported file relative to the root of the repo +# $2 = Mercurial's hash of the file +# $3 = "1" if Mercurial reports the file as binary, otherwise "0" + +if [ "$3" == "1" ]; then cat; else dos2unix; fi +-- End of crlf-filter.sh -- +``` diff --git a/plugins/shell_filter_file_contents/__init__.py b/plugins/shell_filter_file_contents/__init__.py new file mode 100644 index 0000000..84fd938 --- /dev/null +++ b/plugins/shell_filter_file_contents/__init__.py @@ -0,0 +1,28 @@ +#Pipe contents of each exported file through FILTER_CONTENTS " +import subprocess +import shlex +import sys +from mercurial import node + +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + self.filter_contents = shlex.split(args) + + def file_data_filter(self,file_data): + d = file_data['data'] + file_ctx = file_data['file_ctx'] + filename = file_data['filename'] + filter_cmd = self.filter_contents + [filename, node.hex(file_ctx.filenode()), '1' if file_ctx.isbinary() else '0'] + try: + filter_proc = subprocess.Popen(filter_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + d, _ = filter_proc.communicate(d) + except: + sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) + raise + filter_ret = filter_proc.poll() + if filter_ret: + raise subprocess.CalledProcessError(filter_ret, filter_cmd) + file_data['data'] = d -- 2.11.4.GIT