From 4c7c39d5e041cd44da3c2bb49eb1e5730bccb63b Mon Sep 17 00:00:00 2001 From: mhagger Date: Mon, 3 Feb 2014 12:51:50 +0000 Subject: [PATCH] cvs2git: Make the --blobfile argument optional. If it is not specified, then write the blobs to a temporary file in FilterSymbolsPass, then in OutputPass copy it to the start of the dumpfile. Please note that this increases the disk usage, because the blobfile is stored twice. git-svn-id: http://cvs2svn.tigris.org/svn/cvs2svn/trunk@5443 be7e6eca-30d4-0310-a8e5-ac0d63af7087 --- cvs2git-example.options | 12 +++++++--- cvs2svn_lib/config.py | 3 +++ cvs2svn_lib/external_blob_generator.py | 17 ++++++++++++-- cvs2svn_lib/git_output_option.py | 22 +++++++++++++++++ cvs2svn_lib/git_revision_collector.py | 15 ++++++++++-- cvs2svn_lib/git_run_options.py | 10 ++++---- run-tests.py | 43 ++++++++++++++++++++++++++++++---- 7 files changed, 107 insertions(+), 15 deletions(-) diff --git a/cvs2git-example.options b/cvs2git-example.options index 4d1f8e53..17f26c8c 100644 --- a/cvs2git-example.options +++ b/cvs2git-example.options @@ -160,13 +160,19 @@ ctx.revision_collector = GitRevisionCollector( CVSRevisionReader(cvs_executable=r'cvs'), # The file in which to write the git-fast-import stream that - # contains the file revision contents: + # contains the file revision contents. If None, it will be + # written to a temporary file then streamed to stdout in + # OutputPass: blob_filename='cvs2git-tmp/git-blob.dat', ) # This second alternative is vastly faster than the version above. It # uses an external Python program to reconstruct the contents of CVS -# file revisions: -#ctx.revision_collector = ExternalBlobGenerator('cvs2git-tmp/git-blob.dat') +# file revisions and write it to the specified file. If blob_filename +# is None, the blobs will be written to a temporary file then streamed +# to stdout in OutputPass: +#ctx.revision_collector = ExternalBlobGenerator( +# blob_filename='cvs2git-tmp/git-blob.dat', +# ) # cvs2git doesn't need a revision reader because OutputPass only # refers to blobs that were output during CollectRevsPass, so leave diff --git a/cvs2svn_lib/config.py b/cvs2svn_lib/config.py index 416326c1..84dd421a 100644 --- a/cvs2svn_lib/config.py +++ b/cvs2svn_lib/config.py @@ -204,6 +204,9 @@ CVS_CHECKOUT_DB = 'cvs-checkout.db' # End of DBs related to --use-internal-co. +# Hold the generated blob content for the git back end. +GIT_BLOB_DATAFILE = "git-blobs.dat" + # flush a commit if a 5 minute gap occurs. COMMIT_THRESHOLD = 5 * 60 diff --git a/cvs2svn_lib/external_blob_generator.py b/cvs2svn_lib/external_blob_generator.py index 10a75e02..31958492 100644 --- a/cvs2svn_lib/external_blob_generator.py +++ b/cvs2svn_lib/external_blob_generator.py @@ -40,27 +40,40 @@ import os import subprocess import cPickle as pickle +from cvs2svn_lib import config from cvs2svn_lib.common import FatalError from cvs2svn_lib.log import logger from cvs2svn_lib.cvs_item import CVSRevisionDelete from cvs2svn_lib.revision_manager import RevisionCollector from cvs2svn_lib.key_generator import KeyGenerator +from cvs2svn_lib.artifact_manager import artifact_manager class ExternalBlobGenerator(RevisionCollector): """Have generate_blobs.py output file revisions to a blob file.""" - def __init__(self, blob_filename): + def __init__(self, blob_filename=None): self.blob_filename = blob_filename + def register_artifacts(self, which_pass): + RevisionCollector.register_artifacts(self, which_pass) + if self.blob_filename is None: + artifact_manager.register_temp_file( + config.GIT_BLOB_DATAFILE, which_pass, + ) + def start(self): self._mark_generator = KeyGenerator() logger.normal('Starting generate_blobs.py...') + if self.blob_filename is None: + blob_filename = artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE) + else: + blob_filename = self.blob_filename self._pipe = subprocess.Popen( [ sys.executable, os.path.join(os.path.dirname(__file__), 'generate_blobs.py'), - self.blob_filename, + blob_filename, ], stdin=subprocess.PIPE, ) diff --git a/cvs2svn_lib/git_output_option.py b/cvs2svn_lib/git_output_option.py index f6ed4880..935f8d90 100644 --- a/cvs2svn_lib/git_output_option.py +++ b/cvs2svn_lib/git_output_option.py @@ -24,7 +24,9 @@ For information about the format allowed by git-fast-import, see: import bisect import time +import shutil +from cvs2svn_lib import config from cvs2svn_lib.common import InternalError from cvs2svn_lib.log import logger from cvs2svn_lib.context import Ctx @@ -35,6 +37,7 @@ from cvs2svn_lib.cvs_item import CVSSymbol from cvs2svn_lib.dvcs_common import DVCSOutputOption from cvs2svn_lib.dvcs_common import MirrorUpdater from cvs2svn_lib.key_generator import KeyGenerator +from cvs2svn_lib.artifact_manager import artifact_manager class GitRevisionWriter(MirrorUpdater): @@ -68,6 +71,25 @@ class GitRevisionWriter(MirrorUpdater): class GitRevisionMarkWriter(GitRevisionWriter): + def register_artifacts(self, which_pass): + GitRevisionWriter.register_artifacts(self, which_pass) + if Ctx().revision_collector.blob_filename is None: + artifact_manager.register_temp_file_needed( + config.GIT_BLOB_DATAFILE, which_pass, + ) + + def start(self, mirror, f): + GitRevisionWriter.start(self, mirror, f) + if Ctx().revision_collector.blob_filename is None: + # The revision collector wrote the blobs to a temporary file; + # copy them into f: + logger.normal('Copying blob data to output') + blobf = open( + artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'rb', + ) + shutil.copyfileobj(blobf, f) + blobf.close() + def _modify_file(self, cvs_item, post_commit): if cvs_item.cvs_file.executable: mode = '100755' diff --git a/cvs2svn_lib/git_revision_collector.py b/cvs2svn_lib/git_revision_collector.py index 8b9cc32e..70cd2975 100644 --- a/cvs2svn_lib/git_revision_collector.py +++ b/cvs2svn_lib/git_revision_collector.py @@ -16,24 +16,35 @@ """Write file contents to a stream of git-fast-import blobs.""" +from cvs2svn_lib import config from cvs2svn_lib.cvs_item import CVSRevisionDelete from cvs2svn_lib.revision_manager import RevisionCollector from cvs2svn_lib.key_generator import KeyGenerator +from cvs2svn_lib.artifact_manager import artifact_manager class GitRevisionCollector(RevisionCollector): """Output file revisions to git-fast-import.""" - def __init__(self, revision_reader, blob_filename): + def __init__(self, revision_reader, blob_filename=None): self.revision_reader = revision_reader self.blob_filename = blob_filename def register_artifacts(self, which_pass): self.revision_reader.register_artifacts(which_pass) + if self.blob_filename is None: + artifact_manager.register_temp_file( + config.GIT_BLOB_DATAFILE, which_pass, + ) def start(self): self.revision_reader.start() - self.dump_file = open(self.blob_filename, 'wb') + if self.blob_filename is None: + self.dump_file = open( + artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'wb', + ) + else: + self.dump_file = open(self.blob_filename, 'wb') self._mark_generator = KeyGenerator() def _process_revision(self, cvs_rev): diff --git a/cvs2svn_lib/git_run_options.py b/cvs2svn_lib/git_run_options.py index 4504dfe4..9c12db72 100644 --- a/cvs2svn_lib/git_run_options.py +++ b/cvs2svn_lib/git_run_options.py @@ -165,11 +165,13 @@ A directory under \\fI%s\\fR (or the directory specified by ctx.revision_collector = NullRevisionCollector() return - if not (options.blobfile and options.dumpfile): - raise FatalError("must pass '--blobfile' and '--dumpfile' options.") + if not (options.dumpfile): + raise FatalError('must pass \'--dumpfile\' option.') if options.use_external_blob_generator: - ctx.revision_collector = ExternalBlobGenerator(options.blobfile) + ctx.revision_collector = ExternalBlobGenerator( + blob_filename=options.blobfile, + ) else: if options.use_rcs: revision_reader = RCSRevisionReader( @@ -181,7 +183,7 @@ A directory under \\fI%s\\fR (or the directory specified by cvs_executable=options.cvs_executable ) ctx.revision_collector = GitRevisionCollector( - revision_reader, options.blobfile, + revision_reader, blob_filename=options.blobfile, ) def process_output_options(self): diff --git a/run-tests.py b/run-tests.py index d53e123f..edf7bbd1 100755 --- a/run-tests.py +++ b/run-tests.py @@ -3527,6 +3527,39 @@ def main_git2(): @Cvs2SvnTestFunction +def main_git_merged(): + "cvs2git with no blobfile" + + # Note: To test importing into git, do + # + # ./run-tests + # rm -rf cvs2svn-tmp/main.git + # git init --bare cvs2svn-tmp/main.git + # cd cvs2svn-tmp/main.git + # cat ../git-dump.dat | git fast-import + + conv = GitConversion('main', None, [ + '--dumpfile=cvs2svn-tmp/git-dump.dat', + '--username=cvs2git', + 'test-data/main-cvsrepos', + ]) + + +@Cvs2SvnTestFunction +def main_git2_merged(): + "cvs2git external with no blobfile" + + # See comment in main_git_merged() for more information. + + conv = GitConversion('main', None, [ + '--use-external-blob-generator', + '--dumpfile=cvs2svn-tmp/dumpfile.out', + '--username=cvs2git', + 'test-data/main-cvsrepos', + ]) + + +@Cvs2SvnTestFunction def git_options(): "test cvs2git using options file" @@ -4195,8 +4228,10 @@ test_list = [ add_on_branch, main_git, main_git2, - git_options, + main_git_merged, # 150: + main_git2_merged, + git_options, main_hg, invalid_symbol, invalid_symbol_ignore, @@ -4205,9 +4240,9 @@ test_list = [ EOLVariants('CR'), EOLVariants('CRLF'), EOLVariants('native'), +# 160: no_revs_file, mirror_keyerror_test, -# 160: exclude_ntdb_test, mirror_keyerror2_test, mirror_keyerror3_test, @@ -4216,9 +4251,9 @@ test_list = [ transform_unlabeled_branch_name, ignore_unlabeled_branch, exclude_unlabeled_branch, +# 170: unlabeled_branch_name_collision, collision_with_unlabeled_branch_name, -# 170: many_deletes, cvs_description, include_empty_directories, @@ -4227,9 +4262,9 @@ test_list = [ add_on_branch2, branch_from_vendor_branch, strange_default_branch, +# 180: move_parent, log_message_eols, -# 180: missing_vendor_branch, newphrases, ] -- 2.11.4.GIT