From fe72467bffb6e5b45db1b2059d1c9b2c5f01a0df Mon Sep 17 00:00:00 2001 From: Sverre Rabbelier Date: Sun, 22 Aug 2010 01:22:14 -0500 Subject: [PATCH] git_remote_helpers: add fastimport library --- git_remote_helpers/fastimport/__init__.py | 0 git_remote_helpers/fastimport/commands.py | 469 +++++++++++++++++++ git_remote_helpers/fastimport/dates.py | 79 ++++ git_remote_helpers/fastimport/errors.py | 182 ++++++++ git_remote_helpers/fastimport/head_tracker.py | 47 ++ git_remote_helpers/fastimport/helpers.py | 88 ++++ git_remote_helpers/fastimport/idmapfile.py | 65 +++ git_remote_helpers/fastimport/parser.py | 621 ++++++++++++++++++++++++++ git_remote_helpers/fastimport/processor.py | 222 +++++++++ git_remote_helpers/setup.py | 3 +- 10 files changed, 1775 insertions(+), 1 deletion(-) create mode 100644 git_remote_helpers/fastimport/__init__.py create mode 100644 git_remote_helpers/fastimport/commands.py create mode 100644 git_remote_helpers/fastimport/dates.py create mode 100644 git_remote_helpers/fastimport/errors.py create mode 100644 git_remote_helpers/fastimport/head_tracker.py create mode 100644 git_remote_helpers/fastimport/helpers.py create mode 100644 git_remote_helpers/fastimport/idmapfile.py create mode 100644 git_remote_helpers/fastimport/parser.py create mode 100644 git_remote_helpers/fastimport/processor.py diff --git a/git_remote_helpers/fastimport/__init__.py b/git_remote_helpers/fastimport/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/git_remote_helpers/fastimport/commands.py b/git_remote_helpers/fastimport/commands.py new file mode 100644 index 0000000000..b3c86c4910 --- /dev/null +++ b/git_remote_helpers/fastimport/commands.py @@ -0,0 +1,469 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Import command classes.""" + +import os + +# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes +# one extra character. Set this variable to True to work-around it. It only +# happens when renaming a file whose name contains spaces and/or quotes, and +# the symptom is: +# % git-fast-import +# fatal: Missing space after source: R "file 1.txt" file 2.txt +# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584 +GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False + + +# Lists of command names +COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'feature', 'progress', + 'reset', 'tag'] +FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename', + 'filedeleteall'] + + +# Feature names +MULTIPLE_AUTHORS_FEATURE = "multiple-authors" +COMMIT_PROPERTIES_FEATURE = "commit-properties" +EMPTY_DIRS_FEATURE = "empty-directories" +FEATURE_NAMES = [ + MULTIPLE_AUTHORS_FEATURE, + COMMIT_PROPERTIES_FEATURE, + EMPTY_DIRS_FEATURE, + ] + + +# for classes with no meaningful __str__() +def _simplerepr(self): + return "<%s at 0x%x>" % (self.__class__.__name__, id(self)) + +# classes that define __str__() should use this instead +def _detailrepr(self): + return ("<%s at 0x%x: %s>" + % (self.__class__.__name__, id(self), str(self))) + + +class ImportCommand(object): + """Base class for import commands.""" + + def __init__(self, name): + self.name = name + # List of field names not to display + self._binary = [] + + __repr__ = _simplerepr + + def format(self): + """Format this command as a fastimport dump fragment. + + Returns a (possibly multiline) string that, if seen in a + fastimport stream, would parse to an equivalent command object. + """ + raise NotImplementedError("abstract method") + + def dump_str(self, names=None, child_lists=None, verbose=False): + """Dump fields as a string. + + :param names: the list of fields to include or + None for all public fields + :param child_lists: dictionary of child command names to + fields for that child command to include + :param verbose: if True, prefix each line with the command class and + display fields as a dictionary; if False, dump just the field + values with tabs between them + """ + interesting = {} + if names is None: + fields = [k for k in self.__dict__.keys() if not k.startswith('_')] + else: + fields = names + for field in fields: + value = self.__dict__.get(field) + if field in self._binary and value is not None: + value = '(...)' + interesting[field] = value + if verbose: + return "%s: %s" % (self.__class__.__name__, interesting) + else: + return "\t".join([repr(interesting[k]) for k in fields]) + + +class _MarkMixin(object): + """mixin for fastimport commands with a mark: blob, commit.""" + def __init__(self, mark, location): + self.mark= mark + self.location = location + + # Provide a unique id in case the mark is missing + if mark is None: + self.id = '%s@%d' % (os.path.basename(location[0]), location[1]) + else: + self.id = ':%s' % mark + + def __str__(self): + return self.id + + __repr__ = _detailrepr + + +class BlobCommand(ImportCommand, _MarkMixin): + + def __init__(self, mark, data, location): + ImportCommand.__init__(self, 'blob') + _MarkMixin.__init__(self, mark, location) + self.data = data + self._binary = ['data'] + + def format(self): + if self.mark is None: + mark_line = "" + else: + mark_line = "\nmark :%s" % self.mark + return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data) + + +class CheckpointCommand(ImportCommand): + + def __init__(self): + ImportCommand.__init__(self, 'checkpoint') + + def format(self): + return "checkpoint" + + +class CommitCommand(ImportCommand, _MarkMixin): + + def __init__(self, ref, mark, author, committer, message, from_, + merges, file_cmds, location=None, more_authors=None, properties=None): + ImportCommand.__init__(self, 'commit') + _MarkMixin.__init__(self, mark, location) + self.ref = ref + self.author = author + self.committer = committer + self.message = message + self.from_ = from_ + self.merges = merges + self.file_cmds = file_cmds + self.more_authors = more_authors + self.properties = properties + self._binary = ['file_cmds'] + + def format(self, use_features=True, include_file_contents=True): + if self.mark is None: + mark_line = "" + else: + mark_line = "\nmark :%s" % self.mark + if self.author is None: + author_section = "" + else: + author_section = "\nauthor %s" % format_who_when(self.author) + if use_features and self.more_authors: + for author in self.more_authors: + author_section += "\nauthor %s" % format_who_when(author) + committer = "committer %s" % format_who_when(self.committer) + if self.message is None: + msg_section = "" + else: + msg = self.message.encode('utf8') + msg_section = "\ndata %d\n%s" % (len(msg), msg) + if self.from_ is None: + from_line = "" + else: + from_line = "\nfrom %s" % self.from_ + if self.merges is None: + merge_lines = "" + else: + merge_lines = "".join(["\nmerge %s" % (m,) + for m in self.merges]) + if use_features and self.properties: + property_lines = [] + for name in sorted(self.properties): + value = self.properties[name] + property_lines.append("\n" + format_property(name, value)) + properties_section = "".join(property_lines) + else: + properties_section = "" + if self.file_cmds is None: + filecommands = "" + else: + if include_file_contents: + format_str = "\n%r" + else: + format_str = "\n%s" + filecommands = "".join( + ["\n" + fc.format() for fc in self.file_cmds]) + return "commit %s%s%s\n%s%s%s%s%s%s" % (self.ref, mark_line, + author_section, committer, msg_section, from_line, merge_lines, + properties_section, filecommands) + + def dump_str(self, names=None, child_lists=None, verbose=False): + result = [ImportCommand.dump_str(self, names, verbose=verbose)] + for f in self.file_cmds: + if child_lists is None: + continue + try: + child_names = child_lists[f.name] + except KeyError: + continue + result.append("\t%s" % f.dump_str(child_names, verbose=verbose)) + return '\n'.join(result) + + +class FeatureCommand(ImportCommand): + + def __init__(self, feature_name, value=None, location=None): + ImportCommand.__init__(self, 'feature') + self.feature_name = feature_name + self.value = value + self.location = location + + def format(self): + if self.value is None: + value_text = "" + else: + value_text = "=%s" % self.value + return "feature %s%s" % (self.feature_name, value_text) + + +class ProgressCommand(ImportCommand): + + def __init__(self, message): + ImportCommand.__init__(self, 'progress') + self.message = message + + def format(self): + return "progress %s" % (self.message,) + + +class ResetCommand(ImportCommand): + + def __init__(self, ref, from_): + ImportCommand.__init__(self, 'reset') + self.ref = ref + self.from_ = from_ + + def format(self): + if self.from_ is None: + from_line = "" + else: + # According to git-fast-import(1), the extra LF is optional here; + # however, versions of git up to 1.5.4.3 had a bug by which the LF + # was needed. Always emit it, since it doesn't hurt and maintains + # compatibility with older versions. + # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab + from_line = "\nfrom %s\n" % self.from_ + return "reset %s%s" % (self.ref, from_line) + + +class TagCommand(ImportCommand): + + def __init__(self, id, from_, tagger, message): + ImportCommand.__init__(self, 'tag') + self.id = id + self.from_ = from_ + self.tagger = tagger + self.message = message + + def __str__(self): + return self.id + + __repr__ = _detailrepr + + def format(self): + if self.from_ is None: + from_line = "" + else: + from_line = "\nfrom %s" % self.from_ + if self.tagger is None: + tagger_line = "" + else: + tagger_line = "\ntagger %s" % format_who_when(self.tagger) + if self.message is None: + msg_section = "" + else: + msg = self.message.encode('utf8') + msg_section = "\ndata %d\n%s" % (len(msg), msg) + return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section) + + +class FileCommand(ImportCommand): + """Base class for file commands.""" + pass + + +class FileModifyCommand(FileCommand): + + def __init__(self, path, mode, dataref, data): + # Either dataref or data should be null + FileCommand.__init__(self, 'filemodify') + self.path = check_path(path) + self.mode = mode + self.dataref = dataref + self.data = data + self._binary = ['data'] + + def __str__(self): + return self.path + + __repr__ = _detailrepr + + def format(self, include_file_contents=True): + datastr = "" + if self.dataref is None: + dataref = "inline" + if include_file_contents: + datastr = "\ndata %d\n%s" % (len(self.data), self.data) + else: + dataref = "%s" % (self.dataref,) + path = format_path(self.path) + return "M %s %s %s%s" % (self.mode, dataref, path, datastr) + + def is_regular(self): + """Return true if this is a regular file (mode 644).""" + return self.mode.endswith("644") + + def is_executable(self): + """Return true if this is an executable file (mode 755).""" + return self.mode.endswith("755") + + def is_symlink(self): + """Return true if this is a symlink (mode 120000).""" + return self.mode == "120000" + + def is_gitlink(self): + """Return true if this is a gitlink (mode 160000).""" + return self.mode == "160000" + + +class FileDeleteCommand(FileCommand): + + def __init__(self, path): + FileCommand.__init__(self, 'filedelete') + self.path = check_path(path) + + def __str__(self): + return self.path + + __repr__ = _detailrepr + + def format(self): + return "D %s" % (format_path(self.path),) + + +class FileCopyCommand(FileCommand): + + def __init__(self, src_path, dest_path): + FileCommand.__init__(self, 'filecopy') + self.src_path = check_path(src_path) + self.dest_path = check_path(dest_path) + + def __str__(self): + return "%s -> %s" % (self.src_path, self.dest_path) + + __repr__ = _detailrepr + + def format(self): + return "C %s %s" % ( + format_path(self.src_path, quote_spaces=True), + format_path(self.dest_path)) + + +class FileRenameCommand(FileCommand): + + def __init__(self, old_path, new_path): + FileCommand.__init__(self, 'filerename') + self.old_path = check_path(old_path) + self.new_path = check_path(new_path) + + def __str__(self): + return "%s -> %s" % (self.old_path, self.new_path) + + __repr__ = _detailrepr + + def format(self): + return "R %s %s" % ( + format_path(self.old_path, quote_spaces=True), + format_path(self.new_path)) + + +class FileDeleteAllCommand(FileCommand): + + def __init__(self): + FileCommand.__init__(self, 'filedeleteall') + + def format(self): + return "deleteall" + + +def check_path(path): + """Check that a path is legal. + + :return: the path if all is OK + :raise ValueError: if the path is illegal + """ + if path is None or path == '': + raise ValueError("illegal path '%s'" % path) + return path + + +def format_path(p, quote_spaces=False): + """Format a path in utf8, quoting it if necessary.""" + if '\n' in p: + import re + p = re.sub('\n', '\\n', p) + quote = True + else: + quote = p[0] == '"' or (quote_spaces and ' ' in p) + if quote: + extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or '' + p = '"%s"%s' % (p, extra) + return p.encode('utf8') + + +def format_who_when(fields): + """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string.""" + offset = fields[3] + if offset < 0: + offset_sign = '-' + offset = abs(offset) + else: + offset_sign = '+' + offset_hours = offset / 3600 + offset_minutes = offset / 60 - offset_hours * 60 + offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes) + name = fields[0] + if name == '': + sep = '' + else: + sep = ' ' + if isinstance(name, unicode): + name = name.encode('utf8') + email = fields[1] + if isinstance(email, unicode): + email = email.encode('utf8') + result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str) + return result + + +def format_property(name, value): + """Format the name and value (both unicode) of a property as a string.""" + utf8_name = name.encode('utf8') + if value is not None: + utf8_value = value.encode('utf8') + result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value) + else: + result = "property %s" % (utf8_name,) + return result diff --git a/git_remote_helpers/fastimport/dates.py b/git_remote_helpers/fastimport/dates.py new file mode 100644 index 0000000000..f532b2e249 --- /dev/null +++ b/git_remote_helpers/fastimport/dates.py @@ -0,0 +1,79 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Date parsing routines. + +Each routine returns timestamp,timezone where + +* timestamp is seconds since epoch +* timezone is the offset from UTC in seconds. +""" + + +import time + +from git_remote_helpers.fastimport import errors + + +def parse_raw(s, lineno=0): + """Parse a date from a raw string. + + The format must be exactly "seconds-since-epoch offset-utc". + See the spec for details. + """ + timestamp_str, timezone_str = s.split(' ', 1) + timestamp = float(timestamp_str) + timezone = _parse_tz(timezone_str, lineno) + return timestamp, timezone + + +def _parse_tz(tz, lineno): + """Parse a timezone specification in the [+|-]HHMM format. + + :return: the timezone offset in seconds. + """ + # from git_repository.py in bzr-git + if len(tz) != 5: + raise errors.InvalidTimezone(lineno, tz) + sign = {'+': +1, '-': -1}[tz[0]] + hours = int(tz[1:3]) + minutes = int(tz[3:]) + return sign * 60 * (60 * hours + minutes) + + +def parse_rfc2822(s, lineno=0): + """Parse a date from a rfc2822 string. + + See the spec for details. + """ + raise NotImplementedError(parse_rfc2822) + + +def parse_now(s, lineno=0): + """Parse a date from a string. + + The format must be exactly "now". + See the spec for details. + """ + return time.time(), 0 + + +# Lookup tabel of date parsing routines +DATE_PARSERS_BY_NAME = { + 'raw': parse_raw, + 'rfc2822': parse_rfc2822, + 'now': parse_now, + } diff --git a/git_remote_helpers/fastimport/errors.py b/git_remote_helpers/fastimport/errors.py new file mode 100644 index 0000000000..b8cf26fd09 --- /dev/null +++ b/git_remote_helpers/fastimport/errors.py @@ -0,0 +1,182 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Exception classes for fastimport""" + + +class FastImportError(StandardError): + """The base exception class for all import processing exceptions.""" + + _fmt = "Unknown Import Error" + + def __str__(self): + return self._fmt % self.__dict__ + +class ParsingError(FastImportError): + """The base exception class for all import processing exceptions.""" + + _fmt = "Unknown Import Parsing Error" + + def __init__(self, filename, lineno): + FastImportError.__init__(self) + self.filename = filename + self.lineno = lineno + + def __str__(self): + result = [] + if self.filename: + result.append(self.filename) + result.append(", ") + result.append("line ") + result.append(str(self.lineno)) + result.append(": ") + result.append(FastImportError.__str__(self)) + return "".join(result) + + +class MissingBytes(ParsingError): + """Raised when EOF encountered while expecting to find more bytes.""" + + _fmt = ("Unexpected EOF - expected %(expected)d bytes," + " found %(found)d") + + def __init__(self, filename, lineno, expected, found): + ParsingError.__init__(self, filename, lineno) + self.expected = expected + self.found = found + + +class MissingTerminator(ParsingError): + """Raised when EOF encountered while expecting to find a terminator.""" + + _fmt = "Unexpected EOF - expected '%(terminator)s' terminator" + + def __init__(self, filename, lineno, terminator): + ParsingError.__init__(self, filename, lineno) + self.terminator = terminator + + +class InvalidCommand(ParsingError): + """Raised when an unknown command found.""" + + _fmt = ("Invalid command '%(cmd)s'") + + def __init__(self, filename, lineno, cmd): + ParsingError.__init__(self, filename, lineno) + self.cmd = cmd + + +class MissingSection(ParsingError): + """Raised when a section is required in a command but not present.""" + + _fmt = ("Command %(cmd)s is missing section %(section)s") + + def __init__(self, filename, lineno, cmd, section): + ParsingError.__init__(self, filename, lineno) + self.cmd = cmd + self.section = section + + +class BadFormat(ParsingError): + """Raised when a section is formatted incorrectly.""" + + _fmt = ("Bad format for section %(section)s in " + "command %(cmd)s: found '%(text)s'") + + def __init__(self, filename, lineno, cmd, section, text): + ParsingError.__init__(self, filename, lineno) + self.cmd = cmd + self.section = section + self.text = text + + +class InvalidTimezone(ParsingError): + """Raised when converting a string timezone to a seconds offset.""" + + _fmt = "Timezone %(timezone)r could not be converted.%(reason)s" + + def __init__(self, filename, lineno, timezone, reason=None): + ParsingError.__init__(self, filename, lineno) + self.timezone = timezone + if reason: + self.reason = ' ' + reason + else: + self.reason = '' + + +class UnknownDateFormat(FastImportError): + """Raised when an unknown date format is given.""" + + _fmt = ("Unknown date format '%(format)s'") + + def __init__(self, format): + FastImportError.__init__(self) + self.format = format + + +class MissingHandler(FastImportError): + """Raised when a processor can't handle a command.""" + + _fmt = ("Missing handler for command %(cmd)s") + + def __init__(self, cmd): + FastImportError.__init__(self) + self.cmd = cmd + + +class UnknownParameter(FastImportError): + """Raised when an unknown parameter is passed to a processor.""" + + _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s") + + def __init__(self, param, knowns): + FastImportError.__init__(self) + self.param = param + self.knowns = knowns + + +class BadRepositorySize(FastImportError): + """Raised when the repository has an incorrect number of revisions.""" + + _fmt = ("Bad repository size - %(found)d revisions found, " + "%(expected)d expected") + + def __init__(self, expected, found): + FastImportError.__init__(self) + self.expected = expected + self.found = found + + +class BadRestart(FastImportError): + """Raised when the import stream and id-map do not match up.""" + + _fmt = ("Bad restart - attempted to skip commit %(commit_id)s " + "but matching revision-id is unknown") + + def __init__(self, commit_id): + FastImportError.__init__(self) + self.commit_id = commit_id + + +class UnknownFeature(FastImportError): + """Raised when an unknown feature is given in the input stream.""" + + _fmt = ("Unknown feature '%(feature)s' - try a later importer or " + "an earlier data format") + + def __init__(self, feature): + FastImportError.__init__(self) + self.feature = feature diff --git a/git_remote_helpers/fastimport/head_tracker.py b/git_remote_helpers/fastimport/head_tracker.py new file mode 100644 index 0000000000..ad6b48c8b8 --- /dev/null +++ b/git_remote_helpers/fastimport/head_tracker.py @@ -0,0 +1,47 @@ + + +class HeadTracker(object): + """ + Keep track of the heads in a fastimport stream. + """ + def __init__(self): + self.last_ref = None + + # map git ref name (e.g. "refs/heads/master") to id of last + # commit with that ref + self.last_ids = {} + + # the set of heads seen so far in the stream, as a mapping + # from commit id of the head to set of ref names + self.heads = {} + + def track_heads(self, cmd): + """Track the repository heads given a CommitCommand. + + :param cmd: the CommitCommand + :return: the list of parents in terms of commit-ids + """ + # Get the true set of parents + if cmd.from_ is not None: + parents = [cmd.from_] + else: + last_id = self.last_ids.get(cmd.ref) + if last_id is not None: + parents = [last_id] + else: + parents = [] + parents.extend(cmd.merges) + + # Track the heads + self.track_heads_for_ref(cmd.ref, cmd.id, parents) + return parents + + def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): + if parents is not None: + for parent in parents: + if parent in self.heads: + del self.heads[parent] + self.heads.setdefault(cmd_id, set()).add(cmd_ref) + self.last_ids[cmd_ref] = cmd_id + self.last_ref = cmd_ref + diff --git a/git_remote_helpers/fastimport/helpers.py b/git_remote_helpers/fastimport/helpers.py new file mode 100644 index 0000000000..3ce5a98e17 --- /dev/null +++ b/git_remote_helpers/fastimport/helpers.py @@ -0,0 +1,88 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Miscellaneous useful stuff.""" + +import os + +def single_plural(n, single, plural): + """Return a single or plural form of a noun based on number.""" + if n == 1: + return single + else: + return plural + + +def invert_dict(d): + """Invert a dictionary with keys matching each value turned into a list.""" + # Based on recipe from ASPN + result = {} + for k, v in d.iteritems(): + keys = result.setdefault(v, []) + keys.append(k) + return result + + +def invert_dictset(d): + """Invert a dictionary with keys matching a set of values, turned into lists.""" + # Based on recipe from ASPN + result = {} + for k, c in d.iteritems(): + for v in c: + keys = result.setdefault(v, []) + keys.append(k) + return result + + +def _common_path_and_rest(l1, l2, common=[]): + # From http://code.activestate.com/recipes/208993/ + if len(l1) < 1: return (common, l1, l2) + if len(l2) < 1: return (common, l1, l2) + if l1[0] != l2[0]: return (common, l1, l2) + return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]]) + + +def common_path(path1, path2): + """Find the common bit of 2 paths.""" + return ''.join(_common_path_and_rest(path1, path2)[0]) + + +def common_directory(paths): + """Find the deepest common directory of a list of paths. + + :return: if no paths are provided, None is returned; + if there is no common directory, '' is returned; + otherwise the common directory with a trailing / is returned. + """ + def get_dir_with_slash(path): + if path == '' or path.endswith('/'): + return path + else: + dirname, basename = os.path.split(path) + if dirname == '': + return dirname + else: + return dirname + '/' + + if not paths: + return None + elif len(paths) == 1: + return get_dir_with_slash(paths[0]) + else: + common = common_path(paths[0], paths[1]) + for path in paths[2:]: + common = common_path(common, path) + return get_dir_with_slash(common) diff --git a/git_remote_helpers/fastimport/idmapfile.py b/git_remote_helpers/fastimport/idmapfile.py new file mode 100644 index 0000000000..7b4ccf4afe --- /dev/null +++ b/git_remote_helpers/fastimport/idmapfile.py @@ -0,0 +1,65 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Routines for saving and loading the id-map file.""" + +import os + + +def save_id_map(filename, revision_ids): + """Save the mapping of commit ids to revision ids to a file. + + Throws the usual exceptions if the file cannot be opened, + written to or closed. + + :param filename: name of the file to save the data to + :param revision_ids: a dictionary of commit ids to revision ids. + """ + f = open(filename, 'wb') + try: + for commit_id, rev_id in revision_ids.iteritems(): + f.write("%s %s\n" % (commit_id, rev_id)) + f.flush() + finally: + f.close() + + +def load_id_map(filename): + """Load the mapping of commit ids to revision ids from a file. + + If the file does not exist, an empty result is returned. + If the file does exists but cannot be opened, read or closed, + the normal exceptions are thrown. + + NOTE: It is assumed that commit-ids do not have embedded spaces. + + :param filename: name of the file to save the data to + :result: map, count where: + map = a dictionary of commit ids to revision ids; + count = the number of keys in map + """ + result = {} + count = 0 + if os.path.exists(filename): + f = open(filename) + try: + for line in f: + parts = line[:-1].split(' ', 1) + result[parts[0]] = parts[1] + count += 1 + finally: + f.close() + return result, count diff --git a/git_remote_helpers/fastimport/parser.py b/git_remote_helpers/fastimport/parser.py new file mode 100644 index 0000000000..f9c2655913 --- /dev/null +++ b/git_remote_helpers/fastimport/parser.py @@ -0,0 +1,621 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import warnings + +"""Parser of import data into command objects. + +In order to reuse existing front-ends, the stream format is a subset of +the one used by git-fast-import (as of the 1.5.4 release of git at least). +The grammar is: + + stream ::= cmd*; + + cmd ::= new_blob + | new_commit + | new_tag + | reset_branch + | checkpoint + | progress + ; + + new_blob ::= 'blob' lf + mark? + file_content; + file_content ::= data; + + new_commit ::= 'commit' sp ref_str lf + mark? + ('author' sp name '<' email '>' when lf)? + 'committer' sp name '<' email '>' when lf + commit_msg + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* + file_change* + lf?; + commit_msg ::= data; + + file_change ::= file_clr + | file_del + | file_rnm + | file_cpy + | file_obm + | file_inm; + file_clr ::= 'deleteall' lf; + file_del ::= 'D' sp path_str lf; + file_rnm ::= 'R' sp path_str sp path_str lf; + file_cpy ::= 'C' sp path_str sp path_str lf; + file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; + file_inm ::= 'M' sp mode sp 'inline' sp path_str lf + data; + + new_tag ::= 'tag' sp tag_str lf + 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf + 'tagger' sp name '<' email '>' when lf + tag_msg; + tag_msg ::= data; + + reset_branch ::= 'reset' sp ref_str lf + ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + lf?; + + checkpoint ::= 'checkpoint' lf + lf?; + + progress ::= 'progress' sp not_lf* lf + lf?; + + # note: the first idnum in a stream should be 1 and subsequent + # idnums should not have gaps between values as this will cause + # the stream parser to reserve space for the gapped values. An + # idnum can be updated in the future to a new object by issuing + # a new mark directive with the old idnum. + # + mark ::= 'mark' sp idnum lf; + data ::= (delimited_data | exact_data) + lf?; + + # note: delim may be any string but must not contain lf. + # data_line may contain any data but must not be exactly + # delim. The lf after the final data_line is included in + # the data. + delimited_data ::= 'data' sp '<<' delim lf + (data_line lf)* + delim lf; + + # note: declen indicates the length of binary_data in bytes. + # declen does not include the lf preceeding the binary data. + # + exact_data ::= 'data' sp declen lf + binary_data; + + # note: quoted strings are C-style quoting supporting \c for + # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn + # is the signed byte value in octal. Note that the only + # characters which must actually be escaped to protect the + # stream formatting is: \, \" and LF. Otherwise these values + # are UTF8. + # + ref_str ::= ref; + sha1exp_str ::= sha1exp; + tag_str ::= tag; + path_str ::= path | '"' quoted(path) '"' ; + mode ::= '100644' | '644' + | '100755' | '755' + | '120000' + ; + + declen ::= # unsigned 32 bit value, ascii base10 notation; + bigint ::= # unsigned integer value, ascii base10 notation; + binary_data ::= # file content, not interpreted; + + when ::= raw_when | rfc2822_when; + raw_when ::= ts sp tz; + rfc2822_when ::= # Valid RFC 2822 date and time; + + sp ::= # ASCII space character; + lf ::= # ASCII newline (LF) character; + + # note: a colon (':') must precede the numerical value assigned to + # an idnum. This is to distinguish it from a ref or tag name as + # GIT does not permit ':' in ref or tag strings. + # + idnum ::= ':' bigint; + path ::= # GIT style file path, e.g. \"a/b/c\"; + ref ::= # GIT ref name, e.g. \"refs/heads/MOZ_GECKO_EXPERIMENT\"; + tag ::= # GIT tag name, e.g. \"FIREFOX_1_5\"; + sha1exp ::= # Any valid GIT SHA1 expression; + hexsha1 ::= # SHA1 in hexadecimal format; + + # note: name and email are UTF8 strings, however name must not + # contain '<' or lf and email must not contain any of the + # following: '<', '>', lf. + # + name ::= # valid GIT author/committer name; + email ::= # valid GIT author/committer email; + ts ::= # time since the epoch in seconds, ascii base10 notation; + tz ::= # GIT style timezone; + + # note: comments may appear anywhere in the input, except + # within a data command. Any form of the data command + # always escapes the related input from comment processing. + # + # In case it is not clear, the '#' that starts the comment + # must be the first character on that the line (an lf have + # preceeded it). + # + comment ::= '#' not_lf* lf; + not_lf ::= # Any byte that is not ASCII newline (LF); +""" + + +import re +import sys + +from git_remote_helpers.fastimport import ( + commands, + dates, + errors + ) + + +## Stream parsing ## + +class LineBasedParser(object): + + def __init__(self, input, filename=None): + """A Parser that keeps track of line numbers. + + :param input: the file-like object to read from + """ + self.input = input + if filename is None: + try: + self.filename = input.name + except AttributeError: + self.filename = "(unknown)" + else: + self.filename = filename + self.lineno = 0 + # Lines pushed back onto the input stream + self._buffer = [] + + def abort(self, exception, *args): + """Raise an exception providing line number information.""" + raise exception(self.filename, self.lineno, *args) + + def readline(self): + """Get the next line including the newline or '' on EOF.""" + self.lineno += 1 + if self._buffer: + return self._buffer.pop() + else: + return self.input.readline() + + def next_line(self): + """Get the next line without the newline or None on EOF.""" + line = self.readline() + if line: + return line[:-1] + else: + return None + + def push_line(self, line): + """Push line back onto the line buffer. + + :param line: the line with no trailing newline + """ + self.lineno -= 1 + self._buffer.append(line + "\n") + + def read_bytes(self, count): + """Read a given number of bytes from the input stream. + + Throws MissingBytes if the bytes are not found. + + Note: This method does not read from the line buffer. + + :return: a string + """ + result = self.input.read(count) + found = len(result) + self.lineno += result.count("\n") + if found != count: + self.abort(errors.MissingBytes, count, found) + return result + + def read_until(self, terminator): + """Read the input stream until the terminator is found. + + Throws MissingTerminator if the terminator is not found. + + Note: This method does not read from the line buffer. + + :return: the bytes read up to but excluding the terminator. + """ + + lines = [] + term = terminator + '\n' + while True: + line = self.input.readline() + if line == term: + break + else: + lines.append(line) + return ''.join(lines) + + +# Regular expression used for parsing. (Note: The spec states that the name +# part should be non-empty but git-fast-export doesn't always do that so +# the first bit is \w*, not \w+.) Also git-fast-import code says the +# space before the email is optional. +_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)') +_WHO_RE = re.compile(r'([^<]*)<(.*)>') + + +class ImportParser(LineBasedParser): + + def __init__(self, input, filename=None): + """A Parser of import commands. + + :param input: the file-like object to read from + :param verbose: display extra information of not + """ + LineBasedParser.__init__(self, input, filename) + + # We auto-detect the date format when a date is first encountered + self.date_parser = None + + def warning(self, msg): + sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg)) + + def parse(self): + """Parse the input stream, yielding a sequence of ImportCommand + objects. Iteration terminates on EOF. Raises InvalidCommand on + parse error.""" + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for commands in order of likelihood + elif line.startswith('commit '): + yield self._parse_commit(line[len('commit '):]) + elif line.startswith('blob'): + yield self._parse_blob() + elif line.startswith('done'): + break + elif line.startswith('progress '): + yield commands.ProgressCommand(line[len('progress '):]) + elif line.startswith('reset '): + yield self._parse_reset(line[len('reset '):]) + elif line.startswith('tag '): + yield self._parse_tag(line[len('tag '):]) + elif line.startswith('checkpoint'): + yield commands.CheckpointCommand() + elif line.startswith('feature'): + yield self._parse_feature(line[len('feature '):]) + else: + self.abort(errors.InvalidCommand, line) + + def iter_commands(self): + warnings.warn("iter_commands() deprecated: use parse()", + DeprecationWarning, stacklevel=2) + return self.parse() + + def iter_file_commands(self): + """Iterator returning FileCommand objects. + + If an invalid file command is found, the line is silently + pushed back and iteration ends. + """ + while True: + line = self.next_line() + if line is None: + break + elif len(line) == 0 or line.startswith('#'): + continue + # Search for file commands in order of likelihood + elif line.startswith('M '): + yield self._parse_file_modify(line[2:]) + elif line.startswith('D '): + path = self._path(line[2:]) + yield commands.FileDeleteCommand(path) + elif line.startswith('R '): + old, new = self._path_pair(line[2:]) + yield commands.FileRenameCommand(old, new) + elif line.startswith('C '): + src, dest = self._path_pair(line[2:]) + yield commands.FileCopyCommand(src, dest) + elif line.startswith('deleteall'): + yield commands.FileDeleteAllCommand() + else: + self.push_line(line) + break + + def _parse_blob(self): + """Parse a blob command.""" + location = (self.filename, self.lineno) + mark = self._get_mark_if_any() + data = self._get_data('blob') + return commands.BlobCommand(mark, data, location) + + def _parse_commit(self, ref): + """Parse a commit command.""" + location = (self.filename, self.lineno) + mark = self._get_mark_if_any() + author = self._get_user_info('commit', 'author', False) + more_authors = [] + while True: + another_author = self._get_user_info('commit', 'author', False) + if another_author is not None: + more_authors.append(another_author) + else: + break + committer = self._get_user_info('commit', 'committer') + message = self._get_data('commit', 'message') + try: + message = message.decode('utf_8') + except UnicodeDecodeError: + self.warning( + "commit message not in utf8 - replacing unknown characters") + message = message.decode('utf_8', 'replace') + from_ = self._get_from() + merges = [] + while True: + merge = self._get_merge() + if merge is not None: + # while the spec suggests it's illegal, git-fast-export + # outputs multiple merges on the one line, e.g. + # merge :x :y :z + these_merges = merge.split(" ") + merges.extend(these_merges) + else: + break + properties = {} + while True: + name_value = self._get_property() + if name_value is not None: + name, value = name_value + properties[name] = value + else: + break + file_cmds = list(self.iter_file_commands()) + return commands.CommitCommand(ref, mark, author, committer, message, + from_, merges, file_cmds, location, + more_authors=more_authors, properties=properties) + + def _parse_feature(self, info): + """Parse a feature command.""" + parts = info.split("=", 1) + name = parts[0] + if len(parts) > 1: + value = self._path(parts[1]) + else: + value = None + location = (self.filename, self.lineno) + return commands.FeatureCommand(name, value, location=location) + + + def _parse_file_modify(self, info): + """Parse a filemodify command within a commit. + + :param info: a string in the format "mode dataref path" + (where dataref might be the hard-coded literal 'inline'). + """ + params = info.split(' ', 2) + path = self._path(params[2]) + mode = params[0] + if params[1] == 'inline': + dataref = None + data = self._get_data('filemodify') + else: + dataref = params[1] + data = None + return commands.FileModifyCommand(path, mode, dataref, data) + + def _parse_reset(self, ref): + """Parse a reset command.""" + from_ = self._get_from() + return commands.ResetCommand(ref, from_) + + def _parse_tag(self, name): + """Parse a tag command.""" + from_ = self._get_from('tag') + tagger = self._get_user_info('tag', 'tagger', accept_just_who=True) + message = self._get_data('tag', 'message').decode('utf_8') + return commands.TagCommand(name, from_, tagger, message) + + def _get_mark_if_any(self): + """Parse a mark section.""" + line = self.next_line() + if line.startswith('mark :'): + return line[len('mark :'):] + else: + self.push_line(line) + return None + + def _get_from(self, required_for=None): + """Parse a from section.""" + line = self.next_line() + if line is None: + return None + elif line.startswith('from '): + return line[len('from '):] + elif required_for: + self.abort(errors.MissingSection, required_for, 'from') + else: + self.push_line(line) + return None + + def _get_merge(self): + """Parse a merge section.""" + line = self.next_line() + if line is None: + return None + elif line.startswith('merge '): + return line[len('merge '):] + else: + self.push_line(line) + return None + + def _get_property(self): + """Parse a property section.""" + line = self.next_line() + if line is None: + return None + elif line.startswith('property '): + return self._name_value(line[len('property '):]) + else: + self.push_line(line) + return None + + def _get_user_info(self, cmd, section, required=True, + accept_just_who=False): + """Parse a user section.""" + line = self.next_line() + if line.startswith(section + ' '): + return self._who_when(line[len(section + ' '):], cmd, section, + accept_just_who=accept_just_who) + elif required: + self.abort(errors.MissingSection, cmd, section) + else: + self.push_line(line) + return None + + def _get_data(self, required_for, section='data'): + """Parse a data section.""" + line = self.next_line() + if line.startswith('data '): + rest = line[len('data '):] + if rest.startswith('<<'): + return self.read_until(rest[2:]) + else: + size = int(rest) + read_bytes = self.read_bytes(size) + # optional LF after data. + next = self.input.readline() + self.lineno += 1 + if len(next) > 1 or next != "\n": + self.push_line(next[:-1]) + return read_bytes + else: + self.abort(errors.MissingSection, required_for, section) + + def _who_when(self, s, cmd, section, accept_just_who=False): + """Parse who and when information from a string. + + :return: a tuple of (name,email,timestamp,timezone). name may be + the empty string if only an email address was given. + """ + match = _WHO_AND_WHEN_RE.search(s) + if match: + datestr = match.group(3) + if self.date_parser is None: + # auto-detect the date format + if len(datestr.split(' ')) == 2: + format = 'raw' + elif datestr == 'now': + format = 'now' + else: + format = 'rfc2822' + self.date_parser = dates.DATE_PARSERS_BY_NAME[format] + when = self.date_parser(datestr, self.lineno) + else: + match = _WHO_RE.search(s) + if accept_just_who and match: + # HACK around missing time + # TODO: output a warning here + when = dates.DATE_PARSERS_BY_NAME['now']('now') + else: + self.abort(errors.BadFormat, cmd, section, s) + + # Do not attempt to decode name or email address; they are just + # bytes. (Everything will work out better if they are in UTF-8, + # but that's not guaranteed.) + name = match.group(1).rstrip() + email = match.group(2) + return (name, email, when[0], when[1]) + + def _name_value(self, s): + """Parse a (name,value) tuple from 'name value-length value'.""" + parts = s.split(' ', 2) + name = parts[0] + if len(parts) == 1: + value = None + else: + size = int(parts[1]) + value = parts[2] + still_to_read = size - len(value) + if still_to_read == 1: + value += "\n" + elif still_to_read > 0: + read_bytes = self.read_bytes(still_to_read - 1) + value += "\n" + read_bytes + value = value.decode('utf8') + return (name, value) + + def _path(self, s): + """Parse a path.""" + if s.startswith('"'): + if s[-1] != '"': + self.abort(errors.BadFormat, '?', '?', s) + else: + return _unquote_c_string(s[1:-1]) + + # Do *not* decode the path to a Unicode string: filenames on + # Unix are just bytes. Git and Mercurial, at least, inherit + # this stance. git-fast-import(1) merely says "It is + # recommended that always be encoded using UTF-8.", which + # is good advice ... but not something we can count on here. + return s + + def _path_pair(self, s): + """Parse two paths separated by a space.""" + # TODO: handle a space in the first path + if s.startswith('"'): + parts = s[1:].split('" ', 1) + else: + parts = s.split(' ', 1) + if len(parts) != 2: + self.abort(errors.BadFormat, '?', '?', s) + elif parts[1].startswith('"') and parts[1].endswith('"'): + parts[1] = parts[1][1:-1] + elif parts[1].startswith('"') or parts[1].endswith('"'): + self.abort(errors.BadFormat, '?', '?', s) + return map(_unquote_c_string, parts) + + def _mode(self, s): + """Parse a file mode into executable and symlink flags. + + :return (is_executable, is_symlink) + """ + # Note: Output from git-fast-export slightly different to spec + if s in ['644', '100644', '0100644']: + return False, False + elif s in ['755', '100755', '0100755']: + return True, False + elif s in ['120000', '0120000']: + return False, True + else: + self.abort(errors.BadFormat, 'filemodify', 'mode', s) + + +def _unquote_c_string(s): + """replace C-style escape sequences (\n, \", etc.) with real chars.""" + # HACK: Python strings are close enough + return s.decode('string_escape', 'replace') diff --git a/git_remote_helpers/fastimport/processor.py b/git_remote_helpers/fastimport/processor.py new file mode 100644 index 0000000000..bfb4226a46 --- /dev/null +++ b/git_remote_helpers/fastimport/processor.py @@ -0,0 +1,222 @@ +# Copyright (C) 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Processor of import commands. + +This module provides core processing functionality including an abstract class +for basing real processors on. See the processors package for examples. +""" + +import sys +import time +import logging + +from git_remote_helpers.fastimport import errors + +log = logging.getLogger(__name__) + + +class ImportProcessor(object): + """Base class for import processors. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + known_params = [] + + def __init__(self, params=None, verbose=False, outf=None): + if outf is None: + self.outf = sys.stdout + else: + self.outf = outf + self.verbose = verbose + if params is None: + self.params = {} + else: + self.params = params + self.validate_parameters() + + # Handlers can set this to request exiting cleanly without + # iterating through the remaining commands + self.finished = False + + def validate_parameters(self): + """Validate that the parameters are correctly specified.""" + for p in self.params: + if p not in self.known_params: + raise errors.UnknownParameter(p, self.known_params) + + def process(self, commands): + """Process a stream of fast-import commands from a parser. + + :param commands: a sequence of commands.ImportCommand objects + """ + self.pre_process() + for cmd in commands: + try: + handler = self.__class__.__dict__[cmd.name + "_handler"] + except KeyError: + raise errors.MissingHandler(cmd.name) + else: + self.pre_handler(cmd) + handler(self, cmd) + self.post_handler(cmd) + if self.finished: + break + self.post_process() + + def pre_process(self): + """Hook for logic at start of processing. + + Called just before process() starts iterating over its sequence + of commands. + """ + pass + + def post_process(self): + """Hook for logic at end of successful processing. + + Called after process() finishes successfully iterating over its + sequence of commands (i.e. not called if an exception is raised + while processing commands). + """ + pass + + def pre_handler(self, cmd): + """Hook for logic before each handler starts.""" + pass + + def post_handler(self, cmd): + """Hook for logic after each handler finishes.""" + pass + + def progress_handler(self, cmd): + """Process a ProgressCommand.""" + raise NotImplementedError(self.progress_handler) + + def blob_handler(self, cmd): + """Process a BlobCommand.""" + raise NotImplementedError(self.blob_handler) + + def checkpoint_handler(self, cmd): + """Process a CheckpointCommand.""" + raise NotImplementedError(self.checkpoint_handler) + + def commit_handler(self, cmd): + """Process a CommitCommand.""" + raise NotImplementedError(self.commit_handler) + + def reset_handler(self, cmd): + """Process a ResetCommand.""" + raise NotImplementedError(self.reset_handler) + + def tag_handler(self, cmd): + """Process a TagCommand.""" + raise NotImplementedError(self.tag_handler) + + def feature_handler(self, cmd): + """Process a FeatureCommand.""" + raise NotImplementedError(self.feature_handler) + + +class CommitHandler(object): + """Base class for commit handling. + + Subclasses should override the pre_*, post_* and *_handler + methods as appropriate. + """ + + def __init__(self, command): + self.command = command + + def process(self): + self.pre_process_files() + for fc in self.command.file_cmds: + try: + handler = self.__class__.__dict__[fc.name[4:] + "_handler"] + except KeyError: + raise errors.MissingHandler(fc.name) + else: + handler(self, fc) + self.post_process_files() + + def _log(self, level, msg, *args): + log.log(level, msg + " (%s)", *(args + (self.command.id,))) + + # Logging methods: unused in this library, but used by + # bzr-fastimport. Could be useful for other subclasses. + + def note(self, msg, *args): + """log.info() with context about the command""" + self._log(logging.INFO, msg, *args) + + def warning(self, msg, *args): + """log.warning() with context about the command""" + self._log(logging.WARNING, msg, *args) + + def debug(self, msg, *args): + """log.debug() with context about the command""" + self._log(logging.DEBUG, msg, *args) + + def pre_process_files(self): + """Prepare for committing.""" + pass + + def post_process_files(self): + """Save the revision.""" + pass + + def modify_handler(self, filecmd): + """Handle a filemodify command.""" + raise NotImplementedError(self.modify_handler) + + def delete_handler(self, filecmd): + """Handle a filedelete command.""" + raise NotImplementedError(self.delete_handler) + + def copy_handler(self, filecmd): + """Handle a filecopy command.""" + raise NotImplementedError(self.copy_handler) + + def rename_handler(self, filecmd): + """Handle a filerename command.""" + raise NotImplementedError(self.rename_handler) + + def deleteall_handler(self, filecmd): + """Handle a filedeleteall command.""" + raise NotImplementedError(self.deleteall_handler) + + +def parseMany(filenames, parser_factory, processor): + """Parse multiple input files, sending the results all to + 'processor'. parser_factory must be a callable that takes one input + file and returns an ImportParser instance, e.g. the ImportParser + class object itself. Each file in 'filenames' is opened, parsed, + and closed in turn. For filename \"-\", reads stdin. + """ + for filename in filenames: + if filename == "-": + infile = sys.stdin + else: + infile = open(filename, "rb") + + try: + parser = parser_factory(infile) + processor.process(parser.parse()) + finally: + if filename != "-": + infile.close() diff --git a/git_remote_helpers/setup.py b/git_remote_helpers/setup.py index 4d434b65cb..a19c061fdf 100644 --- a/git_remote_helpers/setup.py +++ b/git_remote_helpers/setup.py @@ -13,5 +13,6 @@ setup( author_email = 'git@vger.kernel.org', url = 'http://www.git-scm.com/', package_dir = {'git_remote_helpers': ''}, - packages = ['git_remote_helpers', 'git_remote_helpers.git'], + packages = ['git_remote_helpers', 'git_remote_helpers.git', + 'git_remote_helpers.fastimport'], ) -- 2.11.4.GIT