From fe72467bffb6e5b45db1b2059d1c9b2c5f01a0df Mon Sep 17 00:00:00 2001
From: Sverre Rabbelier <srabbelier@gmail.com>
Date: Sun, 22 Aug 2010 01:22:14 -0500
Subject: [PATCH] git_remote_helpers: add fastimport library

---
 git_remote_helpers/fastimport/__init__.py     |   0
 git_remote_helpers/fastimport/commands.py     | 469 +++++++++++++++++++
 git_remote_helpers/fastimport/dates.py        |  79 ++++
 git_remote_helpers/fastimport/errors.py       | 182 ++++++++
 git_remote_helpers/fastimport/head_tracker.py |  47 ++
 git_remote_helpers/fastimport/helpers.py      |  88 ++++
 git_remote_helpers/fastimport/idmapfile.py    |  65 +++
 git_remote_helpers/fastimport/parser.py       | 621 ++++++++++++++++++++++++++
 git_remote_helpers/fastimport/processor.py    | 222 +++++++++
 git_remote_helpers/setup.py                   |   3 +-
 10 files changed, 1775 insertions(+), 1 deletion(-)
 create mode 100644 git_remote_helpers/fastimport/__init__.py
 create mode 100644 git_remote_helpers/fastimport/commands.py
 create mode 100644 git_remote_helpers/fastimport/dates.py
 create mode 100644 git_remote_helpers/fastimport/errors.py
 create mode 100644 git_remote_helpers/fastimport/head_tracker.py
 create mode 100644 git_remote_helpers/fastimport/helpers.py
 create mode 100644 git_remote_helpers/fastimport/idmapfile.py
 create mode 100644 git_remote_helpers/fastimport/parser.py
 create mode 100644 git_remote_helpers/fastimport/processor.py

diff --git a/git_remote_helpers/fastimport/__init__.py b/git_remote_helpers/fastimport/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/git_remote_helpers/fastimport/commands.py b/git_remote_helpers/fastimport/commands.py
new file mode 100644
index 0000000000..b3c86c4910
--- /dev/null
+++ b/git_remote_helpers/fastimport/commands.py
@@ -0,0 +1,469 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Import command classes."""
+
+import os
+
+# There is a bug in git 1.5.4.3 and older by which unquoting a string consumes
+# one extra character. Set this variable to True to work-around it. It only
+# happens when renaming a file whose name contains spaces and/or quotes, and
+# the symptom is:
+#   % git-fast-import
+#   fatal: Missing space after source: R "file 1.txt" file 2.txt
+# http://git.kernel.org/?p=git/git.git;a=commit;h=c8744d6a8b27115503565041566d97c21e722584
+GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE = False
+
+
+# Lists of command names
+COMMAND_NAMES = ['blob', 'checkpoint', 'commit', 'feature', 'progress',
+    'reset', 'tag']
+FILE_COMMAND_NAMES = ['filemodify', 'filedelete', 'filecopy', 'filerename',
+    'filedeleteall']
+
+
+# Feature names
+MULTIPLE_AUTHORS_FEATURE = "multiple-authors"
+COMMIT_PROPERTIES_FEATURE = "commit-properties"
+EMPTY_DIRS_FEATURE = "empty-directories"
+FEATURE_NAMES = [
+    MULTIPLE_AUTHORS_FEATURE,
+    COMMIT_PROPERTIES_FEATURE,
+    EMPTY_DIRS_FEATURE,
+    ]
+
+
+# for classes with no meaningful __str__()
+def _simplerepr(self):
+    return "<%s at 0x%x>" % (self.__class__.__name__, id(self))
+
+# classes that define __str__() should use this instead
+def _detailrepr(self):
+    return ("<%s at 0x%x: %s>"
+            % (self.__class__.__name__, id(self), str(self)))
+
+
+class ImportCommand(object):
+    """Base class for import commands."""
+
+    def __init__(self, name):
+        self.name = name
+        # List of field names not to display
+        self._binary = []
+
+    __repr__ = _simplerepr
+
+    def format(self):
+        """Format this command as a fastimport dump fragment.
+
+        Returns a (possibly multiline) string that, if seen in a
+        fastimport stream, would parse to an equivalent command object.
+        """
+        raise NotImplementedError("abstract method")
+
+    def dump_str(self, names=None, child_lists=None, verbose=False):
+        """Dump fields as a string.
+
+        :param names: the list of fields to include or
+            None for all public fields
+        :param child_lists: dictionary of child command names to
+            fields for that child command to include
+        :param verbose: if True, prefix each line with the command class and
+            display fields as a dictionary; if False, dump just the field
+            values with tabs between them
+        """
+        interesting = {}
+        if names is None:
+            fields = [k for k in self.__dict__.keys() if not k.startswith('_')]
+        else:
+            fields = names
+        for field in fields:
+            value = self.__dict__.get(field)
+            if field in self._binary and value is not None:
+                value = '(...)'
+            interesting[field] = value
+        if verbose:
+            return "%s: %s" % (self.__class__.__name__, interesting)
+        else:
+            return "\t".join([repr(interesting[k]) for k in fields])
+
+
+class _MarkMixin(object):
+    """mixin for fastimport commands with a mark: blob, commit."""
+    def __init__(self, mark, location):
+        self.mark= mark
+        self.location = location
+
+        # Provide a unique id in case the mark is missing
+        if mark is None:
+            self.id = '%s@%d' % (os.path.basename(location[0]), location[1])
+        else:
+            self.id = ':%s' % mark
+
+    def __str__(self):
+        return self.id
+
+    __repr__ = _detailrepr
+
+
+class BlobCommand(ImportCommand, _MarkMixin):
+
+    def __init__(self, mark, data, location):
+        ImportCommand.__init__(self, 'blob')
+        _MarkMixin.__init__(self, mark, location)
+        self.data = data
+        self._binary = ['data']
+
+    def format(self):
+        if self.mark is None:
+            mark_line = ""
+        else:
+            mark_line = "\nmark :%s" % self.mark
+        return "blob%s\ndata %d\n%s" % (mark_line, len(self.data), self.data)
+
+
+class CheckpointCommand(ImportCommand):
+
+    def __init__(self):
+        ImportCommand.__init__(self, 'checkpoint')
+
+    def format(self):
+        return "checkpoint"
+
+
+class CommitCommand(ImportCommand, _MarkMixin):
+
+    def __init__(self, ref, mark, author, committer, message, from_,
+        merges, file_cmds, location=None, more_authors=None, properties=None):
+        ImportCommand.__init__(self, 'commit')
+        _MarkMixin.__init__(self, mark, location)
+        self.ref = ref
+        self.author = author
+        self.committer = committer
+        self.message = message
+        self.from_ = from_
+        self.merges = merges
+        self.file_cmds = file_cmds
+        self.more_authors = more_authors
+        self.properties = properties
+        self._binary = ['file_cmds']
+
+    def format(self, use_features=True, include_file_contents=True):
+        if self.mark is None:
+            mark_line = ""
+        else:
+            mark_line = "\nmark :%s" % self.mark
+        if self.author is None:
+            author_section = ""
+        else:
+            author_section = "\nauthor %s" % format_who_when(self.author)
+            if use_features and self.more_authors:
+                for author in self.more_authors:
+                    author_section += "\nauthor %s" % format_who_when(author)
+        committer = "committer %s" % format_who_when(self.committer)
+        if self.message is None:
+            msg_section = ""
+        else:
+            msg = self.message.encode('utf8')
+            msg_section = "\ndata %d\n%s" % (len(msg), msg)
+        if self.from_ is None:
+            from_line = ""
+        else:
+            from_line = "\nfrom %s" % self.from_
+        if self.merges is None:
+            merge_lines = ""
+        else:
+            merge_lines = "".join(["\nmerge %s" % (m,)
+                for m in self.merges])
+        if use_features and self.properties:
+            property_lines = []
+            for name in sorted(self.properties):
+                value = self.properties[name]
+                property_lines.append("\n" + format_property(name, value))
+            properties_section = "".join(property_lines)
+        else:
+            properties_section = ""
+        if self.file_cmds is None:
+            filecommands = ""
+        else:
+            if include_file_contents:
+                format_str = "\n%r"
+            else:
+                format_str = "\n%s"
+            filecommands = "".join(
+                ["\n" + fc.format() for fc in self.file_cmds])
+        return "commit %s%s%s\n%s%s%s%s%s%s" % (self.ref, mark_line,
+            author_section, committer, msg_section, from_line, merge_lines,
+            properties_section, filecommands)
+
+    def dump_str(self, names=None, child_lists=None, verbose=False):
+        result = [ImportCommand.dump_str(self, names, verbose=verbose)]
+        for f in self.file_cmds:
+            if child_lists is None:
+                continue
+            try:
+                child_names = child_lists[f.name]
+            except KeyError:
+                continue
+            result.append("\t%s" % f.dump_str(child_names, verbose=verbose))
+        return '\n'.join(result)
+
+
+class FeatureCommand(ImportCommand):
+
+    def __init__(self, feature_name, value=None, location=None):
+        ImportCommand.__init__(self, 'feature')
+        self.feature_name = feature_name
+        self.value = value
+        self.location = location
+
+    def format(self):
+        if self.value is None:
+            value_text = ""
+        else:
+            value_text = "=%s" % self.value
+        return "feature %s%s" % (self.feature_name, value_text)
+
+
+class ProgressCommand(ImportCommand):
+
+    def __init__(self, message):
+        ImportCommand.__init__(self, 'progress')
+        self.message = message
+
+    def format(self):
+        return "progress %s" % (self.message,)
+
+
+class ResetCommand(ImportCommand):
+
+    def __init__(self, ref, from_):
+        ImportCommand.__init__(self, 'reset')
+        self.ref = ref
+        self.from_ = from_
+
+    def format(self):
+        if self.from_ is None:
+            from_line = ""
+        else:
+            # According to git-fast-import(1), the extra LF is optional here;
+            # however, versions of git up to 1.5.4.3 had a bug by which the LF
+            # was needed. Always emit it, since it doesn't hurt and maintains
+            # compatibility with older versions.
+            # http://git.kernel.org/?p=git/git.git;a=commit;h=655e8515f279c01f525745d443f509f97cd805ab
+            from_line = "\nfrom %s\n" % self.from_
+        return "reset %s%s" % (self.ref, from_line)
+
+
+class TagCommand(ImportCommand):
+
+    def __init__(self, id, from_, tagger, message):
+        ImportCommand.__init__(self, 'tag')
+        self.id = id
+        self.from_ = from_
+        self.tagger = tagger
+        self.message = message
+
+    def __str__(self):
+        return self.id
+
+    __repr__ = _detailrepr
+
+    def format(self):
+        if self.from_ is None:
+            from_line = ""
+        else:
+            from_line = "\nfrom %s" % self.from_
+        if self.tagger is None:
+            tagger_line = ""
+        else:
+            tagger_line = "\ntagger %s" % format_who_when(self.tagger)
+        if self.message is None:
+            msg_section = ""
+        else:
+            msg = self.message.encode('utf8')
+            msg_section = "\ndata %d\n%s" % (len(msg), msg)
+        return "tag %s%s%s%s" % (self.id, from_line, tagger_line, msg_section)
+
+
+class FileCommand(ImportCommand):
+    """Base class for file commands."""
+    pass
+
+
+class FileModifyCommand(FileCommand):
+
+    def __init__(self, path, mode, dataref, data):
+        # Either dataref or data should be null
+        FileCommand.__init__(self, 'filemodify')
+        self.path = check_path(path)
+        self.mode = mode
+        self.dataref = dataref
+        self.data = data
+        self._binary = ['data']
+
+    def __str__(self):
+        return self.path
+
+    __repr__ = _detailrepr
+
+    def format(self, include_file_contents=True):
+        datastr = ""
+        if self.dataref is None:
+            dataref = "inline"
+            if include_file_contents:
+                datastr = "\ndata %d\n%s" % (len(self.data), self.data)
+        else:
+            dataref = "%s" % (self.dataref,)
+        path = format_path(self.path)
+        return "M %s %s %s%s" % (self.mode, dataref, path, datastr)
+
+    def is_regular(self):
+        """Return true if this is a regular file (mode 644)."""
+        return self.mode.endswith("644")
+
+    def is_executable(self):
+        """Return true if this is an executable file (mode 755)."""
+        return self.mode.endswith("755")
+
+    def is_symlink(self):
+        """Return true if this is a symlink (mode 120000)."""
+        return self.mode == "120000"
+
+    def is_gitlink(self):
+        """Return true if this is a gitlink (mode 160000)."""
+        return self.mode == "160000"
+
+
+class FileDeleteCommand(FileCommand):
+
+    def __init__(self, path):
+        FileCommand.__init__(self, 'filedelete')
+        self.path = check_path(path)
+
+    def __str__(self):
+        return self.path
+
+    __repr__ = _detailrepr
+
+    def format(self):
+        return "D %s" % (format_path(self.path),)
+
+
+class FileCopyCommand(FileCommand):
+
+    def __init__(self, src_path, dest_path):
+        FileCommand.__init__(self, 'filecopy')
+        self.src_path = check_path(src_path)
+        self.dest_path = check_path(dest_path)
+
+    def __str__(self):
+        return "%s -> %s" % (self.src_path, self.dest_path)
+
+    __repr__ = _detailrepr
+
+    def format(self):
+        return "C %s %s" % (
+            format_path(self.src_path, quote_spaces=True),
+            format_path(self.dest_path))
+
+
+class FileRenameCommand(FileCommand):
+
+    def __init__(self, old_path, new_path):
+        FileCommand.__init__(self, 'filerename')
+        self.old_path = check_path(old_path)
+        self.new_path = check_path(new_path)
+
+    def __str__(self):
+        return "%s -> %s" % (self.old_path, self.new_path)
+
+    __repr__ = _detailrepr
+
+    def format(self):
+        return "R %s %s" % (
+            format_path(self.old_path, quote_spaces=True),
+            format_path(self.new_path))
+
+
+class FileDeleteAllCommand(FileCommand):
+
+    def __init__(self):
+        FileCommand.__init__(self, 'filedeleteall')
+
+    def format(self):
+        return "deleteall"
+
+
+def check_path(path):
+    """Check that a path is legal.
+
+    :return: the path if all is OK
+    :raise ValueError: if the path is illegal
+    """
+    if path is None or path == '':
+        raise ValueError("illegal path '%s'" % path)
+    return path
+
+
+def format_path(p, quote_spaces=False):
+    """Format a path in utf8, quoting it if necessary."""
+    if '\n' in p:
+        import re
+        p = re.sub('\n', '\\n', p)
+        quote = True
+    else:
+        quote = p[0] == '"' or (quote_spaces and ' ' in p)
+    if quote:
+        extra = GIT_FAST_IMPORT_NEEDS_EXTRA_SPACE_AFTER_QUOTE and ' ' or ''
+        p = '"%s"%s' % (p, extra)
+    return p.encode('utf8')
+
+
+def format_who_when(fields):
+    """Format a tuple of name,email,secs-since-epoch,utc-offset-secs as a string."""
+    offset = fields[3]
+    if offset < 0:
+        offset_sign = '-'
+        offset = abs(offset)
+    else:
+        offset_sign = '+'
+    offset_hours = offset / 3600
+    offset_minutes = offset / 60 - offset_hours * 60
+    offset_str = "%s%02d%02d" % (offset_sign, offset_hours, offset_minutes)
+    name = fields[0]
+    if name == '':
+        sep = ''
+    else:
+        sep = ' '
+    if isinstance(name, unicode):
+        name = name.encode('utf8')
+    email = fields[1]
+    if isinstance(email, unicode):
+        email = email.encode('utf8')
+    result = "%s%s<%s> %d %s" % (name, sep, email, fields[2], offset_str)
+    return result
+
+
+def format_property(name, value):
+    """Format the name and value (both unicode) of a property as a string."""
+    utf8_name = name.encode('utf8')
+    if value is not None:
+        utf8_value = value.encode('utf8')
+        result = "property %s %d %s" % (utf8_name, len(utf8_value), utf8_value)
+    else:
+        result = "property %s" % (utf8_name,)
+    return result
diff --git a/git_remote_helpers/fastimport/dates.py b/git_remote_helpers/fastimport/dates.py
new file mode 100644
index 0000000000..f532b2e249
--- /dev/null
+++ b/git_remote_helpers/fastimport/dates.py
@@ -0,0 +1,79 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Date parsing routines.
+
+Each routine returns timestamp,timezone where
+
+* timestamp is seconds since epoch
+* timezone is the offset from UTC in seconds.
+"""
+
+
+import time
+
+from git_remote_helpers.fastimport import errors
+
+
+def parse_raw(s, lineno=0):
+    """Parse a date from a raw string.
+    
+    The format must be exactly "seconds-since-epoch offset-utc".
+    See the spec for details.
+    """
+    timestamp_str, timezone_str = s.split(' ', 1)
+    timestamp = float(timestamp_str)
+    timezone = _parse_tz(timezone_str, lineno)
+    return timestamp, timezone
+
+
+def _parse_tz(tz, lineno):
+    """Parse a timezone specification in the [+|-]HHMM format.
+
+    :return: the timezone offset in seconds.
+    """
+    # from git_repository.py in bzr-git
+    if len(tz) != 5:
+        raise errors.InvalidTimezone(lineno, tz)
+    sign = {'+': +1, '-': -1}[tz[0]]
+    hours = int(tz[1:3])
+    minutes = int(tz[3:])
+    return sign * 60 * (60 * hours + minutes)
+
+
+def parse_rfc2822(s, lineno=0):
+    """Parse a date from a rfc2822 string.
+    
+    See the spec for details.
+    """
+    raise NotImplementedError(parse_rfc2822)
+
+
+def parse_now(s, lineno=0):
+    """Parse a date from a string.
+
+    The format must be exactly "now".
+    See the spec for details.
+    """
+    return time.time(), 0
+
+
+# Lookup tabel of date parsing routines
+DATE_PARSERS_BY_NAME = {
+    'raw':      parse_raw,
+    'rfc2822':  parse_rfc2822,
+    'now':      parse_now,
+    }
diff --git a/git_remote_helpers/fastimport/errors.py b/git_remote_helpers/fastimport/errors.py
new file mode 100644
index 0000000000..b8cf26fd09
--- /dev/null
+++ b/git_remote_helpers/fastimport/errors.py
@@ -0,0 +1,182 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Exception classes for fastimport"""
+
+
+class FastImportError(StandardError):
+    """The base exception class for all import processing exceptions."""
+
+    _fmt = "Unknown Import Error"
+
+    def __str__(self):
+        return self._fmt % self.__dict__
+
+class ParsingError(FastImportError):
+    """The base exception class for all import processing exceptions."""
+
+    _fmt = "Unknown Import Parsing Error"
+
+    def __init__(self, filename, lineno):
+        FastImportError.__init__(self)
+        self.filename = filename
+        self.lineno = lineno
+
+    def __str__(self):
+        result = []
+        if self.filename:
+            result.append(self.filename)
+            result.append(", ")
+        result.append("line ")
+        result.append(str(self.lineno))
+        result.append(": ")
+        result.append(FastImportError.__str__(self))
+        return "".join(result)
+
+
+class MissingBytes(ParsingError):
+    """Raised when EOF encountered while expecting to find more bytes."""
+
+    _fmt = ("Unexpected EOF - expected %(expected)d bytes,"
+        " found %(found)d")
+
+    def __init__(self, filename, lineno, expected, found):
+        ParsingError.__init__(self, filename, lineno)
+        self.expected = expected
+        self.found = found
+
+
+class MissingTerminator(ParsingError):
+    """Raised when EOF encountered while expecting to find a terminator."""
+
+    _fmt = "Unexpected EOF - expected '%(terminator)s' terminator"
+
+    def __init__(self, filename, lineno, terminator):
+        ParsingError.__init__(self, filename, lineno)
+        self.terminator = terminator
+
+
+class InvalidCommand(ParsingError):
+    """Raised when an unknown command found."""
+
+    _fmt = ("Invalid command '%(cmd)s'")
+
+    def __init__(self, filename, lineno, cmd):
+        ParsingError.__init__(self, filename, lineno)
+        self.cmd = cmd
+
+
+class MissingSection(ParsingError):
+    """Raised when a section is required in a command but not present."""
+
+    _fmt = ("Command %(cmd)s is missing section %(section)s")
+
+    def __init__(self, filename, lineno, cmd, section):
+        ParsingError.__init__(self, filename, lineno)
+        self.cmd = cmd
+        self.section = section
+
+
+class BadFormat(ParsingError):
+    """Raised when a section is formatted incorrectly."""
+
+    _fmt = ("Bad format for section %(section)s in "
+            "command %(cmd)s: found '%(text)s'")
+
+    def __init__(self, filename, lineno, cmd, section, text):
+        ParsingError.__init__(self, filename, lineno)
+        self.cmd = cmd
+        self.section = section
+        self.text = text
+
+
+class InvalidTimezone(ParsingError):
+    """Raised when converting a string timezone to a seconds offset."""
+
+    _fmt = "Timezone %(timezone)r could not be converted.%(reason)s"
+
+    def __init__(self, filename, lineno, timezone, reason=None):
+        ParsingError.__init__(self, filename, lineno)
+        self.timezone = timezone
+        if reason:
+            self.reason = ' ' + reason
+        else:
+            self.reason = ''
+
+
+class UnknownDateFormat(FastImportError):
+    """Raised when an unknown date format is given."""
+
+    _fmt = ("Unknown date format '%(format)s'")
+
+    def __init__(self, format):
+        FastImportError.__init__(self)
+        self.format = format
+
+
+class MissingHandler(FastImportError):
+    """Raised when a processor can't handle a command."""
+
+    _fmt = ("Missing handler for command %(cmd)s")
+
+    def __init__(self, cmd):
+        FastImportError.__init__(self)
+        self.cmd = cmd
+
+
+class UnknownParameter(FastImportError):
+    """Raised when an unknown parameter is passed to a processor."""
+
+    _fmt = ("Unknown parameter - '%(param)s' not in %(knowns)s")
+
+    def __init__(self, param, knowns):
+        FastImportError.__init__(self)
+        self.param = param
+        self.knowns = knowns
+
+
+class BadRepositorySize(FastImportError):
+    """Raised when the repository has an incorrect number of revisions."""
+
+    _fmt = ("Bad repository size - %(found)d revisions found, "
+        "%(expected)d expected")
+
+    def __init__(self, expected, found):
+        FastImportError.__init__(self)
+        self.expected = expected
+        self.found = found
+
+
+class BadRestart(FastImportError):
+    """Raised when the import stream and id-map do not match up."""
+
+    _fmt = ("Bad restart - attempted to skip commit %(commit_id)s "
+        "but matching revision-id is unknown")
+
+    def __init__(self, commit_id):
+        FastImportError.__init__(self)
+        self.commit_id = commit_id
+
+
+class UnknownFeature(FastImportError):
+    """Raised when an unknown feature is given in the input stream."""
+
+    _fmt = ("Unknown feature '%(feature)s' - try a later importer or "
+        "an earlier data format")
+
+    def __init__(self, feature):
+        FastImportError.__init__(self)
+        self.feature = feature
diff --git a/git_remote_helpers/fastimport/head_tracker.py b/git_remote_helpers/fastimport/head_tracker.py
new file mode 100644
index 0000000000..ad6b48c8b8
--- /dev/null
+++ b/git_remote_helpers/fastimport/head_tracker.py
@@ -0,0 +1,47 @@
+
+
+class HeadTracker(object):
+    """
+    Keep track of the heads in a fastimport stream.
+    """
+    def __init__(self):
+        self.last_ref = None
+
+        # map git ref name (e.g. "refs/heads/master") to id of last
+        # commit with that ref
+        self.last_ids = {}
+
+        # the set of heads seen so far in the stream, as a mapping
+        # from commit id of the head to set of ref names
+        self.heads = {}
+
+    def track_heads(self, cmd):
+        """Track the repository heads given a CommitCommand.
+        
+        :param cmd: the CommitCommand
+        :return: the list of parents in terms of commit-ids
+        """
+        # Get the true set of parents
+        if cmd.from_ is not None:
+            parents = [cmd.from_]
+        else:
+            last_id = self.last_ids.get(cmd.ref)
+            if last_id is not None:
+                parents = [last_id]
+            else:
+                parents = []
+        parents.extend(cmd.merges)
+
+        # Track the heads
+        self.track_heads_for_ref(cmd.ref, cmd.id, parents)
+        return parents
+
+    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
+        if parents is not None:
+            for parent in parents:
+                if parent in self.heads:
+                    del self.heads[parent]
+        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
+        self.last_ids[cmd_ref] = cmd_id
+        self.last_ref = cmd_ref
+    
diff --git a/git_remote_helpers/fastimport/helpers.py b/git_remote_helpers/fastimport/helpers.py
new file mode 100644
index 0000000000..3ce5a98e17
--- /dev/null
+++ b/git_remote_helpers/fastimport/helpers.py
@@ -0,0 +1,88 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Miscellaneous useful stuff."""
+
+import os
+
+def single_plural(n, single, plural):
+    """Return a single or plural form of a noun based on number."""
+    if n == 1:
+        return single
+    else:
+        return plural
+
+
+def invert_dict(d):
+    """Invert a dictionary with keys matching each value turned into a list."""
+    # Based on recipe from ASPN
+    result = {}
+    for k, v in d.iteritems():
+        keys = result.setdefault(v, [])
+        keys.append(k)
+    return result
+
+
+def invert_dictset(d):
+    """Invert a dictionary with keys matching a set of values, turned into lists."""
+    # Based on recipe from ASPN
+    result = {}
+    for k, c in d.iteritems():
+        for v in c:
+            keys = result.setdefault(v, [])
+            keys.append(k)
+    return result
+
+
+def _common_path_and_rest(l1, l2, common=[]):
+    # From http://code.activestate.com/recipes/208993/
+    if len(l1) < 1: return (common, l1, l2)
+    if len(l2) < 1: return (common, l1, l2)
+    if l1[0] != l2[0]: return (common, l1, l2)
+    return _common_path_and_rest(l1[1:], l2[1:], common+[l1[0]])
+
+
+def common_path(path1, path2):
+    """Find the common bit of 2 paths."""
+    return ''.join(_common_path_and_rest(path1, path2)[0])
+
+
+def common_directory(paths):
+    """Find the deepest common directory of a list of paths.
+    
+    :return: if no paths are provided, None is returned;
+      if there is no common directory, '' is returned;
+      otherwise the common directory with a trailing / is returned.
+    """
+    def get_dir_with_slash(path):
+        if path == '' or path.endswith('/'):
+            return path
+        else:
+            dirname, basename = os.path.split(path)
+            if dirname == '':
+                return dirname
+            else:
+                return dirname + '/'
+
+    if not paths:
+        return None
+    elif len(paths) == 1:
+        return get_dir_with_slash(paths[0])
+    else:
+        common = common_path(paths[0], paths[1])
+        for path in paths[2:]:
+            common = common_path(common, path)
+        return get_dir_with_slash(common)
diff --git a/git_remote_helpers/fastimport/idmapfile.py b/git_remote_helpers/fastimport/idmapfile.py
new file mode 100644
index 0000000000..7b4ccf4afe
--- /dev/null
+++ b/git_remote_helpers/fastimport/idmapfile.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Routines for saving and loading the id-map file."""
+
+import os
+
+
+def save_id_map(filename, revision_ids):
+    """Save the mapping of commit ids to revision ids to a file.
+
+    Throws the usual exceptions if the file cannot be opened,
+    written to or closed.
+
+    :param filename: name of the file to save the data to
+    :param revision_ids: a dictionary of commit ids to revision ids.
+    """
+    f = open(filename, 'wb')
+    try:
+        for commit_id, rev_id in revision_ids.iteritems():
+            f.write("%s %s\n" % (commit_id, rev_id))
+        f.flush()
+    finally:
+        f.close()
+
+
+def load_id_map(filename):
+    """Load the mapping of commit ids to revision ids from a file.
+
+    If the file does not exist, an empty result is returned.
+    If the file does exists but cannot be opened, read or closed,
+    the normal exceptions are thrown.
+
+    NOTE: It is assumed that commit-ids do not have embedded spaces.
+
+    :param filename: name of the file to save the data to
+    :result: map, count where:
+      map = a dictionary of commit ids to revision ids;
+      count = the number of keys in map
+    """
+    result = {}
+    count = 0
+    if os.path.exists(filename):
+        f = open(filename)
+        try:
+            for line in f:
+                parts = line[:-1].split(' ', 1)
+                result[parts[0]] = parts[1]
+                count += 1
+        finally:
+            f.close()
+    return result, count
diff --git a/git_remote_helpers/fastimport/parser.py b/git_remote_helpers/fastimport/parser.py
new file mode 100644
index 0000000000..f9c2655913
--- /dev/null
+++ b/git_remote_helpers/fastimport/parser.py
@@ -0,0 +1,621 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import warnings
+
+"""Parser of import data into command objects.
+
+In order to reuse existing front-ends, the stream format is a subset of
+the one used by git-fast-import (as of the 1.5.4 release of git at least).
+The grammar is:
+
+  stream ::= cmd*;
+
+  cmd ::= new_blob
+        | new_commit
+        | new_tag
+        | reset_branch
+        | checkpoint
+        | progress
+        ;
+
+  new_blob ::= 'blob' lf
+    mark?
+    file_content;
+  file_content ::= data;
+
+  new_commit ::= 'commit' sp ref_str lf
+    mark?
+    ('author' sp name '<' email '>' when lf)?
+    'committer' sp name '<' email '>' when lf
+    commit_msg
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
+    file_change*
+    lf?;
+  commit_msg ::= data;
+
+  file_change ::= file_clr
+    | file_del
+    | file_rnm
+    | file_cpy
+    | file_obm
+    | file_inm;
+  file_clr ::= 'deleteall' lf;
+  file_del ::= 'D' sp path_str lf;
+  file_rnm ::= 'R' sp path_str sp path_str lf;
+  file_cpy ::= 'C' sp path_str sp path_str lf;
+  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
+  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
+    data;
+
+  new_tag ::= 'tag' sp tag_str lf
+    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
+    'tagger' sp name '<' email '>' when lf
+    tag_msg;
+  tag_msg ::= data;
+
+  reset_branch ::= 'reset' sp ref_str lf
+    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+    lf?;
+
+  checkpoint ::= 'checkpoint' lf
+    lf?;
+
+  progress ::= 'progress' sp not_lf* lf
+    lf?;
+
+     # note: the first idnum in a stream should be 1 and subsequent
+     # idnums should not have gaps between values as this will cause
+     # the stream parser to reserve space for the gapped values.  An
+     # idnum can be updated in the future to a new object by issuing
+     # a new mark directive with the old idnum.
+     #
+  mark ::= 'mark' sp idnum lf;
+  data ::= (delimited_data | exact_data)
+    lf?;
+
+    # note: delim may be any string but must not contain lf.
+    # data_line may contain any data but must not be exactly
+    # delim. The lf after the final data_line is included in
+    # the data.
+  delimited_data ::= 'data' sp '<<' delim lf
+    (data_line lf)*
+    delim lf;
+
+     # note: declen indicates the length of binary_data in bytes.
+     # declen does not include the lf preceeding the binary data.
+     #
+  exact_data ::= 'data' sp declen lf
+    binary_data;
+
+     # note: quoted strings are C-style quoting supporting \c for
+     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
+     # is the signed byte value in octal.  Note that the only
+     # characters which must actually be escaped to protect the
+     # stream formatting is: \, \" and LF.  Otherwise these values
+     # are UTF8.
+     #
+  ref_str     ::= ref;
+  sha1exp_str ::= sha1exp;
+  tag_str     ::= tag;
+  path_str    ::= path    | '"' quoted(path)    '"' ;
+  mode        ::= '100644' | '644'
+                | '100755' | '755'
+                | '120000'
+                ;
+
+  declen ::= # unsigned 32 bit value, ascii base10 notation;
+  bigint ::= # unsigned integer value, ascii base10 notation;
+  binary_data ::= # file content, not interpreted;
+
+  when         ::= raw_when | rfc2822_when;
+  raw_when     ::= ts sp tz;
+  rfc2822_when ::= # Valid RFC 2822 date and time;
+
+  sp ::= # ASCII space character;
+  lf ::= # ASCII newline (LF) character;
+
+     # note: a colon (':') must precede the numerical value assigned to
+     # an idnum.  This is to distinguish it from a ref or tag name as
+     # GIT does not permit ':' in ref or tag strings.
+     #
+  idnum   ::= ':' bigint;
+  path    ::= # GIT style file path, e.g. \"a/b/c\";
+  ref     ::= # GIT ref name, e.g. \"refs/heads/MOZ_GECKO_EXPERIMENT\";
+  tag     ::= # GIT tag name, e.g. \"FIREFOX_1_5\";
+  sha1exp ::= # Any valid GIT SHA1 expression;
+  hexsha1 ::= # SHA1 in hexadecimal format;
+
+     # note: name and email are UTF8 strings, however name must not
+     # contain '<' or lf and email must not contain any of the
+     # following: '<', '>', lf.
+     #
+  name  ::= # valid GIT author/committer name;
+  email ::= # valid GIT author/committer email;
+  ts    ::= # time since the epoch in seconds, ascii base10 notation;
+  tz    ::= # GIT style timezone;
+
+     # note: comments may appear anywhere in the input, except
+     # within a data command.  Any form of the data command
+     # always escapes the related input from comment processing.
+     #
+     # In case it is not clear, the '#' that starts the comment
+     # must be the first character on that the line (an lf have
+     # preceeded it).
+     #
+  comment ::= '#' not_lf* lf;
+  not_lf  ::= # Any byte that is not ASCII newline (LF);
+"""
+
+
+import re
+import sys
+
+from git_remote_helpers.fastimport import (
+    commands,
+    dates,
+    errors
+    )
+
+
+## Stream parsing ##
+
+class LineBasedParser(object):
+
+    def __init__(self, input, filename=None):
+        """A Parser that keeps track of line numbers.
+
+        :param input: the file-like object to read from
+        """
+        self.input = input
+        if filename is None:
+            try:
+                self.filename = input.name
+            except AttributeError:
+                self.filename = "(unknown)"
+        else:
+            self.filename = filename
+        self.lineno = 0
+        # Lines pushed back onto the input stream
+        self._buffer = []
+
+    def abort(self, exception, *args):
+        """Raise an exception providing line number information."""
+        raise exception(self.filename, self.lineno, *args)
+
+    def readline(self):
+        """Get the next line including the newline or '' on EOF."""
+        self.lineno += 1
+        if self._buffer:
+            return self._buffer.pop()
+        else:
+            return self.input.readline()
+
+    def next_line(self):
+        """Get the next line without the newline or None on EOF."""
+        line = self.readline()
+        if line:
+            return line[:-1]
+        else:
+            return None
+
+    def push_line(self, line):
+        """Push line back onto the line buffer.
+        
+        :param line: the line with no trailing newline
+        """
+        self.lineno -= 1
+        self._buffer.append(line + "\n")
+
+    def read_bytes(self, count):
+        """Read a given number of bytes from the input stream.
+        
+        Throws MissingBytes if the bytes are not found.
+
+        Note: This method does not read from the line buffer.
+
+        :return: a string
+        """
+        result = self.input.read(count)
+        found = len(result)
+        self.lineno += result.count("\n")
+        if found != count:
+            self.abort(errors.MissingBytes, count, found)
+        return result
+
+    def read_until(self, terminator):
+        """Read the input stream until the terminator is found.
+        
+        Throws MissingTerminator if the terminator is not found.
+
+        Note: This method does not read from the line buffer.
+
+        :return: the bytes read up to but excluding the terminator.
+        """
+        
+        lines = []
+        term = terminator + '\n'
+        while True:
+            line = self.input.readline()
+            if line == term:
+                break
+            else:
+                lines.append(line)
+        return ''.join(lines)
+
+
+# Regular expression used for parsing. (Note: The spec states that the name
+# part should be non-empty but git-fast-export doesn't always do that so
+# the first bit is \w*, not \w+.) Also git-fast-import code says the
+# space before the email is optional.
+_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')
+_WHO_RE = re.compile(r'([^<]*)<(.*)>')
+
+
+class ImportParser(LineBasedParser):
+
+    def __init__(self, input, filename=None):
+        """A Parser of import commands.
+
+        :param input: the file-like object to read from
+        :param verbose: display extra information of not
+        """
+        LineBasedParser.__init__(self, input, filename)
+
+        # We auto-detect the date format when a date is first encountered
+        self.date_parser = None
+
+    def warning(self, msg):
+        sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg))
+
+    def parse(self):
+        """Parse the input stream, yielding a sequence of ImportCommand
+        objects.  Iteration terminates on EOF.  Raises InvalidCommand on
+        parse error."""
+        while True:
+            line = self.next_line()
+            if line is None:
+                break
+            elif len(line) == 0 or line.startswith('#'):
+                continue
+            # Search for commands in order of likelihood
+            elif line.startswith('commit '):
+                yield self._parse_commit(line[len('commit '):])
+            elif line.startswith('blob'):
+                yield self._parse_blob()
+            elif line.startswith('done'):
+                break
+            elif line.startswith('progress '):
+                yield commands.ProgressCommand(line[len('progress '):])
+            elif line.startswith('reset '):
+                yield self._parse_reset(line[len('reset '):])
+            elif line.startswith('tag '):
+                yield self._parse_tag(line[len('tag '):])
+            elif line.startswith('checkpoint'):
+                yield commands.CheckpointCommand()
+            elif line.startswith('feature'):
+                yield self._parse_feature(line[len('feature '):])
+            else:
+                self.abort(errors.InvalidCommand, line)
+
+    def iter_commands(self):
+        warnings.warn("iter_commands() deprecated: use parse()",
+                      DeprecationWarning, stacklevel=2)
+        return self.parse()
+
+    def iter_file_commands(self):
+        """Iterator returning FileCommand objects.
+        
+        If an invalid file command is found, the line is silently
+        pushed back and iteration ends.
+        """
+        while True:
+            line = self.next_line()
+            if line is None:
+                break
+            elif len(line) == 0 or line.startswith('#'):
+                continue
+            # Search for file commands in order of likelihood
+            elif line.startswith('M '):
+                yield self._parse_file_modify(line[2:])
+            elif line.startswith('D '):
+                path = self._path(line[2:])
+                yield commands.FileDeleteCommand(path)
+            elif line.startswith('R '):
+                old, new = self._path_pair(line[2:])
+                yield commands.FileRenameCommand(old, new)
+            elif line.startswith('C '):
+                src, dest = self._path_pair(line[2:])
+                yield commands.FileCopyCommand(src, dest)
+            elif line.startswith('deleteall'):
+                yield commands.FileDeleteAllCommand()
+            else:
+                self.push_line(line)
+                break
+
+    def _parse_blob(self):
+        """Parse a blob command."""
+        location = (self.filename, self.lineno)
+        mark = self._get_mark_if_any()
+        data = self._get_data('blob')
+        return commands.BlobCommand(mark, data, location)
+
+    def _parse_commit(self, ref):
+        """Parse a commit command."""
+        location = (self.filename, self.lineno)
+        mark = self._get_mark_if_any()
+        author = self._get_user_info('commit', 'author', False)
+        more_authors = []
+        while True:
+            another_author = self._get_user_info('commit', 'author', False)
+            if another_author is not None:
+                more_authors.append(another_author)
+            else:
+                break
+        committer = self._get_user_info('commit', 'committer')
+        message = self._get_data('commit', 'message')
+        try:
+            message = message.decode('utf_8')
+        except UnicodeDecodeError:
+            self.warning(
+                "commit message not in utf8 - replacing unknown characters")
+            message = message.decode('utf_8', 'replace')
+        from_ = self._get_from()
+        merges = []
+        while True:
+            merge = self._get_merge()
+            if merge is not None:
+                # while the spec suggests it's illegal, git-fast-export
+                # outputs multiple merges on the one line, e.g.
+                # merge :x :y :z
+                these_merges = merge.split(" ")
+                merges.extend(these_merges)
+            else:
+                break
+        properties = {}
+        while True:
+            name_value = self._get_property()
+            if name_value is not None:
+                name, value = name_value
+                properties[name] = value
+            else:
+                break
+        file_cmds = list(self.iter_file_commands())
+        return commands.CommitCommand(ref, mark, author, committer, message,
+            from_, merges, file_cmds, location,
+            more_authors=more_authors, properties=properties)
+
+    def _parse_feature(self, info):
+        """Parse a feature command."""
+        parts = info.split("=", 1)
+        name = parts[0]
+        if len(parts) > 1:
+            value = self._path(parts[1])
+        else:
+            value = None
+        location = (self.filename, self.lineno)
+        return commands.FeatureCommand(name, value, location=location)
+
+
+    def _parse_file_modify(self, info):
+        """Parse a filemodify command within a commit.
+
+        :param info: a string in the format "mode dataref path"
+          (where dataref might be the hard-coded literal 'inline').
+        """
+        params = info.split(' ', 2)
+        path = self._path(params[2])
+        mode = params[0]
+        if params[1] == 'inline':
+            dataref = None
+            data = self._get_data('filemodify')
+        else:
+            dataref = params[1]
+            data = None
+        return commands.FileModifyCommand(path, mode, dataref, data)
+
+    def _parse_reset(self, ref):
+        """Parse a reset command."""
+        from_ = self._get_from()
+        return commands.ResetCommand(ref, from_)
+
+    def _parse_tag(self, name):
+        """Parse a tag command."""
+        from_ = self._get_from('tag')
+        tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)
+        message = self._get_data('tag', 'message').decode('utf_8')
+        return commands.TagCommand(name, from_, tagger, message)
+
+    def _get_mark_if_any(self):
+        """Parse a mark section."""
+        line = self.next_line()
+        if line.startswith('mark :'):
+            return line[len('mark :'):]
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_from(self, required_for=None):
+        """Parse a from section."""
+        line = self.next_line()
+        if line is None:
+            return None
+        elif line.startswith('from '):
+            return line[len('from '):]
+        elif required_for:
+            self.abort(errors.MissingSection, required_for, 'from')
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_merge(self):
+        """Parse a merge section."""
+        line = self.next_line()
+        if line is None:
+            return None
+        elif line.startswith('merge '):
+            return line[len('merge '):]
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_property(self):
+        """Parse a property section."""
+        line = self.next_line()
+        if line is None:
+            return None
+        elif line.startswith('property '):
+            return self._name_value(line[len('property '):])
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_user_info(self, cmd, section, required=True,
+        accept_just_who=False):
+        """Parse a user section."""
+        line = self.next_line()
+        if line.startswith(section + ' '):
+            return self._who_when(line[len(section + ' '):], cmd, section,
+                accept_just_who=accept_just_who)
+        elif required:
+            self.abort(errors.MissingSection, cmd, section)
+        else:
+            self.push_line(line)
+            return None
+
+    def _get_data(self, required_for, section='data'):
+        """Parse a data section."""
+        line = self.next_line()
+        if line.startswith('data '):
+            rest = line[len('data '):]
+            if rest.startswith('<<'):
+                return self.read_until(rest[2:])
+            else:
+                size = int(rest)
+                read_bytes = self.read_bytes(size)
+                # optional LF after data.
+                next = self.input.readline()
+                self.lineno += 1
+                if len(next) > 1 or next != "\n":
+                    self.push_line(next[:-1])
+                return read_bytes
+        else:
+            self.abort(errors.MissingSection, required_for, section)
+
+    def _who_when(self, s, cmd, section, accept_just_who=False):
+        """Parse who and when information from a string.
+        
+        :return: a tuple of (name,email,timestamp,timezone). name may be
+            the empty string if only an email address was given.
+        """
+        match = _WHO_AND_WHEN_RE.search(s)
+        if match:
+            datestr = match.group(3)
+            if self.date_parser is None:
+                # auto-detect the date format
+                if len(datestr.split(' ')) == 2:
+                    format = 'raw'
+                elif datestr == 'now':
+                    format = 'now'
+                else:
+                    format = 'rfc2822'
+                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
+            when = self.date_parser(datestr, self.lineno)
+        else:
+            match = _WHO_RE.search(s)
+            if accept_just_who and match:
+                # HACK around missing time
+                # TODO: output a warning here
+                when = dates.DATE_PARSERS_BY_NAME['now']('now')
+            else:
+                self.abort(errors.BadFormat, cmd, section, s)
+
+        # Do not attempt to decode name or email address; they are just
+        # bytes.  (Everything will work out better if they are in UTF-8,
+        # but that's not guaranteed.)
+        name = match.group(1).rstrip()
+        email = match.group(2)
+        return (name, email, when[0], when[1])
+
+    def _name_value(self, s):
+        """Parse a (name,value) tuple from 'name value-length value'."""
+        parts = s.split(' ', 2)
+        name = parts[0]
+        if len(parts) == 1:
+            value = None
+        else:
+            size = int(parts[1])
+            value = parts[2]
+            still_to_read = size - len(value)
+            if still_to_read == 1:
+                value += "\n"
+            elif still_to_read > 0:
+                read_bytes = self.read_bytes(still_to_read - 1)
+                value += "\n" + read_bytes
+            value = value.decode('utf8')
+        return (name, value)
+
+    def _path(self, s):
+        """Parse a path."""
+        if s.startswith('"'):
+            if s[-1] != '"':
+                self.abort(errors.BadFormat, '?', '?', s)
+            else:
+                return _unquote_c_string(s[1:-1])
+
+        # Do *not* decode the path to a Unicode string: filenames on
+        # Unix are just bytes.  Git and Mercurial, at least, inherit
+        # this stance.  git-fast-import(1) merely says "It is
+        # recommended that <path> always be encoded using UTF-8.", which
+        # is good advice ... but not something we can count on here.
+        return s
+
+    def _path_pair(self, s):
+        """Parse two paths separated by a space."""
+        # TODO: handle a space in the first path
+        if s.startswith('"'):
+            parts = s[1:].split('" ', 1)
+        else:
+            parts = s.split(' ', 1)
+        if len(parts) != 2:
+            self.abort(errors.BadFormat, '?', '?', s)
+        elif parts[1].startswith('"') and parts[1].endswith('"'):
+            parts[1] = parts[1][1:-1]
+        elif parts[1].startswith('"') or parts[1].endswith('"'):
+            self.abort(errors.BadFormat, '?', '?', s)
+        return map(_unquote_c_string, parts)
+
+    def _mode(self, s):
+        """Parse a file mode into executable and symlink flags.
+        
+        :return (is_executable, is_symlink)
+        """
+        # Note: Output from git-fast-export slightly different to spec
+        if s in ['644', '100644', '0100644']:
+            return False, False
+        elif s in ['755', '100755', '0100755']:
+            return True, False
+        elif s in ['120000', '0120000']:
+            return False, True
+        else:
+            self.abort(errors.BadFormat, 'filemodify', 'mode', s)
+
+
+def _unquote_c_string(s):
+    """replace C-style escape sequences (\n, \", etc.) with real chars."""
+    # HACK: Python strings are close enough
+    return s.decode('string_escape', 'replace')
diff --git a/git_remote_helpers/fastimport/processor.py b/git_remote_helpers/fastimport/processor.py
new file mode 100644
index 0000000000..bfb4226a46
--- /dev/null
+++ b/git_remote_helpers/fastimport/processor.py
@@ -0,0 +1,222 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Processor of import commands.
+
+This module provides core processing functionality including an abstract class
+for basing real processors on. See the processors package for examples.
+"""
+
+import sys
+import time
+import logging
+
+from git_remote_helpers.fastimport import errors
+
+log = logging.getLogger(__name__)
+
+
+class ImportProcessor(object):
+    """Base class for import processors.
+    
+    Subclasses should override the pre_*, post_* and *_handler
+    methods as appropriate.
+    """
+
+    known_params = []
+
+    def __init__(self, params=None, verbose=False, outf=None):
+        if outf is None:
+            self.outf = sys.stdout
+        else:
+            self.outf = outf
+        self.verbose = verbose
+        if params is None:
+            self.params = {}
+        else:
+            self.params = params
+            self.validate_parameters()
+
+        # Handlers can set this to request exiting cleanly without
+        # iterating through the remaining commands
+        self.finished = False
+
+    def validate_parameters(self):
+        """Validate that the parameters are correctly specified."""
+        for p in self.params:
+            if p not in self.known_params:
+                raise errors.UnknownParameter(p, self.known_params)
+
+    def process(self, commands):
+        """Process a stream of fast-import commands from a parser.
+
+        :param commands: a sequence of commands.ImportCommand objects
+        """
+        self.pre_process()
+        for cmd in commands:
+            try:
+                handler = self.__class__.__dict__[cmd.name + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(cmd.name)
+            else:
+                self.pre_handler(cmd)
+                handler(self, cmd)
+                self.post_handler(cmd)
+            if self.finished:
+                break
+        self.post_process()
+
+    def pre_process(self):
+        """Hook for logic at start of processing.
+
+        Called just before process() starts iterating over its sequence
+        of commands.
+        """
+        pass
+
+    def post_process(self):
+        """Hook for logic at end of successful processing.
+
+        Called after process() finishes successfully iterating over its
+        sequence of commands (i.e. not called if an exception is raised
+        while processing commands).
+        """
+        pass
+
+    def pre_handler(self, cmd):
+        """Hook for logic before each handler starts."""
+        pass
+
+    def post_handler(self, cmd):
+        """Hook for logic after each handler finishes."""
+        pass
+
+    def progress_handler(self, cmd):
+        """Process a ProgressCommand."""
+        raise NotImplementedError(self.progress_handler)
+
+    def blob_handler(self, cmd):
+        """Process a BlobCommand."""
+        raise NotImplementedError(self.blob_handler)
+
+    def checkpoint_handler(self, cmd):
+        """Process a CheckpointCommand."""
+        raise NotImplementedError(self.checkpoint_handler)
+
+    def commit_handler(self, cmd):
+        """Process a CommitCommand."""
+        raise NotImplementedError(self.commit_handler)
+
+    def reset_handler(self, cmd):
+        """Process a ResetCommand."""
+        raise NotImplementedError(self.reset_handler)
+
+    def tag_handler(self, cmd):
+        """Process a TagCommand."""
+        raise NotImplementedError(self.tag_handler)
+
+    def feature_handler(self, cmd):
+        """Process a FeatureCommand."""
+        raise NotImplementedError(self.feature_handler)
+
+
+class CommitHandler(object):
+    """Base class for commit handling.
+    
+    Subclasses should override the pre_*, post_* and *_handler
+    methods as appropriate.
+    """
+
+    def __init__(self, command):
+        self.command = command
+
+    def process(self):
+        self.pre_process_files()
+        for fc in self.command.file_cmds:
+            try:
+                handler = self.__class__.__dict__[fc.name[4:] + "_handler"]
+            except KeyError:
+                raise errors.MissingHandler(fc.name)
+            else:
+                handler(self, fc)
+        self.post_process_files()
+
+    def _log(self, level, msg, *args):
+        log.log(level, msg + " (%s)", *(args + (self.command.id,)))
+
+    # Logging methods: unused in this library, but used by
+    # bzr-fastimport.  Could be useful for other subclasses.
+
+    def note(self, msg, *args):
+        """log.info() with context about the command"""
+        self._log(logging.INFO, msg, *args)
+
+    def warning(self, msg, *args):
+        """log.warning() with context about the command"""
+        self._log(logging.WARNING, msg, *args)
+
+    def debug(self, msg, *args):
+        """log.debug() with context about the command"""
+        self._log(logging.DEBUG, msg, *args)
+
+    def pre_process_files(self):
+        """Prepare for committing."""
+        pass
+
+    def post_process_files(self):
+        """Save the revision."""
+        pass
+
+    def modify_handler(self, filecmd):
+        """Handle a filemodify command."""
+        raise NotImplementedError(self.modify_handler)
+
+    def delete_handler(self, filecmd):
+        """Handle a filedelete command."""
+        raise NotImplementedError(self.delete_handler)
+
+    def copy_handler(self, filecmd):
+        """Handle a filecopy command."""
+        raise NotImplementedError(self.copy_handler)
+
+    def rename_handler(self, filecmd):
+        """Handle a filerename command."""
+        raise NotImplementedError(self.rename_handler)
+
+    def deleteall_handler(self, filecmd):
+        """Handle a filedeleteall command."""
+        raise NotImplementedError(self.deleteall_handler)
+
+
+def parseMany(filenames, parser_factory, processor):
+    """Parse multiple input files, sending the results all to
+    'processor'.  parser_factory must be a callable that takes one input
+    file and returns an ImportParser instance, e.g. the ImportParser
+    class object itself.  Each file in 'filenames' is opened, parsed,
+    and closed in turn.  For filename \"-\", reads stdin.
+    """
+    for filename in filenames:
+        if filename == "-":
+            infile = sys.stdin
+        else:
+            infile = open(filename, "rb")
+
+        try:
+            parser = parser_factory(infile)
+            processor.process(parser.parse())
+        finally:
+            if filename != "-":
+                infile.close()
diff --git a/git_remote_helpers/setup.py b/git_remote_helpers/setup.py
index 4d434b65cb..a19c061fdf 100644
--- a/git_remote_helpers/setup.py
+++ b/git_remote_helpers/setup.py
@@ -13,5 +13,6 @@ setup(
     author_email = 'git@vger.kernel.org',
     url = 'http://www.git-scm.com/',
     package_dir = {'git_remote_helpers': ''},
-    packages = ['git_remote_helpers', 'git_remote_helpers.git'],
+    packages = ['git_remote_helpers', 'git_remote_helpers.git',
+                'git_remote_helpers.fastimport'],
 )
-- 
2.11.4.GIT