added files from the gitrb project, which seems abandoned, but which is great code
authorscott Chacon <schacon@agadorsparticus.corp.reactrix.com>
Tue, 20 Nov 2007 20:07:46 +0000 (20 12:07 -0800)
committerscott Chacon <schacon@agadorsparticus.corp.reactrix.com>
Tue, 20 Nov 2007 20:07:46 +0000 (20 12:07 -0800)
lib/git/raw/git.rb [new file with mode: 0644]
lib/git/raw/internal/loose.rb [new file with mode: 0644]
lib/git/raw/internal/mmap.rb [new file with mode: 0644]
lib/git/raw/internal/object.rb [new file with mode: 0644]
lib/git/raw/internal/pack.rb [new file with mode: 0644]
lib/git/raw/object.rb [new file with mode: 0644]
tests/units/test_raw_internals.rb [new file with mode: 0644]

diff --git a/lib/git/raw/git.rb b/lib/git/raw/git.rb
new file mode 100644 (file)
index 0000000..004e795
--- /dev/null
@@ -0,0 +1,63 @@
+require 'git/internal/object'
+require 'git/internal/pack'
+require 'git/internal/loose'
+require 'git/object'
+
+module Git
+  class Repository
+    def initialize(git_dir)
+      @git_dir = git_dir
+      @loose = Internal::LooseStorage.new(git_path("objects"))
+      @packs = []
+      initpacks
+    end
+
+    def git_path(path)
+      return "#@git_dir/#{path}"
+    end
+
+    def get_object_by_sha1(sha1)
+      r = get_raw_object_by_sha1(sha1)
+      return nil if !r
+      Object.from_raw(r, self)
+    end
+
+    def get_raw_object_by_sha1(sha1)
+      sha1 = [sha1].pack("H*")
+
+      # try packs
+      @packs.each do |pack|
+        o = pack[sha1]
+        return o if o
+      end
+
+      # try loose storage
+      o = @loose[sha1]
+      return o if o
+
+      # try packs again, maybe the object got packed in the meantime
+      initpacks
+      @packs.each do |pack|
+        o = pack[sha1]
+        return o if o
+      end
+
+      nil
+    end
+
+    def initpacks
+      @packs.each do |pack|
+        pack.close
+      end
+      @packs = []
+      Dir.open(git_path("objects/pack/")) do |dir|
+        dir.each do |entry|
+          if entry =~ /\.pack$/i
+            @packs << Git::Internal::PackStorage.new(git_path("objects/pack/" \
+                                                              + entry))
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/git/raw/internal/loose.rb b/lib/git/raw/internal/loose.rb
new file mode 100644 (file)
index 0000000..0e4020c
--- /dev/null
@@ -0,0 +1,96 @@
+require 'zlib'
+require 'digest/sha1'
+
+require 'git/raw/internal/object'
+
+module Git module Raw module Internal
+  class LooseObjectError < StandardError
+  end
+
+  class LooseStorage
+    def initialize(directory)
+      @directory = directory
+    end
+
+    def [](sha1)
+      sha1 = sha1.unpack("H*")[0]
+
+      path = @directory+'/'+sha1[0...2]+'/'+sha1[2..40]
+      begin
+        get_raw_object(File.read(path))
+      rescue Errno::ENOENT
+        nil
+      end
+    end
+
+    def get_raw_object(buf)
+      if buf.length < 2
+        raise LooseObjectError, "object file too small"
+      end
+
+      if legacy_loose_object?(buf)
+        content = Zlib::Inflate.inflate(buf)
+        header, content = content.split(/\0/, 2)
+        if !header || !content
+          raise LooseObjectError, "invalid object header"
+        end
+        type, size = header.split(/ /, 2)
+        if !%w(blob tree commit tag).include?(type) || size !~ /^\d+$/
+          raise LooseObjectError, "invalid object header"
+        end
+        type = type.to_sym
+        size = size.to_i
+      else
+        type, size, used = unpack_object_header_gently(buf)
+        content = Zlib::Inflate.inflate(buf[used..-1])
+      end
+      raise LooseObjectError, "size mismatch" if content.length != size
+      return RawObject.new(type, content)
+    end
+
+    # private
+    def unpack_object_header_gently(buf)
+      used = 0
+      c = buf[used]
+      used += 1
+
+      type = (c >> 4) & 7;
+      size = c & 15;
+      shift = 4;
+      while c & 0x80 != 0
+        if buf.length <= used
+          raise LooseObjectError, "object file too short"
+        end
+        c = buf[used]
+        used += 1
+
+        size += (c & 0x7f) << shift
+        shift += 7
+      end
+      type = OBJ_TYPES[type]
+      if ![:blob, :tree, :commit, :tag].include?(type)
+        raise LooseObjectError, "invalid loose object type"
+      end
+      return [type, size, used]
+    end
+    private :unpack_object_header_gently
+
+    def legacy_loose_object?(buf)
+      word = (buf[0] << 8) + buf[1]
+      buf[0] == 0x78 && word % 31 == 0
+    end
+    private :legacy_loose_object?
+  end
+end end
+
+if $0 == __FILE__
+  require 'find'
+  ARGV.each do |path|
+    storage = Git::Internal::LooseStorage.new(path)
+    Find.find(path) do |p|
+      next if !/\/([0-9a-f]{2})\/([0-9a-f]{38})$/.match(p)
+      obj = storage[[$1+$2].pack("H*")]
+      puts "%s %s" % [obj.sha1.unpack("H*")[0], obj.type]
+    end
+  end
+end
diff --git a/lib/git/raw/internal/mmap.rb b/lib/git/raw/internal/mmap.rb
new file mode 100644 (file)
index 0000000..d7390b1
--- /dev/null
@@ -0,0 +1,44 @@
+begin
+  require 'mmap'
+rescue LoadError
+
+module Git module Raw module Internal
+  class Mmap
+    def initialize(file)
+      @file = file
+      @offset = nil
+    end
+
+    def unmap
+      @file = nil
+    end
+
+    def [](*idx)
+      idx = idx[0] if idx.length == 1
+      case idx
+      when Range
+        offset = idx.first
+        len = idx.last - idx.first + idx.exclude_end? ? 0 : 1
+      when Fixnum
+        offset = idx
+        len = nil
+      when Array
+        offset, len = idx
+      else
+        raise RuntimeError, "invalid index param: #{idx.class}"
+      end
+      if @offset != offset
+        @file.seek(offset)
+      end
+      @offset = offset + len ? len : 1
+      if not len
+        @file.read(1)[0]
+      else
+        @file.read(len)
+      end
+    end
+  end
+end end
+
+end     # rescue LoadError
+
diff --git a/lib/git/raw/internal/object.rb b/lib/git/raw/internal/object.rb
new file mode 100644 (file)
index 0000000..b81df2b
--- /dev/null
@@ -0,0 +1,23 @@
+require 'digest/sha1'
+
+module Git module Raw module Internal
+  OBJ_NONE = 0
+  OBJ_COMMIT = 1
+  OBJ_TREE = 2
+  OBJ_BLOB = 3
+  OBJ_TAG = 4
+
+  OBJ_TYPES = [nil, :commit, :tree, :blob, :tag].freeze
+
+  class RawObject
+    attr_accessor :type, :content
+    def initialize(type, content)
+      @type = type
+      @content = content
+    end
+
+    def sha1
+      Digest::SHA1.digest("%s %d\0" % [@type, @content.length] + @content)
+    end
+  end
+end end
diff --git a/lib/git/raw/internal/pack.rb b/lib/git/raw/internal/pack.rb
new file mode 100644 (file)
index 0000000..edfeada
--- /dev/null
@@ -0,0 +1,240 @@
+require 'zlib'
+require 'git/raw/internal/object'
+require 'git/raw/internal/mmap'
+
+module Git module Raw module Internal
+  class PackFormatError < StandardError
+  end
+
+  class PackStorage
+    OBJ_OFS_DELTA = 6
+    OBJ_REF_DELTA = 7
+
+    FanOutCount = 256
+    SHA1Size = 20
+    IdxOffsetSize = 4
+    OffsetSize = 4
+    OffsetStart = FanOutCount * IdxOffsetSize
+    SHA1Start = OffsetStart + OffsetSize
+    EntrySize = OffsetSize + SHA1Size
+
+    def initialize(file)
+      if file =~ /\.idx$/
+        file = file[0...-3] + 'pack'
+      end
+
+      @name = file
+      @packfile = File.open(file)
+      @idxfile = File.open(file[0...-4]+'idx')
+      @idx = Mmap.new(@idxfile)
+
+      @offsets = [0]
+      FanOutCount.times do |i|
+        pos = @idx[i * IdxOffsetSize,IdxOffsetSize].unpack('N')[0]
+        if pos < @offsets[i]
+          raise PackFormatError, "pack #@name has discontinuous index #{i}"
+        end
+        @offsets << pos
+      end
+
+      @size = @offsets[-1]
+    end
+
+    def close
+      @packfile.close
+      @idx.unmap
+      @idxfile.close
+    end
+
+    def [](sha1)
+      offset = find_object(sha1)
+      return nil if !offset
+      return parse_object(offset)
+    end
+
+    def each_entry
+      pos = OffsetStart
+      @size.times do
+        offset = @idx[pos,OffsetSize].unpack('N')[0]
+        sha1 = @idx[pos+OffsetSize,SHA1Size]
+        pos += EntrySize
+        yield sha1, offset
+      end
+    end
+
+    def each_sha1
+      # unpacking the offset is quite expensive, so
+      # we avoid using #each
+      pos = SHA1Start
+      @size.times do
+        sha1 = @idx[pos,SHA1Size]
+        pos += EntrySize
+        yield sha1
+      end
+    end
+
+    def find_object(sha1)
+      slot = sha1[0]
+      first, last = @offsets[slot,2]
+      while first < last
+        mid = (first + last) / 2
+        midsha1 = @idx[SHA1Start + mid * EntrySize,SHA1Size]
+        cmp = midsha1 <=> sha1
+
+        if cmp < 0
+          first = mid + 1
+        elsif cmp > 0
+          last = mid
+        else
+          pos = OffsetStart + mid * EntrySize
+          offset = @idx[pos,OffsetSize].unpack('N')[0]
+          return offset
+        end
+      end
+
+      nil
+    end
+    private :find_object
+
+    def parse_object(offset)
+      data, type = unpack_object(offset)
+      RawObject.new(OBJ_TYPES[type], data)
+    end
+    protected :parse_object
+
+    def unpack_object(offset)
+      obj_offset = offset
+      @packfile.seek(offset)
+
+      c = @packfile.read(1)[0]
+      size = c & 0xf
+      type = (c >> 4) & 7
+      shift = 4
+      offset += 1
+      while c & 0x80 != 0
+        c = @packfile.read(1)[0]
+        size |= ((c & 0x7f) << shift)
+        shift += 7
+        offset += 1
+      end
+
+      case type
+      when OBJ_OFS_DELTA, OBJ_REF_DELTA
+        data, type = unpack_deltified(type, offset, obj_offset, size)
+      when OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG
+        data = unpack_compressed(offset, size)
+      else
+        raise PackFormatError, "invalid type #{type}"
+      end
+      [data, type]
+    end
+    private :unpack_object
+
+    def unpack_deltified(type, offset, obj_offset, size)
+      @packfile.seek(offset)
+      data = @packfile.read(SHA1Size)
+
+      if type == OBJ_OFS_DELTA
+        i = 0
+        c = data[i]
+        base_offset = c & 0x7f
+        while c & 0x80 != 0
+          c = data[i += 1]
+          base_offset += 1
+          base_offset <<= 7
+          base_offset |= c & 0x7f
+        end
+        base_offset = obj_offset - base_offset
+        offset += i + 1
+      else
+        base_offset = find_object(data)
+        offset += SHA1Size
+      end
+
+      base, type = unpack_object(base_offset)
+      delta = unpack_compressed(offset, size)
+      [patch_delta(base, delta), type]
+    end
+    private :unpack_deltified
+
+    def unpack_compressed(offset, destsize)
+      outdata = ""
+      @packfile.seek(offset)
+      zstr = Zlib::Inflate.new
+      while outdata.size < destsize
+        indata = @packfile.read(4096)
+        if indata.size == 0
+          raise PackFormatError, 'error reading pack data'
+        end
+        outdata += zstr.inflate(indata)
+      end
+      if outdata.size > destsize
+        raise PackFormatError, 'error reading pack data'
+      end
+      zstr.close
+      outdata
+    end
+    private :unpack_compressed
+
+    def patch_delta(base, delta)
+      src_size, pos = patch_delta_header_size(delta, 0)
+      if src_size != base.size
+        raise PackFormatError, 'invalid delta data'
+      end
+
+      dest_size, pos = patch_delta_header_size(delta, pos)
+      dest = ""
+      while pos < delta.size
+        c = delta[pos]
+        pos += 1
+        if c & 0x80 != 0
+          pos -= 1
+          cp_off = cp_size = 0
+          cp_off = delta[pos += 1] if c & 0x01 != 0
+          cp_off |= delta[pos += 1] << 8 if c & 0x02 != 0
+          cp_off |= delta[pos += 1] << 16 if c & 0x04 != 0
+          cp_off |= delta[pos += 1] << 24 if c & 0x08 != 0
+          cp_size = delta[pos += 1] if c & 0x10 != 0
+          cp_size |= delta[pos += 1] << 8 if c & 0x20 != 0
+          cp_size |= delta[pos += 1] << 16 if c & 0x40 != 0
+          cp_size = 0x10000 if cp_size == 0
+          pos += 1
+          dest += base[cp_off,cp_size]
+        elsif c != 0
+          dest += delta[pos,c]
+          pos += c
+        else
+          raise PackFormatError, 'invalid delta data'
+        end
+      end
+      dest
+    end
+    private :patch_delta
+
+    def patch_delta_header_size(delta, pos)
+      size = 0
+      shift = 0
+      begin
+        c = delta[pos]
+        if c == nil
+          raise PackFormatError, 'invalid delta header'
+        end
+        pos += 1
+        size |= (c & 0x7f) << shift
+        shift += 7
+      end while c & 0x80 != 0
+      [size, pos]
+    end
+    private :patch_delta_header_size
+  end
+end end
+
+if $0 == __FILE__
+  ARGV.each do |path|
+    storage = Git::Internal::PackStorage.new(path)
+    storage.each_sha1 do |sha1|
+      obj = storage[sha1]
+      puts "%s %s" % [obj.sha1.unpack('H*'), obj.type]
+    end
+  end
+end
diff --git a/lib/git/raw/object.rb b/lib/git/raw/object.rb
new file mode 100644 (file)
index 0000000..7e3e618
--- /dev/null
@@ -0,0 +1,268 @@
+require 'digest/sha1'
+
+module Git
+  module Raw
+
+  # class for author/committer/tagger lines
+  class UserInfo
+    attr_accessor :name, :email, :date, :offset
+
+    def initialize(str)
+      m = /^(.*?) <(.*)> (\d+) ([+-])0*(\d+?)$/.match(str)
+      if !m
+        raise RuntimeError, "invalid %s header in commit" % key
+      end
+      @name = m[1]
+      @email = m[2]
+      @date = Time.at(Integer(m[3]))
+      @offset = (m[4] == "-" ? -1 : 1)*Integer(m[5])
+    end
+
+    def to_s
+      "%s <%s> %s %+05d" % [@name, @email, @date.to_i, @offset]
+    end
+  end
+
+  # base class for all git objects (blob, tree, commit, tag)
+  class Object
+    attr_accessor :repository
+
+    def Object.from_raw(rawobject, repository = nil)
+      case rawobject.type
+      when :blob
+        return Blob.from_raw(rawobject, repository)
+      when :tree
+        return Tree.from_raw(rawobject, repository)
+      when :commit
+        return Commit.from_raw(rawobject, repository)
+      when :tag
+        return Tag.from_raw(rawobject, repository)
+      else
+        raise RuntimeError, "got invalid object-type"
+      end
+    end
+
+    def initialize
+      raise NotImplemented, "abstract class"
+    end
+
+    def type
+      raise NotImplemented, "abstract class"
+    end
+
+    def raw_content
+      raise NotImplemented, "abstract class"
+    end
+
+    def sha1
+      Digest::SHA1.hexdigest("%s %d\0" % \
+                             [self.type, self.raw_content.length] + \
+                             self.raw_content)
+    end
+  end
+
+  class Blob < Object
+    attr_accessor :content
+
+    def self.from_raw(rawobject, repository)
+      new(rawobject.content)
+    end
+
+    def initialize(content, repository=nil)
+      @content = content
+      @repository = repository
+    end
+
+    def type
+      :blob
+    end
+
+    def raw_content
+      @content
+    end
+  end
+
+  class DirectoryEntry
+    S_IFMT  = 00170000
+    S_IFLNK =  0120000
+    S_IFREG =  0100000
+    S_IFDIR =  0040000
+
+    attr_accessor :mode, :name, :sha1
+    def initialize(buf)
+      m = /^(\d+) (.*)\0(.{20})$/m.match(buf)
+      if !m
+        raise RuntimeError, "invalid directory entry"
+      end
+      @mode = 0
+      m[1].each_byte do |i|
+        @mode = (@mode << 3) | (i-'0'[0])
+      end
+      @name = m[2]
+      @sha1 = m[3].unpack("H*")[0]
+
+      if ![S_IFLNK, S_IFDIR, S_IFREG].include?(@mode & S_IFMT)
+        raise RuntimeError, "unknown type for directory entry"
+      end
+    end
+
+    def type
+      case @mode & S_IFMT
+      when S_IFLNK
+        @type = :link
+      when S_IFDIR
+        @type = :directory
+      when S_IFREG
+        @type = :file
+      else
+        raise RuntimeError, "unknown type for directory entry"
+      end
+    end
+
+    def type=(type)
+      case @type
+      when :link
+        @mode = (@mode & ~S_IFMT) | S_IFLNK
+      when :directory
+        @mode = (@mode & ~S_IFMT) | S_IFDIR
+      when :file
+        @mode = (@mode & ~S_IFMT) | S_IFREG
+      else
+        raise RuntimeError, "invalid type"
+      end
+    end
+
+    def raw
+      "%o %s\0%s" % [@mode, @name, [@sha1].pack("H*")]
+    end
+  end
+
+  class Tree < Object
+    attr_accessor :entry
+
+    def self.from_raw(rawobject, repository=nil)
+      entries = []
+      rawobject.content.scan(/\d+ .*?\0.{20}/m) do |raw|
+        entries << DirectoryEntry.new(raw)
+      end
+      new(entries, repository)
+    end
+
+    def initialize(entries=[], repository = nil)
+      @entry = entries
+      @repository = repository
+    end
+
+    def type
+      :tree
+    end
+
+    def raw_content
+      # TODO: sort correctly
+      #@entry.sort { |a,b| a.name <=> b.name }.
+      @entry.
+        collect { |e| e.raw }.join
+    end
+  end
+
+  class Commit < Object
+    attr_accessor :author, :committer, :tree, :parent, :message
+
+    def self.from_raw(rawobject, repository=nil)
+      parent = []
+      tree = author = committer = nil
+
+      headers, message = rawobject.content.split(/\n\n/, 2)
+      headers = headers.split(/\n/).map { |header| header.split(/ /, 2) }
+      headers.each do |key, value|
+        case key
+        when "tree"
+          tree = value
+        when "parent"
+          parent.push(value)
+        when "author"
+          author = UserInfo.new(value)
+        when "committer"
+          committer = UserInfo.new(value)
+        else
+          warn "unknown header '%s' in commit %s" % \
+            [key, rawobject.sha1.unpack("H*")[0]]
+        end
+      end
+      if not tree && author && committer
+        raise RuntimeError, "incomplete raw commit object"
+      end
+      new(tree, parent, author, committer, message, repository)
+    end
+
+    def initialize(tree, parent, author, committer, message, repository=nil)
+      @tree = tree
+      @author = author
+      @parent = parent
+      @committer = committer
+      @message = message
+      @repository = repository
+    end
+
+    def type
+      :commit
+    end
+
+    def raw_content
+      "tree %s\n%sauthor %s\ncommitter %s\n\n" % [
+        @tree,
+        @parent.collect { |i| "parent %s\n" % i }.join,
+        @author, @committer] + @message
+    end
+  end
+
+  class Tag < Object
+    attr_accessor :object, :type, :tag, :tagger, :message
+
+    def self.from_raw(rawobject, repository=nil)
+      headers, message = rawobject.content.split(/\n\n/, 2)
+      headers = headers.split(/\n/).map { |header| header.split(/ /, 2) }
+      headers.each do |key, value|
+        case key
+        when "object"
+          object = value
+        when "type"
+          if !["blob", "tree", "commit", "tag"].include?(value)
+            raise RuntimeError, "invalid type in tag"
+          end
+          type = value.to_sym
+        when "tag"
+          tag = value
+        when "tagger"
+          tagger = UserInfo.new(value)
+        else
+          warn "unknown header '%s' in tag" % \
+            [key, rawobject.sha1.unpack("H*")[0]]
+        end
+        if not object && type && tag && tagger
+          raise RuntimeError, "incomplete raw tag object"
+        end
+      end
+      new(object, type, tag, tagger, repository)
+    end
+
+    def initialize(object, type, tag, tagger, repository=nil)
+      @object = object
+      @type = type
+      @tag = tag
+      @tagger = tagger
+      @repository = repository
+    end
+
+    def raw_content
+      "object %s\ntype %s\ntag %s\ntagger %s\n\n" % \
+        [@object, @type, @tag, @tagger] + @message
+    end
+
+    def type
+      :tag
+    end
+  end
+  
+end
+end
\ No newline at end of file
diff --git a/tests/units/test_raw_internals.rb b/tests/units/test_raw_internals.rb
new file mode 100644 (file)
index 0000000..b135e52
--- /dev/null
@@ -0,0 +1,15 @@
+#!/usr/bin/env ruby
+
+require File.dirname(__FILE__) + '/../test_helper'
+
+class TestRawInternals < Test::Unit::TestCase
+  
+  def setup
+    set_file_paths
+    @git = Git.open(@wdir)
+  end
+  
+  def test_raw_log
+  end
+
+end
\ No newline at end of file