Update documentation to reflect the updated links in the RFC
[VCS-Git-Torrent.git] / lib / VCS / Git / Torrent / CommitReel / Index.pm
blob33d6cda71f4c6251a3cddf2a704744309924b8d6
2 package VCS::Git::Torrent::CommitReel::Index;
4 =head1 NAME
6 VCS::Git::Torrent::CommitReel::Index
8 =cut
10 use DB_File;
11 use Moose;
12 use Storable qw( freeze thaw );
14 use VCS::Git::Torrent::CommitReel::Entry;
16 has 'index' =>
17 isa => 'HashRef',
18 is => 'rw',
19 default => sub {
20 my $self = shift;
21 $self->open_index;
24 has 'reel' =>
25 isa => "VCS::Git::Torrent::CommitReel",
26 is => "rw",
27 weak_ref => 1,
28 handles => [ 'git' ];
30 has 'cat_file' =>
31 isa => 'ArrayRef',
32 is => 'ro',
33 default => sub {
34 [ Git::command_bidi_pipe('cat-file', '--batch-check') ]
37 =head2 open_index
39 Open, and possibly create, the file that stores the commit reel index.
40 DB_File in DB_BTREE mode is currently used, with a numeric sort to the keys,
41 which are the offsets in the commit reel. The values are frozen
42 VCS::Git::Torrent::CommitReel::Entry objects; see L<Storable>.
44 =cut
46 sub open_index {
47 my $self = shift;
48 my %index;
49 my $x;
51 # define the sort function; the offset is the hash index, so we
52 # need to force numeric sorting not string compare
53 $DB_BTREE->{'compare'} = sub { $_[0] <=> $_[1] };
54 $x = tie %index, 'DB_File' => 'reel.idx',
55 O_CREAT|O_RDWR, 0666, $DB_BTREE;
57 $self->{db} = $x;
59 \%index;
62 =head2 update_index
64 Update ourself to contain the latest entries from the commit reel. This is
65 done by calling L<reel_revlist_iter> and freezing (see L<Storable>) the
66 resulting list into the index.
68 =cut
70 sub update_index {
71 my $self = shift;
72 my ($key, $val);
73 my $last_sha1;
74 my @revlist;
75 my $rev;
76 my $offset;
78 if ( $self->{db}->seq($key, $val, R_LAST) ) { # assume empty
79 $last_sha1 = undef;
81 else {
82 my $last_entry = thaw $val;
83 $last_sha1 = $last_entry->objectid;
86 my $iter = $self->reel_revlist_iter;
88 while ( my $rev = $iter->() ) {
89 $self->index->{$rev->offset} = freeze $rev;
93 =head2 reel_revlist_iter() returns VCS::Git::Torrent::CommitReel::Entry
95 Using the currently available references in the index, determine what other
96 references are needed to bring us up to date.
98 Commits are ordered according to the RFC.
99 L<http://gittorrent.utsl.gen.nz/rfc.html#org-reels>
101 =cut
103 sub reel_revlist_iter {
104 my $self = shift;
106 my @refs = values %{ $self->reel->end->refs };
107 my @not;
108 if ( $self->reel->start ) {
109 @not = values %{ $self->reel->start->refs };
112 my ($rev_list, $ctx) = $self->git->command_output_pipe
113 ("rev-list", "--date-order", "--reverse",
114 "--pretty=format:%P %ct",
115 @refs, @not ? ( "--not" => @not ) : () );
117 my @peek;
118 my $get_rev_list = sub {
119 if ( @_ ) {
120 unshift @peek, @_;
121 return;
123 elsif ( @peek ) {
124 return shift @peek;
126 return if not defined $rev_list;
127 if (eof($rev_list)) {
128 $self->git->command_close_pipe($rev_list, $ctx);
129 undef($rev_list);
130 return;
132 my ($commitid) = <$rev_list> =~ m{^commit (.*)} or die;
133 my ($parents, $when) = <$rev_list> =~ m{^(.*) (\d+)$}
134 or die;
135 my %parents = map { $_ => 1 } split /\s+/, $parents;
136 ({ commitid => $commitid,
137 parents => \%parents,
138 when => $when });
141 # note: the fact we need to pre-populate this %seen may be a
142 # good reason to say that reels should always include all the
143 # objects required for the first revisions in them; it
144 # introduces one point of bad scalability
145 my %seen;
146 if ( @not ) {
147 my ($base, $ctx) = $self->git->command_output_pipe
148 ("rev-list", "--objects", @not );
149 while ( my $rev = <$base> ) {
150 chomp($rev);
151 $seen{$rev}++;
153 $self->git->command_close_pipe($base, $ctx);
156 my @ready;
157 my $commit_iter = sub {
158 # keep grabbing items off the list, until we see one that
159 # a) has a different commit date, or
160 # b) has a parent which has not been written to the
161 # reel yet ("seen")
162 my $next = $get_rev_list->() or return shift @ready;
163 while ( $next and
164 ( !@ready or
165 $next->{when} == $ready[0]->{when} ) and
166 not grep { not $seen{$_} }
167 keys %{ $next->{parents} }
169 push @ready, $next;
170 $next = $get_rev_list->();
172 # put one back
173 $get_rev_list->($next) if $next;
175 # nothing left on @ready is not "ready to go"
176 @ready = sort { $a->{commitid} cmp $b->{commitid} }
177 @ready;
179 return shift @ready;
182 my @objects;
183 my $offset = 0;
184 my $git = $self->git;
185 my $object_iter = sub {
186 if ( !@objects ) {
187 my $next_commit = $commit_iter->()
188 or return;
190 my $id = $next_commit->{commitid};
192 @objects = grep { !$seen{$_->[0]}++ }
193 $self->_commit_objects($id);
196 my $x = shift @objects;
198 my $rev = VCS::Git::Torrent::CommitReel::Entry->new(
199 offset => $offset,
200 type => $x->[2],
201 size => $x->[1],
202 objectid => $x->[0],
203 ($x->[3] ?
204 ( path => $x->[3] ) : ()),
207 $offset += $rev->size;
209 return $rev;
212 return $object_iter;
215 sub _commit_objects {
216 my $self = shift;
217 my $commitid = shift;
219 my $pipe_read = $self->cat_file->[1];
220 my $pipe_write = $self->cat_file->[2];
222 my $git = $self->git;
224 my $commit_size = do {
225 print $pipe_write $commitid . "\n";
226 (split(/ /, <$pipe_read>))[2];
228 my @deps = $git->command (qw(rev-list --objects), $commitid.'^!');
230 # get information for all the objects between these two commits;
231 # use git-cat-file --batch-check, asynchronously.
232 my @x;
233 my $x_item = 0;
234 my $flush_pipe = sub {
235 while ($x[$x_item] and
236 defined(my $line = <$pipe_read>)) {
237 my $x = $x[$x_item++];
238 (undef, $x->[2], $x->[1]) = split(/ /, $line);
242 $pipe_write->autoflush(0);
243 $pipe_read->blocking(0);
245 # the sort order in the RFC is quite specific - objects must
246 # have the objects they refer to come first.
247 foreach my $d (sort { $a cmp $b } @deps) {
248 my ($d_hash, $d_what) = split /\s+/, $d, 2;
249 next if $d_hash eq $commitid;
250 print $pipe_write $d_hash . "\n";
251 push @x, [ $d_hash, undef, undef, $d_what ];
253 # more than 42 or so answers backed up might end up
254 # with us getting SIGPIPE
255 $flush_pipe->() if (@x - $x_item > 42);
258 $pipe_write->autoflush(1);
259 $pipe_read->blocking(1);
260 $flush_pipe->();
262 my @rfc_ordered;
264 # an object is ready if:
265 # - it is a blob, or
266 # - it is a tree, but there are no other trees or
267 # blobs under a sub-path of this one.
268 while ( @x ) {
269 for (my $i = 0; $i <= $#x; $i++ ) {
270 my $ready;
271 if ( $x[$i][2] eq "blob" ) {
272 $ready = 1;
274 elsif ( $x[$i][2] eq "tree" ) {
275 my $path = $x[$i][3];
276 my $pat = $path ? qr{^\Q$path\E/} : qr{.};
277 $ready = !grep { $_->[3] && $_->[3] =~ m{$pat} }
278 @x[$i+1..$#x];
280 else {
281 confess "encountered strange object "
282 ."'$x[$i][2]'";
284 if ( $ready ) {
285 push @rfc_ordered, $x[$i];
286 @x=(@x[0..$i-1], @x[$i+1..$#x]);
287 $i = -1;
292 push @rfc_ordered, [ $commitid, $commit_size, "commit" ];
293 @rfc_ordered;