2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
27 #include "got_compat.h"
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_cancel.h"
32 #include "got_commit_graph.h"
35 #include "got_lib_delta.h"
36 #include "got_lib_inflate.h"
37 #include "got_lib_object.h"
38 #include "got_lib_object_idset.h"
40 struct got_commit_graph_node
{
41 struct got_object_id id
;
43 /* Used only during iteration. */
45 TAILQ_ENTRY(got_commit_graph_node
) entry
;
48 TAILQ_HEAD(got_commit_graph_iter_list
, got_commit_graph_node
);
50 struct got_commit_graph_branch_tip
{
51 struct got_object_id
*commit_id
;
52 struct got_commit_object
*commit
;
53 struct got_commit_graph_node
*new_node
;
56 struct got_commit_graph
{
57 /* The set of all commits we have traversed. */
58 struct got_object_idset
*node_ids
;
61 #define GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL 0x01
64 * A set of object IDs of known parent commits which we have not yet
65 * traversed. Each commit ID in this set represents a branch in commit
66 * history: Either the first-parent branch of the head node, or another
67 * branch corresponding to a traversed merge commit for which we have
68 * not traversed a branch point commit yet.
70 * Whenever we add a commit with a matching ID to the graph, we remove
71 * its corresponding element from this set, and add new elements for
72 * each of that commit's parent commits which were not traversed yet.
74 * When API users ask us to fetch more commits, we fetch commits from
75 * all currently open branches. This allows API users to process
76 * commits in linear order even though the history contains branches.
78 struct got_object_idset
*open_branches
;
80 /* Array of branch tips for fetch_commits_from_open_branches(). */
81 struct got_commit_graph_branch_tip
*tips
;
84 /* Path of tree entry of interest to the API user. */
88 * Nodes which will be passed to the API user next, sorted by
91 struct got_commit_graph_iter_list iter_list
;
94 static const struct got_error
*
95 detect_changed_path(int *changed
, struct got_commit_object
*commit
,
96 struct got_object_id
*commit_id
, const char *path
,
97 struct got_repository
*repo
)
99 const struct got_error
*err
= NULL
;
100 struct got_commit_object
*pcommit
= NULL
;
101 struct got_tree_object
*tree
= NULL
, *ptree
= NULL
;
102 struct got_object_qid
*pid
;
104 if (got_path_is_root_dir(path
)) {
111 pid
= STAILQ_FIRST(&commit
->parent_ids
);
113 struct got_object_id
*obj_id
;
114 err
= got_object_id_by_path(&obj_id
, repo
, commit_id
, path
);
116 if (err
->code
== GOT_ERR_NO_TREE_ENTRY
)
119 *changed
= 1; /* The path was created in this commit. */
124 err
= got_object_open_as_tree(&tree
, repo
, commit
->tree_id
);
128 err
= got_object_open_as_commit(&pcommit
, repo
, pid
->id
);
132 err
= got_object_open_as_tree(&ptree
, repo
, pcommit
->tree_id
);
136 err
= got_object_tree_path_changed(changed
, tree
, ptree
, path
, repo
);
139 got_object_tree_close(tree
);
141 got_object_tree_close(ptree
);
143 got_object_commit_close(pcommit
);
148 add_node_to_iter_list(struct got_commit_graph
*graph
,
149 struct got_commit_graph_node
*node
, time_t committer_time
)
151 struct got_commit_graph_node
*n
, *next
;
153 node
->timestamp
= committer_time
;
155 n
= TAILQ_FIRST(&graph
->iter_list
);
157 next
= TAILQ_NEXT(n
, entry
);
158 if (next
&& node
->timestamp
>= next
->timestamp
) {
159 TAILQ_INSERT_BEFORE(next
, node
, entry
);
164 TAILQ_INSERT_TAIL(&graph
->iter_list
, node
, entry
);
167 static const struct got_error
*
168 add_node(struct got_commit_graph_node
**new_node
,
169 struct got_commit_graph
*graph
, struct got_object_id
*commit_id
,
170 struct got_repository
*repo
)
172 const struct got_error
*err
= NULL
;
173 struct got_commit_graph_node
*node
;
177 node
= calloc(1, sizeof(*node
));
179 return got_error_from_errno("calloc");
181 memcpy(&node
->id
, commit_id
, sizeof(node
->id
));
182 err
= got_object_idset_add(graph
->node_ids
, &node
->id
, NULL
);
191 * Ask got-read-pack to traverse first-parent history until a commit is
192 * encountered which modified graph->path, or until the pack file runs
193 * out of relevant commits. This is faster than sending an individual
194 * request for each commit stored in the pack file.
196 static const struct got_error
*
197 packed_first_parent_traversal(int *ncommits_traversed
,
198 struct got_commit_graph
*graph
, struct got_object_id
*commit_id
,
199 struct got_repository
*repo
)
201 const struct got_error
*err
= NULL
;
202 struct got_object_id_queue traversed_commits
;
203 struct got_object_qid
*qid
;
205 STAILQ_INIT(&traversed_commits
);
206 *ncommits_traversed
= 0;
208 err
= got_traverse_packed_commits(&traversed_commits
,
209 commit_id
, graph
->path
, repo
);
213 /* Add all traversed commits to the graph... */
214 STAILQ_FOREACH(qid
, &traversed_commits
, entry
) {
215 struct got_commit_graph_node
*node
;
217 if (got_object_idset_contains(graph
->open_branches
, qid
->id
))
219 if (got_object_idset_contains(graph
->node_ids
, qid
->id
))
222 (*ncommits_traversed
)++;
224 /* ... except the last commit is the new branch tip. */
225 if (STAILQ_NEXT(qid
, entry
) == NULL
) {
226 err
= got_object_idset_add(graph
->open_branches
,
231 err
= add_node(&node
, graph
, qid
->id
, repo
);
236 got_object_id_queue_free(&traversed_commits
);
240 static const struct got_error
*
241 close_branch(struct got_commit_graph
*graph
, struct got_object_id
*commit_id
)
243 const struct got_error
*err
;
245 err
= got_object_idset_remove(NULL
, graph
->open_branches
, commit_id
);
246 if (err
&& err
->code
!= GOT_ERR_NO_OBJ
)
251 static const struct got_error
*
252 advance_branch(struct got_commit_graph
*graph
, struct got_object_id
*commit_id
,
253 struct got_commit_object
*commit
, struct got_repository
*repo
)
255 const struct got_error
*err
;
256 struct got_object_qid
*qid
;
258 err
= close_branch(graph
, commit_id
);
262 if (graph
->flags
& GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL
) {
263 qid
= STAILQ_FIRST(&commit
->parent_ids
);
265 got_object_idset_contains(graph
->open_branches
, qid
->id
))
268 * The root directory always changes by definition, and when
269 * logging the root we want to traverse consecutive commits
270 * even if they point at the same tree.
271 * But if we are looking for a specific path then we can avoid
272 * fetching packed commits which did not modify the path and
273 * only fetch their IDs. This speeds up 'got blame'.
275 if (!got_path_is_root_dir(graph
->path
) &&
276 (commit
->flags
& GOT_COMMIT_FLAG_PACKED
)) {
278 err
= packed_first_parent_traversal(&ncommits
,
279 graph
, qid
->id
, repo
);
280 if (err
|| ncommits
> 0)
283 return got_object_idset_add(graph
->open_branches
,
288 * If we are graphing commits for a specific path, skip branches
289 * which do not contribute any content to this path.
291 if (commit
->nparents
> 1 && !got_path_is_root_dir(graph
->path
)) {
292 struct got_object_id
*merged_id
, *prev_id
= NULL
;
293 int branches_differ
= 0;
295 err
= got_object_id_by_path(&merged_id
, repo
, commit_id
,
300 STAILQ_FOREACH(qid
, &commit
->parent_ids
, entry
) {
301 struct got_object_id
*id
;
303 if (got_object_idset_contains(graph
->open_branches
,
307 err
= got_object_id_by_path(&id
, repo
, qid
->id
,
310 if (err
->code
== GOT_ERR_NO_TREE_ENTRY
) {
320 if (!branches_differ
&&
321 got_object_id_cmp(id
, prev_id
) != 0)
328 * If a branch has created the merged content we can
329 * skip any other branches.
331 if (got_object_id_cmp(merged_id
, id
) == 0) {
332 err
= got_object_idset_add(graph
->open_branches
,
346 * If the path's content is the same on all branches,
347 * follow the first parent only.
349 if (!branches_differ
) {
350 qid
= STAILQ_FIRST(&commit
->parent_ids
);
353 if (got_object_idset_contains(graph
->open_branches
,
356 if (got_object_idset_contains(graph
->node_ids
,
358 return NULL
; /* parent already traversed */
359 return got_object_idset_add(graph
->open_branches
,
364 STAILQ_FOREACH(qid
, &commit
->parent_ids
, entry
) {
365 if (got_object_idset_contains(graph
->open_branches
, qid
->id
))
367 if (got_object_idset_contains(graph
->node_ids
, qid
->id
))
368 continue; /* parent already traversed */
369 err
= got_object_idset_add(graph
->open_branches
, qid
->id
, NULL
);
377 const struct got_error
*
378 got_commit_graph_open(struct got_commit_graph
**graph
,
379 const char *path
, int first_parent_traversal
)
381 const struct got_error
*err
= NULL
;
383 *graph
= calloc(1, sizeof(**graph
));
385 return got_error_from_errno("calloc");
387 TAILQ_INIT(&(*graph
)->iter_list
);
389 (*graph
)->path
= strdup(path
);
390 if ((*graph
)->path
== NULL
) {
391 err
= got_error_from_errno("strdup");
395 (*graph
)->node_ids
= got_object_idset_alloc();
396 if ((*graph
)->node_ids
== NULL
) {
397 err
= got_error_from_errno("got_object_idset_alloc");
401 (*graph
)->open_branches
= got_object_idset_alloc();
402 if ((*graph
)->open_branches
== NULL
) {
403 err
= got_error_from_errno("got_object_idset_alloc");
407 if (first_parent_traversal
)
408 (*graph
)->flags
|= GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL
;
411 got_commit_graph_close(*graph
);
417 struct add_branch_tip_arg
{
418 struct got_commit_graph_branch_tip
*tips
;
420 struct got_repository
*repo
;
421 struct got_commit_graph
*graph
;
424 static const struct got_error
*
425 add_branch_tip(struct got_object_id
*commit_id
, void *data
, void *arg
)
427 const struct got_error
*err
;
428 struct add_branch_tip_arg
*a
= arg
;
429 struct got_commit_graph_node
*new_node
;
430 struct got_commit_object
*commit
;
432 err
= got_object_open_as_commit(&commit
, a
->repo
, commit_id
);
436 err
= add_node(&new_node
, a
->graph
, commit_id
, a
->repo
);
440 a
->tips
[a
->ntips
].commit_id
= new_node
? &new_node
->id
: NULL
;
441 a
->tips
[a
->ntips
].commit
= commit
;
442 a
->tips
[a
->ntips
].new_node
= new_node
;
448 static const struct got_error
*
449 fetch_commits_from_open_branches(struct got_commit_graph
*graph
,
450 struct got_repository
*repo
, got_cancel_cb cancel_cb
, void *cancel_arg
)
452 const struct got_error
*err
;
453 struct add_branch_tip_arg arg
;
456 ntips
= got_object_idset_num_elements(graph
->open_branches
);
460 /* (Re-)allocate branch tips array if necessary. */
461 if (graph
->ntips
< ntips
) {
462 struct got_commit_graph_branch_tip
*tips
;
463 tips
= recallocarray(graph
->tips
, graph
->ntips
, ntips
,
466 return got_error_from_errno("recallocarray");
468 graph
->ntips
= ntips
;
470 arg
.tips
= graph
->tips
;
471 arg
.ntips
= 0; /* add_branch_tip() will increment */
474 err
= got_object_idset_for_each(graph
->open_branches
, add_branch_tip
,
479 for (i
= 0; i
< arg
.ntips
; i
++) {
480 struct got_object_id
*commit_id
;
481 struct got_commit_object
*commit
;
482 struct got_commit_graph_node
*new_node
;
486 err
= (*cancel_cb
)(cancel_arg
);
491 commit_id
= arg
.tips
[i
].commit_id
;
492 commit
= arg
.tips
[i
].commit
;
493 new_node
= arg
.tips
[i
].new_node
;
495 err
= detect_changed_path(&changed
, commit
, commit_id
,
498 if (err
->code
!= GOT_ERR_NO_OBJ
)
501 * History of the path stops here on the current
502 * branch. Keep going on other branches.
504 err
= close_branch(graph
, commit_id
);
510 add_node_to_iter_list(graph
, new_node
,
511 got_object_commit_get_committer_time(commit
));
512 err
= advance_branch(graph
, commit_id
, commit
, repo
);
517 for (i
= 0; i
< arg
.ntips
; i
++)
518 got_object_commit_close(arg
.tips
[i
].commit
);
523 got_commit_graph_close(struct got_commit_graph
*graph
)
525 if (graph
->open_branches
)
526 got_object_idset_free(graph
->open_branches
);
528 got_object_idset_free(graph
->node_ids
);
534 const struct got_error
*
535 got_commit_graph_iter_start(struct got_commit_graph
*graph
,
536 struct got_object_id
*id
, struct got_repository
*repo
,
537 got_cancel_cb cancel_cb
, void *cancel_arg
)
539 const struct got_error
*err
= NULL
;
541 if (!TAILQ_EMPTY(&graph
->iter_list
))
542 return got_error(GOT_ERR_ITER_BUSY
);
544 err
= got_object_idset_add(graph
->open_branches
, id
, NULL
);
548 /* Locate first commit which changed graph->path. */
549 while (TAILQ_EMPTY(&graph
->iter_list
) &&
550 got_object_idset_num_elements(graph
->open_branches
) > 0) {
551 err
= fetch_commits_from_open_branches(graph
, repo
,
552 cancel_cb
, cancel_arg
);
557 if (TAILQ_EMPTY(&graph
->iter_list
)) {
559 if (got_path_is_root_dir(graph
->path
))
560 return got_error_no_obj(id
);
562 while (path
[0] == '/')
564 return got_error_path(path
, GOT_ERR_NO_TREE_ENTRY
);
570 const struct got_error
*
571 got_commit_graph_iter_next(struct got_object_id
**id
,
572 struct got_commit_graph
*graph
, struct got_repository
*repo
,
573 got_cancel_cb cancel_cb
, void *cancel_arg
)
575 const struct got_error
*err
= NULL
;
576 struct got_commit_graph_node
*node
;
580 node
= TAILQ_FIRST(&graph
->iter_list
);
582 /* We are done iterating, or iteration was not started. */
583 return got_error(GOT_ERR_ITER_COMPLETED
);
586 while (TAILQ_NEXT(node
, entry
) == NULL
&&
587 got_object_idset_num_elements(graph
->open_branches
) > 0) {
588 err
= fetch_commits_from_open_branches(graph
, repo
,
589 cancel_cb
, cancel_arg
);
595 TAILQ_REMOVE(&graph
->iter_list
, node
, entry
);
599 const struct got_error
*
600 got_commit_graph_find_youngest_common_ancestor(struct got_object_id
**yca_id
,
601 struct got_object_id
*commit_id
, struct got_object_id
*commit_id2
,
602 int first_parent_traversal
,
603 struct got_repository
*repo
, got_cancel_cb cancel_cb
, void *cancel_arg
)
605 const struct got_error
*err
= NULL
;
606 struct got_commit_graph
*graph
= NULL
, *graph2
= NULL
;
607 int completed
= 0, completed2
= 0;
608 struct got_object_idset
*commit_ids
;
612 commit_ids
= got_object_idset_alloc();
613 if (commit_ids
== NULL
)
614 return got_error_from_errno("got_object_idset_alloc");
616 err
= got_commit_graph_open(&graph
, "/", first_parent_traversal
);
620 err
= got_commit_graph_open(&graph2
, "/", first_parent_traversal
);
624 err
= got_commit_graph_iter_start(graph
, commit_id
, repo
,
625 cancel_cb
, cancel_arg
);
629 err
= got_commit_graph_iter_start(graph2
, commit_id2
, repo
,
630 cancel_cb
, cancel_arg
);
635 struct got_object_id
*id
= NULL
, *id2
= NULL
;
638 err
= (*cancel_cb
)(cancel_arg
);
644 err
= got_commit_graph_iter_next(&id
, graph
, repo
,
645 cancel_cb
, cancel_arg
);
647 if (err
->code
!= GOT_ERR_ITER_COMPLETED
)
655 err
= got_commit_graph_iter_next(&id2
, graph2
, repo
,
656 cancel_cb
, cancel_arg
);
658 if (err
->code
!= GOT_ERR_ITER_COMPLETED
)
666 if (got_object_idset_contains(commit_ids
, id
)) {
667 *yca_id
= got_object_id_dup(id
);
670 err
= got_error_from_errno("got_object_id_dup");
674 err
= got_object_idset_add(commit_ids
, id
, NULL
);
679 if (got_object_idset_contains(commit_ids
, id2
)) {
680 *yca_id
= got_object_id_dup(id2
);
683 err
= got_error_from_errno("got_object_id_dup");
687 err
= got_object_idset_add(commit_ids
, id2
, NULL
);
692 if (completed
&& completed2
) {
693 err
= got_error(GOT_ERR_ANCESTRY
);
699 got_object_idset_free(commit_ids
);
701 got_commit_graph_close(graph
);
703 got_commit_graph_close(graph2
);