install.sh: verify $Girocco::Config::nc_openbsd_bin supports -U
[girocco.git] / src / list_packs.c
blob903201ea889c3971e9177d3cb45b369683a8948e
1 /*
3 list_packs.c -- list_packs utility to count Git packs and their objects
4 Copyright (C) 2016 Kyle J. McKay. All rights reserved.
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 This utility is intended to be used by a script to assist in determining
24 whether or not it's time to run gc in the case where gc.auto=0 and when
25 it is to provide a convenient mechanism to feed selected pack names to
26 a script for futher processing at gc time.
28 Various options are available to select which .pack files to inspect
29 including supplying the names. This utility is intended to be able
30 to read the pack names from the --export-pack-edges file that may be
31 produced by git fast-import without needing any preprocessing.
33 See the list_packs.txt file or run the command with no arguments for help.
36 #define _XOPEN_SOURCE 600
37 #undef _FILE_OFFSET_BITS
38 #define _FILE_OFFSET_BITS 64
39 #include <stdarg.h>
40 #include <stddef.h>
41 #include <stdint.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <sys/types.h> /* in some cases required before dirent.h or sys/stat.h */
45 #include <dirent.h>
46 #include <inttypes.h>
47 #include <limits.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <arpa/inet.h>
51 #include <sys/stat.h>
53 #ifndef PATH_MAX
54 #define PATH_MAX 1024
55 #endif
56 #if PATH_MAX < 4096
57 #define PATH_BUFF 4096
58 #else
59 #define PATH_BUFF PATH_MAX
60 #endif
62 static int fgetfn(FILE *f, char *b, size_t s);
63 static void process_pack_filename(char *fn, size_t bl, int hasps);
64 static void process_pack(const char *fn, uint32_t objcnt);
65 static void process_packs_finish(void);
67 struct pack_info {
68 struct pack_info *next;
69 uint32_t objcount;
70 char filename[1];
73 struct ext_info {
74 struct ext_info *next;
75 size_t extlen; /* strlen(ext) */
76 char ext[12]; /* includes leading '.' and trailing '\0' */
77 int xxt; /* >0 excludes if ext present, 0 excludes if ext NOT present */
80 #define MAX_EXT_LEN (sizeof(((const struct ext_info *)0)->ext) - 2)
81 #define BAD_EXT_CHARS ":./\\ \t\n\v\f\r"
83 const char USAGE[] =
84 #include "list_packs.inc"
87 static int opt_q = 0;
88 static int opt_xix = -1;
89 static long opt_limit = 0;
90 static int opt_count = 0;
91 static uint64_t count = 0;
92 static uint64_t objlimit = 0;
93 static int opt_desc = 0;
94 static int opt_boundary = 0;
95 static uint64_t maxlimit = 0;
96 static uint64_t processed = 0;
98 static struct pack_info *packlist = NULL;
99 static size_t packcount = 0;
101 static struct ext_info *extlist = NULL;
103 static char fnbuff[PATH_BUFF];
105 static void die(const char *fmt, ...)
107 va_list args;
108 va_start(args, fmt);
109 fflush(stdout);
110 if (!opt_q)
111 vfprintf(stderr, fmt, args);
112 va_end(args);
113 exit(EXIT_FAILURE);
116 static void dienomem(const char *what)
118 if (what && !*what)
119 what = NULL;
120 die("list_packs: error: out of memory%s%s\n", (what ? " for " : ""),
121 (what ? what : ""));
124 static void dienopackmem(void)
126 dienomem("pack list");
129 static void dienoextmem(void)
131 dienomem("ext list");
134 static void dieusage(int err)
136 FILE *f = err ? stderr : stdout;
137 fflush(stdout);
138 if (!err || !opt_q)
139 fprintf(f, "%s", USAGE);
140 exit(err);
143 static int has_suffix(const char *s, size_t l, const char *x, size_t b)
145 if (!s || !x || !b || l < (b + 1) /* ?<suffix> */)
146 return 0;
147 return !strncmp(s + l - b, x, b);
149 #define has_idx_suffix(s,l) has_suffix((s),(l),".idx",4)
150 #define has_pack_suffix(s,l) has_suffix((s),(l),".pack",5)
152 static int is_pack_sha1_name(const char *s, size_t l)
154 if (!s || l != 50)
155 return 0;
156 if (strncmp(s, "pack-", 5) || strncmp(s + l - 5, ".pack", 5))
157 return 0;
158 return strspn(s + 5, "0123456789abcdefABCDEF") == 40;
161 static struct ext_info *find_add_ext(const char *ext)
163 size_t elen = strlen(ext);
164 struct ext_info *result = extlist;
166 while (result && strcmp(result->ext, ext)) {
167 result = result->next;
169 if (!result) {
170 result = (struct ext_info *)malloc(sizeof(struct ext_info));
171 if (!result)
172 dienoextmem();
173 result->extlen = elen + 1;
174 result->ext[0] = '.';
175 memcpy(&result->ext[1], ext, elen + 1);
176 if (elen + 2 < sizeof(result->ext))
177 memset(&result->ext[elen + 2], 0, sizeof(result->ext) - (elen + 2));
178 result->xxt = -1;
179 result->next = extlist;
180 extlist = result;
182 return result;
185 void handle_ext_option(const char *ext, int v)
187 size_t elen;
188 struct ext_info *einfo;
190 if (!ext || !*ext || !(elen = strlen(ext)) ||
191 elen > MAX_EXT_LEN || strcspn(ext, BAD_EXT_CHARS) != elen)
192 dieusage(EXIT_FAILURE);
193 if (!strcmp(ext, "idx")) {
194 opt_xix = v;
195 } else {
196 einfo = find_add_ext(ext);
197 einfo->xxt = v;
201 int main(int argc, char *argv[])
203 int argn;
204 int opt_a = 0;
205 const char *only = NULL;
206 const char *dir = NULL;
207 int is_stdin = 0;
208 FILE *in = NULL;
210 for (argn = 1; argn < argc; ++argn) {
211 if (!strcmp(argv[argn], "-h") || !strcmp(argv[argn], "--help")) {
212 dieusage(EXIT_SUCCESS);
213 } else if (!strcmp(argv[argn], "-q") || !strcmp(argv[argn], "--quiet")) {
214 opt_q = 1;
215 } else if (!strcmp(argv[argn], "-a") || !strcmp(argv[argn], "--all")) {
216 opt_a = 1;
217 } else if (!strcmp(argv[argn], "--exclude-idx")) {
218 opt_xix = 1;
219 } else if (!strcmp(argv[argn], "--exclude-no-idx")) {
220 opt_xix = 0;
221 } else if (!strcmp(argv[argn], "--exclude-keep")) {
222 handle_ext_option("keep", 1);
223 } else if (!strcmp(argv[argn], "--exclude-no-keep")) {
224 handle_ext_option("keep", 0);
225 } else if (!strcmp(argv[argn], "--exclude-bitmap")) {
226 handle_ext_option("bitmap", 1);
227 } else if (!strcmp(argv[argn], "--exclude-no-bitmap")) {
228 handle_ext_option("bitmap", 0);
229 } else if (!strcmp(argv[argn], "--exclude-bndl")) {
230 handle_ext_option("bndl", 1);
231 } else if (!strcmp(argv[argn], "--exclude-no-bndl")) {
232 handle_ext_option("bndl", 0);
233 } else if (!strcmp(argv[argn], "--count")) {
234 opt_count = 1;
235 } else if (!strcmp(argv[argn], "--count-objects")) {
236 opt_count = 2;
237 } else if (!strcmp(argv[argn], "--include-boundary")) {
238 opt_boundary = 1;
239 } else if (!strcmp(argv[argn], "--exclude-ext")) {
240 if (++argn >= argc)
241 dieusage(EXIT_FAILURE);
242 handle_ext_option(argv[argn], 1);
243 } else if (!strcmp(argv[argn], "--exclude-no-ext")) {
244 if (++argn >= argc)
245 dieusage(EXIT_FAILURE);
246 handle_ext_option(argv[argn], 0);
247 } else if (!strcmp(argv[argn], "--exclude-limit")) {
248 char *end;
249 long limit = 0;
251 if (++argn >= argc)
252 dieusage(EXIT_FAILURE);
253 limit = strtol(argv[argn], &end, 0);
254 if (!*argv[argn] || *end || !limit)
255 dieusage(EXIT_FAILURE);
256 opt_limit = limit;
257 } else if (!strcmp(argv[argn], "--object-limit")) {
258 char *end;
259 long limit = 0;
261 if (++argn >= argc)
262 dieusage(EXIT_FAILURE);
263 limit = strtol(argv[argn], &end, 0);
264 if (!*argv[argn] || *end || !limit)
265 dieusage(EXIT_FAILURE);
266 if (limit < 0) {
267 opt_desc = 1;
268 objlimit = (uint64_t)-limit;
269 } else {
270 objlimit = (uint64_t)limit;
272 } else if (!strcmp(argv[argn], "--max-matches")) {
273 char *end;
274 long limit = 0;
276 if (++argn >= argc)
277 dieusage(EXIT_FAILURE);
278 limit = strtol(argv[argn], &end, 0);
279 if (!*argv[argn] || *end || limit <= 0)
280 dieusage(EXIT_FAILURE);
281 maxlimit = (uint64_t)limit;
282 } else if (!strcmp(argv[argn], "--only")) {
283 if (++argn >= argc || !*argv[argn])
284 dieusage(EXIT_FAILURE);
285 only = argv[argn];
286 } else if (!strcmp(argv[argn], "-C")) {
287 if (++argn >= argc || !*argv[argn])
288 dieusage(EXIT_FAILURE);
289 if (chdir(argv[argn])) {
290 if (!opt_q)
291 fprintf(stderr, "list_packs: error: "
292 "chdir '%s' failed\n", argv[argn]);
293 exit(EXIT_FAILURE);
295 } else if (!strcmp(argv[argn], "--")) {
296 ++argn;
297 break;
298 } else if (argv[argn][0] == '-' && argv[argn][1]) {
299 dieusage(EXIT_FAILURE);
300 } else {
301 break;
304 if (argn < argc && *argv[argn])
305 dir = argv[argn++];
306 if (argn != argc || (!only && !dir) || (only && dir) || (only && opt_a))
307 dieusage(EXIT_FAILURE);
308 if (only) {
309 if (!strcmp(only, "-")) {
310 is_stdin = 1;
311 in = stdin;
312 } else {
313 in = fopen(only, "r");
314 if (!in)
315 die("list_packs: error: could not open %s\n", only);
317 while (fgetfn(in, fnbuff, sizeof(fnbuff) - (MAX_EXT_LEN + 1))) {
318 char *fn = fnbuff;
319 size_t l = strlen(fn);
320 int ips;
322 if (!l)
323 continue;
324 if (l > 2 && !strncmp(fn, "./", 2)) {
325 fn += 2;
326 l -= 2;
328 ips = has_pack_suffix(fn, l);
329 process_pack_filename(fn, (ips ? l - 5 : l), ips);
331 if (!is_stdin)
332 fclose(in);
333 } else {
334 size_t l;
335 DIR *d;
336 struct dirent *e;
338 l = strlen(dir);
339 while (l > 1 && dir[l-1] == '/') {
340 --l;
342 if (l > 2 && !strncmp(dir, "./", 2)) {
343 dir += 2;
344 l -= 2;
346 if (l + 10 /* "/?.bitmap\0" */ > PATH_BUFF)
347 die("list_packs: error: dirname too long\n");
348 memcpy(fnbuff, dir, l);
349 fnbuff[l] = '\0';
350 d = opendir(fnbuff);
351 if (!d)
352 die("list_packs: error: could not read directory %s\n", fnbuff);
353 if (!strcmp(fnbuff, ".")) {
354 l = 0;
355 fnbuff[0] = '\0';
357 if (l && fnbuff[l-1] != '/')
358 fnbuff[l++] = '/';
359 while ((e = readdir(d)) != NULL) {
360 /* d_namlen is a nice, but non-POSIX extension */
361 size_t el = strlen(e->d_name);
363 if (has_pack_suffix(e->d_name, el) &&
364 (opt_a || is_pack_sha1_name(e->d_name, el))) {
365 if (l + el + 3 /* "ap\0" */ > PATH_BUFF) {
366 if (!opt_q)
367 fprintf(stderr, "list_packs: warning: "
368 "ignored input filename greater "
369 "than %d characters long\n",
370 PATH_BUFF - 3);
371 continue;
373 memcpy(fnbuff + l, e->d_name, el + 1 /* \0 */);
374 process_pack_filename(fnbuff, l + el - 5 /* .pack */, 1);
377 closedir(d);
379 process_packs_finish();
380 if (opt_count)
381 printf("%"PRIu64"\n", count);
383 return EXIT_SUCCESS;
386 #define FNDELIM "\t\n\v\f\r :"
388 static int fgetfn(FILE *f, char *b, size_t s)
390 size_t l, fnl;
391 int trunc;
393 if (!fgets(b, (int)s, f)) {
394 if (ferror(f)) {
395 if (!opt_q)
396 fprintf(stderr, "list_packs: error: an error "
397 "occurred reading pack name list file\n");
398 exit(EXIT_FAILURE);
400 return 0;
402 if (!*b)
403 return 1;
404 l = strlen(b);
405 fnl = strcspn(b, FNDELIM);
406 if (b[l-1] != '\n' && !feof(f)) {
407 int ch;
408 flockfile(f);
409 while ((ch = getc_unlocked(f)) != EOF && ch != '\n') {
410 /* loop */
412 funlockfile(f);
413 trunc = 1;
414 } else {
415 trunc = 0;
417 if (fnl < l || (!ferror(f) && !trunc)) {
418 b[fnl] = '\0';
419 return 1;
421 if (ferror(f)) {
422 if (!opt_q)
423 fprintf(stderr, "list_packs: error: an error "
424 "occurred reading pack name list file\n");
425 exit(EXIT_FAILURE);
427 if (!opt_q)
428 fprintf(stderr, "list_packs: warning: ignored input filename "
429 "greater than %d characters long\n", (int)s - 2);
430 *b = '\0';
431 return 1;
434 static int file_exists(const char *fn, struct stat *s)
436 if (!stat(fn, s)) {
437 if (S_ISREG(s->st_mode))
438 return 1;
439 if (!opt_q)
440 fprintf(stderr, "list_packs: warning: ignoring "
441 "non-file '%s'\n", fn);
443 return 0;
446 static void process_pack_filename(char *fn, size_t bl, int hasps)
448 struct stat ps, es;
449 FILE *f;
450 union {
451 uint32_t u[3];
452 char c[12];
453 } hdr;
454 uint32_t packver;
455 uint32_t objcnt;
456 const struct ext_info *einfo;
458 if (stat(fn, &ps) || !S_ISREG(ps.st_mode)) {
459 if (!opt_q)
460 fprintf(stderr, "list_packs: warning: ignoring "
461 "non-file '%s'\n", fn);
462 return;
464 if (ps.st_size < 32) {
465 if (!opt_q)
466 fprintf(stderr, "list_packs: warning: ignoring "
467 "invalid pack file '%s'\n", fn);
468 return;
470 einfo = extlist;
471 while (einfo) {
472 if (einfo->xxt >= 0) {
473 int hext;
475 memcpy(fn + bl, einfo->ext, einfo->extlen + 1);
476 hext = file_exists(fn, &es);
477 if ((einfo->xxt && hext) || (!einfo->xxt && !hext))
478 return;
480 einfo = einfo->next;
482 if (opt_xix >= 0) {
483 int hx;
485 memcpy(fn + bl, ".idx", 5);
486 hx = file_exists(fn, &es);
487 if ((opt_xix && hx) || (!opt_xix && !hx))
488 return;
490 if (hasps)
491 memcpy(fn + bl, ".pack", 6);
492 else
493 fn[bl] = '\0';
494 f = fopen(fn, "rb");
495 if (!f) {
496 if (!opt_q)
497 fprintf(stderr, "list_packs: warning: ignoring "
498 "unopenable pack file '%s'\n", fn);
499 return;
501 if (fread(&hdr, 12, 1, f) != 1) {
502 fclose(f);
503 if (!opt_q)
504 fprintf(stderr, "list_packs: warning: ignoring "
505 "unreadable pack file '%s'\n", fn);
506 return;
508 fclose(f);
509 packver = ntohl(hdr.u[1]);
510 objcnt = ntohl(hdr.u[2]);
511 if (memcmp(hdr.c, "PACK", 4) || (packver != 2 && packver != 3) ||
512 ps.st_size < ((off_t)objcnt + 32)) {
513 if (!opt_q)
514 fprintf(stderr, "list_packs: warning: ignoring "
515 "invalid pack file '%s'\n", fn);
516 return;
518 if (!opt_xix && es.st_size < ((off_t)objcnt * 28 + 1072)) {
519 if (!opt_q)
520 fprintf(stderr, "list_packs: warning: ignoring pack "
521 "with invalid idx file '%.*s.idx'\n", (int)bl, fn);
522 return;
524 if (opt_limit) {
525 if ((opt_limit > 0 && objcnt >= (uint32_t)opt_limit) ||
526 (opt_limit < 0 && objcnt < (uint32_t)-opt_limit))
527 return;
529 /* the PACK file passed all the checks, process it */
530 if (objlimit) {
531 size_t fnlen = strlen(fn);
532 struct pack_info *info = (struct pack_info *)
533 malloc(sizeof(struct pack_info) + fnlen);
535 if (!info)
536 dienopackmem();
537 info->objcount = objcnt;
538 memcpy(info->filename, fn, fnlen + 1);
539 info->next = packlist;
540 packlist = info;
541 ++packcount;
542 } else {
543 process_pack(fn, objcnt);
547 static void process_pack(const char *fn, uint32_t objcnt)
549 if (maxlimit && processed >= maxlimit)
550 return;
551 if (opt_count) {
552 if (opt_count > 1)
553 count += objcnt;
554 else
555 ++count;
556 } else {
557 printf("%s\n", fn);
559 ++processed;
562 static void process_pack_info(const struct pack_info *pack)
564 process_pack(pack->filename, pack->objcount);
567 static int sort_asc(const void *p, const void *q)
569 const struct pack_info **a = (const struct pack_info **)p;
570 const struct pack_info **b = (const struct pack_info **)q;
571 if ((*a)->objcount < (*b)->objcount)
572 return -1;
573 if ((*a)->objcount > (*b)->objcount)
574 return 1;
575 return strcmp((*a)->filename, (*b)->filename);
578 static int sort_dsc(const void *p, const void *q)
580 return sort_asc(q, p);
583 static void process_packs_finish(void)
585 struct pack_info **table, *p;
586 size_t i;
587 uint64_t tally;
589 if (!objlimit || !packlist || !packcount)
590 return;
591 table = (struct pack_info **)malloc(sizeof(struct pack_info *) * packcount);
592 if (!table)
593 dienopackmem();
594 i = 0;
595 p = packlist;
596 do {
597 table[i++] = p;
598 p = p->next;
599 } while (p);
600 qsort(table, packcount, sizeof(struct pack_info *), (opt_desc ? sort_dsc : sort_asc));
601 tally = 0;
602 for (i=0; i < packcount; ++i) {
603 tally += table[i]->objcount;
604 if (tally <= objlimit) {
605 process_pack_info(table[i]);
606 } else {
607 if (opt_boundary)
608 process_pack_info(table[i]);
609 break;