update-all-config.pl: warn about questionable fetch specs
[girocco.git] / src / list_packs.c
blob67e181d2c9b82922c93776346fa1bd898a2d90bf
1 /*
3 list_packs.c -- list_packs utility to count Git packs and their objects
4 Copyright (C) 2016,2017 Kyle J. McKay.
5 All rights reserved.
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License
9 as published by the Free Software Foundation; either version 2
10 of the License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 This utility is intended to be used by a script to assist in determining
25 whether or not it's time to run gc in the case where gc.auto=0 and when
26 it is to provide a convenient mechanism to feed selected pack names to
27 a script for futher processing at gc time.
29 Various options are available to select which .pack files to inspect
30 including supplying the names. This utility is intended to be able
31 to read the pack names from the --export-pack-edges file that may be
32 produced by git fast-import without needing any preprocessing.
34 See the list_packs.txt file or run the command with no arguments for help.
37 #define _XOPEN_SOURCE 600
38 #undef _FILE_OFFSET_BITS
39 #define _FILE_OFFSET_BITS 64
40 #include <stdarg.h>
41 #include <stddef.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <sys/types.h> /* in some cases required before dirent.h or sys/stat.h */
46 #include <dirent.h>
47 #include <inttypes.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <arpa/inet.h>
52 #include <sys/stat.h>
54 #ifndef PATH_MAX
55 #define PATH_MAX 1024
56 #endif
57 #if PATH_MAX < 4096
58 #define PATH_BUFF 4096
59 #else
60 #define PATH_BUFF PATH_MAX
61 #endif
63 static int fgetfn(FILE *f, char *b, size_t s);
64 static void process_pack_filename(char *fn, size_t bl, int hasps);
65 static void process_pack(const char *fn, uint32_t objcnt);
66 static void process_packs_finish(void);
68 struct pack_info {
69 struct pack_info *next;
70 uint32_t objcount;
71 char filename[1];
74 struct ext_info {
75 struct ext_info *next;
76 size_t extlen; /* strlen(ext) */
77 char ext[12]; /* includes leading '.' and trailing '\0' */
78 int xxt; /* >0 excludes if ext present, 0 excludes if ext NOT present */
81 #define MAX_EXT_LEN (sizeof(((const struct ext_info *)0)->ext) - 2)
82 #define MAX_SFX_LEN (sizeof(((const struct ext_info *)0)->ext) - 1)
83 #define BAD_EXT_CHARS ":./\\ \t\n\v\f\r"
85 const char USAGE[] =
86 #include "list_packs.inc"
89 static int opt_q = 0;
90 static int opt_xix = -1;
91 static long opt_limit = 0;
92 static int opt_count = 0;
93 static uint64_t count = 0;
94 static uint64_t objlimit = 0;
95 static int opt_desc = 0;
96 static int opt_boundary = 0;
97 static uint64_t maxlimit = 0;
98 static uint64_t processed = 0;
100 static struct pack_info *packlist = NULL;
101 static size_t packcount = 0;
103 static struct ext_info *extlist = NULL;
104 static struct ext_info *sfxlist = NULL;
106 static char fnbuff[PATH_BUFF];
108 static void die(const char *fmt, ...)
110 va_list args;
111 va_start(args, fmt);
112 fflush(stdout);
113 if (!opt_q)
114 vfprintf(stderr, fmt, args);
115 va_end(args);
116 exit(EXIT_FAILURE);
119 static void dienomem(const char *what)
121 if (what && !*what)
122 what = NULL;
123 die("list_packs: error: out of memory%s%s\n", (what ? " for " : ""),
124 (what ? what : ""));
127 static void dienopackmem(void)
129 dienomem("pack list");
132 static void dienoextmem(void)
134 dienomem("ext list");
137 static void dieusage(int err)
139 FILE *f = err ? stderr : stdout;
140 fflush(stdout);
141 if (!err || !opt_q)
142 fprintf(f, "%s", USAGE);
143 exit(err);
146 static int has_suffix(const char *s, size_t l, const char *x, size_t b)
148 if (!s || !x || !b || l < (b + 1) /* ?<suffix> */)
149 return 0;
150 return !strncmp(s + l - b, x, b);
152 #define has_idx_suffix(s,l) has_suffix((s),(l),".idx",4)
153 #define has_pack_suffix(s,l) has_suffix((s),(l),".pack",5)
155 static int is_pack_sha1_name(const char *s, size_t l)
157 if (!s || l < 50)
158 return 0;
159 if (strncmp(s, "pack-", 5) || strncmp(s + l - 5, ".pack", 5))
160 return 0;
161 return strspn(s + 5, "0123456789abcdefABCDEF") >= 40;
164 #define find_add_ext(ext) find_add_extsfx(&extlist, ext, '.')
165 #define find_add_sfx(sfx) find_add_extsfx(&sfxlist, sfx, 0)
166 static struct ext_info *find_add_extsfx(struct ext_info **list, const char *extsfx, char ch)
168 size_t elen = strlen(extsfx);
169 size_t b = ch ? 1 : 0;
170 struct ext_info *result = *list;
172 while (result && strcmp(&result->ext[b], extsfx)) {
173 result = result->next;
175 if (!result) {
176 result = (struct ext_info *)malloc(sizeof(struct ext_info));
177 if (!result)
178 dienoextmem();
179 result->extlen = elen + b;
180 if (b)
181 result->ext[0] = ch;
182 memcpy(&result->ext[b], extsfx, elen + 1);
183 if (elen + b + 1 < sizeof(result->ext))
184 memset(&result->ext[elen + b + 1], 0,
185 sizeof(result->ext) - (elen + b + 1));
186 result->xxt = -1;
187 result->next = *list;
188 *list = result;
190 return result;
193 void handle_ext_option(const char *ext, int v)
195 size_t elen;
196 struct ext_info *einfo;
198 if (!ext || !*ext || !(elen = strlen(ext)) ||
199 elen > MAX_EXT_LEN || strcspn(ext, BAD_EXT_CHARS) != elen)
200 dieusage(EXIT_FAILURE);
201 if (!strcmp(ext, "idx")) {
202 opt_xix = v;
203 } else {
204 einfo = find_add_ext(ext);
205 einfo->xxt = v;
209 void handle_sfx_option(const char *sfx, int v)
211 size_t elen;
212 struct ext_info *sinfo;
214 if (!sfx || !*sfx || !(elen = strlen(sfx)) ||
215 elen > MAX_SFX_LEN || strcspn(sfx, BAD_EXT_CHARS) != elen ||
216 strchr("0123456789abcdefABCDEF", sfx[0]))
217 dieusage(EXIT_FAILURE);
218 sinfo = find_add_sfx(sfx);
219 sinfo->xxt = v;
222 int main(int argc, char *argv[])
224 int argn;
225 int opt_a = 0;
226 const char *only = NULL;
227 const char *dir = NULL;
228 int is_stdin = 0;
229 FILE *in = NULL;
231 for (argn = 1; argn < argc; ++argn) {
232 if (!strcmp(argv[argn], "-h") || !strcmp(argv[argn], "--help")) {
233 dieusage(EXIT_SUCCESS);
234 } else if (!strcmp(argv[argn], "-q") || !strcmp(argv[argn], "--quiet")) {
235 opt_q = 1;
236 } else if (!strcmp(argv[argn], "-a") || !strcmp(argv[argn], "--all")) {
237 opt_a = 1;
238 } else if (!strcmp(argv[argn], "--exclude-idx")) {
239 opt_xix = 1;
240 } else if (!strcmp(argv[argn], "--exclude-no-idx")) {
241 opt_xix = 0;
242 } else if (!strcmp(argv[argn], "--exclude-keep")) {
243 handle_ext_option("keep", 1);
244 } else if (!strcmp(argv[argn], "--exclude-no-keep")) {
245 handle_ext_option("keep", 0);
246 } else if (!strcmp(argv[argn], "--exclude-bitmap")) {
247 handle_ext_option("bitmap", 1);
248 } else if (!strcmp(argv[argn], "--exclude-no-bitmap")) {
249 handle_ext_option("bitmap", 0);
250 } else if (!strcmp(argv[argn], "--exclude-bndl")) {
251 handle_ext_option("bndl", 1);
252 } else if (!strcmp(argv[argn], "--exclude-no-bndl")) {
253 handle_ext_option("bndl", 0);
254 } else if (!strcmp(argv[argn], "--count")) {
255 opt_count = 1;
256 } else if (!strcmp(argv[argn], "--count-objects")) {
257 opt_count = 2;
258 } else if (!strcmp(argv[argn], "--include-boundary")) {
259 opt_boundary = 1;
260 } else if (!strcmp(argv[argn], "--exclude-ext")) {
261 if (++argn >= argc)
262 dieusage(EXIT_FAILURE);
263 handle_ext_option(argv[argn], 1);
264 } else if (!strcmp(argv[argn], "--exclude-no-ext")) {
265 if (++argn >= argc)
266 dieusage(EXIT_FAILURE);
267 handle_ext_option(argv[argn], 0);
268 } else if (!strcmp(argv[argn], "--exclude-sfx")) {
269 if (++argn >= argc)
270 dieusage(EXIT_FAILURE);
271 handle_sfx_option(argv[argn], 1);
272 } else if (!strcmp(argv[argn], "--exclude-no-sfx")) {
273 if (++argn >= argc)
274 dieusage(EXIT_FAILURE);
275 handle_sfx_option(argv[argn], 0);
276 } else if (!strcmp(argv[argn], "--exclude-limit")) {
277 char *end;
278 long limit = 0;
280 if (++argn >= argc)
281 dieusage(EXIT_FAILURE);
282 limit = strtol(argv[argn], &end, 0);
283 if (!*argv[argn] || *end || !limit)
284 dieusage(EXIT_FAILURE);
285 opt_limit = limit;
286 } else if (!strcmp(argv[argn], "--object-limit")) {
287 char *end;
288 long limit = 0;
290 if (++argn >= argc)
291 dieusage(EXIT_FAILURE);
292 limit = strtol(argv[argn], &end, 0);
293 if (!*argv[argn] || *end || !limit)
294 dieusage(EXIT_FAILURE);
295 if (limit < 0) {
296 opt_desc = 1;
297 objlimit = (uint64_t)-limit;
298 } else {
299 objlimit = (uint64_t)limit;
301 } else if (!strcmp(argv[argn], "--max-matches")) {
302 char *end;
303 long limit = 0;
305 if (++argn >= argc)
306 dieusage(EXIT_FAILURE);
307 limit = strtol(argv[argn], &end, 0);
308 if (!*argv[argn] || *end || limit <= 0)
309 dieusage(EXIT_FAILURE);
310 maxlimit = (uint64_t)limit;
311 } else if (!strcmp(argv[argn], "--only")) {
312 if (++argn >= argc || !*argv[argn])
313 dieusage(EXIT_FAILURE);
314 only = argv[argn];
315 } else if (!strcmp(argv[argn], "-C")) {
316 if (++argn >= argc || !*argv[argn])
317 dieusage(EXIT_FAILURE);
318 if (chdir(argv[argn])) {
319 if (!opt_q)
320 fprintf(stderr, "list_packs: error: "
321 "chdir '%s' failed\n", argv[argn]);
322 exit(EXIT_FAILURE);
324 } else if (!strcmp(argv[argn], "--")) {
325 ++argn;
326 break;
327 } else if (argv[argn][0] == '-' && argv[argn][1]) {
328 dieusage(EXIT_FAILURE);
329 } else {
330 break;
333 if (argn < argc && *argv[argn])
334 dir = argv[argn++];
335 if (argn != argc || (!only && !dir) || (only && dir) || (only && opt_a))
336 dieusage(EXIT_FAILURE);
337 if (only) {
338 if (!strcmp(only, "-")) {
339 is_stdin = 1;
340 in = stdin;
341 } else {
342 in = fopen(only, "r");
343 if (!in)
344 die("list_packs: error: could not open %s\n", only);
346 while (fgetfn(in, fnbuff, sizeof(fnbuff) - (MAX_EXT_LEN + 1))) {
347 char *fn = fnbuff;
348 size_t l = strlen(fn);
349 int ips;
351 if (!l)
352 continue;
353 if (l > 2 && !strncmp(fn, "./", 2)) {
354 fn += 2;
355 l -= 2;
357 ips = has_pack_suffix(fn, l);
358 process_pack_filename(fn, (ips ? l - 5 : l), ips);
360 if (!is_stdin)
361 fclose(in);
362 } else {
363 size_t l;
364 DIR *d;
365 struct dirent *e;
367 l = strlen(dir);
368 while (l > 1 && dir[l-1] == '/') {
369 --l;
371 if (l > 2 && !strncmp(dir, "./", 2)) {
372 dir += 2;
373 l -= 2;
375 if (l + 10 /* "/?.bitmap\0" */ > PATH_BUFF)
376 die("list_packs: error: dirname too long\n");
377 memcpy(fnbuff, dir, l);
378 fnbuff[l] = '\0';
379 d = opendir(fnbuff);
380 if (!d)
381 die("list_packs: error: could not read directory %s\n", fnbuff);
382 if (!strcmp(fnbuff, ".")) {
383 l = 0;
384 fnbuff[0] = '\0';
386 if (l && fnbuff[l-1] != '/')
387 fnbuff[l++] = '/';
388 while ((e = readdir(d)) != NULL) {
389 /* d_namlen is a nice, but non-POSIX extension */
390 size_t el = strlen(e->d_name);
392 if (has_pack_suffix(e->d_name, el) &&
393 (opt_a || is_pack_sha1_name(e->d_name, el))) {
394 if (l + el + 3 /* "ap\0" */ > PATH_BUFF) {
395 if (!opt_q)
396 fprintf(stderr, "list_packs: warning: "
397 "ignored input filename greater "
398 "than %d characters long\n",
399 PATH_BUFF - 3);
400 continue;
402 memcpy(fnbuff + l, e->d_name, el + 1 /* \0 */);
403 process_pack_filename(fnbuff, l + el - 5 /* .pack */, 1);
406 closedir(d);
408 process_packs_finish();
409 if (opt_count)
410 printf("%"PRIu64"\n", count);
412 return EXIT_SUCCESS;
415 #define FNDELIM "\t\n\v\f\r :"
417 static int fgetfn(FILE *f, char *b, size_t s)
419 size_t l, fnl;
420 int trunc;
422 if (!fgets(b, (int)s, f)) {
423 if (ferror(f)) {
424 if (!opt_q)
425 fprintf(stderr, "list_packs: error: an error "
426 "occurred reading pack name list file\n");
427 exit(EXIT_FAILURE);
429 return 0;
431 if (!*b)
432 return 1;
433 l = strlen(b);
434 fnl = strcspn(b, FNDELIM);
435 if (b[l-1] != '\n' && !feof(f)) {
436 int ch;
437 flockfile(f);
438 while ((ch = getc_unlocked(f)) != EOF && ch != '\n') {
439 /* loop */
441 funlockfile(f);
442 trunc = 1;
443 } else {
444 trunc = 0;
446 if (fnl < l || (!ferror(f) && !trunc)) {
447 b[fnl] = '\0';
448 return 1;
450 if (ferror(f)) {
451 if (!opt_q)
452 fprintf(stderr, "list_packs: error: an error "
453 "occurred reading pack name list file\n");
454 exit(EXIT_FAILURE);
456 if (!opt_q)
457 fprintf(stderr, "list_packs: warning: ignored input filename "
458 "greater than %d characters long\n", (int)s - 2);
459 *b = '\0';
460 return 1;
463 static int file_exists(const char *fn, struct stat *s)
465 if (!stat(fn, s)) {
466 if (S_ISREG(s->st_mode))
467 return 1;
468 if (!opt_q)
469 fprintf(stderr, "list_packs: warning: ignoring "
470 "non-file '%s'\n", fn);
472 return 0;
475 static void process_pack_filename(char *fn, size_t bl, int hasps)
477 struct stat ps, es;
478 FILE *f;
479 union {
480 uint32_t u[3];
481 char c[12];
482 } hdr;
483 uint32_t packver;
484 uint32_t objcnt;
485 const struct ext_info *einfo;
486 int sfxor;
488 if (stat(fn, &ps) || !S_ISREG(ps.st_mode)) {
489 if (!opt_q)
490 fprintf(stderr, "list_packs: warning: ignoring "
491 "non-file '%s'\n", fn);
492 return;
494 if (ps.st_size < 32) {
495 if (!opt_q)
496 fprintf(stderr, "list_packs: warning: ignoring "
497 "invalid pack file '%s'\n", fn);
498 return;
500 einfo = sfxlist;
501 sfxor = -1;
502 while (einfo) {
503 if (einfo->xxt >= 0) {
504 int hsfx;
506 hsfx = (bl >= einfo->extlen) &&
507 !strncmp(fn + bl - einfo->extlen, einfo->ext, einfo->extlen);
508 if (einfo->xxt) {
509 if (hsfx)
510 return;
511 } else if ((sfxor = hsfx)) {
512 break;
515 einfo = einfo->next;
517 if (!sfxor)
518 return;
519 einfo = extlist;
520 while (einfo) {
521 if (einfo->xxt >= 0) {
522 int hext;
524 memcpy(fn + bl, einfo->ext, einfo->extlen + 1);
525 hext = file_exists(fn, &es);
526 if ((einfo->xxt && hext) || (!einfo->xxt && !hext))
527 return;
529 einfo = einfo->next;
531 if (opt_xix >= 0) {
532 int hx;
534 memcpy(fn + bl, ".idx", 5);
535 hx = file_exists(fn, &es);
536 if ((opt_xix && hx) || (!opt_xix && !hx))
537 return;
539 if (hasps)
540 memcpy(fn + bl, ".pack", 6);
541 else
542 fn[bl] = '\0';
543 f = fopen(fn, "rb");
544 if (!f) {
545 if (!opt_q)
546 fprintf(stderr, "list_packs: warning: ignoring "
547 "unopenable pack file '%s'\n", fn);
548 return;
550 if (fread(&hdr, 12, 1, f) != 1) {
551 fclose(f);
552 if (!opt_q)
553 fprintf(stderr, "list_packs: warning: ignoring "
554 "unreadable pack file '%s'\n", fn);
555 return;
557 fclose(f);
558 packver = ntohl(hdr.u[1]);
559 objcnt = ntohl(hdr.u[2]);
560 if (memcmp(hdr.c, "PACK", 4) || (packver != 2 && packver != 3) ||
561 ps.st_size < ((off_t)objcnt + 32)) {
562 if (!opt_q)
563 fprintf(stderr, "list_packs: warning: ignoring "
564 "invalid pack file '%s'\n", fn);
565 return;
567 if (!opt_xix && es.st_size < ((off_t)objcnt * 28 + 1072)) {
568 if (!opt_q)
569 fprintf(stderr, "list_packs: warning: ignoring pack "
570 "with invalid idx file '%.*s.idx'\n", (int)bl, fn);
571 return;
573 if (opt_limit) {
574 if ((opt_limit > 0 && objcnt >= (uint32_t)opt_limit) ||
575 (opt_limit < 0 && objcnt < (uint32_t)-opt_limit))
576 return;
578 /* the PACK file passed all the checks, process it */
579 if (objlimit) {
580 size_t fnlen = strlen(fn);
581 struct pack_info *info = (struct pack_info *)
582 malloc(sizeof(struct pack_info) + fnlen);
584 if (!info)
585 dienopackmem();
586 info->objcount = objcnt;
587 memcpy(info->filename, fn, fnlen + 1);
588 info->next = packlist;
589 packlist = info;
590 ++packcount;
591 } else {
592 process_pack(fn, objcnt);
596 static void process_pack(const char *fn, uint32_t objcnt)
598 if (maxlimit && processed >= maxlimit)
599 return;
600 if (opt_count) {
601 if (opt_count > 1)
602 count += objcnt;
603 else
604 ++count;
605 } else {
606 printf("%s\n", fn);
608 ++processed;
611 static void process_pack_info(const struct pack_info *pack)
613 process_pack(pack->filename, pack->objcount);
616 static int sort_asc(const void *p, const void *q)
618 const struct pack_info **a = (const struct pack_info **)p;
619 const struct pack_info **b = (const struct pack_info **)q;
620 if ((*a)->objcount < (*b)->objcount)
621 return -1;
622 if ((*a)->objcount > (*b)->objcount)
623 return 1;
624 return strcmp((*a)->filename, (*b)->filename);
627 static int sort_dsc(const void *p, const void *q)
629 return sort_asc(q, p);
632 static void process_packs_finish(void)
634 struct pack_info **table, *p;
635 size_t i;
636 uint64_t tally;
638 if (!objlimit || !packlist || !packcount)
639 return;
640 table = (struct pack_info **)malloc(sizeof(struct pack_info *) * packcount);
641 if (!table)
642 dienopackmem();
643 i = 0;
644 p = packlist;
645 do {
646 table[i++] = p;
647 p = p->next;
648 } while (p);
649 qsort(table, packcount, sizeof(struct pack_info *), (opt_desc ? sort_dsc : sort_asc));
650 tally = 0;
651 for (i=0; i < packcount; ++i) {
652 tally += table[i]->objcount;
653 if (tally <= objlimit) {
654 process_pack_info(table[i]);
655 } else {
656 if (opt_boundary)
657 process_pack_info(table[i]);
658 break;