projlist: improve text browser project list formatting
[girocco.git] / src / list_packs.c
blobf8a3e99035d0c47898951c84b8d668e2702e1436
1 /*
3 list_packs.c -- list_packs utility to count Git packs and their objects
4 Copyright (C) 2016,2017 Kyle J. McKay.
5 All rights reserved.
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License
9 as published by the Free Software Foundation; either version 2
10 of the License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 This utility is intended to be used by a script to assist in determining
25 whether or not it's time to run gc in the case where gc.auto=0 and when
26 it is to provide a convenient mechanism to feed selected pack names to
27 a script for futher processing at gc time.
29 Various options are available to select which .pack files to inspect
30 including supplying the names. This utility is intended to be able
31 to read the pack names from the --export-pack-edges file that may be
32 produced by git fast-import without needing any preprocessing.
34 See the list_packs.txt file or run the command with no arguments for help.
37 #define _XOPEN_SOURCE 600
38 #undef _FILE_OFFSET_BITS
39 #define _FILE_OFFSET_BITS 64
40 #include <stdarg.h>
41 #include <stddef.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <sys/types.h> /* in some cases required before dirent.h or sys/stat.h */
46 #include <dirent.h>
47 #include <inttypes.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <arpa/inet.h>
52 #include <sys/stat.h>
54 #ifndef PATH_MAX
55 #define PATH_MAX 1024
56 #endif
57 #if PATH_MAX < 4096
58 #define PATH_BUFF 4096
59 #else
60 #define PATH_BUFF PATH_MAX
61 #endif
63 static int fgetfn(FILE *f, char *b, size_t s);
64 static void process_pack_filename(char *fn, size_t bl, int hasps);
65 static void process_pack(const char *fn, uint32_t objcnt);
66 static void process_packs_finish(void);
68 struct pack_info {
69 struct pack_info *next;
70 uint32_t objcount;
71 char filename[1];
74 struct ext_info {
75 struct ext_info *next;
76 size_t extlen; /* strlen(ext) */
77 char ext[12]; /* includes leading '.' and trailing '\0' */
78 int xxt; /* >0 excludes if ext present, 0 excludes if ext NOT present */
81 #define MAX_EXT_LEN (sizeof(((const struct ext_info *)0)->ext) - 2)
82 #define BAD_EXT_CHARS ":./\\ \t\n\v\f\r"
84 const char USAGE[] =
85 #include "list_packs.inc"
88 static int opt_q = 0;
89 static int opt_xix = -1;
90 static long opt_limit = 0;
91 static int opt_count = 0;
92 static uint64_t count = 0;
93 static uint64_t objlimit = 0;
94 static int opt_desc = 0;
95 static int opt_boundary = 0;
96 static uint64_t maxlimit = 0;
97 static uint64_t processed = 0;
99 static struct pack_info *packlist = NULL;
100 static size_t packcount = 0;
102 static struct ext_info *extlist = NULL;
104 static char fnbuff[PATH_BUFF];
106 static void die(const char *fmt, ...)
108 va_list args;
109 va_start(args, fmt);
110 fflush(stdout);
111 if (!opt_q)
112 vfprintf(stderr, fmt, args);
113 va_end(args);
114 exit(EXIT_FAILURE);
117 static void dienomem(const char *what)
119 if (what && !*what)
120 what = NULL;
121 die("list_packs: error: out of memory%s%s\n", (what ? " for " : ""),
122 (what ? what : ""));
125 static void dienopackmem(void)
127 dienomem("pack list");
130 static void dienoextmem(void)
132 dienomem("ext list");
135 static void dieusage(int err)
137 FILE *f = err ? stderr : stdout;
138 fflush(stdout);
139 if (!err || !opt_q)
140 fprintf(f, "%s", USAGE);
141 exit(err);
144 static int has_suffix(const char *s, size_t l, const char *x, size_t b)
146 if (!s || !x || !b || l < (b + 1) /* ?<suffix> */)
147 return 0;
148 return !strncmp(s + l - b, x, b);
150 #define has_idx_suffix(s,l) has_suffix((s),(l),".idx",4)
151 #define has_pack_suffix(s,l) has_suffix((s),(l),".pack",5)
153 static int is_pack_sha1_name(const char *s, size_t l)
155 if (!s || l < 50)
156 return 0;
157 if (strncmp(s, "pack-", 5) || strncmp(s + l - 5, ".pack", 5))
158 return 0;
159 return strspn(s + 5, "0123456789abcdefABCDEF") >= 40;
162 static struct ext_info *find_add_ext(const char *ext)
164 size_t elen = strlen(ext);
165 struct ext_info *result = extlist;
167 while (result && strcmp(result->ext, ext)) {
168 result = result->next;
170 if (!result) {
171 result = (struct ext_info *)malloc(sizeof(struct ext_info));
172 if (!result)
173 dienoextmem();
174 result->extlen = elen + 1;
175 result->ext[0] = '.';
176 memcpy(&result->ext[1], ext, elen + 1);
177 if (elen + 2 < sizeof(result->ext))
178 memset(&result->ext[elen + 2], 0, sizeof(result->ext) - (elen + 2));
179 result->xxt = -1;
180 result->next = extlist;
181 extlist = result;
183 return result;
186 void handle_ext_option(const char *ext, int v)
188 size_t elen;
189 struct ext_info *einfo;
191 if (!ext || !*ext || !(elen = strlen(ext)) ||
192 elen > MAX_EXT_LEN || strcspn(ext, BAD_EXT_CHARS) != elen)
193 dieusage(EXIT_FAILURE);
194 if (!strcmp(ext, "idx")) {
195 opt_xix = v;
196 } else {
197 einfo = find_add_ext(ext);
198 einfo->xxt = v;
202 int main(int argc, char *argv[])
204 int argn;
205 int opt_a = 0;
206 const char *only = NULL;
207 const char *dir = NULL;
208 int is_stdin = 0;
209 FILE *in = NULL;
211 for (argn = 1; argn < argc; ++argn) {
212 if (!strcmp(argv[argn], "-h") || !strcmp(argv[argn], "--help")) {
213 dieusage(EXIT_SUCCESS);
214 } else if (!strcmp(argv[argn], "-q") || !strcmp(argv[argn], "--quiet")) {
215 opt_q = 1;
216 } else if (!strcmp(argv[argn], "-a") || !strcmp(argv[argn], "--all")) {
217 opt_a = 1;
218 } else if (!strcmp(argv[argn], "--exclude-idx")) {
219 opt_xix = 1;
220 } else if (!strcmp(argv[argn], "--exclude-no-idx")) {
221 opt_xix = 0;
222 } else if (!strcmp(argv[argn], "--exclude-keep")) {
223 handle_ext_option("keep", 1);
224 } else if (!strcmp(argv[argn], "--exclude-no-keep")) {
225 handle_ext_option("keep", 0);
226 } else if (!strcmp(argv[argn], "--exclude-bitmap")) {
227 handle_ext_option("bitmap", 1);
228 } else if (!strcmp(argv[argn], "--exclude-no-bitmap")) {
229 handle_ext_option("bitmap", 0);
230 } else if (!strcmp(argv[argn], "--exclude-bndl")) {
231 handle_ext_option("bndl", 1);
232 } else if (!strcmp(argv[argn], "--exclude-no-bndl")) {
233 handle_ext_option("bndl", 0);
234 } else if (!strcmp(argv[argn], "--count")) {
235 opt_count = 1;
236 } else if (!strcmp(argv[argn], "--count-objects")) {
237 opt_count = 2;
238 } else if (!strcmp(argv[argn], "--include-boundary")) {
239 opt_boundary = 1;
240 } else if (!strcmp(argv[argn], "--exclude-ext")) {
241 if (++argn >= argc)
242 dieusage(EXIT_FAILURE);
243 handle_ext_option(argv[argn], 1);
244 } else if (!strcmp(argv[argn], "--exclude-no-ext")) {
245 if (++argn >= argc)
246 dieusage(EXIT_FAILURE);
247 handle_ext_option(argv[argn], 0);
248 } else if (!strcmp(argv[argn], "--exclude-limit")) {
249 char *end;
250 long limit = 0;
252 if (++argn >= argc)
253 dieusage(EXIT_FAILURE);
254 limit = strtol(argv[argn], &end, 0);
255 if (!*argv[argn] || *end || !limit)
256 dieusage(EXIT_FAILURE);
257 opt_limit = limit;
258 } else if (!strcmp(argv[argn], "--object-limit")) {
259 char *end;
260 long limit = 0;
262 if (++argn >= argc)
263 dieusage(EXIT_FAILURE);
264 limit = strtol(argv[argn], &end, 0);
265 if (!*argv[argn] || *end || !limit)
266 dieusage(EXIT_FAILURE);
267 if (limit < 0) {
268 opt_desc = 1;
269 objlimit = (uint64_t)-limit;
270 } else {
271 objlimit = (uint64_t)limit;
273 } else if (!strcmp(argv[argn], "--max-matches")) {
274 char *end;
275 long limit = 0;
277 if (++argn >= argc)
278 dieusage(EXIT_FAILURE);
279 limit = strtol(argv[argn], &end, 0);
280 if (!*argv[argn] || *end || limit <= 0)
281 dieusage(EXIT_FAILURE);
282 maxlimit = (uint64_t)limit;
283 } else if (!strcmp(argv[argn], "--only")) {
284 if (++argn >= argc || !*argv[argn])
285 dieusage(EXIT_FAILURE);
286 only = argv[argn];
287 } else if (!strcmp(argv[argn], "-C")) {
288 if (++argn >= argc || !*argv[argn])
289 dieusage(EXIT_FAILURE);
290 if (chdir(argv[argn])) {
291 if (!opt_q)
292 fprintf(stderr, "list_packs: error: "
293 "chdir '%s' failed\n", argv[argn]);
294 exit(EXIT_FAILURE);
296 } else if (!strcmp(argv[argn], "--")) {
297 ++argn;
298 break;
299 } else if (argv[argn][0] == '-' && argv[argn][1]) {
300 dieusage(EXIT_FAILURE);
301 } else {
302 break;
305 if (argn < argc && *argv[argn])
306 dir = argv[argn++];
307 if (argn != argc || (!only && !dir) || (only && dir) || (only && opt_a))
308 dieusage(EXIT_FAILURE);
309 if (only) {
310 if (!strcmp(only, "-")) {
311 is_stdin = 1;
312 in = stdin;
313 } else {
314 in = fopen(only, "r");
315 if (!in)
316 die("list_packs: error: could not open %s\n", only);
318 while (fgetfn(in, fnbuff, sizeof(fnbuff) - (MAX_EXT_LEN + 1))) {
319 char *fn = fnbuff;
320 size_t l = strlen(fn);
321 int ips;
323 if (!l)
324 continue;
325 if (l > 2 && !strncmp(fn, "./", 2)) {
326 fn += 2;
327 l -= 2;
329 ips = has_pack_suffix(fn, l);
330 process_pack_filename(fn, (ips ? l - 5 : l), ips);
332 if (!is_stdin)
333 fclose(in);
334 } else {
335 size_t l;
336 DIR *d;
337 struct dirent *e;
339 l = strlen(dir);
340 while (l > 1 && dir[l-1] == '/') {
341 --l;
343 if (l > 2 && !strncmp(dir, "./", 2)) {
344 dir += 2;
345 l -= 2;
347 if (l + 10 /* "/?.bitmap\0" */ > PATH_BUFF)
348 die("list_packs: error: dirname too long\n");
349 memcpy(fnbuff, dir, l);
350 fnbuff[l] = '\0';
351 d = opendir(fnbuff);
352 if (!d)
353 die("list_packs: error: could not read directory %s\n", fnbuff);
354 if (!strcmp(fnbuff, ".")) {
355 l = 0;
356 fnbuff[0] = '\0';
358 if (l && fnbuff[l-1] != '/')
359 fnbuff[l++] = '/';
360 while ((e = readdir(d)) != NULL) {
361 /* d_namlen is a nice, but non-POSIX extension */
362 size_t el = strlen(e->d_name);
364 if (has_pack_suffix(e->d_name, el) &&
365 (opt_a || is_pack_sha1_name(e->d_name, el))) {
366 if (l + el + 3 /* "ap\0" */ > PATH_BUFF) {
367 if (!opt_q)
368 fprintf(stderr, "list_packs: warning: "
369 "ignored input filename greater "
370 "than %d characters long\n",
371 PATH_BUFF - 3);
372 continue;
374 memcpy(fnbuff + l, e->d_name, el + 1 /* \0 */);
375 process_pack_filename(fnbuff, l + el - 5 /* .pack */, 1);
378 closedir(d);
380 process_packs_finish();
381 if (opt_count)
382 printf("%"PRIu64"\n", count);
384 return EXIT_SUCCESS;
387 #define FNDELIM "\t\n\v\f\r :"
389 static int fgetfn(FILE *f, char *b, size_t s)
391 size_t l, fnl;
392 int trunc;
394 if (!fgets(b, (int)s, f)) {
395 if (ferror(f)) {
396 if (!opt_q)
397 fprintf(stderr, "list_packs: error: an error "
398 "occurred reading pack name list file\n");
399 exit(EXIT_FAILURE);
401 return 0;
403 if (!*b)
404 return 1;
405 l = strlen(b);
406 fnl = strcspn(b, FNDELIM);
407 if (b[l-1] != '\n' && !feof(f)) {
408 int ch;
409 flockfile(f);
410 while ((ch = getc_unlocked(f)) != EOF && ch != '\n') {
411 /* loop */
413 funlockfile(f);
414 trunc = 1;
415 } else {
416 trunc = 0;
418 if (fnl < l || (!ferror(f) && !trunc)) {
419 b[fnl] = '\0';
420 return 1;
422 if (ferror(f)) {
423 if (!opt_q)
424 fprintf(stderr, "list_packs: error: an error "
425 "occurred reading pack name list file\n");
426 exit(EXIT_FAILURE);
428 if (!opt_q)
429 fprintf(stderr, "list_packs: warning: ignored input filename "
430 "greater than %d characters long\n", (int)s - 2);
431 *b = '\0';
432 return 1;
435 static int file_exists(const char *fn, struct stat *s)
437 if (!stat(fn, s)) {
438 if (S_ISREG(s->st_mode))
439 return 1;
440 if (!opt_q)
441 fprintf(stderr, "list_packs: warning: ignoring "
442 "non-file '%s'\n", fn);
444 return 0;
447 static void process_pack_filename(char *fn, size_t bl, int hasps)
449 struct stat ps, es;
450 FILE *f;
451 union {
452 uint32_t u[3];
453 char c[12];
454 } hdr;
455 uint32_t packver;
456 uint32_t objcnt;
457 const struct ext_info *einfo;
459 if (stat(fn, &ps) || !S_ISREG(ps.st_mode)) {
460 if (!opt_q)
461 fprintf(stderr, "list_packs: warning: ignoring "
462 "non-file '%s'\n", fn);
463 return;
465 if (ps.st_size < 32) {
466 if (!opt_q)
467 fprintf(stderr, "list_packs: warning: ignoring "
468 "invalid pack file '%s'\n", fn);
469 return;
471 einfo = extlist;
472 while (einfo) {
473 if (einfo->xxt >= 0) {
474 int hext;
476 memcpy(fn + bl, einfo->ext, einfo->extlen + 1);
477 hext = file_exists(fn, &es);
478 if ((einfo->xxt && hext) || (!einfo->xxt && !hext))
479 return;
481 einfo = einfo->next;
483 if (opt_xix >= 0) {
484 int hx;
486 memcpy(fn + bl, ".idx", 5);
487 hx = file_exists(fn, &es);
488 if ((opt_xix && hx) || (!opt_xix && !hx))
489 return;
491 if (hasps)
492 memcpy(fn + bl, ".pack", 6);
493 else
494 fn[bl] = '\0';
495 f = fopen(fn, "rb");
496 if (!f) {
497 if (!opt_q)
498 fprintf(stderr, "list_packs: warning: ignoring "
499 "unopenable pack file '%s'\n", fn);
500 return;
502 if (fread(&hdr, 12, 1, f) != 1) {
503 fclose(f);
504 if (!opt_q)
505 fprintf(stderr, "list_packs: warning: ignoring "
506 "unreadable pack file '%s'\n", fn);
507 return;
509 fclose(f);
510 packver = ntohl(hdr.u[1]);
511 objcnt = ntohl(hdr.u[2]);
512 if (memcmp(hdr.c, "PACK", 4) || (packver != 2 && packver != 3) ||
513 ps.st_size < ((off_t)objcnt + 32)) {
514 if (!opt_q)
515 fprintf(stderr, "list_packs: warning: ignoring "
516 "invalid pack file '%s'\n", fn);
517 return;
519 if (!opt_xix && es.st_size < ((off_t)objcnt * 28 + 1072)) {
520 if (!opt_q)
521 fprintf(stderr, "list_packs: warning: ignoring pack "
522 "with invalid idx file '%.*s.idx'\n", (int)bl, fn);
523 return;
525 if (opt_limit) {
526 if ((opt_limit > 0 && objcnt >= (uint32_t)opt_limit) ||
527 (opt_limit < 0 && objcnt < (uint32_t)-opt_limit))
528 return;
530 /* the PACK file passed all the checks, process it */
531 if (objlimit) {
532 size_t fnlen = strlen(fn);
533 struct pack_info *info = (struct pack_info *)
534 malloc(sizeof(struct pack_info) + fnlen);
536 if (!info)
537 dienopackmem();
538 info->objcount = objcnt;
539 memcpy(info->filename, fn, fnlen + 1);
540 info->next = packlist;
541 packlist = info;
542 ++packcount;
543 } else {
544 process_pack(fn, objcnt);
548 static void process_pack(const char *fn, uint32_t objcnt)
550 if (maxlimit && processed >= maxlimit)
551 return;
552 if (opt_count) {
553 if (opt_count > 1)
554 count += objcnt;
555 else
556 ++count;
557 } else {
558 printf("%s\n", fn);
560 ++processed;
563 static void process_pack_info(const struct pack_info *pack)
565 process_pack(pack->filename, pack->objcount);
568 static int sort_asc(const void *p, const void *q)
570 const struct pack_info **a = (const struct pack_info **)p;
571 const struct pack_info **b = (const struct pack_info **)q;
572 if ((*a)->objcount < (*b)->objcount)
573 return -1;
574 if ((*a)->objcount > (*b)->objcount)
575 return 1;
576 return strcmp((*a)->filename, (*b)->filename);
579 static int sort_dsc(const void *p, const void *q)
581 return sort_asc(q, p);
584 static void process_packs_finish(void)
586 struct pack_info **table, *p;
587 size_t i;
588 uint64_t tally;
590 if (!objlimit || !packlist || !packcount)
591 return;
592 table = (struct pack_info **)malloc(sizeof(struct pack_info *) * packcount);
593 if (!table)
594 dienopackmem();
595 i = 0;
596 p = packlist;
597 do {
598 table[i++] = p;
599 p = p->next;
600 } while (p);
601 qsort(table, packcount, sizeof(struct pack_info *), (opt_desc ? sort_dsc : sort_asc));
602 tally = 0;
603 for (i=0; i < packcount; ++i) {
604 tally += table[i]->objcount;
605 if (tally <= objlimit) {
606 process_pack_info(table[i]);
607 } else {
608 if (opt_boundary)
609 process_pack_info(table[i]);
610 break;