Fix a few valgrind-detected memory leaks
[clav.git] / mutation-finder.c
blob11663b250148854172a319de69a0cc5701709b24
1 /*
2 * Covpyright (c) 2016, S. Gilles <sgilles@math.umd.edu>
4 * Permission to use, copy, modify, and/or distribute this software
5 * for any purpose with or without fee is hereby granted, provided
6 * that the above copyright notice and this permission notice appear
7 * in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS *ALL
10 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING AL*L IMPLIED
11 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EV*ENT SHALL THE
12 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
13 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
14 * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
15 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
16 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #include <errno.h>
19 #include <limits.h>
20 #include <locale.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
28 #include "macros.h"
29 #include "quiver.h"
31 /* Flag for whether to print info */
32 static volatile sig_atomic_t status_requested = 0;
34 /* Signal handler */
35 static void
36 handle(int sig);
38 /* Signal handler */
39 static void handle(int sig)
41 /* XXX: I'd like to check for SIGINFO here */
42 if (sig == SIGUSR1) {
43 status_requested = 1;
44 signal(sig, handle);
48 /* Print usage */
49 static void usage(char *argv0)
51 printf("Usage: %s -s <startfile> -e <endfile>\n", argv0);
52 printf(" [ -v ] # Be verbose\n");
53 printf(" [ -c <edgecap> ]\n");
54 printf(" [ -f <frozen> [ -f <frozen> [...] ] ]\n");
55 printf(" [ -l <startlen> ] [ -m <maxlen> ]\n");
56 printf("\n");
57 printf(" startfile: a quiver file representing the start "
58 "state\n");
59 printf(" endfile: a quiver file representing the desired end "
60 "state\n");
61 printf(" edgecap: ignore mutations if the cause edges with"
62 "weights above this\n");
63 printf(" startlen: start with sequences of this length\n");
64 printf(" maxlen: halt after exhausting sequences of this "
65 "length\n");
66 printf(" frozen: a vertex with this name will not be "
67 "mutated\n");
70 static int cmp(const void *a, const void *b)
72 struct vertex **va = (struct vertex **) a;
73 struct vertex **vb = (struct vertex **) b;
75 return strcmp((*va)->name, (*vb)->name);
78 /* Load start/end quivers, figure out frozen vertices, order things, etc. */
79 static int setup(char *start_file, char *end_file, char **frozen, size_t
80 num_frozen, struct quiver *out_qstart, struct quiver *out_qend,
81 size_t *out_num_nonfrozen)
83 struct quiver a = { 0 };
84 struct quiver b = { 0 };
85 FILE *fa = 0;
86 FILE *fb = 0;
87 const char *errstr = 0;
88 int ret = 0;
89 struct vertex **averts = 0;
90 struct vertex **bverts = 0;
91 size_t *a_to_start = 0;
92 size_t *b_to_end = 0;
94 if (!out_qstart ||
95 !out_qend) {
96 fprintf(stderr, L("invalid quivers"));
97 ret = EINVAL;
98 goto done;
101 if (out_qstart->v_num ||
102 out_qend->v_num) {
103 fprintf(stderr, L("output quivers are not empty\n"));
104 ret = EINVAL;
105 goto done;
108 if (!(fa = fopen(start_file, "r"))) {
109 ret = errno;
110 perror(L("fopen"));
111 goto done;
114 if (!(fb = fopen(end_file, "r"))) {
115 ret = errno;
116 perror(L("fopen"));
117 goto done;
120 if ((ret = quiver_load_from_file(&a, fa, &errstr))) {
121 fprintf(stderr, "%s\n", errstr);
122 goto done;
125 if ((ret = quiver_load_from_file(&b, fb, &errstr))) {
126 fprintf(stderr, "%s\n", errstr);
127 goto done;
130 if (a.v_num != b.v_num) {
131 fprintf(stderr, L(
132 "Quivers do not have same number of vertices\n"));
133 ret = EINVAL;
134 goto done;
138 * Make sure there are no duplicate vertices, but don't sort in
139 * place - that would change ordering, and ordering of vertices
140 * must match user input - tuning input so that known prefixes
141 * arise early is sometimes crucial.
143 if (!(averts = malloc(a.v_num * sizeof(*averts)))) {
144 ret = errno;
145 perror(L("malloc"));
146 goto done;
149 if (!(bverts = malloc(b.v_num * sizeof(*bverts)))) {
150 ret = errno;
151 perror(L("malloc"));
152 goto done;
155 for (size_t j = 0; j < a.v_num; ++j) {
156 averts[j] = &(a.v[j]);
157 bverts[j] = &(b.v[j]);
160 qsort(averts, a.v_num, sizeof(averts[0]), cmp);
161 qsort(bverts, b.v_num, sizeof(bverts[0]), cmp);
163 for (size_t j = 0; j < a.v_num; ++j) {
164 if (j &&
165 !strcmp(averts[j]->name, averts[j - 1]->name)) {
166 fprintf(stderr, L("Duplicate vertex \"%s\"\n"),
167 averts[j]->name);
168 ret = EINVAL;
169 goto done;
172 int d = strcmp(averts[j]->name, bverts[j]->name);
174 if (d > 0) {
175 fprintf(stderr, L("Vertex \"%s\" not in both files\n"),
176 bverts[j]->name);
177 ret = EINVAL;
178 goto done;
181 if (d < 0) {
182 fprintf(stderr, L("Vertex \"%s\" not in both files\n"),
183 averts[j]->name);
184 ret = EINVAL;
185 goto done;
188 if (averts[j]->fatness != bverts[j]->fatness) {
189 fprintf(stderr, L(
190 "Vertex \"%s\" has inconsistent fatness\n"),
191 averts[j]->name);
192 ret = EINVAL;
193 goto done;
197 /* a_to_s[i] = j <=> "vertex i in a is vertex j in out_qstart" */
198 if (!(a_to_start = malloc(a.v_num * sizeof(*a_to_start)))) {
199 ret = errno;
200 perror(L("malloc"));
201 goto done;
204 if (!(b_to_end = malloc(b.v_num * sizeof(*b_to_end)))) {
205 ret = errno;
206 perror(L("malloc"));
207 goto done;
211 * Order so that frozen vertices are all at the end - required
212 * for incrementation algorithm
214 for (uint_fast8_t want_frozen = 0; want_frozen <= 1; want_frozen++) {
215 for (size_t j = 0; j < a.v_num; ++j) {
216 uint_fast8_t is_frozen = 0;
218 for (size_t k = 0; k < num_frozen; ++k) {
219 is_frozen = is_frozen ||
220 !strcmp(a.v[j].name, frozen[k]);
222 if (is_frozen) {
223 k = num_frozen;
227 if (is_frozen != want_frozen) {
228 continue;
231 if ((ret = quiver_add_vertex(out_qstart,
232 &(a_to_start[j]),
233 a.v[j].name,
234 a.v[j].fatness, 0, 0,
235 &errstr))) {
236 fprintf(stderr, "%s\n", errstr);
237 goto done;
240 if (!want_frozen &&
241 out_num_nonfrozen) {
242 *out_num_nonfrozen = a_to_start[j] + 1;
245 if ((ret = quiver_add_vertex(out_qend, 0, a.v[j].name,
246 a.v[j].fatness, 0, 0,
247 &errstr))) {
248 fprintf(stderr, "%s\n", errstr);
249 goto done;
252 for (size_t k = 0; k < b.v_num; ++k) {
253 if (!(strcmp(b.v[k].name, a.v[j].name))) {
254 b_to_end[k] = j;
260 /* Add the edges in correct order */
261 for (size_t j = 0; j < a.v_num; ++j) {
262 size_t tj = a_to_start[j];
264 for (size_t k = 0; k < a.v_num; ++k) {
265 size_t tk = a_to_start[k];
266 size_t idx_o = tj * out_qstart->v_len + tk;
267 size_t idx_a = j * a.v_len + k;
269 out_qstart->e[idx_o] = a.e[idx_a];
273 for (size_t j = 0; j < b.v_num; ++j) {
274 size_t tj = b_to_end[j];
276 for (size_t k = 0; k < b.v_num; ++k) {
277 size_t tk = b_to_end[k];
278 size_t idx_o = tj * out_qend->v_len + tk;
279 size_t idx_b = j * b.v_len + k;
281 out_qend->e[idx_o] = b.e[idx_b];
285 done:
286 quiver_clean(&a);
287 quiver_clean(&b);
288 fclose(fa);
289 fclose(fb);
290 free(averts);
291 free(bverts);
292 free(a_to_start);
293 free(b_to_end);
295 return ret;
298 /* Main loop */
299 int main(int argc, char **argv)
301 int ret = 0;
302 struct quiver q = { 0 };
303 struct quiver qend = { 0 };
304 char *frozen[argc];
305 size_t fidx = 0;
306 char *start_file = 0;
307 char *end_file = 0;
308 long start_len_l = 0;
309 long max_len_l = 0;
310 size_t start_len = 0;
311 size_t max_len = (size_t) -1;
312 size_t warn_level = (size_t) -1;
313 struct rational *target = 0;
314 char *p = 0;
315 const char *errstr = 0;
316 uint_fast8_t verbose = 0;
318 setlocale(LC_ALL, "");
319 int opt = 0;
321 for (int j = 0; j < argc; ++j) {
322 frozen[j] = 0;
325 while ((opt = getopt(argc, argv, "vhs:e:c:m:l:f:")) != -1) {
326 switch (opt) {
327 case 'v':
328 verbose++;
329 break;
330 case 'h':
331 usage(argv[0]);
332 ret = 0;
333 goto done;
334 case 's':
335 start_file = optarg;
336 break;
337 case 'e':
338 end_file = optarg;
339 break;
340 case 'c':
341 warn_level = strtoll(optarg, &p, 0);
343 if (p &&
344 *p) {
345 fprintf(stderr, "Invalid edge cap: %s\n",
346 optarg);
347 ret = EINVAL;
348 goto done;
351 break;
352 case 'l':
353 start_len_l = strtoll(optarg, &p, 0);
355 if ((start_len_l <= 0) ||
356 (p &&
357 *p)) {
358 fprintf(stderr, "Invalid start length: %s\n",
359 optarg);
360 ret = EINVAL;
361 goto done;
364 start_len = start_len_l;
365 break;
366 case 'm':
367 max_len_l = strtoll(optarg, &p, 0);
369 if ((max_len_l <= 0) ||
370 (p &&
371 *p)) {
372 fprintf(stderr, "Invalid max length: %s\n",
373 optarg);
374 ret = EINVAL;
375 goto done;
378 max_len = max_len_l;
379 break;
380 case 'f':
381 frozen[fidx] = optarg;
382 fidx++;
383 break;
384 default:
385 usage(argv[0]);
386 ret = EINVAL;
387 goto done;
391 if (!start_file ||
392 !end_file) {
393 usage(argv[0]);
394 ret = EINVAL;
395 goto done;
398 size_t num_nonfrozen = 0;
400 if (setup(start_file, end_file, frozen, fidx, &q, &qend,
401 &num_nonfrozen)) {
402 goto done;
405 if (warn_level == (size_t) -1) {
406 warn_level = 0;
408 for (size_t k = 0; k < q.v_num; ++k) {
409 if (warn_level < q.v[k].fatness) {
410 warn_level = q.v[k].fatness;
414 warn_level = warn_level + 1;
417 if (warn_level >= UINT_MAX) {
418 warn_level = UINT_MAX - 1;
421 if ((ret = quiver_set_warning_threshold(&q, warn_level, &errstr))) {
422 fprintf(stderr, "%s\n", errstr);
423 goto done;
426 /* Save off qend's vertices. XXX: Why not just USE qend? */
427 if (!(target = malloc(sizeof *target * qend.v_num * qend.v_num))) {
428 ret = errno;
429 perror(L("malloc"));
430 goto done;
433 for (size_t i = 0; i < qend.v_num * qend.v_num; ++i) {
434 target[i] = (struct rational) { .p = 0, .q = 1 };
437 for (size_t i = 0; i < qend.v_num; ++i) {
438 for (size_t j = 0; j < qend.v_num; ++j) {
439 target[i * qend.v_num + j] = qend.e[i * qend.v_len + j];
443 /* Now, the long haul*/
444 size_t len = 0;
446 if (start_len) {
447 len = start_len - 1;
450 signal(SIGUSR1, handle);
451 size_t s = 0;
452 size_t c_idx;
453 int warn = 0;
454 uint_fast8_t correct = 0;
456 while (len < max_len) {
457 len++;
458 size_t sequence[len];
460 for (size_t k = 0; k < len; ++k) {
461 /* This will get pruned, it's okay */
462 sequence[k] = 0;
465 c_idx = 0;
466 have_full_sequence:
468 if (status_requested) {
469 status_requested = 0;
470 printf("Current sequence %s", q.v[sequence[0]].name);
472 for (size_t k = 1; k < len; ++k) {
473 printf(", %s", q.v[sequence[k]].name);
476 printf("\n");
477 fflush(stdout);
481 * Here, we have computed a full sequence that we want
482 * to check. c_idx is in [0, len - 1], and the quiver
483 * has been mutated following the sequence up to (but
484 * not including) c_idx.
486 * This code is duplicated from increment_sequence
487 * because it is reasonable to start the whole process
488 * with a sequence that may, in fact, produce warnings.
490 warn = 0;
492 while (c_idx < len) {
493 /* Prune duplicates (involutions) */
494 if (c_idx &&
495 sequence[c_idx] == sequence[c_idx - 1]) {
496 goto increment_sequence;
499 /* Proceed one more mutation */
500 quiver_mutate(&q, sequence[c_idx], &warn, 0);
502 if (warn) {
503 /* That mutation caused a warning: skip it */
504 quiver_mutate(&q, sequence[c_idx], &warn, 0);
506 /* And try the sequence, starting at c_idx */
507 goto increment_sequence;
511 * Skip ... X, Y ... when X > Y and the edge
512 * from X to Y is trivial
514 if (c_idx &&
515 sequence[c_idx] < sequence[c_idx - 1]) {
516 size_t x = sequence[c_idx - 1];
517 size_t y = sequence[c_idx];
518 struct rational *xy = &(q.e[x * q.v_len + y]);
519 struct rational *yx = &(q.e[y * q.v_len + x]);
521 if (!xy->p &&
522 !yx->p) {
523 /* Undo mutation */
524 quiver_mutate(&q, sequence[c_idx], 0,
527 /* And try the sequence, starting at c_idx */
528 goto increment_sequence;
532 c_idx++;
536 * Here, there's a full sequence, and the quiver has
537 * been mutated according to it (all of it). We want
538 * to check if the result matches the target.
540 correct = 1;
542 for (size_t i = 0; i < q.v_num; ++i) {
543 for (size_t j = 0; j < q.v_num; ++j) {
544 struct rational *e = &(q.e[i * q.v_len + j]);
545 struct rational *f = &(target[i * q.v_num + j]);
547 if (e->p * f->q != f->p * e->q) {
548 correct = 0;
549 i = q.v_num + 1;
550 j = i;
555 if (correct) {
556 printf("SEQUENCE: %s", q.v[sequence[0]].name);
558 for (size_t k = 1; k < len; ++k) {
559 printf(", %s", q.v[sequence[k]].name);
562 printf("\n");
563 fflush(stdout);
566 c_idx = len - 1;
567 quiver_mutate(&q, sequence[c_idx], 0, 0);
568 increment_sequence:
571 * Here, we have some kind of sequence. The quiver agrees
572 * with it up to (but not including) c_idx, and we want
573 * to increment it at c_idx.
575 s++;
576 sequence[c_idx]++;
578 if (verbose &&
579 c_idx == 2) {
580 double completed = sequence[0] * num_nonfrozen *
581 num_nonfrozen + sequence[1] *
582 num_nonfrozen +
583 sequence[2] - 1;
584 double total = num_nonfrozen * num_nonfrozen *
585 num_nonfrozen;
587 printf("Length %zu: completed %.3g%%\n", len, (100.0 *
588 completed)
589 / total);
590 fflush(stdout);
593 /* Skip over repeats: each mutation is an involution */
594 if (c_idx > 0 &&
595 sequence[c_idx] == sequence[c_idx - 1]) {
596 sequence[c_idx]++;
599 /* Wrap back to 0 */
600 if (sequence[c_idx] >= num_nonfrozen) {
601 if (!c_idx) {
602 goto exhausted_len;
605 c_idx--;
606 quiver_mutate(&q, sequence[c_idx], 0, 0);
607 goto increment_sequence;
610 if (c_idx != len - 1) {
612 * Here, we now have a start of a sequence.
613 * The quiver agrees with our start up to (but
614 * not including) c_idx, and we want to possibly
615 * complete this to a full sequence. First,
616 * however, we need to make sure that we don't
617 * introduce any warnings at c_idx.
619 * This single-mutation testing block could be
620 * ignored, but I have a vague suspicion it will
621 * be a hot path.
623 warn = 0;
624 quiver_mutate(&q, sequence[c_idx], &warn, 0);
626 if (warn) {
627 /* That mutation caused a warning: skip it */
628 quiver_mutate(&q, sequence[c_idx], &warn, 0);
630 /* And try the sequence, starting at c_idx */
631 goto increment_sequence;
635 * Quivers almost commute: in particular if
636 * the edge v_i -> v_j is zero, then mutation
637 * at v_i and v_j commute. So if we're at X, Y
638 * in the list, and X > Y, then we've actually
639 * already computed that branch of the tree,
640 * back when it was Y, X. Therefore, increment X.
642 if (c_idx &&
643 sequence[c_idx] < sequence[c_idx - 1]) {
644 size_t x = sequence[c_idx - 1];
645 size_t y = sequence[c_idx];
646 struct rational *xy = &(q.e[x * q.v_len + y]);
647 struct rational *yx = &(q.e[y * q.v_len + x]);
649 if (!xy->p &&
650 !yx->p) {
651 /* Undo mutation */
652 quiver_mutate(&q, sequence[c_idx], 0,
655 /* And try the sequence, starting at c_idx */
656 goto increment_sequence;
661 * Here the quiver agrees with our start up
662 * to and including c_idx, and we need to fill
663 * it out into a full sequence so that it can
664 * be tested. We don't need to perform all the
665 * mutations of that full sequence, but we do
666 * need to generate the minimal one.
668 for (size_t i = c_idx + 1; i < len; ++i) {
669 sequence[i] = 1 - !!((i - c_idx) % 2);
672 c_idx++;
675 goto have_full_sequence;
676 exhausted_len:
679 * Here, we have c_idx = 0, and the quiver has not been
680 * mutated by anything in sequence
682 printf("Exhausted length %zu\n", len);
685 fflush(stdout);
686 done:
687 free(target);
689 return ret;