mutation-finder.c

   1 /*
   2  * Covpyright (c) 2016, S. Gilles <sgilles@math.umd.edu>
   3  *
   4  * Permission to use, copy, modify, and/or distribute this software
   5  * for any purpose with or without fee is hereby granted, provided
   6  * that the above copyright notice and this permission notice appear
   7  * in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS *ALL
  10  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING AL*L IMPLIED
  11  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EV*ENT SHALL THE
  12  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
  13  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
  14  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  15  * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  16  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #include <errno.h>
  19 #include <limits.h>
  20 #include <locale.h>
  21 #include <signal.h>
  22 #include <stdint.h>
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <unistd.h>
  27
  28 #include "macros.h"
  29 #include "quiver.h"
  30
  31 /* Flag for whether to print info */
  32 static volatile sig_atomic_t status_requested = 0;
  33
  34 /* Signal handler */
  35 static void
  36 handle(int sig);
  37
  38 /* Signal handler */
  39 static void handle(int sig)
  40 {
  41         /* XXX: I'd like to check for SIGINFO here */
  42         if (sig == SIGUSR1) {
  43                 status_requested = 1;
  44                 signal(sig, handle);
  45         }
  46 }
  47
  48 /* Print usage */
  49 static void usage(char *argv0)
  50 {
  51         printf("Usage: %s -s <startfile> -e <endfile>\n", argv0);
  52         printf("       [ -v ] # Be verbose\n");
  53         printf("       [ -c <edgecap> ]\n");
  54         printf("       [ -f <frozen> [ -f <frozen> [...] ] ]\n");
  55         printf("       [ -l <startlen> ] [ -m <maxlen> ]\n");
  56         printf("\n");
  57         printf("       startfile: a quiver file representing the start "
  58                "state\n");
  59         printf("       endfile:   a quiver file representing the desired end "
  60                "state\n");
  61         printf("       edgecap:   ignore mutations if the cause edges with"
  62                "weights above this\n");
  63         printf("       startlen:  start with sequences of this length\n");
  64         printf("       maxlen:    halt after exhausting sequences of this "
  65                "length\n");
  66         printf("       frozen:    a vertex with this name will not be "
  67                "mutated\n");
  68 }
  69
  70 static int cmp(const void *a, const void *b)
  71 {
  72         struct vertex **va = (struct vertex **) a;
  73         struct vertex **vb = (struct vertex **) b;
  74
  75         return strcmp((*va)->name, (*vb)->name);
  76 }
  77
  78 /* Load start/end quivers, figure out frozen vertices, order things, etc. */
  79 static int setup(char *start_file, char *end_file, char **frozen, size_t
  80                  num_frozen, struct quiver *out_qstart, struct quiver *out_qend,
  81                  size_t *out_num_nonfrozen)
  82 {
  83         struct quiver a = { 0 };
  84         struct quiver b = { 0 };
  85         FILE *fa = 0;
  86         FILE *fb = 0;
  87         const char *errstr = 0;
  88         int ret = 0;
  89         struct vertex **averts = 0;
  90         struct vertex **bverts = 0;
  91         size_t *a_to_start = 0;
  92         size_t *b_to_end = 0;
  93
  94         if (!out_qstart ||
  95             !out_qend) {
  96                 fprintf(stderr, L("invalid quivers"));
  97                 ret = EINVAL;
  98                 goto done;
  99         }
 100
 101         if (out_qstart->v_num ||
 102             out_qend->v_num) {
 103                 fprintf(stderr, L("output quivers are not empty\n"));
 104                 ret = EINVAL;
 105                 goto done;
 106         }
 107
 108         if (!(fa = fopen(start_file, "r"))) {
 109                 ret = errno;
 110                 perror(L("fopen"));
 111                 goto done;
 112         }
 113
 114         if (!(fb = fopen(end_file, "r"))) {
 115                 ret = errno;
 116                 perror(L("fopen"));
 117                 goto done;
 118         }
 119
 120         if ((ret = quiver_load_from_file(&a, fa, &errstr))) {
 121                 fprintf(stderr, "%s\n", errstr);
 122                 goto done;
 123         }
 124
 125         if ((ret = quiver_load_from_file(&b, fb, &errstr))) {
 126                 fprintf(stderr, "%s\n", errstr);
 127                 goto done;
 128         }
 129
 130         if (a.v_num != b.v_num) {
 131                 fprintf(stderr, L(
 132                                 "Quivers do not have same number of vertices\n"));
 133                 ret = EINVAL;
 134                 goto done;
 135         }
 136
 137         /*
 138          * Make sure there are no duplicate vertices, but don't sort in
 139          * place - that would change ordering, and ordering of vertices
 140          * must match user input - tuning input so that known prefixes
 141          * arise early is sometimes crucial.
 142          */
 143         if (!(averts = malloc(a.v_num * sizeof(*averts)))) {
 144                 ret = errno;
 145                 perror(L("malloc"));
 146                 goto done;
 147         }
 148
 149         if (!(bverts = malloc(b.v_num * sizeof(*bverts)))) {
 150                 ret = errno;
 151                 perror(L("malloc"));
 152                 goto done;
 153         }
 154
 155         for (size_t j = 0; j < a.v_num; ++j) {
 156                 averts[j] = &(a.v[j]);
 157                 bverts[j] = &(b.v[j]);
 158         }
 159
 160         qsort(averts, a.v_num, sizeof(averts[0]), cmp);
 161         qsort(bverts, b.v_num, sizeof(bverts[0]), cmp);
 162
 163         for (size_t j = 0; j < a.v_num; ++j) {
 164                 if (j &&
 165                     !strcmp(averts[j]->name, averts[j - 1]->name)) {
 166                         fprintf(stderr, L("Duplicate vertex \"%s\"\n"),
 167                                 averts[j]->name);
 168                         ret = EINVAL;
 169                         goto done;
 170                 }
 171
 172                 int d = strcmp(averts[j]->name, bverts[j]->name);
 173
 174                 if (d > 0) {
 175                         fprintf(stderr, L("Vertex \"%s\" not in both files\n"),
 176                                 bverts[j]->name);
 177                         ret = EINVAL;
 178                         goto done;
 179                 }
 180
 181                 if (d < 0) {
 182                         fprintf(stderr, L("Vertex \"%s\" not in both files\n"),
 183                                 averts[j]->name);
 184                         ret = EINVAL;
 185                         goto done;
 186                 }
 187
 188                 if (averts[j]->fatness != bverts[j]->fatness) {
 189                         fprintf(stderr, L(
 190                                         "Vertex \"%s\" has inconsistent fatness\n"),
 191                                 averts[j]->name);
 192                         ret = EINVAL;
 193                         goto done;
 194                 }
 195         }
 196
 197         /* a_to_s[i] = j <=> "vertex i in a is vertex j in out_qstart" */
 198         if (!(a_to_start = malloc(a.v_num * sizeof(*a_to_start)))) {
 199                 ret = errno;
 200                 perror(L("malloc"));
 201                 goto done;
 202         }
 203
 204         if (!(b_to_end = malloc(b.v_num * sizeof(*b_to_end)))) {
 205                 ret = errno;
 206                 perror(L("malloc"));
 207                 goto done;
 208         }
 209
 210         /*
 211          * Order so that frozen vertices are all at the end - required
 212          * for incrementation algorithm
 213          */
 214         for (uint_fast8_t want_frozen = 0; want_frozen <= 1; want_frozen++) {
 215                 for (size_t j = 0; j < a.v_num; ++j) {
 216                         uint_fast8_t is_frozen = 0;
 217
 218                         for (size_t k = 0; k < num_frozen; ++k) {
 219                                 is_frozen = is_frozen ||
 220                                             !strcmp(a.v[j].name, frozen[k]);
 221
 222                                 if (is_frozen) {
 223                                         k = num_frozen;
 224                                 }
 225                         }
 226
 227                         if (is_frozen != want_frozen) {
 228                                 continue;
 229                         }
 230
 231                         if ((ret = quiver_add_vertex(out_qstart,
 232                                                      &(a_to_start[j]),
 233                                                      a.v[j].name,
 234                                                      a.v[j].fatness, 0, 0,
 235                                                      &errstr))) {
 236                                 fprintf(stderr, "%s\n", errstr);
 237                                 goto done;
 238                         }
 239
 240                         if (!want_frozen &&
 241                             out_num_nonfrozen) {
 242                                 *out_num_nonfrozen = a_to_start[j] + 1;
 243                         }
 244
 245                         if ((ret = quiver_add_vertex(out_qend, 0, a.v[j].name,
 246                                                      a.v[j].fatness, 0, 0,
 247                                                      &errstr))) {
 248                                 fprintf(stderr, "%s\n", errstr);
 249                                 goto done;
 250                         }
 251
 252                         for (size_t k = 0; k < b.v_num; ++k) {
 253                                 if (!(strcmp(b.v[k].name, a.v[j].name))) {
 254                                         b_to_end[k] = j;
 255                                 }
 256                         }
 257                 }
 258         }
 259
 260         /* Add the edges in correct order */
 261         for (size_t j = 0; j < a.v_num; ++j) {
 262                 size_t tj = a_to_start[j];
 263
 264                 for (size_t k = 0; k < a.v_num; ++k) {
 265                         size_t tk = a_to_start[k];
 266                         size_t idx_o = tj * out_qstart->v_len + tk;
 267                         size_t idx_a = j * a.v_len + k;
 268
 269                         out_qstart->e[idx_o] = a.e[idx_a];
 270                 }
 271         }
 272
 273         for (size_t j = 0; j < b.v_num; ++j) {
 274                 size_t tj = b_to_end[j];
 275
 276                 for (size_t k = 0; k < b.v_num; ++k) {
 277                         size_t tk = b_to_end[k];
 278                         size_t idx_o = tj * out_qend->v_len + tk;
 279                         size_t idx_b = j * b.v_len + k;
 280
 281                         out_qend->e[idx_o] = b.e[idx_b];
 282                 }
 283         }
 284
 285 done:
 286         quiver_clean(&a);
 287         quiver_clean(&b);
 288         fclose(fa);
 289         fclose(fb);
 290         free(averts);
 291         free(bverts);
 292         free(a_to_start);
 293         free(b_to_end);
 294
 295         return ret;
 296 }
 297
 298 /* Main loop */
 299 int main(int argc, char **argv)
 300 {
 301         int ret = 0;
 302         struct quiver q = { 0 };
 303         struct quiver qend = { 0 };
 304         char *frozen[argc];
 305         size_t fidx = 0;
 306         char *start_file = 0;
 307         char *end_file = 0;
 308         long start_len_l = 0;
 309         long max_len_l = 0;
 310         size_t start_len = 0;
 311         size_t max_len = (size_t) -1;
 312         size_t warn_level = (size_t) -1;
 313         struct rational *target = 0;
 314         char *p = 0;
 315         const char *errstr = 0;
 316         uint_fast8_t verbose = 0;
 317
 318         setlocale(LC_ALL, "");
 319         int opt = 0;
 320
 321         for (int j = 0; j < argc; ++j) {
 322                 frozen[j] = 0;
 323         }
 324
 325         while ((opt = getopt(argc, argv, "vhs:e:c:m:l:f:")) != -1) {
 326                 switch (opt) {
 327                 case 'v':
 328                         verbose++;
 329                         break;
 330                 case 'h':
 331                         usage(argv[0]);
 332                         ret = 0;
 333                         goto done;
 334                 case 's':
 335                         start_file = optarg;
 336                         break;
 337                 case 'e':
 338                         end_file = optarg;
 339                         break;
 340                 case 'c':
 341                         warn_level = strtoll(optarg, &p, 0);
 342
 343                         if (p &&
 344                             *p) {
 345                                 fprintf(stderr, "Invalid edge cap: %s\n",
 346                                         optarg);
 347                                 ret = EINVAL;
 348                                 goto done;
 349                         }
 350
 351                         break;
 352                 case 'l':
 353                         start_len_l = strtoll(optarg, &p, 0);
 354
 355                         if ((start_len_l <= 0) ||
 356                             (p &&
 357                              *p)) {
 358                                 fprintf(stderr, "Invalid start length: %s\n",
 359                                         optarg);
 360                                 ret = EINVAL;
 361                                 goto done;
 362                         }
 363
 364                         start_len = start_len_l;
 365                         break;
 366                 case 'm':
 367                         max_len_l = strtoll(optarg, &p, 0);
 368
 369                         if ((max_len_l <= 0) ||
 370                             (p &&
 371                              *p)) {
 372                                 fprintf(stderr, "Invalid max length: %s\n",
 373                                         optarg);
 374                                 ret = EINVAL;
 375                                 goto done;
 376                         }
 377
 378                         max_len = max_len_l;
 379                         break;
 380                 case 'f':
 381                         frozen[fidx] = optarg;
 382                         fidx++;
 383                         break;
 384                 default:
 385                         usage(argv[0]);
 386                         ret = EINVAL;
 387                         goto done;
 388                 }
 389         }
 390
 391         if (!start_file ||
 392             !end_file) {
 393                 usage(argv[0]);
 394                 ret = EINVAL;
 395                 goto done;
 396         }
 397
 398         size_t num_nonfrozen = 0;
 399
 400         if (setup(start_file, end_file, frozen, fidx, &q, &qend,
 401                   &num_nonfrozen)) {
 402                 goto done;
 403         }
 404
 405         if (warn_level == (size_t) -1) {
 406                 warn_level = 0;
 407
 408                 for (size_t k = 0; k < q.v_num; ++k) {
 409                         if (warn_level < q.v[k].fatness) {
 410                                 warn_level = q.v[k].fatness;
 411                         }
 412                 }
 413
 414                 warn_level = warn_level + 1;
 415         }
 416
 417         if (warn_level >= UINT_MAX) {
 418                 warn_level = UINT_MAX - 1;
 419         }
 420
 421         if ((ret = quiver_set_warning_threshold(&q, warn_level, &errstr))) {
 422                 fprintf(stderr, "%s\n", errstr);
 423                 goto done;
 424         }
 425
 426         /* Save off qend's vertices.  XXX: Why not just USE qend? */
 427         if (!(target = malloc(sizeof *target * qend.v_num * qend.v_num))) {
 428                 ret = errno;
 429                 perror(L("malloc"));
 430                 goto done;
 431         }
 432
 433         for (size_t i = 0; i < qend.v_num * qend.v_num; ++i) {
 434                 target[i] = (struct rational) { .p = 0, .q = 1 };
 435         }
 436
 437         for (size_t i = 0; i < qend.v_num; ++i) {
 438                 for (size_t j = 0; j < qend.v_num; ++j) {
 439                         target[i * qend.v_num + j] = qend.e[i * qend.v_len + j];
 440                 }
 441         }
 442
 443         /* Now, the long haul*/
 444         size_t len = 0;
 445
 446         if (start_len) {
 447                 len = start_len - 1;
 448         }
 449
 450         signal(SIGUSR1, handle);
 451         size_t s = 0;
 452         size_t c_idx;
 453         int warn = 0;
 454         uint_fast8_t correct = 0;
 455
 456         while (len < max_len) {
 457                 len++;
 458                 size_t sequence[len];
 459
 460                 for (size_t k = 0; k < len; ++k) {
 461                         /* This will get pruned, it's okay */
 462                         sequence[k] = 0;
 463                 }
 464
 465                 c_idx = 0;
 466 have_full_sequence:
 467
 468                 if (status_requested) {
 469                         status_requested = 0;
 470                         printf("Current sequence %s", q.v[sequence[0]].name);
 471
 472                         for (size_t k = 1; k < len; ++k) {
 473                                 printf(", %s", q.v[sequence[k]].name);
 474                         }
 475
 476                         printf("\n");
 477                         fflush(stdout);
 478                 }
 479
 480                 /*
 481                  * Here, we have computed a full sequence that we want
 482                  * to check.  c_idx is in [0, len - 1], and the quiver
 483                  * has been mutated following the sequence up to (but
 484                  * not including) c_idx.
 485                  *
 486                  * This code is duplicated from increment_sequence
 487                  * because it is reasonable to start the whole process
 488                  * with a sequence that may, in fact, produce warnings.
 489                  */
 490                 warn = 0;
 491
 492                 while (c_idx < len) {
 493                         /* Prune duplicates (involutions) */
 494                         if (c_idx &&
 495                             sequence[c_idx] == sequence[c_idx - 1]) {
 496                                 goto increment_sequence;
 497                         }
 498
 499                         /* Proceed one more mutation */
 500                         quiver_mutate(&q, sequence[c_idx], &warn, 0);
 501
 502                         if (warn) {
 503                                 /* That mutation caused a warning: skip it */
 504                                 quiver_mutate(&q, sequence[c_idx], &warn, 0);
 505
 506                                 /* And try the sequence, starting at c_idx */
 507                                 goto increment_sequence;
 508                         }
 509
 510                         /*
 511                          * Skip ... X, Y ... when X > Y and the edge
 512                          * from X to Y is trivial
 513                          */
 514                         if (c_idx &&
 515                             sequence[c_idx] < sequence[c_idx - 1]) {
 516                                 size_t x = sequence[c_idx - 1];
 517                                 size_t y = sequence[c_idx];
 518                                 struct rational *xy = &(q.e[x * q.v_len + y]);
 519                                 struct rational *yx = &(q.e[y * q.v_len + x]);
 520
 521                                 if (!xy->p &&
 522                                     !yx->p) {
 523                                         /* Undo mutation */
 524                                         quiver_mutate(&q, sequence[c_idx], 0,
 525                                                       0);
 526
 527                                         /* And try the sequence, starting at c_idx */
 528                                         goto increment_sequence;
 529                                 }
 530                         }
 531
 532                         c_idx++;
 533                 }
 534
 535                 /*
 536                  * Here, there's a full sequence, and the quiver has
 537                  * been mutated according to it (all of it).  We want
 538                  * to check if the result matches the target.
 539                  */
 540                 correct = 1;
 541
 542                 for (size_t i = 0; i < q.v_num; ++i) {
 543                         for (size_t j = 0; j < q.v_num; ++j) {
 544                                 struct rational *e = &(q.e[i * q.v_len + j]);
 545                                 struct rational *f = &(target[i * q.v_num + j]);
 546
 547                                 if (e->p * f->q != f->p * e->q) {
 548                                         correct = 0;
 549                                         i = q.v_num + 1;
 550                                         j = i;
 551                                 }
 552                         }
 553                 }
 554
 555                 if (correct) {
 556                         printf("SEQUENCE:     %s", q.v[sequence[0]].name);
 557
 558                         for (size_t k = 1; k < len; ++k) {
 559                                 printf(", %s", q.v[sequence[k]].name);
 560                         }
 561
 562                         printf("\n");
 563                         fflush(stdout);
 564                 }
 565
 566                 c_idx = len - 1;
 567                 quiver_mutate(&q, sequence[c_idx], 0, 0);
 568 increment_sequence:
 569
 570                 /*
 571                  * Here, we have some kind of sequence. The quiver agrees
 572                  * with it up to (but not including) c_idx, and we want
 573                  * to increment it at c_idx.
 574                  */
 575                 s++;
 576                 sequence[c_idx]++;
 577
 578                 if (verbose &&
 579                     c_idx == 2) {
 580                         double completed = sequence[0] * num_nonfrozen *
 581                                            num_nonfrozen + sequence[1] *
 582                                            num_nonfrozen +
 583                                            sequence[2] - 1;
 584                         double total = num_nonfrozen * num_nonfrozen *
 585                                        num_nonfrozen;
 586
 587                         printf("Length %zu: completed %.3g%%\n", len, (100.0 *
 588                                                                        completed)
 589                                / total);
 590                         fflush(stdout);
 591                 }
 592
 593                 /* Skip over repeats: each mutation is an involution */
 594                 if (c_idx > 0 &&
 595                     sequence[c_idx] == sequence[c_idx - 1]) {
 596                         sequence[c_idx]++;
 597                 }
 598
 599                 /* Wrap back to 0 */
 600                 if (sequence[c_idx] >= num_nonfrozen) {
 601                         if (!c_idx) {
 602                                 goto exhausted_len;
 603                         }
 604
 605                         c_idx--;
 606                         quiver_mutate(&q, sequence[c_idx], 0, 0);
 607                         goto increment_sequence;
 608                 }
 609
 610                 if (c_idx != len - 1) {
 611                         /*
 612                          * Here, we now have a start of a sequence.
 613                          * The quiver agrees with our start up to (but
 614                          * not including) c_idx, and we want to possibly
 615                          * complete this to a full sequence. First,
 616                          * however, we need to make sure that we don't
 617                          * introduce any warnings at c_idx.
 618                          *
 619                          * This single-mutation testing block could be
 620                          * ignored, but I have a vague suspicion it will
 621                          * be a hot path.
 622                          */
 623                         warn = 0;
 624                         quiver_mutate(&q, sequence[c_idx], &warn, 0);
 625
 626                         if (warn) {
 627                                 /* That mutation caused a warning: skip it */
 628                                 quiver_mutate(&q, sequence[c_idx], &warn, 0);
 629
 630                                 /* And try the sequence, starting at c_idx */
 631                                 goto increment_sequence;
 632                         }
 633
 634                         /*
 635                          * Quivers almost commute: in particular if
 636                          * the edge v_i -> v_j is zero, then mutation
 637                          * at v_i and v_j commute. So if we're at X, Y
 638                          * in the list, and X > Y, then we've actually
 639                          * already computed that branch of the tree,
 640                          * back when it was Y, X. Therefore, increment X.
 641                          */
 642                         if (c_idx &&
 643                             sequence[c_idx] < sequence[c_idx - 1]) {
 644                                 size_t x = sequence[c_idx - 1];
 645                                 size_t y = sequence[c_idx];
 646                                 struct rational *xy = &(q.e[x * q.v_len + y]);
 647                                 struct rational *yx = &(q.e[y * q.v_len + x]);
 648
 649                                 if (!xy->p &&
 650                                     !yx->p) {
 651                                         /* Undo mutation */
 652                                         quiver_mutate(&q, sequence[c_idx], 0,
 653                                                       0);
 654
 655                                         /* And try the sequence, starting at c_idx */
 656                                         goto increment_sequence;
 657                                 }
 658                         }
 659
 660                         /*
 661                          * Here the quiver agrees with our start up
 662                          * to and including c_idx, and we need to fill
 663                          * it out into a full sequence so that it can
 664                          * be tested.  We don't need to perform all the
 665                          * mutations of that full sequence, but we do
 666                          * need to generate the minimal one.
 667                          */
 668                         for (size_t i = c_idx + 1; i < len; ++i) {
 669                                 sequence[i] = 1 - !!((i - c_idx) % 2);
 670                         }
 671
 672                         c_idx++;
 673                 }
 674
 675                 goto have_full_sequence;
 676 exhausted_len:
 677
 678                 /*
 679                  * Here, we have c_idx = 0, and the quiver has not been
 680                  * mutated by anything in sequence
 681                  */
 682                 printf("Exhausted length %zu\n", len);
 683         }
 684
 685         fflush(stdout);
 686 done:
 687         free(target);
 688
 689         return ret;
 690 }