bin/tsort/tsort.c

   1 /* $OpenBSD: tsort.c,v 1.36 2017/05/20 09:31:19 espie Exp $ */
   2 /* ex:ts=8 sw=4:
   3  *
   4  * Copyright (c) 1999-2004 Marc Espie <espie@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18
  19 #include <assert.h>
  20 #include <ctype.h>
  21 #include <err.h>
  22 #include <limits.h>
  23 #include <stddef.h>
  24 #include <stdio.h>
  25 #include <stdint.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <unistd.h>
  29 #include "compat_ohash.h"
  30 #include "compat.h"
  31
  32 /* The complexity of topological sorting is O(e), where e is the
  33  * size of input.  While reading input, vertices have to be identified,
  34  * thus add the complexity of e keys retrieval among v keys using
  35  * an appropriate data structure.  This program uses open double hashing
  36  * for that purpose.  See Knuth for the expected complexity of double
  37  * hashing (Brent variation should probably be used if v << e, as a user
  38  * option).
  39  *
  40  * The algorithm used for longest cycle reporting is accurate, but somewhat
  41  * expensive.  It may need to build all free paths of the graph (a free
  42  * path is a path that never goes twice through the same node), whose
  43  * number can be as high as O(2^e).  Usually, the number of free paths is
  44  * much smaller though.  This program's author does not believe that a
  45  * significantly better worst-case complexity algorithm exists.
  46  *
  47  * In case of a hints file, the set of minimal nodes is maintained as a
  48  * heap.  The resulting complexity is O(e+v log v) for the worst case.
  49  * The average should actually be near O(e).
  50  *
  51  * If the hints file is incomplete, there is some extra complexity incurred
  52  * by make_transparent, which does propagate order values to unmarked
  53  * nodes. In the worst case, make_transparent is  O(e u),
  54  * where u is the number of originally unmarked nodes.
  55  * In practice, it is much faster.
  56  *
  57  * The simple topological sort algorithm detects cycles.  This program
  58  * goes further, breaking cycles through the use of simple heuristics.
  59  * Each cycle break checks the whole set of nodes, hence if c cycles break
  60  * are needed, this is an extra cost of O(c v).
  61  *
  62  * Possible heuristics are as follows:
  63  * - break cycle at node with lowest number of predecessors (default case),
  64  * - break longest cycle at node with lowest number of predecessors,
  65  * - break cycle at next node from the hints file.
  66  *
  67  * Except for the hints file case, which sets an explicit constraint on
  68  * which cycle to break, those heuristics locally result in the smallest
  69  * number of broken edges.
  70  *
  71  * Those are admittedly greedy strategies, as is the selection of the next
  72  * node from the hints file amongst equivalent candidates that is used for
  73  * `stable' topological sorting.
  74  */
  75
  76 #ifdef __GNUC__
  77 #define UNUSED  __attribute__((unused))
  78 #else
  79 #define UNUSED
  80 #endif
  81
  82 struct node;
  83
  84 /* The set of arcs from a given node is stored as a linked list.  */
  85 struct link {
  86         struct link *next;
  87         struct node *node;
  88 };
  89
  90 #define NO_ORDER        UINT_MAX
  91
  92 struct node {
  93         unsigned int refs;      /* Number of arcs left, coming into this node.
  94                                  * Note that nodes with a null count can't
  95                                  * be part of cycles.  */
  96         unsigned int order;     /* Order of nodes according to a hint file.  */
  97
  98         struct link  *arcs;     /* List of forward arcs.  */
  99
 100         /* Cycle detection algorithms build a free path of nodes.  */
 101         struct node  *from;     /* Previous node in the current path.  */
 102         struct link  *traverse; /* Next link to traverse when backtracking.  */
 103         unsigned int mark;      /* Mark processed nodes in cycle discovery.  */
 104
 105         char         k[1];      /* Name of this node.  */
 106 };
 107
 108 #define HASH_START 9
 109
 110 struct array {
 111         unsigned int entries;
 112         struct node  **t;
 113 };
 114
 115 static void nodes_init(struct ohash *);
 116 static struct node *node_lookup(struct ohash *, const char *, const char *);
 117 static __dead void usage(void);
 118 static struct node *new_node(const char *, const char *);
 119
 120 static unsigned int read_pairs(FILE *, struct ohash *, int,
 121     const char *, unsigned int, int);
 122 static void split_nodes(struct ohash *, struct array *, struct array *);
 123 static void make_transparent(struct ohash *);
 124 static void insert_arc(struct node *, struct node *);
 125
 126 #ifdef DEBUG
 127 static void dump_node(struct node *);
 128 static void dump_array(struct array *);
 129 static void dump_hash(struct ohash *);
 130 #endif
 131 static unsigned int read_hints(FILE *, struct ohash *, int,
 132     const char *, unsigned int);
 133 static struct node *find_smallest_node(struct array *);
 134 static struct node *find_good_cycle_break(struct array *);
 135 static void print_cycle(struct array *);
 136 static struct node *find_cycle_from(struct node *, struct array *);
 137 static struct node *find_predecessor(struct array *, struct node *);
 138 static unsigned int traverse_node(struct node *, unsigned int, struct array *);
 139 static struct node *find_longest_cycle(struct array *, struct array *);
 140 static struct node *find_normal_cycle(struct array *, struct array *);
 141
 142 static void heap_down(struct array *, unsigned int);
 143 static void heapify(struct array *, int);
 144 static struct node *dequeue(struct array *);
 145 static void enqueue(struct array *, struct node *);
 146
 147
 148
 149 static void *hash_calloc(size_t, size_t, void *);
 150 static void hash_free(void *, void *);
 151 static void* entry_alloc(size_t, void *);
 152 static void *ereallocarray(void *, size_t, size_t);
 153 static void *emem(void *);
 154 #define DEBUG_TRAVERSE 0
 155 static struct ohash_info node_info = {
 156         offsetof(struct node, k), NULL, hash_calloc, hash_free, entry_alloc };
 157 static void parse_args(int, char *[], struct ohash *);
 158 static int tsort(struct ohash *);
 159
 160 static int              quiet_flag, long_flag,
 161                         warn_flag, hints_flag, verbose_flag;
 162
 163
 164 int main(int, char *[]);
 165
 166 /***
 167  *** Memory handling.
 168  ***/
 169
 170 static void *
 171 emem(void *p)
 172 {
 173         if (p)
 174                 return p;
 175         else
 176                 errx(1, "Memory exhausted");
 177 }
 178
 179 static void *
 180 hash_calloc(size_t n, size_t s, void *u UNUSED)
 181 {
 182         return emem(calloc(n, s));
 183 }
 184
 185 static void
 186 hash_free(void *p, void *u UNUSED)
 187 {
 188         free(p);
 189 }
 190
 191 static void *
 192 entry_alloc(size_t s, void *u UNUSED)
 193 {
 194         return ereallocarray(NULL, 1, s);
 195 }
 196
 197 static void *
 198 ereallocarray(void *p, size_t n, size_t s)
 199 {
 200         return emem(reallocarray(p, n, s));
 201 }
 202
 203 \f
 204 /***
 205  *** Hash table.
 206  ***/
 207
 208 /* Inserting and finding nodes in the hash structure.
 209  * We handle interval strings for efficiency wrt fgetln.  */
 210 static struct node *
 211 new_node(const char *start, const char *end)
 212 {
 213         struct node     *n;
 214
 215         n = ohash_create_entry(&node_info, start, &end);
 216         n->from = NULL;
 217         n->arcs = NULL;
 218         n->refs = 0;
 219         n->mark = 0;
 220         n->order = NO_ORDER;
 221         n->traverse = NULL;
 222         return n;
 223 }
 224
 225
 226 static void
 227 nodes_init(struct ohash *h)
 228 {
 229         ohash_init(h, HASH_START, &node_info);
 230 }
 231
 232 static struct node *
 233 node_lookup(struct ohash *h, const char *start, const char *end)
 234 {
 235         unsigned int    i;
 236         struct node *   n;
 237
 238         i = ohash_qlookupi(h, start, &end);
 239
 240         n = ohash_find(h, i);
 241         if (n == NULL)
 242                 n = ohash_insert(h, i, new_node(start, end));
 243         return n;
 244 }
 245
 246 #ifdef DEBUG
 247 static void
 248 dump_node(struct node *n)
 249 {
 250         struct link     *l;
 251
 252         if (n->refs == 0)
 253                 return;
 254         printf("%s (%u/%u): ", n->k, n->order, n->refs);
 255         for (l = n->arcs; l != NULL; l = l->next)
 256                 if (n->refs != 0)
 257                 printf("%s(%u/%u) ", l->node->k, l->node->order, l->node->refs);
 258         putchar('\n');
 259 }
 260
 261 static void
 262 dump_array(struct array *a)
 263 {
 264         unsigned int    i;
 265
 266         for (i = 0; i < a->entries; i++)
 267                 dump_node(a->t[i]);
 268 }
 269
 270 static void
 271 dump_hash(struct ohash *h)
 272 {
 273         unsigned int    i;
 274         struct node     *n;
 275
 276         for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i))
 277                 dump_node(n);
 278 }
 279 #endif
 280
 281 /***
 282  *** Reading data.
 283  ***/
 284
 285 static void
 286 insert_arc(struct node *a, struct node *b)
 287 {
 288         struct link     *l;
 289
 290         /* Check that this arc is not already present.  */
 291         for (l = a->arcs; l != NULL; l = l->next) {
 292                 if (l->node == b)
 293                         return;
 294         }
 295         b->refs++;
 296         l = ereallocarray(NULL, 1, sizeof(struct link));
 297         l->node = b;
 298         l->next = a->arcs;
 299         a->arcs = l;
 300 }
 301
 302 static unsigned int
 303 read_pairs(FILE *f, struct ohash *h, int reverse, const char *name,
 304     unsigned int order, int hint)
 305 {
 306         int             toggle;
 307         struct node     *a;
 308         size_t          size;
 309         char            *str;
 310
 311         toggle = 1;
 312         a = NULL;
 313
 314         while ((str = fgetln(f, &size)) != NULL) {
 315                 char *sentinel;
 316
 317                 sentinel = str + size;
 318                 for (;;) {
 319                         char *e;
 320
 321                         while (str < sentinel &&
 322                             isspace((unsigned char)*str))
 323                                 str++;
 324                         if (str == sentinel)
 325                                 break;
 326                         for (e = str;
 327                             e < sentinel && !isspace((unsigned char)*e); e++)
 328                                 continue;
 329                         if (toggle) {
 330                                 a = node_lookup(h, str, e);
 331                                 if (a->order == NO_ORDER && hint)
 332                                         a->order = order++;
 333                         } else {
 334                                 struct node *b;
 335
 336                                 b = node_lookup(h, str, e);
 337                                 assert(a != NULL);
 338                                 if (b != a) {
 339                                         if (reverse)
 340                                                 insert_arc(b, a);
 341                                         else
 342                                                 insert_arc(a, b);
 343                                 }
 344                         }
 345                         toggle = !toggle;
 346                         str = e;
 347                 }
 348         }
 349         if (toggle == 0)
 350                 errx(1, "odd number of node names in %s", name);
 351         if (!feof(f))
 352                 err(1, "error reading %s", name);
 353         return order;
 354 }
 355
 356 static unsigned int
 357 read_hints(FILE *f, struct ohash *h, int quiet, const char *name,
 358     unsigned int order)
 359 {
 360         char            *str;
 361         size_t          size;
 362
 363         while ((str = fgetln(f, &size)) != NULL) {
 364                 char *sentinel;
 365
 366                 sentinel = str + size;
 367                 for (;;) {
 368                         char *e;
 369                         struct node *a;
 370
 371                         while (str < sentinel && isspace((unsigned char)*str))
 372                                 str++;
 373                         if (str == sentinel)
 374                                 break;
 375                         for (e = str;
 376                             e < sentinel && !isspace((unsigned char)*e); e++)
 377                                 continue;
 378                         a = node_lookup(h, str, e);
 379                         if (a->order != NO_ORDER) {
 380                                 if (!quiet)
 381                                     warnx(
 382                                         "duplicate node %s in hints file %s",
 383                                         a->k, name);
 384                         } else
 385                                 a->order = order++;
 386                         str = e;
 387                 }
 388         }
 389         if (!feof(f))
 390                 err(1, "error reading %s", name);
 391         return order;
 392 }
 393
 394 \f
 395 /***
 396  *** Standard heap handling routines.
 397  ***/
 398
 399 static void
 400 heap_down(struct array *h, unsigned int i)
 401 {
 402         unsigned int    j;
 403         struct node     *swap;
 404
 405         for (; (j=2*i+1) < h->entries; i = j) {
 406                 if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order)
 407                         j++;
 408                 if (h->t[i]->order <= h->t[j]->order)
 409                         break;
 410                 swap = h->t[i];
 411                 h->t[i] = h->t[j];
 412                 h->t[j] = swap;
 413         }
 414 }
 415
 416 static void
 417 heapify(struct array *h, int verbose)
 418 {
 419         unsigned int    i;
 420
 421         for (i = h->entries; i != 0;) {
 422                 if (h->t[--i]->order == NO_ORDER && verbose)
 423                         warnx("node %s absent from hints file", h->t[i]->k);
 424                 heap_down(h, i);
 425         }
 426 }
 427
 428 #define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )
 429
 430 static struct node *
 431 dequeue(struct array *h)
 432 {
 433         struct node     *n;
 434
 435         if (h->entries == 0)
 436                 n = NULL;
 437         else {
 438                 n = h->t[0];
 439                 if (--h->entries != 0) {
 440                     h->t[0] = h->t[h->entries];
 441                     heap_down(h, 0);
 442                 }
 443         }
 444         return n;
 445 }
 446
 447 #define ENQUEUE(h, n) do {                      \
 448         if (hints_flag)                         \
 449                 enqueue((h), (n));              \
 450         else                                    \
 451                 (h)->t[(h)->entries++] = (n);   \
 452         } while(0);
 453
 454 static void
 455 enqueue(struct array *h, struct node *n)
 456 {
 457         unsigned int    i, j;
 458         struct node     *swap;
 459
 460         h->t[h->entries++] = n;
 461         for (i = h->entries-1; i > 0; i = j) {
 462                 j = (i-1)/2;
 463                 if (h->t[j]->order < h->t[i]->order)
 464                         break;
 465                 swap = h->t[j];
 466                 h->t[j] = h->t[i];
 467                 h->t[i] = swap;
 468         }
 469 }
 470
 471 /* Nodes without order should not hinder direct dependencies.
 472  * Iterate until no nodes are left.
 473  */
 474 static void
 475 make_transparent(struct ohash *hash)
 476 {
 477         struct node     *n;
 478         unsigned int    i;
 479         struct link     *l;
 480         int             adjusted;
 481         int             bad;
 482         unsigned int    min;
 483
 484         /* first try to solve complete nodes */
 485         do {
 486                 adjusted = 0;
 487                 bad = 0;
 488                 for (n = ohash_first(hash, &i); n != NULL;
 489                     n = ohash_next(hash, &i)) {
 490                         if (n->order == NO_ORDER) {
 491                                 min = NO_ORDER;
 492
 493                                 for (l = n->arcs; l != NULL; l = l->next) {
 494                                         /* unsolved node -> delay resolution */
 495                                         if (l->node->order == NO_ORDER) {
 496                                                 bad = 1;
 497                                                 break;
 498                                         } else if (l->node->order < min)
 499                                                 min = l->node->order;
 500                                 }
 501                                 if (min < NO_ORDER && l == NULL) {
 502                                         n->order = min;
 503                                         adjusted = 1;
 504                                 }
 505                         }
 506                 }
 507
 508         } while (adjusted);
 509
 510         /* then, if incomplete nodes are left, do them */
 511         if (bad) do {
 512                 adjusted = 0;
 513                 for (n = ohash_first(hash, &i); n != NULL;
 514                     n = ohash_next(hash, &i))
 515                         if (n->order == NO_ORDER)
 516                                 for (l = n->arcs; l != NULL; l = l->next)
 517                                         if (l->node->order < n->order) {
 518                                                 n->order = l->node->order;
 519                                                 adjusted = 1;
 520                                         }
 521         } while (adjusted);
 522 }
 523
 524 \f
 525 /***
 526  *** Search through hash array for nodes.
 527  ***/
 528
 529 /* Split nodes into unrefed nodes/live nodes.  */
 530 static void
 531 split_nodes(struct ohash *hash, struct array *heap, struct array *remaining)
 532 {
 533
 534         struct node *n;
 535         unsigned int i;
 536
 537         heap->t = ereallocarray(NULL, ohash_entries(hash),
 538             sizeof(struct node *));
 539         remaining->t = ereallocarray(NULL, ohash_entries(hash),
 540             sizeof(struct node *));
 541         heap->entries = 0;
 542         remaining->entries = 0;
 543
 544         for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) {
 545                 if (n->refs == 0)
 546                         heap->t[heap->entries++] = n;
 547                 else
 548                         remaining->t[remaining->entries++] = n;
 549         }
 550 }
 551
 552 /* Good point to break a cycle: live node with as few refs as possible. */
 553 static struct node *
 554 find_good_cycle_break(struct array *h)
 555 {
 556         unsigned int    i;
 557         unsigned int    best;
 558         struct node     *u;
 559
 560         best = UINT_MAX;
 561         u = NULL;
 562
 563         assert(h->entries != 0);
 564         for (i = 0; i < h->entries; i++) {
 565                 struct node *n = h->t[i];
 566                 /* No need to look further. */
 567                 if (n->refs == 1)
 568                         return n;
 569                 if (n->refs != 0 && n->refs < best) {
 570                         best = n->refs;
 571                         u = n;
 572                 }
 573         }
 574         assert(u != NULL);
 575         return u;
 576 }
 577
 578 /*  Retrieve the node with the smallest order.  */
 579 static struct node *
 580 find_smallest_node(struct array *h)
 581 {
 582         unsigned int    i;
 583         unsigned int    best;
 584         struct node     *u;
 585
 586         best = UINT_MAX;
 587         u = NULL;
 588
 589         assert(h->entries != 0);
 590         for (i = 0; i < h->entries; i++) {
 591                 struct node *n = h->t[i];
 592                 if (n->refs != 0 && n->order < best) {
 593                         best = n->order;
 594                         u = n;
 595                 }
 596         }
 597         assert(u != NULL);
 598         return u;
 599 }
 600
 601 \f
 602 /***
 603  *** Graph algorithms.
 604  ***/
 605
 606 /* Explore the nodes reachable from i to find a cycle, store it in c.
 607  * This may fail.  */
 608 static struct node *
 609 find_cycle_from(struct node *i, struct array *c)
 610 {
 611         struct node     *n;
 612
 613         n = i;
 614         /* XXX Previous cycle findings may have left this pointer non-null.  */
 615         i->from = NULL;
 616
 617         for (;;) {
 618                 /* Note that all marks are reversed before this code exits.  */
 619                 n->mark = 1;
 620                 if (n->traverse)
 621                         n->traverse = n->traverse->next;
 622                 else
 623                         n->traverse = n->arcs;
 624                 /* Skip over dead nodes.  */
 625                 while (n->traverse && n->traverse->node->refs == 0)
 626                         n->traverse = n->traverse->next;
 627                 if (n->traverse) {
 628                         struct node *go = n->traverse->node;
 629
 630                         if (go->mark) {
 631                                 c->entries = 0;
 632                                 for (; n != NULL && n != go; n = n->from) {
 633                                         c->t[c->entries++] = n;
 634                                         n->mark = 0;
 635                                 }
 636                                 for (; n != NULL; n = n->from)
 637                                         n->mark = 0;
 638                                 c->t[c->entries++] = go;
 639                                 return go;
 640                         } else {
 641                             go->from = n;
 642                             n = go;
 643                         }
 644                 } else {
 645                         n->mark = 0;
 646                         n = n->from;
 647                         if (n == NULL)
 648                                 return NULL;
 649                 }
 650         }
 651 }
 652
 653 /* Find a live predecessor of node n.  This is a slow routine, as it needs
 654  * to go through the whole array, but it is not needed often.
 655  */
 656 static struct node *
 657 find_predecessor(struct array *a, struct node *n)
 658 {
 659         unsigned int i;
 660
 661         for (i = 0; i < a->entries; i++) {
 662                 struct node *m;
 663
 664                 m = a->t[i];
 665                 if (m->refs != 0) {
 666                         struct link *l;
 667
 668                         for (l = m->arcs; l != NULL; l = l->next)
 669                                 if (l->node == n)
 670                                         return m;
 671                 }
 672         }
 673         assert(1 == 0);
 674         return NULL;
 675 }
 676
 677 /* Traverse all strongly connected components reachable from node n.
 678    Start numbering them at o. Return the maximum order reached.
 679    Update the largest cycle found so far.
 680  */
 681 static unsigned int
 682 traverse_node(struct node *n, unsigned int o, struct array *c)
 683 {
 684         unsigned int    min, max;
 685
 686         n->from = NULL;
 687         min = o;
 688         max = ++o;
 689
 690         for (;;) {
 691                 n->mark = o;
 692                 if (DEBUG_TRAVERSE)
 693                         printf("%s(%u) ", n->k, n->mark);
 694                 /* Find next arc to explore.  */
 695                 if (n->traverse)
 696                         n->traverse = n->traverse->next;
 697                 else
 698                         n->traverse = n->arcs;
 699                 /* Skip over dead nodes.  */
 700                 while (n->traverse && n->traverse->node->refs == 0)
 701                         n->traverse = n->traverse->next;
 702                 /* If arc left.  */
 703                 if (n->traverse) {
 704                         struct node     *go;
 705
 706                         go = n->traverse->node;
 707                         /* Optimisation: if go->mark < min, we already
 708                          * visited this strongly-connected component in
 709                          * a previous pass.  Hence, this can yield no new
 710                          * cycle.  */
 711
 712                         /* Not part of the current path: go for it.  */
 713                         if (go->mark == 0 || go->mark == min) {
 714                                 go->from = n;
 715                                 n = go;
 716                                 o++;
 717                                 if (o > max)
 718                                         max = o;
 719                         /* Part of the current path: check cycle length.  */
 720                         } else if (go->mark > min) {
 721                                 if (DEBUG_TRAVERSE)
 722                                         printf("%d\n", o - go->mark + 1);
 723                                 if (o - go->mark + 1 > c->entries) {
 724                                         struct node *t;
 725                                         unsigned int i;
 726
 727                                         c->entries = o - go->mark + 1;
 728                                         i = 0;
 729                                         c->t[i++] = go;
 730                                         for (t = n; t != go; t = t->from)
 731                                                 c->t[i++] = t;
 732                                 }
 733                         }
 734
 735                 /* No arc left: backtrack.  */
 736                 } else {
 737                         n->mark = min;
 738                         n = n->from;
 739                         if (!n)
 740                                 return max;
 741                         o--;
 742                 }
 743         }
 744 }
 745
 746 static void
 747 print_cycle(struct array *c)
 748 {
 749         unsigned int    i;
 750
 751         /* Printing in reverse order, since cycle discoveries finds reverse
 752          * edges.  */
 753         for (i = c->entries; i != 0;) {
 754                 i--;
 755                 warnx("%s", c->t[i]->k);
 756         }
 757 }
 758
 759 static struct node *
 760 find_longest_cycle(struct array *h, struct array *c)
 761 {
 762         unsigned int    i;
 763         unsigned int    o;
 764         unsigned int    best;
 765         struct node     *n;
 766         static int      notfirst = 0;
 767
 768         assert(h->entries != 0);
 769
 770         /* No cycle found yet.  */
 771         c->entries = 0;
 772
 773         /* Reset the set of marks, except the first time around.  */
 774         if (notfirst) {
 775                 for (i = 0; i < h->entries; i++)
 776                         h->t[i]->mark = 0;
 777         } else
 778                 notfirst = 1;
 779
 780         o = 0;
 781
 782         /* Traverse the array.  Each unmarked, live node heralds a
 783          * new set of strongly connected components.  */
 784         for (i = 0; i < h->entries; i++) {
 785                 n = h->t[i];
 786                 if (n->refs != 0 && n->mark == 0) {
 787                         /* Each call to traverse_node uses a separate
 788                          * interval of numbers to mark nodes.  */
 789                         o++;
 790                         o = traverse_node(n, o, c);
 791                 }
 792         }
 793
 794         assert(c->entries != 0);
 795         n = c->t[0];
 796         best = n->refs;
 797         for (i = 0; i < c->entries; i++) {
 798                 if (c->t[i]->refs < best) {
 799                         n = c->t[i];
 800                         best = n->refs;
 801                 }
 802         }
 803         return n;
 804 }
 805
 806 static struct node *
 807 find_normal_cycle(struct array *h, struct array *c)
 808 {
 809         struct node *b, *n;
 810
 811         if (hints_flag)
 812                 n = find_smallest_node(h);
 813         else
 814                 n = find_good_cycle_break(h);
 815         while ((b = find_cycle_from(n, c)) == NULL)
 816                 n = find_predecessor(h, n);
 817         return b;
 818 }
 819
 820 \f
 821 #define plural(n) ((n) > 1 ? "s" : "")
 822
 823 static void
 824 parse_args(int argc, char *argv[], struct ohash *pairs)
 825 {
 826         int c;
 827         unsigned int    order;
 828         int reverse_flag;
 829         const char **files;
 830         int i, j;
 831
 832         i = 0;
 833
 834         reverse_flag = quiet_flag = long_flag =
 835                 warn_flag = hints_flag = verbose_flag = 0;
 836         /* argc is good enough, as we start at argv[1] */
 837         files = ereallocarray(NULL, argc, sizeof (char *));
 838         while ((c = getopt(argc, argv, "h:flqrvw")) != -1) {
 839                 switch(c) {
 840                 case 'h':
 841                         files[i++] = optarg;
 842                         hints_flag = 1;
 843                         break;
 844                         /*FALLTHRU*/
 845                 case 'f':
 846                         hints_flag = 2;
 847                         break;
 848                 case 'l':
 849                         long_flag = 1;
 850                         break;
 851                 case 'q':
 852                         quiet_flag = 1;
 853                         break;
 854                 case 'r':
 855                         reverse_flag = 1;
 856                         break;
 857                 case 'v':
 858                         verbose_flag = 1;
 859                         break;
 860                 case 'w':
 861                         warn_flag = 1;
 862                         break;
 863                 default:
 864                         usage();
 865                 }
 866         }
 867
 868         argc -= optind;
 869         argv += optind;
 870
 871         switch(argc) {
 872         case 1:
 873                 files[i++] = argv[0];
 874                 break;
 875         case 0:
 876                 break;
 877         default:
 878                 usage();
 879         }
 880
 881         files[i] = NULL;
 882
 883 /*      if (pledge("stdio rpath", files) == -1) */
 884         if (pledge("stdio rpath", NULL) == -1)
 885                 err(1, "pledge");
 886
 887         nodes_init(pairs);
 888         order = 0;
 889
 890         for (j = 0; j != i-argc; j++) {
 891                 FILE *f;
 892
 893                 f = fopen(files[j], "r");
 894                 if (f == NULL)
 895                         err(1, "Can't open hint file %s", files[i]);
 896                 order = read_hints(f, pairs, quiet_flag, files[i], order);
 897                 fclose(f);
 898         }
 899         free(files);
 900
 901         if (argc == 1) {
 902                 FILE *f;
 903
 904                 f = fopen(argv[0], "r");
 905                 if (f == NULL)
 906                         err(1, "Can't open file %s", argv[0]);
 907                 order = read_pairs(f, pairs, reverse_flag, argv[0], order,
 908                     hints_flag == 2);
 909                 fclose(f);
 910         } else {
 911                 order = read_pairs(stdin, pairs, reverse_flag, "stdin",
 912                     order, hints_flag == 2);
 913         }
 914
 915         if (pledge("stdio", NULL) == -1)
 916                 err(1, "pledge");
 917 }
 918
 919 static int
 920 tsort(struct ohash *pairs)
 921 {
 922             struct array        aux;    /* Unrefed nodes/cycle reporting.  */
 923             struct array        remaining;
 924             unsigned int        broken_arcs, broken_cycles;
 925             unsigned int        left;
 926
 927             broken_arcs = 0;
 928             broken_cycles = 0;
 929
 930             if (hints_flag)
 931                     make_transparent(pairs);
 932             split_nodes(pairs, &aux, &remaining);
 933             ohash_delete(pairs);
 934
 935             if (hints_flag)
 936                     heapify(&aux, verbose_flag);
 937
 938             left = remaining.entries + aux.entries;
 939             while (left != 0) {
 940
 941                     /* Standard topological sort.  */
 942                     while (aux.entries) {
 943                             struct link *l;
 944                             struct node *n;
 945
 946                             n = DEQUEUE(&aux);
 947                             printf("%s\n", n->k);
 948                             left--;
 949                             /* We can't free nodes, as we don't know which
 950                              * entry we can remove in the hash table.  We
 951                              * rely on refs == 0 to recognize live nodes.
 952                              * Decrease ref count of live nodes, enter new
 953                              * candidates into the unrefed list.  */
 954                             for (l = n->arcs; l != NULL; l = l->next)
 955                                     if (l->node->refs != 0 &&
 956                                         --l->node->refs == 0) {
 957                                             ENQUEUE(&aux, l->node);
 958                                     }
 959                     }
 960                     /* There are still cycles to break.  */
 961                     if (left != 0) {
 962                             struct node *n;
 963
 964                             broken_cycles++;
 965                             /* XXX Simple cycle detection and long cycle
 966                              * detection are mutually exclusive.  */
 967
 968                             if (long_flag)
 969                                     n = find_longest_cycle(&remaining, &aux);
 970                             else
 971                                     n = find_normal_cycle(&remaining, &aux);
 972
 973                             if (!quiet_flag) {
 974                                     warnx("cycle in data");
 975                                     print_cycle(&aux);
 976                             }
 977
 978                             if (verbose_flag)
 979                                     warnx("%u edge%s broken", n->refs,
 980                                         plural(n->refs));
 981                             broken_arcs += n->refs;
 982                             n->refs = 0;
 983                             /* Reinitialization, cycle reporting uses aux.  */
 984                             aux.t[0] = n;
 985                             aux.entries = 1;
 986                     }
 987             }
 988             if (verbose_flag && broken_cycles != 0)
 989                     warnx("%u cycle%s broken, for a total of %u edge%s",
 990                         broken_cycles, plural(broken_cycles),
 991                         broken_arcs, plural(broken_arcs));
 992             if (warn_flag)
 993                     return (broken_cycles < 256 ? broken_cycles : 255);
 994             else
 995                     return (0);
 996 }
 997
 998 int
 999 main(int argc, char *argv[])
1000 {
1001         struct ohash    pairs;
1002
1003         if (pledge("stdio rpath", NULL) == -1)
1004                 err(1, "pledge");
1005
1006         parse_args(argc, argv, &pairs);
1007         return tsort(&pairs);
1008 }
1009
1010
1011 extern char *__progname;
1012
1013 static void
1014 usage(void)
1015 {
1016         fprintf(stderr, "Usage: %s [-flqrvw] [-h file] [file]\n", __progname);
1017         exit(1);
1018 }