1 /* $OpenBSD: tsort.c,v 1.36 2017/05/20 09:31:19 espie Exp $ */
4 * Copyright (c) 1999-2004 Marc Espie <espie@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29 #include "compat_ohash.h"
32 /* The complexity of topological sorting is O(e), where e is the
33 * size of input. While reading input, vertices have to be identified,
34 * thus add the complexity of e keys retrieval among v keys using
35 * an appropriate data structure. This program uses open double hashing
36 * for that purpose. See Knuth for the expected complexity of double
37 * hashing (Brent variation should probably be used if v << e, as a user
40 * The algorithm used for longest cycle reporting is accurate, but somewhat
41 * expensive. It may need to build all free paths of the graph (a free
42 * path is a path that never goes twice through the same node), whose
43 * number can be as high as O(2^e). Usually, the number of free paths is
44 * much smaller though. This program's author does not believe that a
45 * significantly better worst-case complexity algorithm exists.
47 * In case of a hints file, the set of minimal nodes is maintained as a
48 * heap. The resulting complexity is O(e+v log v) for the worst case.
49 * The average should actually be near O(e).
51 * If the hints file is incomplete, there is some extra complexity incurred
52 * by make_transparent, which does propagate order values to unmarked
53 * nodes. In the worst case, make_transparent is O(e u),
54 * where u is the number of originally unmarked nodes.
55 * In practice, it is much faster.
57 * The simple topological sort algorithm detects cycles. This program
58 * goes further, breaking cycles through the use of simple heuristics.
59 * Each cycle break checks the whole set of nodes, hence if c cycles break
60 * are needed, this is an extra cost of O(c v).
62 * Possible heuristics are as follows:
63 * - break cycle at node with lowest number of predecessors (default case),
64 * - break longest cycle at node with lowest number of predecessors,
65 * - break cycle at next node from the hints file.
67 * Except for the hints file case, which sets an explicit constraint on
68 * which cycle to break, those heuristics locally result in the smallest
69 * number of broken edges.
71 * Those are admittedly greedy strategies, as is the selection of the next
72 * node from the hints file amongst equivalent candidates that is used for
73 * `stable' topological sorting.
77 #define UNUSED __attribute__((unused))
84 /* The set of arcs from a given node is stored as a linked list. */
90 #define NO_ORDER UINT_MAX
93 unsigned int refs
; /* Number of arcs left, coming into this node.
94 * Note that nodes with a null count can't
95 * be part of cycles. */
96 unsigned int order
; /* Order of nodes according to a hint file. */
98 struct link
*arcs
; /* List of forward arcs. */
100 /* Cycle detection algorithms build a free path of nodes. */
101 struct node
*from
; /* Previous node in the current path. */
102 struct link
*traverse
; /* Next link to traverse when backtracking. */
103 unsigned int mark
; /* Mark processed nodes in cycle discovery. */
105 char k
[1]; /* Name of this node. */
111 unsigned int entries
;
115 static void nodes_init(struct ohash
*);
116 static struct node
*node_lookup(struct ohash
*, const char *, const char *);
117 static __dead
void usage(void);
118 static struct node
*new_node(const char *, const char *);
120 static unsigned int read_pairs(FILE *, struct ohash
*, int,
121 const char *, unsigned int, int);
122 static void split_nodes(struct ohash
*, struct array
*, struct array
*);
123 static void make_transparent(struct ohash
*);
124 static void insert_arc(struct node
*, struct node
*);
127 static void dump_node(struct node
*);
128 static void dump_array(struct array
*);
129 static void dump_hash(struct ohash
*);
131 static unsigned int read_hints(FILE *, struct ohash
*, int,
132 const char *, unsigned int);
133 static struct node
*find_smallest_node(struct array
*);
134 static struct node
*find_good_cycle_break(struct array
*);
135 static void print_cycle(struct array
*);
136 static struct node
*find_cycle_from(struct node
*, struct array
*);
137 static struct node
*find_predecessor(struct array
*, struct node
*);
138 static unsigned int traverse_node(struct node
*, unsigned int, struct array
*);
139 static struct node
*find_longest_cycle(struct array
*, struct array
*);
140 static struct node
*find_normal_cycle(struct array
*, struct array
*);
142 static void heap_down(struct array
*, unsigned int);
143 static void heapify(struct array
*, int);
144 static struct node
*dequeue(struct array
*);
145 static void enqueue(struct array
*, struct node
*);
149 static void *hash_calloc(size_t, size_t, void *);
150 static void hash_free(void *, void *);
151 static void* entry_alloc(size_t, void *);
152 static void *ereallocarray(void *, size_t, size_t);
153 static void *emem(void *);
154 #define DEBUG_TRAVERSE 0
155 static struct ohash_info node_info
= {
156 offsetof(struct node
, k
), NULL
, hash_calloc
, hash_free
, entry_alloc
};
157 static void parse_args(int, char *[], struct ohash
*);
158 static int tsort(struct ohash
*);
160 static int quiet_flag
, long_flag
,
161 warn_flag
, hints_flag
, verbose_flag
;
164 int main(int, char *[]);
176 errx(1, "Memory exhausted");
180 hash_calloc(size_t n
, size_t s
, void *u UNUSED
)
182 return emem(calloc(n
, s
));
186 hash_free(void *p
, void *u UNUSED
)
192 entry_alloc(size_t s
, void *u UNUSED
)
194 return ereallocarray(NULL
, 1, s
);
198 ereallocarray(void *p
, size_t n
, size_t s
)
200 return emem(reallocarray(p
, n
, s
));
208 /* Inserting and finding nodes in the hash structure.
209 * We handle interval strings for efficiency wrt fgetln. */
211 new_node(const char *start
, const char *end
)
215 n
= ohash_create_entry(&node_info
, start
, &end
);
227 nodes_init(struct ohash
*h
)
229 ohash_init(h
, HASH_START
, &node_info
);
233 node_lookup(struct ohash
*h
, const char *start
, const char *end
)
238 i
= ohash_qlookupi(h
, start
, &end
);
240 n
= ohash_find(h
, i
);
242 n
= ohash_insert(h
, i
, new_node(start
, end
));
248 dump_node(struct node
*n
)
254 printf("%s (%u/%u): ", n
->k
, n
->order
, n
->refs
);
255 for (l
= n
->arcs
; l
!= NULL
; l
= l
->next
)
257 printf("%s(%u/%u) ", l
->node
->k
, l
->node
->order
, l
->node
->refs
);
262 dump_array(struct array
*a
)
266 for (i
= 0; i
< a
->entries
; i
++)
271 dump_hash(struct ohash
*h
)
276 for (n
= ohash_first(h
, &i
); n
!= NULL
; n
= ohash_next(h
, &i
))
286 insert_arc(struct node
*a
, struct node
*b
)
290 /* Check that this arc is not already present. */
291 for (l
= a
->arcs
; l
!= NULL
; l
= l
->next
) {
296 l
= ereallocarray(NULL
, 1, sizeof(struct link
));
303 read_pairs(FILE *f
, struct ohash
*h
, int reverse
, const char *name
,
304 unsigned int order
, int hint
)
314 while ((str
= fgetln(f
, &size
)) != NULL
) {
317 sentinel
= str
+ size
;
321 while (str
< sentinel
&&
322 isspace((unsigned char)*str
))
327 e
< sentinel
&& !isspace((unsigned char)*e
); e
++)
330 a
= node_lookup(h
, str
, e
);
331 if (a
->order
== NO_ORDER
&& hint
)
336 b
= node_lookup(h
, str
, e
);
350 errx(1, "odd number of node names in %s", name
);
352 err(1, "error reading %s", name
);
357 read_hints(FILE *f
, struct ohash
*h
, int quiet
, const char *name
,
363 while ((str
= fgetln(f
, &size
)) != NULL
) {
366 sentinel
= str
+ size
;
371 while (str
< sentinel
&& isspace((unsigned char)*str
))
376 e
< sentinel
&& !isspace((unsigned char)*e
); e
++)
378 a
= node_lookup(h
, str
, e
);
379 if (a
->order
!= NO_ORDER
) {
382 "duplicate node %s in hints file %s",
390 err(1, "error reading %s", name
);
396 *** Standard heap handling routines.
400 heap_down(struct array
*h
, unsigned int i
)
405 for (; (j
=2*i
+1) < h
->entries
; i
= j
) {
406 if (j
+1 < h
->entries
&& h
->t
[j
+1]->order
< h
->t
[j
]->order
)
408 if (h
->t
[i
]->order
<= h
->t
[j
]->order
)
417 heapify(struct array
*h
, int verbose
)
421 for (i
= h
->entries
; i
!= 0;) {
422 if (h
->t
[--i
]->order
== NO_ORDER
&& verbose
)
423 warnx("node %s absent from hints file", h
->t
[i
]->k
);
428 #define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )
431 dequeue(struct array
*h
)
439 if (--h
->entries
!= 0) {
440 h
->t
[0] = h
->t
[h
->entries
];
447 #define ENQUEUE(h, n) do { \
451 (h)->t[(h)->entries++] = (n); \
455 enqueue(struct array
*h
, struct node
*n
)
460 h
->t
[h
->entries
++] = n
;
461 for (i
= h
->entries
-1; i
> 0; i
= j
) {
463 if (h
->t
[j
]->order
< h
->t
[i
]->order
)
471 /* Nodes without order should not hinder direct dependencies.
472 * Iterate until no nodes are left.
475 make_transparent(struct ohash
*hash
)
484 /* first try to solve complete nodes */
488 for (n
= ohash_first(hash
, &i
); n
!= NULL
;
489 n
= ohash_next(hash
, &i
)) {
490 if (n
->order
== NO_ORDER
) {
493 for (l
= n
->arcs
; l
!= NULL
; l
= l
->next
) {
494 /* unsolved node -> delay resolution */
495 if (l
->node
->order
== NO_ORDER
) {
498 } else if (l
->node
->order
< min
)
499 min
= l
->node
->order
;
501 if (min
< NO_ORDER
&& l
== NULL
) {
510 /* then, if incomplete nodes are left, do them */
513 for (n
= ohash_first(hash
, &i
); n
!= NULL
;
514 n
= ohash_next(hash
, &i
))
515 if (n
->order
== NO_ORDER
)
516 for (l
= n
->arcs
; l
!= NULL
; l
= l
->next
)
517 if (l
->node
->order
< n
->order
) {
518 n
->order
= l
->node
->order
;
526 *** Search through hash array for nodes.
529 /* Split nodes into unrefed nodes/live nodes. */
531 split_nodes(struct ohash
*hash
, struct array
*heap
, struct array
*remaining
)
537 heap
->t
= ereallocarray(NULL
, ohash_entries(hash
),
538 sizeof(struct node
*));
539 remaining
->t
= ereallocarray(NULL
, ohash_entries(hash
),
540 sizeof(struct node
*));
542 remaining
->entries
= 0;
544 for (n
= ohash_first(hash
, &i
); n
!= NULL
; n
= ohash_next(hash
, &i
)) {
546 heap
->t
[heap
->entries
++] = n
;
548 remaining
->t
[remaining
->entries
++] = n
;
552 /* Good point to break a cycle: live node with as few refs as possible. */
554 find_good_cycle_break(struct array
*h
)
563 assert(h
->entries
!= 0);
564 for (i
= 0; i
< h
->entries
; i
++) {
565 struct node
*n
= h
->t
[i
];
566 /* No need to look further. */
569 if (n
->refs
!= 0 && n
->refs
< best
) {
578 /* Retrieve the node with the smallest order. */
580 find_smallest_node(struct array
*h
)
589 assert(h
->entries
!= 0);
590 for (i
= 0; i
< h
->entries
; i
++) {
591 struct node
*n
= h
->t
[i
];
592 if (n
->refs
!= 0 && n
->order
< best
) {
603 *** Graph algorithms.
606 /* Explore the nodes reachable from i to find a cycle, store it in c.
609 find_cycle_from(struct node
*i
, struct array
*c
)
614 /* XXX Previous cycle findings may have left this pointer non-null. */
618 /* Note that all marks are reversed before this code exits. */
621 n
->traverse
= n
->traverse
->next
;
623 n
->traverse
= n
->arcs
;
624 /* Skip over dead nodes. */
625 while (n
->traverse
&& n
->traverse
->node
->refs
== 0)
626 n
->traverse
= n
->traverse
->next
;
628 struct node
*go
= n
->traverse
->node
;
632 for (; n
!= NULL
&& n
!= go
; n
= n
->from
) {
633 c
->t
[c
->entries
++] = n
;
636 for (; n
!= NULL
; n
= n
->from
)
638 c
->t
[c
->entries
++] = go
;
653 /* Find a live predecessor of node n. This is a slow routine, as it needs
654 * to go through the whole array, but it is not needed often.
657 find_predecessor(struct array
*a
, struct node
*n
)
661 for (i
= 0; i
< a
->entries
; i
++) {
668 for (l
= m
->arcs
; l
!= NULL
; l
= l
->next
)
677 /* Traverse all strongly connected components reachable from node n.
678 Start numbering them at o. Return the maximum order reached.
679 Update the largest cycle found so far.
682 traverse_node(struct node
*n
, unsigned int o
, struct array
*c
)
684 unsigned int min
, max
;
693 printf("%s(%u) ", n
->k
, n
->mark
);
694 /* Find next arc to explore. */
696 n
->traverse
= n
->traverse
->next
;
698 n
->traverse
= n
->arcs
;
699 /* Skip over dead nodes. */
700 while (n
->traverse
&& n
->traverse
->node
->refs
== 0)
701 n
->traverse
= n
->traverse
->next
;
706 go
= n
->traverse
->node
;
707 /* Optimisation: if go->mark < min, we already
708 * visited this strongly-connected component in
709 * a previous pass. Hence, this can yield no new
712 /* Not part of the current path: go for it. */
713 if (go
->mark
== 0 || go
->mark
== min
) {
719 /* Part of the current path: check cycle length. */
720 } else if (go
->mark
> min
) {
722 printf("%d\n", o
- go
->mark
+ 1);
723 if (o
- go
->mark
+ 1 > c
->entries
) {
727 c
->entries
= o
- go
->mark
+ 1;
730 for (t
= n
; t
!= go
; t
= t
->from
)
735 /* No arc left: backtrack. */
747 print_cycle(struct array
*c
)
751 /* Printing in reverse order, since cycle discoveries finds reverse
753 for (i
= c
->entries
; i
!= 0;) {
755 warnx("%s", c
->t
[i
]->k
);
760 find_longest_cycle(struct array
*h
, struct array
*c
)
766 static int notfirst
= 0;
768 assert(h
->entries
!= 0);
770 /* No cycle found yet. */
773 /* Reset the set of marks, except the first time around. */
775 for (i
= 0; i
< h
->entries
; i
++)
782 /* Traverse the array. Each unmarked, live node heralds a
783 * new set of strongly connected components. */
784 for (i
= 0; i
< h
->entries
; i
++) {
786 if (n
->refs
!= 0 && n
->mark
== 0) {
787 /* Each call to traverse_node uses a separate
788 * interval of numbers to mark nodes. */
790 o
= traverse_node(n
, o
, c
);
794 assert(c
->entries
!= 0);
797 for (i
= 0; i
< c
->entries
; i
++) {
798 if (c
->t
[i
]->refs
< best
) {
807 find_normal_cycle(struct array
*h
, struct array
*c
)
812 n
= find_smallest_node(h
);
814 n
= find_good_cycle_break(h
);
815 while ((b
= find_cycle_from(n
, c
)) == NULL
)
816 n
= find_predecessor(h
, n
);
821 #define plural(n) ((n) > 1 ? "s" : "")
824 parse_args(int argc
, char *argv
[], struct ohash
*pairs
)
834 reverse_flag
= quiet_flag
= long_flag
=
835 warn_flag
= hints_flag
= verbose_flag
= 0;
836 /* argc is good enough, as we start at argv[1] */
837 files
= ereallocarray(NULL
, argc
, sizeof (char *));
838 while ((c
= getopt(argc
, argv
, "h:flqrvw")) != -1) {
873 files
[i
++] = argv
[0];
883 /* if (pledge("stdio rpath", files) == -1) */
884 if (pledge("stdio rpath", NULL
) == -1)
890 for (j
= 0; j
!= i
-argc
; j
++) {
893 f
= fopen(files
[j
], "r");
895 err(1, "Can't open hint file %s", files
[i
]);
896 order
= read_hints(f
, pairs
, quiet_flag
, files
[i
], order
);
904 f
= fopen(argv
[0], "r");
906 err(1, "Can't open file %s", argv
[0]);
907 order
= read_pairs(f
, pairs
, reverse_flag
, argv
[0], order
,
911 order
= read_pairs(stdin
, pairs
, reverse_flag
, "stdin",
912 order
, hints_flag
== 2);
915 if (pledge("stdio", NULL
) == -1)
920 tsort(struct ohash
*pairs
)
922 struct array aux
; /* Unrefed nodes/cycle reporting. */
923 struct array remaining
;
924 unsigned int broken_arcs
, broken_cycles
;
931 make_transparent(pairs
);
932 split_nodes(pairs
, &aux
, &remaining
);
936 heapify(&aux
, verbose_flag
);
938 left
= remaining
.entries
+ aux
.entries
;
941 /* Standard topological sort. */
942 while (aux
.entries
) {
947 printf("%s\n", n
->k
);
949 /* We can't free nodes, as we don't know which
950 * entry we can remove in the hash table. We
951 * rely on refs == 0 to recognize live nodes.
952 * Decrease ref count of live nodes, enter new
953 * candidates into the unrefed list. */
954 for (l
= n
->arcs
; l
!= NULL
; l
= l
->next
)
955 if (l
->node
->refs
!= 0 &&
956 --l
->node
->refs
== 0) {
957 ENQUEUE(&aux
, l
->node
);
960 /* There are still cycles to break. */
965 /* XXX Simple cycle detection and long cycle
966 * detection are mutually exclusive. */
969 n
= find_longest_cycle(&remaining
, &aux
);
971 n
= find_normal_cycle(&remaining
, &aux
);
974 warnx("cycle in data");
979 warnx("%u edge%s broken", n
->refs
,
981 broken_arcs
+= n
->refs
;
983 /* Reinitialization, cycle reporting uses aux. */
988 if (verbose_flag
&& broken_cycles
!= 0)
989 warnx("%u cycle%s broken, for a total of %u edge%s",
990 broken_cycles
, plural(broken_cycles
),
991 broken_arcs
, plural(broken_arcs
));
993 return (broken_cycles
< 256 ? broken_cycles
: 255);
999 main(int argc
, char *argv
[])
1003 if (pledge("stdio rpath", NULL
) == -1)
1006 parse_args(argc
, argv
, &pairs
);
1007 return tsort(&pairs
);
1011 extern char *__progname
;
1016 fprintf(stderr
, "Usage: %s [-flqrvw] [-h file] [file]\n", __progname
);