id: fix infinite loop on some systems
[coreutils/ericb.git] / src / tsort.c
blob76865b95ff19bf624fbab4f7063b18b9fbbe340e
1 /* tsort - topological sort.
2 Copyright (C) 1998-2005, 2007-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
23 #include <config.h>
25 #include <stdio.h>
26 #include <assert.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #include "system.h"
31 #include "long-options.h"
32 #include "error.h"
33 #include "quote.h"
34 #include "readtokens.h"
36 /* The official name of this program (e.g., no `g' prefix). */
37 #define PROGRAM_NAME "tsort"
39 #define AUTHORS proper_name ("Mark Kettenis")
41 /* Token delimiters when reading from a file. */
42 #define DELIM " \t\n"
44 /* Members of the list of successors. */
45 struct successor
47 struct item *suc;
48 struct successor *next;
51 /* Each string is held in core as the head of a list of successors. */
52 struct item
54 const char *str;
55 struct item *left, *right;
56 int balance; /* -1, 0, or +1 */
57 size_t count;
58 struct item *qlink;
59 struct successor *top;
62 /* The head of the sorted list. */
63 static struct item *head = NULL;
65 /* The tail of the list of `zeros', strings that have no predecessors. */
66 static struct item *zeros = NULL;
68 /* Used for loop detection. */
69 static struct item *loop = NULL;
71 /* The number of strings to sort. */
72 static size_t n_strings = 0;
74 void
75 usage (int status)
77 if (status != EXIT_SUCCESS)
78 fprintf (stderr, _("Try `%s --help' for more information.\n"),
79 program_name);
80 else
82 printf (_("\
83 Usage: %s [OPTION] [FILE]\n\
84 Write totally ordered list consistent with the partial ordering in FILE.\n\
85 With no FILE, or when FILE is -, read standard input.\n\
86 \n\
87 "), program_name);
88 fputs (HELP_OPTION_DESCRIPTION, stdout);
89 fputs (VERSION_OPTION_DESCRIPTION, stdout);
90 emit_bug_reporting_address ();
93 exit (status);
96 /* Create a new item/node for STR. */
97 static struct item *
98 new_item (const char *str)
100 struct item *k = xmalloc (sizeof *k);
102 k->str = (str ? xstrdup (str): NULL);
103 k->left = k->right = NULL;
104 k->balance = 0;
106 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
107 k->count = 0;
108 k->qlink = NULL;
109 k->top = NULL;
111 return k;
114 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
115 *ROOT is NULL. Insert a node/item for STR if not found. Return
116 the node/item found/created for STR.
118 This is done according to Algorithm A (Balanced tree search and
119 insertion) in Donald E. Knuth, The Art of Computer Programming,
120 Volume 3/Searching and Sorting, pages 455--457. */
122 static struct item *
123 search_item (struct item *root, const char *str)
125 struct item *p, *q, *r, *s, *t;
126 int a;
128 assert (root);
130 /* Make sure the tree is not empty, since that is what the algorithm
131 below expects. */
132 if (root->right == NULL)
133 return (root->right = new_item (str));
135 /* A1. Initialize. */
136 t = root;
137 s = p = root->right;
139 for (;;)
141 /* A2. Compare. */
142 a = strcmp (str, p->str);
143 if (a == 0)
144 return p;
146 /* A3 & A4. Move left & right. */
147 if (a < 0)
148 q = p->left;
149 else
150 q = p->right;
152 if (q == NULL)
154 /* A5. Insert. */
155 q = new_item (str);
157 /* A3 & A4. (continued). */
158 if (a < 0)
159 p->left = q;
160 else
161 p->right = q;
163 /* A6. Adjust balance factors. */
164 assert (!STREQ (str, s->str));
165 if (strcmp (str, s->str) < 0)
167 r = p = s->left;
168 a = -1;
170 else
172 r = p = s->right;
173 a = 1;
176 while (p != q)
178 assert (!STREQ (str, p->str));
179 if (strcmp (str, p->str) < 0)
181 p->balance = -1;
182 p = p->left;
184 else
186 p->balance = 1;
187 p = p->right;
191 /* A7. Balancing act. */
192 if (s->balance == 0 || s->balance == -a)
194 s->balance += a;
195 return q;
198 if (r->balance == a)
200 /* A8. Single Rotation. */
201 p = r;
202 if (a < 0)
204 s->left = r->right;
205 r->right = s;
207 else
209 s->right = r->left;
210 r->left = s;
212 s->balance = r->balance = 0;
214 else
216 /* A9. Double rotation. */
217 if (a < 0)
219 p = r->right;
220 r->right = p->left;
221 p->left = r;
222 s->left = p->right;
223 p->right = s;
225 else
227 p = r->left;
228 r->left = p->right;
229 p->right = r;
230 s->right = p->left;
231 p->left = s;
234 s->balance = 0;
235 r->balance = 0;
236 if (p->balance == a)
237 s->balance = -a;
238 else if (p->balance == -a)
239 r->balance = a;
240 p->balance = 0;
243 /* A10. Finishing touch. */
244 if (s == t->right)
245 t->right = p;
246 else
247 t->left = p;
249 return q;
252 /* A3 & A4. (continued). */
253 if (q->balance)
255 t = p;
256 s = q;
259 p = q;
262 /* NOTREACHED */
265 /* Record the fact that J precedes K. */
267 static void
268 record_relation (struct item *j, struct item *k)
270 struct successor *p;
272 if (!STREQ (j->str, k->str))
274 k->count++;
275 p = xmalloc (sizeof *p);
276 p->suc = k;
277 p->next = j->top;
278 j->top = p;
282 static bool
283 count_items (struct item *unused ATTRIBUTE_UNUSED)
285 n_strings++;
286 return false;
289 static bool
290 scan_zeros (struct item *k)
292 /* Ignore strings that have already been printed. */
293 if (k->count == 0 && k->str)
295 if (head == NULL)
296 head = k;
297 else
298 zeros->qlink = k;
300 zeros = k;
303 return false;
306 /* Try to detect the loop. If we have detected that K is part of a
307 loop, print the loop on standard error, remove a relation to break
308 the loop, and return true.
310 The loop detection strategy is as follows: Realise that what we're
311 dealing with is essentially a directed graph. If we find an item
312 that is part of a graph that contains a cycle we traverse the graph
313 in backwards direction. In general there is no unique way to do
314 this, but that is no problem. If we encounter an item that we have
315 encountered before, we know that we've found a cycle. All we have
316 to do now is retrace our steps, printing out the items until we
317 encounter that item again. (This is not necessarily the item that
318 we started from originally.) Since the order in which the items
319 are stored in the tree is not related to the specified partial
320 ordering, we may need to walk the tree several times before the
321 loop has completely been constructed. If the loop was found, the
322 global variable LOOP will be NULL. */
324 static bool
325 detect_loop (struct item *k)
327 if (k->count > 0)
329 /* K does not have to be part of a cycle. It is however part of
330 a graph that contains a cycle. */
332 if (loop == NULL)
333 /* Start traversing the graph at K. */
334 loop = k;
335 else
337 struct successor **p = &k->top;
339 while (*p)
341 if ((*p)->suc == loop)
343 if (k->qlink)
345 /* We have found a loop. Retrace our steps. */
346 while (loop)
348 struct item *tmp = loop->qlink;
350 fprintf (stderr, "%s: %s\n", program_name,
351 loop->str);
353 /* Until we encounter K again. */
354 if (loop == k)
356 /* Remove relation. */
357 (*p)->suc->count--;
358 *p = (*p)->next;
359 break;
362 /* Tidy things up since we might have to
363 detect another loop. */
364 loop->qlink = NULL;
365 loop = tmp;
368 while (loop)
370 struct item *tmp = loop->qlink;
372 loop->qlink = NULL;
373 loop = tmp;
376 /* Since we have found the loop, stop walking
377 the tree. */
378 return true;
380 else
382 k->qlink = loop;
383 loop = k;
384 break;
388 p = &(*p)->next;
393 return false;
396 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
397 Stop when ACTION returns true. */
399 static bool
400 recurse_tree (struct item *root, bool (*action) (struct item *))
402 if (root->left == NULL && root->right == NULL)
403 return (*action) (root);
404 else
406 if (root->left != NULL)
407 if (recurse_tree (root->left, action))
408 return true;
409 if ((*action) (root))
410 return true;
411 if (root->right != NULL)
412 if (recurse_tree (root->right, action))
413 return true;
416 return false;
419 /* Walk the tree specified by the head ROOT, calling ACTION for
420 each node. */
422 static void
423 walk_tree (struct item *root, bool (*action) (struct item *))
425 if (root->right)
426 recurse_tree (root->right, action);
429 /* Do a topological sort on FILE. Return true if successful. */
431 static bool
432 tsort (const char *file)
434 bool ok = true;
435 struct item *root;
436 struct item *j = NULL;
437 struct item *k = NULL;
438 token_buffer tokenbuffer;
439 bool is_stdin = STREQ (file, "-");
441 /* Intialize the head of the tree will hold the strings we're sorting. */
442 root = new_item (NULL);
444 if (!is_stdin && ! freopen (file, "r", stdin))
445 error (EXIT_FAILURE, errno, "%s", file);
447 init_tokenbuffer (&tokenbuffer);
449 while (1)
451 /* T2. Next Relation. */
452 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
453 if (len == (size_t) -1)
454 break;
456 assert (len != 0);
458 k = search_item (root, tokenbuffer.buffer);
459 if (j)
461 /* T3. Record the relation. */
462 record_relation (j, k);
463 k = NULL;
466 j = k;
469 if (k != NULL)
470 error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
471 file);
473 /* T1. Initialize (N <- n). */
474 walk_tree (root, count_items);
476 while (n_strings > 0)
478 /* T4. Scan for zeros. */
479 walk_tree (root, scan_zeros);
481 while (head)
483 struct successor *p = head->top;
485 /* T5. Output front of queue. */
486 puts (head->str);
487 head->str = NULL; /* Avoid printing the same string twice. */
488 n_strings--;
490 /* T6. Erase relations. */
491 while (p)
493 p->suc->count--;
494 if (p->suc->count == 0)
496 zeros->qlink = p->suc;
497 zeros = p->suc;
500 p = p->next;
503 /* T7. Remove from queue. */
504 head = head->qlink;
507 /* T8. End of process. */
508 if (n_strings > 0)
510 /* The input contains a loop. */
511 error (0, 0, _("%s: input contains a loop:"), file);
512 ok = false;
514 /* Print the loop and remove a relation to break it. */
516 walk_tree (root, detect_loop);
517 while (loop);
521 if (fclose (stdin) != 0)
522 error (EXIT_FAILURE, errno, "%s",
523 is_stdin ? _("standard input") : quote (file));
525 return ok;
529 main (int argc, char **argv)
531 bool ok;
533 initialize_main (&argc, &argv);
534 set_program_name (argv[0]);
535 setlocale (LC_ALL, "");
536 bindtextdomain (PACKAGE, LOCALEDIR);
537 textdomain (PACKAGE);
539 atexit (close_stdout);
541 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, Version,
542 usage, AUTHORS, (char const *) NULL);
543 if (getopt_long (argc, argv, "", NULL, NULL) != -1)
544 usage (EXIT_FAILURE);
546 if (1 < argc - optind)
548 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
549 usage (EXIT_FAILURE);
552 ok = tsort (optind == argc ? "-" : argv[optind]);
554 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);