all: update gnulib submodule to latest
[coreutils.git] / src / tsort.c
blob4aa403fbe965003dd38cf29c0386f2dd89994618
1 /* tsort - topological sort.
2 Copyright (C) 1998-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
23 #include <config.h>
25 #include <assert.h>
26 #include <getopt.h>
27 #include <sys/types.h>
29 #include "system.h"
30 #include "long-options.h"
31 #include "die.h"
32 #include "error.h"
33 #include "fadvise.h"
34 #include "readtokens.h"
35 #include "stdio--.h"
36 #include "quote.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "tsort"
41 #define AUTHORS proper_name ("Mark Kettenis")
43 /* Token delimiters when reading from a file. */
44 #define DELIM " \t\n"
46 /* Members of the list of successors. */
47 struct successor
49 struct item *suc;
50 struct successor *next;
53 /* Each string is held in core as the head of a list of successors. */
54 struct item
56 const char *str;
57 struct item *left, *right;
58 int balance; /* -1, 0, or +1 */
59 size_t count;
60 struct item *qlink;
61 struct successor *top;
64 /* The head of the sorted list. */
65 static struct item *head = NULL;
67 /* The tail of the list of 'zeros', strings that have no predecessors. */
68 static struct item *zeros = NULL;
70 /* Used for loop detection. */
71 static struct item *loop = NULL;
73 /* The number of strings to sort. */
74 static size_t n_strings = 0;
76 void
77 usage (int status)
79 if (status != EXIT_SUCCESS)
80 emit_try_help ();
81 else
83 printf (_("\
84 Usage: %s [OPTION] [FILE]\n\
85 Write totally ordered list consistent with the partial ordering in FILE.\n\
86 "), program_name);
88 emit_stdin_note ();
90 fputs (_("\
91 \n\
92 "), stdout);
93 fputs (HELP_OPTION_DESCRIPTION, stdout);
94 fputs (VERSION_OPTION_DESCRIPTION, stdout);
95 emit_ancillary_info (PROGRAM_NAME);
98 exit (status);
101 /* Create a new item/node for STR. */
102 static struct item *
103 new_item (const char *str)
105 struct item *k = xmalloc (sizeof *k);
107 k->str = (str ? xstrdup (str): NULL);
108 k->left = k->right = NULL;
109 k->balance = 0;
111 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
112 k->count = 0;
113 k->qlink = NULL;
114 k->top = NULL;
116 return k;
119 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
120 *ROOT is NULL. Insert a node/item for STR if not found. Return
121 the node/item found/created for STR.
123 This is done according to Algorithm A (Balanced tree search and
124 insertion) in Donald E. Knuth, The Art of Computer Programming,
125 Volume 3/Searching and Sorting, pages 455--457. */
127 static struct item *
128 search_item (struct item *root, const char *str)
130 struct item *p, *q, *r, *s, *t;
131 int a;
133 assert (root);
135 /* Make sure the tree is not empty, since that is what the algorithm
136 below expects. */
137 if (root->right == NULL)
138 return (root->right = new_item (str));
140 /* A1. Initialize. */
141 t = root;
142 s = p = root->right;
144 while (true)
146 /* A2. Compare. */
147 a = strcmp (str, p->str);
148 if (a == 0)
149 return p;
151 /* A3 & A4. Move left & right. */
152 if (a < 0)
153 q = p->left;
154 else
155 q = p->right;
157 if (q == NULL)
159 /* A5. Insert. */
160 q = new_item (str);
162 /* A3 & A4. (continued). */
163 if (a < 0)
164 p->left = q;
165 else
166 p->right = q;
168 /* A6. Adjust balance factors. */
169 assert (!STREQ (str, s->str));
170 if (strcmp (str, s->str) < 0)
172 r = p = s->left;
173 a = -1;
175 else
177 r = p = s->right;
178 a = 1;
181 while (p != q)
183 assert (!STREQ (str, p->str));
184 if (strcmp (str, p->str) < 0)
186 p->balance = -1;
187 p = p->left;
189 else
191 p->balance = 1;
192 p = p->right;
196 /* A7. Balancing act. */
197 if (s->balance == 0 || s->balance == -a)
199 s->balance += a;
200 return q;
203 if (r->balance == a)
205 /* A8. Single Rotation. */
206 p = r;
207 if (a < 0)
209 s->left = r->right;
210 r->right = s;
212 else
214 s->right = r->left;
215 r->left = s;
217 s->balance = r->balance = 0;
219 else
221 /* A9. Double rotation. */
222 if (a < 0)
224 p = r->right;
225 r->right = p->left;
226 p->left = r;
227 s->left = p->right;
228 p->right = s;
230 else
232 p = r->left;
233 r->left = p->right;
234 p->right = r;
235 s->right = p->left;
236 p->left = s;
239 s->balance = 0;
240 r->balance = 0;
241 if (p->balance == a)
242 s->balance = -a;
243 else if (p->balance == -a)
244 r->balance = a;
245 p->balance = 0;
248 /* A10. Finishing touch. */
249 if (s == t->right)
250 t->right = p;
251 else
252 t->left = p;
254 return q;
257 /* A3 & A4. (continued). */
258 if (q->balance)
260 t = p;
261 s = q;
264 p = q;
267 /* NOTREACHED */
270 /* Record the fact that J precedes K. */
272 static void
273 record_relation (struct item *j, struct item *k)
275 struct successor *p;
277 if (!STREQ (j->str, k->str))
279 k->count++;
280 p = xmalloc (sizeof *p);
281 p->suc = k;
282 p->next = j->top;
283 j->top = p;
287 static bool
288 count_items (struct item *unused _GL_UNUSED)
290 n_strings++;
291 return false;
294 static bool
295 scan_zeros (struct item *k)
297 /* Ignore strings that have already been printed. */
298 if (k->count == 0 && k->str)
300 if (head == NULL)
301 head = k;
302 else
303 zeros->qlink = k;
305 zeros = k;
308 return false;
311 /* Try to detect the loop. If we have detected that K is part of a
312 loop, print the loop on standard error, remove a relation to break
313 the loop, and return true.
315 The loop detection strategy is as follows: Realise that what we're
316 dealing with is essentially a directed graph. If we find an item
317 that is part of a graph that contains a cycle we traverse the graph
318 in backwards direction. In general there is no unique way to do
319 this, but that is no problem. If we encounter an item that we have
320 encountered before, we know that we've found a cycle. All we have
321 to do now is retrace our steps, printing out the items until we
322 encounter that item again. (This is not necessarily the item that
323 we started from originally.) Since the order in which the items
324 are stored in the tree is not related to the specified partial
325 ordering, we may need to walk the tree several times before the
326 loop has completely been constructed. If the loop was found, the
327 global variable LOOP will be NULL. */
329 static bool
330 detect_loop (struct item *k)
332 if (k->count > 0)
334 /* K does not have to be part of a cycle. It is however part of
335 a graph that contains a cycle. */
337 if (loop == NULL)
338 /* Start traversing the graph at K. */
339 loop = k;
340 else
342 struct successor **p = &k->top;
344 while (*p)
346 if ((*p)->suc == loop)
348 if (k->qlink)
350 /* We have found a loop. Retrace our steps. */
351 while (loop)
353 struct item *tmp = loop->qlink;
355 error (0, 0, "%s", (loop->str));
357 /* Until we encounter K again. */
358 if (loop == k)
360 /* Remove relation. */
361 (*p)->suc->count--;
362 *p = (*p)->next;
363 break;
366 /* Tidy things up since we might have to
367 detect another loop. */
368 loop->qlink = NULL;
369 loop = tmp;
372 while (loop)
374 struct item *tmp = loop->qlink;
376 loop->qlink = NULL;
377 loop = tmp;
380 /* Since we have found the loop, stop walking
381 the tree. */
382 return true;
384 else
386 k->qlink = loop;
387 loop = k;
388 break;
392 p = &(*p)->next;
397 return false;
400 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
401 Stop when ACTION returns true. */
403 static bool
404 recurse_tree (struct item *root, bool (*action) (struct item *))
406 if (root->left == NULL && root->right == NULL)
407 return (*action) (root);
408 else
410 if (root->left != NULL)
411 if (recurse_tree (root->left, action))
412 return true;
413 if ((*action) (root))
414 return true;
415 if (root->right != NULL)
416 if (recurse_tree (root->right, action))
417 return true;
420 return false;
423 /* Walk the tree specified by the head ROOT, calling ACTION for
424 each node. */
426 static void
427 walk_tree (struct item *root, bool (*action) (struct item *))
429 if (root->right)
430 recurse_tree (root->right, action);
433 /* Do a topological sort on FILE. Return true if successful. */
435 static bool
436 tsort (const char *file)
438 bool ok = true;
439 struct item *root;
440 struct item *j = NULL;
441 struct item *k = NULL;
442 token_buffer tokenbuffer;
443 bool is_stdin = STREQ (file, "-");
445 /* Intialize the head of the tree will hold the strings we're sorting. */
446 root = new_item (NULL);
448 if (!is_stdin && ! freopen (file, "r", stdin))
449 die (EXIT_FAILURE, errno, "%s", quotef (file));
451 fadvise (stdin, FADVISE_SEQUENTIAL);
453 init_tokenbuffer (&tokenbuffer);
455 while (1)
457 /* T2. Next Relation. */
458 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
459 if (len == (size_t) -1)
460 break;
462 assert (len != 0);
464 k = search_item (root, tokenbuffer.buffer);
465 if (j)
467 /* T3. Record the relation. */
468 record_relation (j, k);
469 k = NULL;
472 j = k;
475 if (k != NULL)
476 die (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
477 quotef (file));
479 /* T1. Initialize (N <- n). */
480 walk_tree (root, count_items);
482 while (n_strings > 0)
484 /* T4. Scan for zeros. */
485 walk_tree (root, scan_zeros);
487 while (head)
489 struct successor *p = head->top;
491 /* T5. Output front of queue. */
492 puts (head->str);
493 #ifdef lint
494 /* suppress valgrind "definitely lost" warnings. */
495 void *head_str = (void *) head->str;
496 free (head_str);
497 #endif
498 head->str = NULL; /* Avoid printing the same string twice. */
499 n_strings--;
501 /* T6. Erase relations. */
502 while (p)
504 p->suc->count--;
505 if (p->suc->count == 0)
507 zeros->qlink = p->suc;
508 zeros = p->suc;
511 p = p->next;
514 /* T7. Remove from queue. */
515 head = head->qlink;
518 /* T8. End of process. */
519 if (n_strings > 0)
521 /* The input contains a loop. */
522 error (0, 0, _("%s: input contains a loop:"), quotef (file));
523 ok = false;
525 /* Print the loop and remove a relation to break it. */
527 walk_tree (root, detect_loop);
528 while (loop);
532 IF_LINT (free (root));
534 if (fclose (stdin) != 0)
535 die (EXIT_FAILURE, errno, "%s",
536 is_stdin ? _("standard input") : quotef (file));
538 return ok;
542 main (int argc, char **argv)
544 bool ok;
546 initialize_main (&argc, &argv);
547 set_program_name (argv[0]);
548 setlocale (LC_ALL, "");
549 bindtextdomain (PACKAGE, LOCALEDIR);
550 textdomain (PACKAGE);
552 atexit (close_stdout);
554 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, Version,
555 usage, AUTHORS, (char const *) NULL);
556 if (getopt_long (argc, argv, "", NULL, NULL) != -1)
557 usage (EXIT_FAILURE);
559 if (1 < argc - optind)
561 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
562 usage (EXIT_FAILURE);
565 ok = tsort (optind == argc ? "-" : argv[optind]);
567 return ok ? EXIT_SUCCESS : EXIT_FAILURE;