doc: remove colon from node name
[coreutils.git] / src / tsort.c
blobe3dd29acb1c67049e5d85d612df087e9da631551
1 /* tsort - topological sort.
2 Copyright (C) 1998-2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
23 #include <config.h>
25 #include <assert.h>
26 #include <sys/types.h>
28 #include "system.h"
29 #include "long-options.h"
30 #include "die.h"
31 #include "error.h"
32 #include "fadvise.h"
33 #include "readtokens.h"
34 #include "stdio--.h"
35 #include "quote.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "tsort"
40 #define AUTHORS proper_name ("Mark Kettenis")
42 /* Token delimiters when reading from a file. */
43 #define DELIM " \t\n"
45 /* Members of the list of successors. */
46 struct successor
48 struct item *suc;
49 struct successor *next;
52 /* Each string is held in core as the head of a list of successors. */
53 struct item
55 const char *str;
56 struct item *left, *right;
57 int balance; /* -1, 0, or +1 */
58 size_t count;
59 struct item *qlink;
60 struct successor *top;
63 /* The head of the sorted list. */
64 static struct item *head = NULL;
66 /* The tail of the list of 'zeros', strings that have no predecessors. */
67 static struct item *zeros = NULL;
69 /* Used for loop detection. */
70 static struct item *loop = NULL;
72 /* The number of strings to sort. */
73 static size_t n_strings = 0;
75 void
76 usage (int status)
78 if (status != EXIT_SUCCESS)
79 emit_try_help ();
80 else
82 printf (_("\
83 Usage: %s [OPTION] [FILE]\n\
84 Write totally ordered list consistent with the partial ordering in FILE.\n\
85 "), program_name);
87 emit_stdin_note ();
89 fputs (_("\
90 \n\
91 "), stdout);
92 fputs (HELP_OPTION_DESCRIPTION, stdout);
93 fputs (VERSION_OPTION_DESCRIPTION, stdout);
94 emit_ancillary_info (PROGRAM_NAME);
97 exit (status);
100 /* Create a new item/node for STR. */
101 static struct item *
102 new_item (const char *str)
104 struct item *k = xmalloc (sizeof *k);
106 k->str = (str ? xstrdup (str): NULL);
107 k->left = k->right = NULL;
108 k->balance = 0;
110 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
111 k->count = 0;
112 k->qlink = NULL;
113 k->top = NULL;
115 return k;
118 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
119 *ROOT is NULL. Insert a node/item for STR if not found. Return
120 the node/item found/created for STR.
122 This is done according to Algorithm A (Balanced tree search and
123 insertion) in Donald E. Knuth, The Art of Computer Programming,
124 Volume 3/Searching and Sorting, pages 455--457. */
126 static struct item *
127 search_item (struct item *root, const char *str)
129 struct item *p, *q, *r, *s, *t;
130 int a;
132 assert (root);
134 /* Make sure the tree is not empty, since that is what the algorithm
135 below expects. */
136 if (root->right == NULL)
137 return (root->right = new_item (str));
139 /* A1. Initialize. */
140 t = root;
141 s = p = root->right;
143 while (true)
145 /* A2. Compare. */
146 a = strcmp (str, p->str);
147 if (a == 0)
148 return p;
150 /* A3 & A4. Move left & right. */
151 if (a < 0)
152 q = p->left;
153 else
154 q = p->right;
156 if (q == NULL)
158 /* A5. Insert. */
159 q = new_item (str);
161 /* A3 & A4. (continued). */
162 if (a < 0)
163 p->left = q;
164 else
165 p->right = q;
167 /* A6. Adjust balance factors. */
168 assert (!STREQ (str, s->str));
169 if (strcmp (str, s->str) < 0)
171 r = p = s->left;
172 a = -1;
174 else
176 r = p = s->right;
177 a = 1;
180 while (p != q)
182 assert (!STREQ (str, p->str));
183 if (strcmp (str, p->str) < 0)
185 p->balance = -1;
186 p = p->left;
188 else
190 p->balance = 1;
191 p = p->right;
195 /* A7. Balancing act. */
196 if (s->balance == 0 || s->balance == -a)
198 s->balance += a;
199 return q;
202 if (r->balance == a)
204 /* A8. Single Rotation. */
205 p = r;
206 if (a < 0)
208 s->left = r->right;
209 r->right = s;
211 else
213 s->right = r->left;
214 r->left = s;
216 s->balance = r->balance = 0;
218 else
220 /* A9. Double rotation. */
221 if (a < 0)
223 p = r->right;
224 r->right = p->left;
225 p->left = r;
226 s->left = p->right;
227 p->right = s;
229 else
231 p = r->left;
232 r->left = p->right;
233 p->right = r;
234 s->right = p->left;
235 p->left = s;
238 s->balance = 0;
239 r->balance = 0;
240 if (p->balance == a)
241 s->balance = -a;
242 else if (p->balance == -a)
243 r->balance = a;
244 p->balance = 0;
247 /* A10. Finishing touch. */
248 if (s == t->right)
249 t->right = p;
250 else
251 t->left = p;
253 return q;
256 /* A3 & A4. (continued). */
257 if (q->balance)
259 t = p;
260 s = q;
263 p = q;
266 /* NOTREACHED */
269 /* Record the fact that J precedes K. */
271 static void
272 record_relation (struct item *j, struct item *k)
274 struct successor *p;
276 if (!STREQ (j->str, k->str))
278 k->count++;
279 p = xmalloc (sizeof *p);
280 p->suc = k;
281 p->next = j->top;
282 j->top = p;
286 static bool
287 count_items (struct item *unused _GL_UNUSED)
289 n_strings++;
290 return false;
293 static bool
294 scan_zeros (struct item *k)
296 /* Ignore strings that have already been printed. */
297 if (k->count == 0 && k->str)
299 if (head == NULL)
300 head = k;
301 else
302 zeros->qlink = k;
304 zeros = k;
307 return false;
310 /* Try to detect the loop. If we have detected that K is part of a
311 loop, print the loop on standard error, remove a relation to break
312 the loop, and return true.
314 The loop detection strategy is as follows: Realise that what we're
315 dealing with is essentially a directed graph. If we find an item
316 that is part of a graph that contains a cycle we traverse the graph
317 in backwards direction. In general there is no unique way to do
318 this, but that is no problem. If we encounter an item that we have
319 encountered before, we know that we've found a cycle. All we have
320 to do now is retrace our steps, printing out the items until we
321 encounter that item again. (This is not necessarily the item that
322 we started from originally.) Since the order in which the items
323 are stored in the tree is not related to the specified partial
324 ordering, we may need to walk the tree several times before the
325 loop has completely been constructed. If the loop was found, the
326 global variable LOOP will be NULL. */
328 static bool
329 detect_loop (struct item *k)
331 if (k->count > 0)
333 /* K does not have to be part of a cycle. It is however part of
334 a graph that contains a cycle. */
336 if (loop == NULL)
337 /* Start traversing the graph at K. */
338 loop = k;
339 else
341 struct successor **p = &k->top;
343 while (*p)
345 if ((*p)->suc == loop)
347 if (k->qlink)
349 /* We have found a loop. Retrace our steps. */
350 while (loop)
352 struct item *tmp = loop->qlink;
354 error (0, 0, "%s", (loop->str));
356 /* Until we encounter K again. */
357 if (loop == k)
359 /* Remove relation. */
360 (*p)->suc->count--;
361 *p = (*p)->next;
362 break;
365 /* Tidy things up since we might have to
366 detect another loop. */
367 loop->qlink = NULL;
368 loop = tmp;
371 while (loop)
373 struct item *tmp = loop->qlink;
375 loop->qlink = NULL;
376 loop = tmp;
379 /* Since we have found the loop, stop walking
380 the tree. */
381 return true;
383 else
385 k->qlink = loop;
386 loop = k;
387 break;
391 p = &(*p)->next;
396 return false;
399 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
400 Stop when ACTION returns true. */
402 static bool
403 recurse_tree (struct item *root, bool (*action) (struct item *))
405 if (root->left == NULL && root->right == NULL)
406 return (*action) (root);
407 else
409 if (root->left != NULL)
410 if (recurse_tree (root->left, action))
411 return true;
412 if ((*action) (root))
413 return true;
414 if (root->right != NULL)
415 if (recurse_tree (root->right, action))
416 return true;
419 return false;
422 /* Walk the tree specified by the head ROOT, calling ACTION for
423 each node. */
425 static void
426 walk_tree (struct item *root, bool (*action) (struct item *))
428 if (root->right)
429 recurse_tree (root->right, action);
432 /* Do a topological sort on FILE. Return true if successful. */
434 static bool
435 tsort (const char *file)
437 bool ok = true;
438 struct item *root;
439 struct item *j = NULL;
440 struct item *k = NULL;
441 token_buffer tokenbuffer;
442 bool is_stdin = STREQ (file, "-");
444 /* Intialize the head of the tree will hold the strings we're sorting. */
445 root = new_item (NULL);
447 if (!is_stdin && ! freopen (file, "r", stdin))
448 die (EXIT_FAILURE, errno, "%s", quotef (file));
450 fadvise (stdin, FADVISE_SEQUENTIAL);
452 init_tokenbuffer (&tokenbuffer);
454 while (1)
456 /* T2. Next Relation. */
457 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
458 if (len == (size_t) -1)
459 break;
461 assert (len != 0);
463 k = search_item (root, tokenbuffer.buffer);
464 if (j)
466 /* T3. Record the relation. */
467 record_relation (j, k);
468 k = NULL;
471 j = k;
474 if (k != NULL)
475 die (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
476 quotef (file));
478 /* T1. Initialize (N <- n). */
479 walk_tree (root, count_items);
481 while (n_strings > 0)
483 /* T4. Scan for zeros. */
484 walk_tree (root, scan_zeros);
486 while (head)
488 struct successor *p = head->top;
490 /* T5. Output front of queue. */
491 puts (head->str);
492 #ifdef lint
493 /* suppress valgrind "definitely lost" warnings. */
494 void *head_str = (void *) head->str;
495 free (head_str);
496 #endif
497 head->str = NULL; /* Avoid printing the same string twice. */
498 n_strings--;
500 /* T6. Erase relations. */
501 while (p)
503 p->suc->count--;
504 if (p->suc->count == 0)
506 zeros->qlink = p->suc;
507 zeros = p->suc;
510 p = p->next;
513 /* T7. Remove from queue. */
514 head = head->qlink;
517 /* T8. End of process. */
518 if (n_strings > 0)
520 /* The input contains a loop. */
521 error (0, 0, _("%s: input contains a loop:"), quotef (file));
522 ok = false;
524 /* Print the loop and remove a relation to break it. */
526 walk_tree (root, detect_loop);
527 while (loop);
531 IF_LINT (free (root));
533 if (fclose (stdin) != 0)
534 die (EXIT_FAILURE, errno, "%s",
535 is_stdin ? _("standard input") : quotef (file));
537 return ok;
541 main (int argc, char **argv)
543 bool ok;
545 initialize_main (&argc, &argv);
546 set_program_name (argv[0]);
547 setlocale (LC_ALL, "");
548 bindtextdomain (PACKAGE, LOCALEDIR);
549 textdomain (PACKAGE);
551 atexit (close_stdout);
553 parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME,
554 Version, true, usage, AUTHORS,
555 (char const *) NULL);
557 if (1 < argc - optind)
559 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
560 usage (EXIT_FAILURE);
563 ok = tsort (optind == argc ? "-" : argv[optind]);
565 return ok ? EXIT_SUCCESS : EXIT_FAILURE;