split: be more careful about buffer sizes
[coreutils.git] / src / tsort.c
blob0032cb5d0b6bc7f7f7e4fb2b30a40c50a87d6327
1 /* tsort - topological sort.
2 Copyright (C) 1998-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
23 #include <config.h>
25 #include <assert.h>
26 #include <sys/types.h>
28 #include "system.h"
29 #include "long-options.h"
30 #include "die.h"
31 #include "error.h"
32 #include "fadvise.h"
33 #include "readtokens.h"
34 #include "stdio--.h"
35 #include "quote.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "tsort"
40 #define AUTHORS proper_name ("Mark Kettenis")
42 /* Token delimiters when reading from a file. */
43 #define DELIM " \t\n"
45 /* Members of the list of successors. */
46 struct successor
48 struct item *suc;
49 struct successor *next;
52 /* Each string is held in memory as the head of a list of successors. */
53 struct item
55 char const *str;
56 struct item *left, *right;
57 signed char balance; /* -1, 0, or +1 */
58 bool printed;
59 size_t count;
60 struct item *qlink;
61 struct successor *top;
64 /* The head of the sorted list. */
65 static struct item *head = NULL;
67 /* The tail of the list of 'zeros', strings that have no predecessors. */
68 static struct item *zeros = NULL;
70 /* Used for loop detection. */
71 static struct item *loop = NULL;
73 /* The number of strings to sort. */
74 static size_t n_strings = 0;
76 void
77 usage (int status)
79 if (status != EXIT_SUCCESS)
80 emit_try_help ();
81 else
83 printf (_("\
84 Usage: %s [OPTION] [FILE]\n\
85 Write totally ordered list consistent with the partial ordering in FILE.\n\
86 "), program_name);
88 emit_stdin_note ();
90 fputs (_("\
91 \n\
92 "), stdout);
93 fputs (HELP_OPTION_DESCRIPTION, stdout);
94 fputs (VERSION_OPTION_DESCRIPTION, stdout);
95 emit_ancillary_info (PROGRAM_NAME);
98 exit (status);
101 /* Create a new item/node for STR. */
102 static struct item *
103 new_item (char const *str)
105 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
106 struct item *k = xzalloc (sizeof *k);
107 if (str)
108 k->str = xstrdup (str);
109 return k;
112 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
113 *ROOT is NULL. Insert a node/item for STR if not found. Return
114 the node/item found/created for STR.
116 This is done according to Algorithm A (Balanced tree search and
117 insertion) in Donald E. Knuth, The Art of Computer Programming,
118 Volume 3/Searching and Sorting, pages 455--457. */
120 static struct item *
121 search_item (struct item *root, char const *str)
123 struct item *p, *q, *r, *s, *t;
124 int a;
126 assert (root);
128 /* Make sure the tree is not empty, since that is what the algorithm
129 below expects. */
130 if (root->right == NULL)
131 return (root->right = new_item (str));
133 /* A1. Initialize. */
134 t = root;
135 s = p = root->right;
137 while (true)
139 /* A2. Compare. */
140 assert (str && p && p->str);
141 a = strcmp (str, p->str);
142 if (a == 0)
143 return p;
145 /* A3 & A4. Move left & right. */
146 if (a < 0)
147 q = p->left;
148 else
149 q = p->right;
151 if (q == NULL)
153 /* A5. Insert. */
154 q = new_item (str);
156 /* A3 & A4. (continued). */
157 if (a < 0)
158 p->left = q;
159 else
160 p->right = q;
162 /* A6. Adjust balance factors. */
163 assert (str && s && s->str && !STREQ (str, s->str));
164 if (strcmp (str, s->str) < 0)
166 r = p = s->left;
167 a = -1;
169 else
171 r = p = s->right;
172 a = 1;
175 while (p != q)
177 assert (str && p && p->str && !STREQ (str, p->str));
178 if (strcmp (str, p->str) < 0)
180 p->balance = -1;
181 p = p->left;
183 else
185 p->balance = 1;
186 p = p->right;
190 /* A7. Balancing act. */
191 if (s->balance == 0 || s->balance == -a)
193 s->balance += a;
194 return q;
197 if (r->balance == a)
199 /* A8. Single Rotation. */
200 p = r;
201 if (a < 0)
203 s->left = r->right;
204 r->right = s;
206 else
208 s->right = r->left;
209 r->left = s;
211 s->balance = r->balance = 0;
213 else
215 /* A9. Double rotation. */
216 if (a < 0)
218 p = r->right;
219 r->right = p->left;
220 p->left = r;
221 s->left = p->right;
222 p->right = s;
224 else
226 p = r->left;
227 r->left = p->right;
228 p->right = r;
229 s->right = p->left;
230 p->left = s;
233 s->balance = 0;
234 r->balance = 0;
235 if (p->balance == a)
236 s->balance = -a;
237 else if (p->balance == -a)
238 r->balance = a;
239 p->balance = 0;
242 /* A10. Finishing touch. */
243 if (s == t->right)
244 t->right = p;
245 else
246 t->left = p;
248 return q;
251 /* A3 & A4. (continued). */
252 if (q->balance)
254 t = p;
255 s = q;
258 p = q;
261 /* NOTREACHED */
264 /* Record the fact that J precedes K. */
266 static void
267 record_relation (struct item *j, struct item *k)
269 struct successor *p;
271 if (!STREQ (j->str, k->str))
273 k->count++;
274 p = xmalloc (sizeof *p);
275 p->suc = k;
276 p->next = j->top;
277 j->top = p;
281 static bool
282 count_items (MAYBE_UNUSED struct item *unused)
284 n_strings++;
285 return false;
288 static bool
289 scan_zeros (struct item *k)
291 /* Ignore strings that have already been printed. */
292 if (k->count == 0 && !k->printed)
294 if (head == NULL)
295 head = k;
296 else
297 zeros->qlink = k;
299 zeros = k;
302 return false;
305 /* Try to detect the loop. If we have detected that K is part of a
306 loop, print the loop on standard error, remove a relation to break
307 the loop, and return true.
309 The loop detection strategy is as follows: Realise that what we're
310 dealing with is essentially a directed graph. If we find an item
311 that is part of a graph that contains a cycle we traverse the graph
312 in backwards direction. In general there is no unique way to do
313 this, but that is no problem. If we encounter an item that we have
314 encountered before, we know that we've found a cycle. All we have
315 to do now is retrace our steps, printing out the items until we
316 encounter that item again. (This is not necessarily the item that
317 we started from originally.) Since the order in which the items
318 are stored in the tree is not related to the specified partial
319 ordering, we may need to walk the tree several times before the
320 loop has completely been constructed. If the loop was found, the
321 global variable LOOP will be NULL. */
323 static bool
324 detect_loop (struct item *k)
326 if (k->count > 0)
328 /* K does not have to be part of a cycle. It is however part of
329 a graph that contains a cycle. */
331 if (loop == NULL)
332 /* Start traversing the graph at K. */
333 loop = k;
334 else
336 struct successor **p = &k->top;
338 while (*p)
340 if ((*p)->suc == loop)
342 if (k->qlink)
344 /* We have found a loop. Retrace our steps. */
345 while (loop)
347 struct item *tmp = loop->qlink;
349 error (0, 0, "%s", (loop->str));
351 /* Until we encounter K again. */
352 if (loop == k)
354 /* Remove relation. */
355 struct successor *s = *p;
356 s->suc->count--;
357 *p = s->next;
358 IF_LINT (free (s));
359 break;
362 /* Tidy things up since we might have to
363 detect another loop. */
364 loop->qlink = NULL;
365 loop = tmp;
368 while (loop)
370 struct item *tmp = loop->qlink;
372 loop->qlink = NULL;
373 loop = tmp;
376 /* Since we have found the loop, stop walking
377 the tree. */
378 return true;
380 else
382 k->qlink = loop;
383 loop = k;
384 break;
388 p = &(*p)->next;
393 return false;
396 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
397 Stop when ACTION returns true. */
399 static bool
400 recurse_tree (struct item *root, bool (*action) (struct item *))
402 if (root->left == NULL && root->right == NULL)
403 return (*action) (root);
404 else
406 if (root->left != NULL)
407 if (recurse_tree (root->left, action))
408 return true;
409 if ((*action) (root))
410 return true;
411 if (root->right != NULL)
412 if (recurse_tree (root->right, action))
413 return true;
416 return false;
419 /* Walk the tree specified by the head ROOT, calling ACTION for
420 each node. */
422 static void
423 walk_tree (struct item *root, bool (*action) (struct item *))
425 if (root->right)
426 recurse_tree (root->right, action);
429 /* Do a topological sort on FILE. Exit with appropriate exit status. */
431 static _Noreturn void
432 tsort (char const *file)
434 bool ok = true;
435 struct item *root;
436 struct item *j = NULL;
437 struct item *k = NULL;
438 token_buffer tokenbuffer;
439 bool is_stdin = STREQ (file, "-");
441 /* Intialize the head of the tree will hold the strings we're sorting. */
442 root = new_item (NULL);
444 if (!is_stdin && ! freopen (file, "r", stdin))
445 die (EXIT_FAILURE, errno, "%s", quotef (file));
447 fadvise (stdin, FADVISE_SEQUENTIAL);
449 init_tokenbuffer (&tokenbuffer);
451 while (true)
453 /* T2. Next Relation. */
454 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
455 if (len == (size_t) -1)
456 break;
458 assert (len != 0);
460 k = search_item (root, tokenbuffer.buffer);
461 if (j)
463 /* T3. Record the relation. */
464 record_relation (j, k);
465 k = NULL;
468 j = k;
471 if (k != NULL)
472 die (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
473 quotef (file));
475 /* T1. Initialize (N <- n). */
476 walk_tree (root, count_items);
478 while (n_strings > 0)
480 /* T4. Scan for zeros. */
481 walk_tree (root, scan_zeros);
483 while (head)
485 struct successor *p = head->top;
487 /* T5. Output front of queue. */
488 puts (head->str);
489 head->printed = true;
490 n_strings--;
492 /* T6. Erase relations. */
493 while (p)
495 p->suc->count--;
496 if (p->suc->count == 0)
498 zeros->qlink = p->suc;
499 zeros = p->suc;
502 p = p->next;
505 /* T7. Remove from queue. */
506 head = head->qlink;
509 /* T8. End of process. */
510 if (n_strings > 0)
512 /* The input contains a loop. */
513 error (0, 0, _("%s: input contains a loop:"), quotef (file));
514 ok = false;
516 /* Print the loop and remove a relation to break it. */
518 walk_tree (root, detect_loop);
519 while (loop);
523 if (fclose (stdin) != 0)
524 die (EXIT_FAILURE, errno, "%s",
525 is_stdin ? _("standard input") : quotef (file));
527 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
531 main (int argc, char **argv)
533 initialize_main (&argc, &argv);
534 set_program_name (argv[0]);
535 setlocale (LC_ALL, "");
536 bindtextdomain (PACKAGE, LOCALEDIR);
537 textdomain (PACKAGE);
539 atexit (close_stdout);
541 parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME,
542 Version, true, usage, AUTHORS,
543 (char const *) NULL);
545 if (1 < argc - optind)
547 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
548 usage (EXIT_FAILURE);
551 tsort (optind == argc ? "-" : argv[optind]);