1 /* tsort - topological sort.
2 Copyright (C) 1998-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
26 #include <sys/types.h>
29 #include "long-options.h"
33 #include "readtokens.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "tsort"
40 #define AUTHORS proper_name ("Mark Kettenis")
42 /* Token delimiters when reading from a file. */
45 /* Members of the list of successors. */
49 struct successor
*next
;
52 /* Each string is held in memory as the head of a list of successors. */
56 struct item
*left
, *right
;
57 signed char balance
; /* -1, 0, or +1 */
61 struct successor
*top
;
64 /* The head of the sorted list. */
65 static struct item
*head
= nullptr;
67 /* The tail of the list of 'zeros', strings that have no predecessors. */
68 static struct item
*zeros
= nullptr;
70 /* Used for loop detection. */
71 static struct item
*loop
= nullptr;
73 /* The number of strings to sort. */
74 static size_t n_strings
= 0;
79 if (status
!= EXIT_SUCCESS
)
84 Usage: %s [OPTION] [FILE]\n\
85 Write totally ordered list consistent with the partial ordering in FILE.\n\
93 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
94 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
95 emit_ancillary_info (PROGRAM_NAME
);
101 /* Create a new item/node for STR. */
103 new_item (char const *str
)
105 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
106 struct item
*k
= xzalloc (sizeof *k
);
108 k
->str
= xstrdup (str
);
112 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
113 *ROOT is null. Insert a node/item for STR if not found. Return
114 the node/item found/created for STR.
116 This is done according to Algorithm A (Balanced tree search and
117 insertion) in Donald E. Knuth, The Art of Computer Programming,
118 Volume 3/Searching and Sorting, pages 455--457. */
121 search_item (struct item
*root
, char const *str
)
123 struct item
*p
, *q
, *r
, *s
, *t
;
128 /* Make sure the tree is not empty, since that is what the algorithm
130 if (root
->right
== nullptr)
131 return (root
->right
= new_item (str
));
133 /* A1. Initialize. */
140 assert (str
&& p
&& p
->str
);
141 a
= strcmp (str
, p
->str
);
145 /* A3 & A4. Move left & right. */
156 /* A3 & A4. (continued). */
162 /* A6. Adjust balance factors. */
163 assert (str
&& s
&& s
->str
&& !STREQ (str
, s
->str
));
164 if (strcmp (str
, s
->str
) < 0)
177 assert (str
&& p
&& p
->str
&& !STREQ (str
, p
->str
));
178 if (strcmp (str
, p
->str
) < 0)
190 /* A7. Balancing act. */
191 if (s
->balance
== 0 || s
->balance
== -a
)
199 /* A8. Single Rotation. */
211 s
->balance
= r
->balance
= 0;
215 /* A9. Double rotation. */
237 else if (p
->balance
== -a
)
242 /* A10. Finishing touch. */
251 /* A3 & A4. (continued). */
264 /* Record the fact that J precedes K. */
267 record_relation (struct item
*j
, struct item
*k
)
271 if (!STREQ (j
->str
, k
->str
))
274 p
= xmalloc (sizeof *p
);
282 count_items (MAYBE_UNUSED
struct item
*unused
)
289 scan_zeros (struct item
*k
)
291 /* Ignore strings that have already been printed. */
292 if (k
->count
== 0 && !k
->printed
)
305 /* Try to detect the loop. If we have detected that K is part of a
306 loop, print the loop on standard error, remove a relation to break
307 the loop, and return true.
309 The loop detection strategy is as follows: Realise that what we're
310 dealing with is essentially a directed graph. If we find an item
311 that is part of a graph that contains a cycle we traverse the graph
312 in backwards direction. In general there is no unique way to do
313 this, but that is no problem. If we encounter an item that we have
314 encountered before, we know that we've found a cycle. All we have
315 to do now is retrace our steps, printing out the items until we
316 encounter that item again. (This is not necessarily the item that
317 we started from originally.) Since the order in which the items
318 are stored in the tree is not related to the specified partial
319 ordering, we may need to walk the tree several times before the
320 loop has completely been constructed. If the loop was found, the
321 global variable LOOP will be null. */
324 detect_loop (struct item
*k
)
328 /* K does not have to be part of a cycle. It is however part of
329 a graph that contains a cycle. */
332 /* Start traversing the graph at K. */
336 struct successor
**p
= &k
->top
;
340 if ((*p
)->suc
== loop
)
344 /* We have found a loop. Retrace our steps. */
347 struct item
*tmp
= loop
->qlink
;
349 error (0, 0, "%s", (loop
->str
));
351 /* Until we encounter K again. */
354 /* Remove relation. */
355 struct successor
*s
= *p
;
362 /* Tidy things up since we might have to
363 detect another loop. */
364 loop
->qlink
= nullptr;
370 struct item
*tmp
= loop
->qlink
;
372 loop
->qlink
= nullptr;
376 /* Since we have found the loop, stop walking
396 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
397 Stop when ACTION returns true. */
400 recurse_tree (struct item
*root
, bool (*action
) (struct item
*))
402 if (root
->left
== nullptr && root
->right
== nullptr)
403 return (*action
) (root
);
406 if (root
->left
!= nullptr)
407 if (recurse_tree (root
->left
, action
))
409 if ((*action
) (root
))
411 if (root
->right
!= nullptr)
412 if (recurse_tree (root
->right
, action
))
419 /* Walk the tree specified by the head ROOT, calling ACTION for
423 walk_tree (struct item
*root
, bool (*action
) (struct item
*))
426 recurse_tree (root
->right
, action
);
429 /* Do a topological sort on FILE. Exit with appropriate exit status. */
431 static _Noreturn
void
432 tsort (char const *file
)
436 struct item
*j
= nullptr;
437 struct item
*k
= nullptr;
438 token_buffer tokenbuffer
;
439 bool is_stdin
= STREQ (file
, "-");
441 /* Intialize the head of the tree will hold the strings we're sorting. */
442 root
= new_item (nullptr);
444 if (!is_stdin
&& ! freopen (file
, "r", stdin
))
445 die (EXIT_FAILURE
, errno
, "%s", quotef (file
));
447 fadvise (stdin
, FADVISE_SEQUENTIAL
);
449 init_tokenbuffer (&tokenbuffer
);
453 /* T2. Next Relation. */
454 size_t len
= readtoken (stdin
, DELIM
, sizeof (DELIM
) - 1, &tokenbuffer
);
455 if (len
== (size_t) -1)
458 die (EXIT_FAILURE
, errno
, _("%s: read error"), quotef (file
));
464 k
= search_item (root
, tokenbuffer
.buffer
);
467 /* T3. Record the relation. */
468 record_relation (j
, k
);
476 die (EXIT_FAILURE
, 0, _("%s: input contains an odd number of tokens"),
479 /* T1. Initialize (N <- n). */
480 walk_tree (root
, count_items
);
482 while (n_strings
> 0)
484 /* T4. Scan for zeros. */
485 walk_tree (root
, scan_zeros
);
489 struct successor
*p
= head
->top
;
491 /* T5. Output front of queue. */
493 head
->printed
= true;
496 /* T6. Erase relations. */
500 if (p
->suc
->count
== 0)
502 zeros
->qlink
= p
->suc
;
509 /* T7. Remove from queue. */
513 /* T8. End of process. */
516 /* The input contains a loop. */
517 error (0, 0, _("%s: input contains a loop:"), quotef (file
));
520 /* Print the loop and remove a relation to break it. */
522 walk_tree (root
, detect_loop
);
527 if (fclose (stdin
) != 0)
528 die (EXIT_FAILURE
, errno
, "%s",
529 is_stdin
? _("standard input") : quotef (file
));
531 exit (ok
? EXIT_SUCCESS
: EXIT_FAILURE
);
535 main (int argc
, char **argv
)
537 initialize_main (&argc
, &argv
);
538 set_program_name (argv
[0]);
539 setlocale (LC_ALL
, "");
540 bindtextdomain (PACKAGE
, LOCALEDIR
);
541 textdomain (PACKAGE
);
543 atexit (close_stdout
);
545 parse_gnu_standard_options_only (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
,
546 Version
, true, usage
, AUTHORS
,
547 (char const *) nullptr);
549 if (1 < argc
- optind
)
551 error (0, 0, _("extra operand %s"), quote (argv
[optind
+ 1]));
552 usage (EXIT_FAILURE
);
555 tsort (optind
== argc
? "-" : argv
[optind
]);