1 /* tsort - topological sort.
2 Copyright (C) 1998-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
25 #include <sys/types.h>
29 #include "long-options.h"
31 #include "readtokens.h"
35 /* The official name of this program (e.g., no 'g' prefix). */
36 #define PROGRAM_NAME "tsort"
38 #define AUTHORS proper_name ("Mark Kettenis")
40 /* Token delimiters when reading from a file. */
43 /* Members of the list of successors. */
47 struct successor
*next
;
50 /* Each string is held in memory as the head of a list of successors. */
54 struct item
*left
, *right
;
55 signed char balance
; /* -1, 0, or +1 */
59 struct successor
*top
;
62 /* The head of the sorted list. */
63 static struct item
*head
= nullptr;
65 /* The tail of the list of 'zeros', strings that have no predecessors. */
66 static struct item
*zeros
= nullptr;
68 /* Used for loop detection. */
69 static struct item
*loop
= nullptr;
71 /* The number of strings to sort. */
72 static size_t n_strings
= 0;
77 if (status
!= EXIT_SUCCESS
)
82 Usage: %s [OPTION] [FILE]\n\
83 Write totally ordered list consistent with the partial ordering in FILE.\n\
91 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
92 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
93 emit_ancillary_info (PROGRAM_NAME
);
99 /* Create a new item/node for STR. */
101 new_item (char const *str
)
103 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
104 struct item
*k
= xzalloc (sizeof *k
);
106 k
->str
= xstrdup (str
);
110 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
111 *ROOT is null. Insert a node/item for STR if not found. Return
112 the node/item found/created for STR.
114 This is done according to Algorithm A (Balanced tree search and
115 insertion) in Donald E. Knuth, The Art of Computer Programming,
116 Volume 3/Searching and Sorting, pages 455--457. */
119 search_item (struct item
*root
, char const *str
)
121 struct item
*p
, *q
, *r
, *s
, *t
;
124 /* Make sure the tree is not empty, since that is what the algorithm
126 if (root
->right
== nullptr)
127 return (root
->right
= new_item (str
));
129 /* A1. Initialize. */
136 a
= strcmp (str
, p
->str
);
140 /* A3 & A4. Move left & right. */
151 /* A3 & A4. (continued). */
157 /* A6. Adjust balance factors. */
158 a
= strcmp (str
, s
->str
);
173 int cmp
= strcmp (str
, p
->str
);
187 /* A7. Balancing act. */
188 if (s
->balance
== 0 || s
->balance
== -a
)
196 /* A8. Single Rotation. */
208 s
->balance
= r
->balance
= 0;
212 /* A9. Double rotation. */
234 else if (p
->balance
== -a
)
239 /* A10. Finishing touch. */
248 /* A3 & A4. (continued). */
261 /* Record the fact that J precedes K. */
264 record_relation (struct item
*j
, struct item
*k
)
268 if (!STREQ (j
->str
, k
->str
))
271 p
= xmalloc (sizeof *p
);
279 count_items (MAYBE_UNUSED
struct item
*unused
)
286 scan_zeros (struct item
*k
)
288 /* Ignore strings that have already been printed. */
289 if (k
->count
== 0 && !k
->printed
)
302 /* Try to detect the loop. If we have detected that K is part of a
303 loop, print the loop on standard error, remove a relation to break
304 the loop, and return true.
306 The loop detection strategy is as follows: Realize that what we're
307 dealing with is essentially a directed graph. If we find an item
308 that is part of a graph that contains a cycle we traverse the graph
309 in backwards direction. In general there is no unique way to do
310 this, but that is no problem. If we encounter an item that we have
311 encountered before, we know that we've found a cycle. All we have
312 to do now is retrace our steps, printing out the items until we
313 encounter that item again. (This is not necessarily the item that
314 we started from originally.) Since the order in which the items
315 are stored in the tree is not related to the specified partial
316 ordering, we may need to walk the tree several times before the
317 loop has completely been constructed. If the loop was found, the
318 global variable LOOP will be null. */
321 detect_loop (struct item
*k
)
325 /* K does not have to be part of a cycle. It is however part of
326 a graph that contains a cycle. */
329 /* Start traversing the graph at K. */
333 struct successor
**p
= &k
->top
;
337 if ((*p
)->suc
== loop
)
341 /* We have found a loop. Retrace our steps. */
344 struct item
*tmp
= loop
->qlink
;
346 error (0, 0, "%s", (loop
->str
));
348 /* Until we encounter K again. */
351 /* Remove relation. */
352 struct successor
*s
= *p
;
359 /* Tidy things up since we might have to
360 detect another loop. */
361 loop
->qlink
= nullptr;
367 struct item
*tmp
= loop
->qlink
;
369 loop
->qlink
= nullptr;
373 /* Since we have found the loop, stop walking
393 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
394 Stop when ACTION returns true. */
397 recurse_tree (struct item
*root
, bool (*action
) (struct item
*))
399 if (root
->left
== nullptr && root
->right
== nullptr)
400 return (*action
) (root
);
403 if (root
->left
!= nullptr)
404 if (recurse_tree (root
->left
, action
))
406 if ((*action
) (root
))
408 if (root
->right
!= nullptr)
409 if (recurse_tree (root
->right
, action
))
416 /* Walk the tree specified by the head ROOT, calling ACTION for
420 walk_tree (struct item
*root
, bool (*action
) (struct item
*))
423 recurse_tree (root
->right
, action
);
426 /* Do a topological sort on FILE. Exit with appropriate exit status. */
428 static _Noreturn
void
429 tsort (char const *file
)
432 struct item
*j
= nullptr;
433 struct item
*k
= nullptr;
434 token_buffer tokenbuffer
;
435 bool is_stdin
= STREQ (file
, "-");
437 /* Initialize the head of the tree holding the strings we're sorting. */
438 struct item
*root
= new_item (nullptr);
440 if (!is_stdin
&& ! freopen (file
, "r", stdin
))
441 error (EXIT_FAILURE
, errno
, "%s", quotef (file
));
443 fadvise (stdin
, FADVISE_SEQUENTIAL
);
445 init_tokenbuffer (&tokenbuffer
);
449 /* T2. Next Relation. */
450 size_t len
= readtoken (stdin
, DELIM
, sizeof (DELIM
) - 1, &tokenbuffer
);
451 if (len
== (size_t) -1)
454 error (EXIT_FAILURE
, errno
, _("%s: read error"), quotef (file
));
460 k
= search_item (root
, tokenbuffer
.buffer
);
463 /* T3. Record the relation. */
464 record_relation (j
, k
);
472 error (EXIT_FAILURE
, 0, _("%s: input contains an odd number of tokens"),
475 /* T1. Initialize (N <- n). */
476 walk_tree (root
, count_items
);
478 while (n_strings
> 0)
480 /* T4. Scan for zeros. */
481 walk_tree (root
, scan_zeros
);
485 struct successor
*p
= head
->top
;
487 /* T5. Output front of queue. */
489 head
->printed
= true;
492 /* T6. Erase relations. */
496 if (p
->suc
->count
== 0)
498 zeros
->qlink
= p
->suc
;
505 /* T7. Remove from queue. */
509 /* T8. End of process. */
512 /* The input contains a loop. */
513 error (0, 0, _("%s: input contains a loop:"), quotef (file
));
516 /* Print the loop and remove a relation to break it. */
518 walk_tree (root
, detect_loop
);
523 if (fclose (stdin
) != 0)
524 error (EXIT_FAILURE
, errno
, "%s",
525 is_stdin
? _("standard input") : quotef (file
));
527 exit (ok
? EXIT_SUCCESS
: EXIT_FAILURE
);
531 main (int argc
, char **argv
)
533 initialize_main (&argc
, &argv
);
534 set_program_name (argv
[0]);
535 setlocale (LC_ALL
, "");
536 bindtextdomain (PACKAGE
, LOCALEDIR
);
537 textdomain (PACKAGE
);
539 atexit (close_stdout
);
541 parse_gnu_standard_options_only (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
,
542 Version
, true, usage
, AUTHORS
,
543 (char const *) nullptr);
545 if (1 < argc
- optind
)
547 error (0, 0, _("extra operand %s"), quote (argv
[optind
+ 1]));
548 usage (EXIT_FAILURE
);
551 tsort (optind
== argc
? "-" : argv
[optind
]);