* src/dd.c (flags): noatime and nofollow now depend on
[coreutils/bo.git] / src / sort.c
blob7f9566d5722524c14b2dfd893585fee6f5f1f87d
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation.
22 Ørn E. Hansen added NLS support in 1997. */
24 #include <config.h>
26 #include <getopt.h>
27 #include <sys/types.h>
28 #include <signal.h>
29 #include "system.h"
30 #include "error.h"
31 #include "hard-locale.h"
32 #include "inttostr.h"
33 #include "md5.h"
34 #include "physmem.h"
35 #include "posixver.h"
36 #include "quote.h"
37 #include "randread.h"
38 #include "stdio--.h"
39 #include "stdlib--.h"
40 #include "strnumcmp.h"
41 #include "xmemcoll.h"
42 #include "xmemxfrm.h"
43 #include "xstrtol.h"
45 #if HAVE_SYS_RESOURCE_H
46 # include <sys/resource.h>
47 #endif
48 #ifndef RLIMIT_DATA
49 struct rlimit { size_t rlim_cur; };
50 # define getrlimit(Resource, Rlp) (-1)
51 #endif
53 /* The official name of this program (e.g., no `g' prefix). */
54 #define PROGRAM_NAME "sort"
56 #define AUTHORS "Mike Haertel", "Paul Eggert"
58 #if HAVE_LANGINFO_CODESET
59 # include <langinfo.h>
60 #endif
62 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
63 present. */
64 #ifndef SA_NOCLDSTOP
65 # define SA_NOCLDSTOP 0
66 # define sigprocmask(How, Set, Oset) /* empty */
67 # define sigset_t int
68 # if ! HAVE_SIGINTERRUPT
69 # define siginterrupt(sig, flag) /* empty */
70 # endif
71 #endif
73 #ifndef STDC_HEADERS
74 double strtod ();
75 #endif
77 #define UCHAR_LIM (UCHAR_MAX + 1)
79 #ifndef DEFAULT_TMPDIR
80 # define DEFAULT_TMPDIR "/tmp"
81 #endif
83 /* Exit statuses. */
84 enum
86 /* POSIX says to exit with status 1 if invoked with -c and the
87 input is not properly sorted. */
88 SORT_OUT_OF_ORDER = 1,
90 /* POSIX says any other irregular exit must exit with a status
91 code greater than 1. */
92 SORT_FAILURE = 2
95 /* The representation of the decimal point in the current locale. */
96 static int decimal_point;
98 /* Thousands separator; if -1, then there isn't one. */
99 static int thousands_sep;
101 /* Nonzero if the corresponding locales are hard. */
102 static bool hard_LC_COLLATE;
103 #if HAVE_NL_LANGINFO
104 static bool hard_LC_TIME;
105 #endif
107 #define NONZERO(x) ((x) != 0)
109 /* The kind of blanks for '-b' to skip in various options. */
110 enum blanktype { bl_start, bl_end, bl_both };
112 /* The character marking end of line. Default to \n. */
113 static char eolchar = '\n';
115 /* Lines are held in core as counted strings. */
116 struct line
118 char *text; /* Text of the line. */
119 size_t length; /* Length including final newline. */
120 char *keybeg; /* Start of first key. */
121 char *keylim; /* Limit of first key. */
124 /* Input buffers. */
125 struct buffer
127 char *buf; /* Dynamically allocated buffer,
128 partitioned into 3 regions:
129 - input data;
130 - unused area;
131 - an array of lines, in reverse order. */
132 size_t used; /* Number of bytes used for input data. */
133 size_t nlines; /* Number of lines in the line array. */
134 size_t alloc; /* Number of bytes allocated. */
135 size_t left; /* Number of bytes left from previous reads. */
136 size_t line_bytes; /* Number of bytes to reserve for each line. */
137 bool eof; /* An EOF has been read. */
140 struct keyfield
142 size_t sword; /* Zero-origin 'word' to start at. */
143 size_t schar; /* Additional characters to skip. */
144 size_t eword; /* Zero-origin first word after field. */
145 size_t echar; /* Additional characters in field. */
146 bool const *ignore; /* Boolean array of characters to ignore. */
147 char const *translate; /* Translation applied to characters. */
148 bool skipsblanks; /* Skip leading blanks when finding start. */
149 bool skipeblanks; /* Skip leading blanks when finding end. */
150 bool numeric; /* Flag for numeric comparison. Handle
151 strings of digits with optional decimal
152 point, but no exponential notation. */
153 bool random; /* Sort by random hash of key. */
154 bool general_numeric; /* Flag for general, numeric comparison.
155 Handle numbers in exponential notation. */
156 bool month; /* Flag for comparison by month name. */
157 bool reverse; /* Reverse the sense of comparison. */
158 struct keyfield *next; /* Next keyfield to try. */
161 struct month
163 char const *name;
164 int val;
167 /* The name this program was run with. */
168 char *program_name;
170 /* FIXME: None of these tables work with multibyte character sets.
171 Also, there are many other bugs when handling multibyte characters.
172 One way to fix this is to rewrite `sort' to use wide characters
173 internally, but doing this with good performance is a bit
174 tricky. */
176 /* Table of blanks. */
177 static bool blanks[UCHAR_LIM];
179 /* Table of non-printing characters. */
180 static bool nonprinting[UCHAR_LIM];
182 /* Table of non-dictionary characters (not letters, digits, or blanks). */
183 static bool nondictionary[UCHAR_LIM];
185 /* Translation table folding lower case to upper. */
186 static char fold_toupper[UCHAR_LIM];
188 #define MONTHS_PER_YEAR 12
190 /* Table mapping month names to integers.
191 Alphabetic order allows binary search. */
192 static struct month monthtab[] =
194 {"APR", 4},
195 {"AUG", 8},
196 {"DEC", 12},
197 {"FEB", 2},
198 {"JAN", 1},
199 {"JUL", 7},
200 {"JUN", 6},
201 {"MAR", 3},
202 {"MAY", 5},
203 {"NOV", 11},
204 {"OCT", 10},
205 {"SEP", 9}
208 /* During the merge phase, the number of files to merge at once. */
209 #define NMERGE 16
211 /* Minimum size for a merge or check buffer. */
212 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
214 /* Minimum sort size; the code might not work with smaller sizes. */
215 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
217 /* The number of bytes needed for a merge or check buffer, which can
218 function relatively efficiently even if it holds only one line. If
219 a longer line is seen, this value is increased. */
220 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
222 /* The approximate maximum number of bytes of main memory to use, as
223 specified by the user. Zero if the user has not specified a size. */
224 static size_t sort_size;
226 /* The guessed size for non-regular files. */
227 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
229 /* Array of directory names in which any temporary files are to be created. */
230 static char const **temp_dirs;
232 /* Number of temporary directory names used. */
233 static size_t temp_dir_count;
235 /* Number of allocated slots in temp_dirs. */
236 static size_t temp_dir_alloc;
238 /* Flag to reverse the order of all comparisons. */
239 static bool reverse;
241 /* Flag for stable sort. This turns off the last ditch bytewise
242 comparison of lines, and instead leaves lines in the same order
243 they were read if all keys compare equal. */
244 static bool stable;
246 /* If TAB has this value, blanks separate fields. */
247 enum { TAB_DEFAULT = CHAR_MAX + 1 };
249 /* Tab character separating fields. If TAB_DEFAULT, then fields are
250 separated by the empty string between a non-blank character and a blank
251 character. */
252 static int tab = TAB_DEFAULT;
254 /* Flag to remove consecutive duplicate lines from the output.
255 Only the last of a sequence of equal lines will be output. */
256 static bool unique;
258 /* Nonzero if any of the input files are the standard input. */
259 static bool have_read_stdin;
261 /* List of key field comparisons to be tried. */
262 static struct keyfield *keylist;
264 static void sortlines_temp (struct line *, size_t, struct line *);
266 /* Report MESSAGE for FILE, then clean up and exit.
267 If FILE is null, it represents standard output. */
269 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
270 static void
271 die (char const *message, char const *file)
273 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
274 exit (SORT_FAILURE);
277 void
278 usage (int status)
280 if (status != EXIT_SUCCESS)
281 fprintf (stderr, _("Try `%s --help' for more information.\n"),
282 program_name);
283 else
285 printf (_("\
286 Usage: %s [OPTION]... [FILE]...\n\
288 program_name);
289 fputs (_("\
290 Write sorted concatenation of all FILE(s) to standard output.\n\
292 "), stdout);
293 fputs (_("\
294 Mandatory arguments to long options are mandatory for short options too.\n\
295 "), stdout);
296 fputs (_("\
297 Ordering options:\n\
299 "), stdout);
300 fputs (_("\
301 -b, --ignore-leading-blanks ignore leading blanks\n\
302 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
303 -f, --ignore-case fold lower case to upper case characters\n\
304 "), stdout);
305 fputs (_("\
306 -g, --general-numeric-sort compare according to general numerical value\n\
307 -i, --ignore-nonprinting consider only printable characters\n\
308 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
309 -n, --numeric-sort compare according to string numerical value\n\
310 -R, --random-sort sort by random hash of keys\n\
311 --random-source=FILE get random bytes from FILE (default /dev/urandom)\n\
312 -r, --reverse reverse the result of comparisons\n\
314 "), stdout);
315 fputs (_("\
316 Other options:\n\
318 -c, --check check whether input is sorted; do not sort\n\
319 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\
320 -m, --merge merge already sorted files; do not sort\n\
321 -o, --output=FILE write result to FILE instead of standard output\n\
322 -s, --stable stabilize sort by disabling last-resort comparison\n\
323 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
324 "), stdout);
325 printf (_("\
326 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
327 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
328 multiple options specify multiple directories\n\
329 -u, --unique with -c, check for strict ordering;\n\
330 without -c, output only the first of an equal run\n\
331 "), DEFAULT_TMPDIR);
332 fputs (_("\
333 -z, --zero-terminated end lines with 0 byte, not newline\n\
334 "), stdout);
335 fputs (HELP_OPTION_DESCRIPTION, stdout);
336 fputs (VERSION_OPTION_DESCRIPTION, stdout);
337 fputs (_("\
339 POS is F[.C][OPTS], where F is the field number and C the character position\n\
340 in the field. If neither the -t nor the -b option is in effect, the characters\n\
341 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
342 one or more single-letter ordering options, which override global ordering\n\
343 options for that key. If no key is given, use the entire line as the key.\n\
345 SIZE may be followed by the following multiplicative suffixes:\n\
346 "), stdout);
347 fputs (_("\
348 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
350 With no FILE, or when FILE is -, read standard input.\n\
352 *** WARNING ***\n\
353 The locale specified by the environment affects sort order.\n\
354 Set LC_ALL=C to get the traditional sort order that uses\n\
355 native byte values.\n\
356 "), stdout );
357 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
360 exit (status);
363 /* For long options that have no equivalent short option, use a
364 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
365 enum
367 RANDOM_SOURCE_OPTION = CHAR_MAX + 1
370 static char const short_options[] = "-bcdfgik:mMno:rRsS:t:T:uy:z";
372 static struct option const long_options[] =
374 {"ignore-leading-blanks", no_argument, NULL, 'b'},
375 {"check", no_argument, NULL, 'c'},
376 {"dictionary-order", no_argument, NULL, 'd'},
377 {"ignore-case", no_argument, NULL, 'f'},
378 {"general-numeric-sort", no_argument, NULL, 'g'},
379 {"ignore-nonprinting", no_argument, NULL, 'i'},
380 {"key", required_argument, NULL, 'k'},
381 {"merge", no_argument, NULL, 'm'},
382 {"month-sort", no_argument, NULL, 'M'},
383 {"numeric-sort", no_argument, NULL, 'n'},
384 {"random-sort", no_argument, NULL, 'R'},
385 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
386 {"output", required_argument, NULL, 'o'},
387 {"reverse", no_argument, NULL, 'r'},
388 {"stable", no_argument, NULL, 's'},
389 {"buffer-size", required_argument, NULL, 'S'},
390 {"field-separator", required_argument, NULL, 't'},
391 {"temporary-directory", required_argument, NULL, 'T'},
392 {"unique", no_argument, NULL, 'u'},
393 {"zero-terminated", no_argument, NULL, 'z'},
394 {GETOPT_HELP_OPTION_DECL},
395 {GETOPT_VERSION_OPTION_DECL},
396 {NULL, 0, NULL, 0},
399 /* The set of signals that are caught. */
400 static sigset_t caught_signals;
402 /* The list of temporary files. */
403 struct tempnode
405 struct tempnode *volatile next;
406 char name[1]; /* Actual size is 1 + file name length. */
408 static struct tempnode *volatile temphead;
409 static struct tempnode *volatile *temptail = &temphead;
411 /* Clean up any remaining temporary files. */
413 static void
414 cleanup (void)
416 struct tempnode const *node;
418 for (node = temphead; node; node = node->next)
419 unlink (node->name);
422 /* Create a new temporary file, returning its newly allocated name.
423 Store into *PFP a stream open for writing. */
425 static char *
426 create_temp_file (FILE **pfp)
428 static char const slashbase[] = "/sortXXXXXX";
429 static size_t temp_dir_index;
430 sigset_t oldset;
431 int fd;
432 int saved_errno;
433 char const *temp_dir = temp_dirs[temp_dir_index];
434 size_t len = strlen (temp_dir);
435 struct tempnode *node =
436 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
437 char *file = node->name;
439 memcpy (file, temp_dir, len);
440 memcpy (file + len, slashbase, sizeof slashbase);
441 node->next = NULL;
442 if (++temp_dir_index == temp_dir_count)
443 temp_dir_index = 0;
445 /* Create the temporary file in a critical section, to avoid races. */
446 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
447 fd = mkstemp (file);
448 if (0 <= fd)
450 *temptail = node;
451 temptail = &node->next;
453 saved_errno = errno;
454 sigprocmask (SIG_SETMASK, &oldset, NULL);
455 errno = saved_errno;
457 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
458 die (_("cannot create temporary file"), file);
460 return file;
463 /* Return a stream for FILE, opened with mode HOW. A null FILE means
464 standard output; HOW should be "w". When opening for input, "-"
465 means standard input. To avoid confusion, do not return file
466 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
467 opening an ordinary FILE. */
469 static FILE *
470 xfopen (const char *file, const char *how)
472 FILE *fp;
474 if (!file)
475 fp = stdout;
476 else if (STREQ (file, "-") && *how == 'r')
478 have_read_stdin = true;
479 fp = stdin;
481 else
483 fp = fopen (file, how);
484 if (! fp)
485 die (_("open failed"), file);
488 return fp;
491 /* Close FP, whose name is FILE, and report any errors. */
493 static void
494 xfclose (FILE *fp, char const *file)
496 switch (fileno (fp))
498 case STDIN_FILENO:
499 /* Allow reading stdin from tty more than once. */
500 if (feof (fp))
501 clearerr (fp);
502 break;
504 case STDOUT_FILENO:
505 /* Don't close stdout just yet. close_stdout does that. */
506 if (fflush (fp) != 0)
507 die (_("fflush failed"), file);
508 break;
510 default:
511 if (fclose (fp) != 0)
512 die (_("close failed"), file);
513 break;
517 static void
518 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
520 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
521 die (_("write failed"), output_file);
524 /* Append DIR to the array of temporary directory names. */
525 static void
526 add_temp_dir (char const *dir)
528 if (temp_dir_count == temp_dir_alloc)
529 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
531 temp_dirs[temp_dir_count++] = dir;
534 /* Remove NAME from the list of temporary files. */
536 static void
537 zaptemp (const char *name)
539 struct tempnode *volatile *pnode;
540 struct tempnode *node;
541 struct tempnode *next;
542 sigset_t oldset;
543 int unlink_status;
544 int unlink_errno = 0;
546 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
547 continue;
549 /* Unlink the temporary file in a critical section to avoid races. */
550 next = node->next;
551 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
552 unlink_status = unlink (name);
553 unlink_errno = errno;
554 *pnode = next;
555 sigprocmask (SIG_SETMASK, &oldset, NULL);
557 if (unlink_status != 0)
558 error (0, unlink_errno, _("warning: cannot remove: %s"), name);
559 if (! next)
560 temptail = pnode;
561 free (node);
564 #if HAVE_NL_LANGINFO
566 static int
567 struct_month_cmp (const void *m1, const void *m2)
569 struct month const *month1 = m1;
570 struct month const *month2 = m2;
571 return strcmp (month1->name, month2->name);
574 #endif
576 /* Initialize the character class tables. */
578 static void
579 inittables (void)
581 size_t i;
583 for (i = 0; i < UCHAR_LIM; ++i)
585 blanks[i] = !! isblank (i);
586 nonprinting[i] = ! isprint (i);
587 nondictionary[i] = ! isalnum (i) && ! isblank (i);
588 fold_toupper[i] = toupper (i);
591 #if HAVE_NL_LANGINFO
592 /* If we're not in the "C" locale, read different names for months. */
593 if (hard_LC_TIME)
595 for (i = 0; i < MONTHS_PER_YEAR; i++)
597 char const *s;
598 size_t s_len;
599 size_t j;
600 char *name;
602 s = (char *) nl_langinfo (ABMON_1 + i);
603 s_len = strlen (s);
604 monthtab[i].name = name = xmalloc (s_len + 1);
605 monthtab[i].val = i + 1;
607 for (j = 0; j < s_len; j++)
608 name[j] = fold_toupper[to_uchar (s[j])];
609 name[j] = '\0';
611 qsort ((void *) monthtab, MONTHS_PER_YEAR,
612 sizeof *monthtab, struct_month_cmp);
614 #endif
617 /* Specify the amount of main memory to use when sorting. */
618 static void
619 specify_sort_size (char const *s)
621 uintmax_t n;
622 char *suffix;
623 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
625 /* The default unit is KiB. */
626 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
628 if (n <= UINTMAX_MAX / 1024)
629 n *= 1024;
630 else
631 e = LONGINT_OVERFLOW;
634 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
635 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
636 switch (suffix[0])
638 case 'b':
639 e = LONGINT_OK;
640 break;
642 case '%':
644 double mem = physmem_total () * n / 100;
646 /* Use "<", not "<=", to avoid problems with rounding. */
647 if (mem < UINTMAX_MAX)
649 n = mem;
650 e = LONGINT_OK;
652 else
653 e = LONGINT_OVERFLOW;
655 break;
658 if (e == LONGINT_OK)
660 /* If multiple sort sizes are specified, take the maximum, so
661 that option order does not matter. */
662 if (n < sort_size)
663 return;
665 sort_size = n;
666 if (sort_size == n)
668 sort_size = MAX (sort_size, MIN_SORT_SIZE);
669 return;
672 e = LONGINT_OVERFLOW;
675 STRTOL_FATAL_ERROR (s, _("sort size"), e);
678 /* Return the default sort size. */
679 static size_t
680 default_sort_size (void)
682 /* Let MEM be available memory or 1/8 of total memory, whichever
683 is greater. */
684 double avail = physmem_available ();
685 double total = physmem_total ();
686 double mem = MAX (avail, total / 8);
687 struct rlimit rlimit;
689 /* Let SIZE be MEM, but no more than the maximum object size or
690 system resource limits. Avoid the MIN macro here, as it is not
691 quite right when only one argument is floating point. Don't
692 bother to check for values like RLIM_INFINITY since in practice
693 they are not much less than SIZE_MAX. */
694 size_t size = SIZE_MAX;
695 if (mem < size)
696 size = mem;
697 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
698 size = rlimit.rlim_cur;
699 #ifdef RLIMIT_AS
700 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
701 size = rlimit.rlim_cur;
702 #endif
704 /* Leave a large safety margin for the above limits, as failure can
705 occur when they are exceeded. */
706 size /= 2;
708 #ifdef RLIMIT_RSS
709 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
710 Exceeding RSS is not fatal, but can be quite slow. */
711 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
712 size = rlimit.rlim_cur / 16 * 15;
713 #endif
715 /* Use no less than the minimum. */
716 return MAX (size, MIN_SORT_SIZE);
719 /* Return the sort buffer size to use with the input files identified
720 by FPS and FILES, which are alternate names of the same files.
721 NFILES gives the number of input files; NFPS may be less. Assume
722 that each input line requires LINE_BYTES extra bytes' worth of line
723 information. Do not exceed the size bound specified by the user
724 (or a default size bound, if the user does not specify one). */
726 static size_t
727 sort_buffer_size (FILE *const *fps, size_t nfps,
728 char *const *files, size_t nfiles,
729 size_t line_bytes)
731 /* A bound on the input size. If zero, the bound hasn't been
732 determined yet. */
733 static size_t size_bound;
735 /* In the worst case, each input byte is a newline. */
736 size_t worst_case_per_input_byte = line_bytes + 1;
738 /* Keep enough room for one extra input line and an extra byte.
739 This extra room might be needed when preparing to read EOF. */
740 size_t size = worst_case_per_input_byte + 1;
742 size_t i;
744 for (i = 0; i < nfiles; i++)
746 struct stat st;
747 off_t file_size;
748 size_t worst_case;
750 if ((i < nfps ? fstat (fileno (fps[i]), &st)
751 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
752 : stat (files[i], &st))
753 != 0)
754 die (_("stat failed"), files[i]);
756 if (S_ISREG (st.st_mode))
757 file_size = st.st_size;
758 else
760 /* The file has unknown size. If the user specified a sort
761 buffer size, use that; otherwise, guess the size. */
762 if (sort_size)
763 return sort_size;
764 file_size = INPUT_FILE_SIZE_GUESS;
767 if (! size_bound)
769 size_bound = sort_size;
770 if (! size_bound)
771 size_bound = default_sort_size ();
774 /* Add the amount of memory needed to represent the worst case
775 where the input consists entirely of newlines followed by a
776 single non-newline. Check for overflow. */
777 worst_case = file_size * worst_case_per_input_byte + 1;
778 if (file_size != worst_case / worst_case_per_input_byte
779 || size_bound - size <= worst_case)
780 return size_bound;
781 size += worst_case;
784 return size;
787 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
788 must be at least sizeof (struct line). Allocate ALLOC bytes
789 initially. */
791 static void
792 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
794 /* Ensure that the line array is properly aligned. If the desired
795 size cannot be allocated, repeatedly halve it until allocation
796 succeeds. The smaller allocation may hurt overall performance,
797 but that's better than failing. */
798 for (;;)
800 alloc += sizeof (struct line) - alloc % sizeof (struct line);
801 buf->buf = malloc (alloc);
802 if (buf->buf)
803 break;
804 alloc /= 2;
805 if (alloc <= line_bytes + 1)
806 xalloc_die ();
809 buf->line_bytes = line_bytes;
810 buf->alloc = alloc;
811 buf->used = buf->left = buf->nlines = 0;
812 buf->eof = false;
815 /* Return one past the limit of the line array. */
817 static inline struct line *
818 buffer_linelim (struct buffer const *buf)
820 return (struct line *) (buf->buf + buf->alloc);
823 /* Return a pointer to the first character of the field specified
824 by KEY in LINE. */
826 static char *
827 begfield (const struct line *line, const struct keyfield *key)
829 char *ptr = line->text, *lim = ptr + line->length - 1;
830 size_t sword = key->sword;
831 size_t schar = key->schar;
832 size_t remaining_bytes;
834 /* The leading field separator itself is included in a field when -t
835 is absent. */
837 if (tab != TAB_DEFAULT)
838 while (ptr < lim && sword--)
840 while (ptr < lim && *ptr != tab)
841 ++ptr;
842 if (ptr < lim)
843 ++ptr;
845 else
846 while (ptr < lim && sword--)
848 while (ptr < lim && blanks[to_uchar (*ptr)])
849 ++ptr;
850 while (ptr < lim && !blanks[to_uchar (*ptr)])
851 ++ptr;
854 if (key->skipsblanks)
855 while (ptr < lim && blanks[to_uchar (*ptr)])
856 ++ptr;
858 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
859 remaining_bytes = lim - ptr;
860 if (schar < remaining_bytes)
861 ptr += schar;
862 else
863 ptr = lim;
865 return ptr;
868 /* Return the limit of (a pointer to the first character after) the field
869 in LINE specified by KEY. */
871 static char *
872 limfield (const struct line *line, const struct keyfield *key)
874 char *ptr = line->text, *lim = ptr + line->length - 1;
875 size_t eword = key->eword, echar = key->echar;
876 size_t remaining_bytes;
878 /* Move PTR past EWORD fields or to one past the last byte on LINE,
879 whichever comes first. If there are more than EWORD fields, leave
880 PTR pointing at the beginning of the field having zero-based index,
881 EWORD. If a delimiter character was specified (via -t), then that
882 `beginning' is the first character following the delimiting TAB.
883 Otherwise, leave PTR pointing at the first `blank' character after
884 the preceding field. */
885 if (tab != TAB_DEFAULT)
886 while (ptr < lim && eword--)
888 while (ptr < lim && *ptr != tab)
889 ++ptr;
890 if (ptr < lim && (eword | echar))
891 ++ptr;
893 else
894 while (ptr < lim && eword--)
896 while (ptr < lim && blanks[to_uchar (*ptr)])
897 ++ptr;
898 while (ptr < lim && !blanks[to_uchar (*ptr)])
899 ++ptr;
902 #ifdef POSIX_UNSPECIFIED
903 /* The following block of code makes GNU sort incompatible with
904 standard Unix sort, so it's ifdef'd out for now.
905 The POSIX spec isn't clear on how to interpret this.
906 FIXME: request clarification.
908 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
909 Date: Thu, 30 May 96 12:20:41 -0400
910 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
912 [...]I believe I've found another bug in `sort'.
914 $ cat /tmp/sort.in
915 a b c 2 d
916 pq rs 1 t
917 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
918 a b c 2 d
919 pq rs 1 t
920 $ /bin/sort -k1.7,1.7 </tmp/sort.in
921 pq rs 1 t
922 a b c 2 d
924 Unix sort produced the answer I expected: sort on the single character
925 in column 7. GNU sort produced different results, because it disagrees
926 on the interpretation of the key-end spec "M.N". Unix sort reads this
927 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
928 "skip M-1 fields, then either N-1 characters or the rest of the current
929 field, whichever comes first". This extra clause applies only to
930 key-ends, not key-starts.
933 /* Make LIM point to the end of (one byte past) the current field. */
934 if (tab != TAB_DEFAULT)
936 char *newlim;
937 newlim = memchr (ptr, tab, lim - ptr);
938 if (newlim)
939 lim = newlim;
941 else
943 char *newlim;
944 newlim = ptr;
945 while (newlim < lim && blanks[to_uchar (*newlim)])
946 ++newlim;
947 while (newlim < lim && !blanks[to_uchar (*newlim)])
948 ++newlim;
949 lim = newlim;
951 #endif
953 /* If we're ignoring leading blanks when computing the End
954 of the field, don't start counting bytes until after skipping
955 past any leading blanks. */
956 if (key->skipeblanks)
957 while (ptr < lim && blanks[to_uchar (*ptr)])
958 ++ptr;
960 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
961 remaining_bytes = lim - ptr;
962 if (echar < remaining_bytes)
963 ptr += echar;
964 else
965 ptr = lim;
967 return ptr;
970 /* Fill BUF reading from FP, moving buf->left bytes from the end
971 of buf->buf to the beginning first. If EOF is reached and the
972 file wasn't terminated by a newline, supply one. Set up BUF's line
973 table too. FILE is the name of the file corresponding to FP.
974 Return true if some input was read. */
976 static bool
977 fillbuf (struct buffer *buf, FILE *fp, char const *file)
979 struct keyfield const *key = keylist;
980 char eol = eolchar;
981 size_t line_bytes = buf->line_bytes;
982 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
984 if (buf->eof)
985 return false;
987 if (buf->used != buf->left)
989 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
990 buf->used = buf->left;
991 buf->nlines = 0;
994 for (;;)
996 char *ptr = buf->buf + buf->used;
997 struct line *linelim = buffer_linelim (buf);
998 struct line *line = linelim - buf->nlines;
999 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1000 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1002 while (line_bytes + 1 < avail)
1004 /* Read as many bytes as possible, but do not read so many
1005 bytes that there might not be enough room for the
1006 corresponding line array. The worst case is when the
1007 rest of the input file consists entirely of newlines,
1008 except that the last byte is not a newline. */
1009 size_t readsize = (avail - 1) / (line_bytes + 1);
1010 size_t bytes_read = fread (ptr, 1, readsize, fp);
1011 char *ptrlim = ptr + bytes_read;
1012 char *p;
1013 avail -= bytes_read;
1015 if (bytes_read != readsize)
1017 if (ferror (fp))
1018 die (_("read failed"), file);
1019 if (feof (fp))
1021 buf->eof = true;
1022 if (buf->buf == ptrlim)
1023 return false;
1024 if (ptrlim[-1] != eol)
1025 *ptrlim++ = eol;
1029 /* Find and record each line in the just-read input. */
1030 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1032 ptr = p + 1;
1033 line--;
1034 line->text = line_start;
1035 line->length = ptr - line_start;
1036 mergesize = MAX (mergesize, line->length);
1037 avail -= line_bytes;
1039 if (key)
1041 /* Precompute the position of the first key for
1042 efficiency. */
1043 line->keylim = (key->eword == SIZE_MAX
1045 : limfield (line, key));
1047 if (key->sword != SIZE_MAX)
1048 line->keybeg = begfield (line, key);
1049 else
1051 if (key->skipsblanks)
1052 while (blanks[to_uchar (*line_start)])
1053 line_start++;
1054 line->keybeg = line_start;
1058 line_start = ptr;
1061 ptr = ptrlim;
1062 if (buf->eof)
1063 break;
1066 buf->used = ptr - buf->buf;
1067 buf->nlines = buffer_linelim (buf) - line;
1068 if (buf->nlines != 0)
1070 buf->left = ptr - line_start;
1071 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1072 return true;
1075 /* The current input line is too long to fit in the buffer.
1076 Double the buffer size and try again. */
1077 buf->buf = X2REALLOC (buf->buf, &buf->alloc);
1081 /* Compare strings A and B as numbers without explicitly converting them to
1082 machine numbers. Comparatively slow for short strings, but asymptotically
1083 hideously fast. */
1085 static int
1086 numcompare (const char *a, const char *b)
1088 while (blanks[to_uchar (*a)])
1089 a++;
1090 while (blanks[to_uchar (*b)])
1091 b++;
1093 return strnumcmp (a, b, decimal_point, thousands_sep);
1096 static int
1097 general_numcompare (const char *sa, const char *sb)
1099 /* FIXME: add option to warn about failed conversions. */
1100 /* FIXME: maybe add option to try expensive FP conversion
1101 only if A and B can't be compared more cheaply/accurately. */
1103 char *ea;
1104 char *eb;
1105 double a = strtod (sa, &ea);
1106 double b = strtod (sb, &eb);
1108 /* Put conversion errors at the start of the collating sequence. */
1109 if (sa == ea)
1110 return sb == eb ? 0 : -1;
1111 if (sb == eb)
1112 return 1;
1114 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1115 conversion errors but before numbers; sort them by internal
1116 bit-pattern, for lack of a more portable alternative. */
1117 return (a < b ? -1
1118 : a > b ? 1
1119 : a == b ? 0
1120 : b == b ? -1
1121 : a == a ? 1
1122 : memcmp ((char *) &a, (char *) &b, sizeof a));
1125 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1126 Return 0 if the name in S is not recognized. */
1128 static int
1129 getmonth (char const *month, size_t len)
1131 size_t lo = 0;
1132 size_t hi = MONTHS_PER_YEAR;
1133 char const *monthlim = month + len;
1135 for (;;)
1137 if (month == monthlim)
1138 return 0;
1139 if (!blanks[to_uchar (*month)])
1140 break;
1141 ++month;
1146 size_t ix = (lo + hi) / 2;
1147 char const *m = month;
1148 char const *n = monthtab[ix].name;
1150 for (;; m++, n++)
1152 if (!*n)
1153 return monthtab[ix].val;
1154 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1156 hi = ix;
1157 break;
1159 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1161 lo = ix + 1;
1162 break;
1166 while (lo < hi);
1168 return 0;
1171 /* A source of random data. */
1172 static struct randread_source *randread_source;
1174 /* Return the Ith randomly-generated state. The caller must invoke
1175 random_state (H) for all H less than I before invoking random_state
1176 (I). */
1178 static struct md5_ctx
1179 random_state (size_t i)
1181 /* An array of states resulting from the random data, and counts of
1182 its used and allocated members. */
1183 static struct md5_ctx *state;
1184 static size_t used;
1185 static size_t allocated;
1187 struct md5_ctx *s = &state[i];
1189 if (used <= i)
1191 unsigned char buf[MD5_DIGEST_SIZE];
1193 used++;
1195 if (allocated <= i)
1197 state = X2NREALLOC (state, &allocated);
1198 s = &state[i];
1201 randread (randread_source, buf, sizeof buf);
1202 md5_init_ctx (s);
1203 md5_process_bytes (buf, sizeof buf, s);
1206 return *s;
1209 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1210 with length LENGTHB. Return negative if less, zero if equal,
1211 positive if greater. */
1213 static int
1214 cmp_hashes (char const *texta, size_t lena,
1215 char const *textb, size_t lenb)
1217 /* Try random hashes until a pair of hashes disagree. But if the
1218 first pair of random hashes agree, check whether the keys are
1219 identical and if so report no difference. */
1220 int diff;
1221 size_t i;
1222 for (i = 0; ; i++)
1224 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1225 struct md5_ctx s[2];
1226 s[0] = s[1] = random_state (i);
1227 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
1228 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
1229 diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1230 if (diff != 0)
1231 break;
1232 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1233 break;
1236 return diff;
1239 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1240 using one or more random hash functions. */
1242 static int
1243 compare_random (char *restrict texta, size_t lena,
1244 char *restrict textb, size_t lenb)
1246 int diff;
1248 if (! hard_LC_COLLATE)
1249 diff = cmp_hashes (texta, lena, textb, lenb);
1250 else
1252 /* Transform the text into the basis of comparison, so that byte
1253 strings that would otherwise considered to be equal are
1254 considered equal here even if their bytes differ. */
1256 char *buf = NULL;
1257 char stackbuf[4000];
1258 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1259 bool a_fits = tlena <= sizeof stackbuf;
1260 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1261 (a_fits ? sizeof stackbuf - tlena : 0),
1262 textb, lenb);
1264 if (a_fits && tlena + tlenb <= sizeof stackbuf)
1265 buf = stackbuf;
1266 else
1268 /* Adding 1 to the buffer size lets xmemxfrm run a bit
1269 faster by avoiding the need for an extra buffer copy. */
1270 buf = xmalloc (tlena + tlenb + 1);
1271 xmemxfrm (buf, tlena + 1, texta, lena);
1272 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1275 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1277 if (buf != stackbuf)
1278 free (buf);
1281 return diff;
1284 /* Compare two lines A and B trying every key in sequence until there
1285 are no more keys or a difference is found. */
1287 static int
1288 keycompare (const struct line *a, const struct line *b)
1290 struct keyfield const *key = keylist;
1292 /* For the first iteration only, the key positions have been
1293 precomputed for us. */
1294 char *texta = a->keybeg;
1295 char *textb = b->keybeg;
1296 char *lima = a->keylim;
1297 char *limb = b->keylim;
1299 int diff;
1301 for (;;)
1303 char const *translate = key->translate;
1304 bool const *ignore = key->ignore;
1306 /* Find the lengths. */
1307 size_t lena = lima <= texta ? 0 : lima - texta;
1308 size_t lenb = limb <= textb ? 0 : limb - textb;
1310 /* Actually compare the fields. */
1312 if (key->random)
1313 diff = compare_random (texta, lena, textb, lenb);
1314 else if (key->numeric | key->general_numeric)
1316 char savea = *lima, saveb = *limb;
1318 *lima = *limb = '\0';
1319 diff = ((key->numeric ? numcompare : general_numcompare)
1320 (texta, textb));
1321 *lima = savea, *limb = saveb;
1323 else if (key->month)
1324 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1325 /* Sorting like this may become slow, so in a simple locale the user
1326 can select a faster sort that is similar to ascii sort. */
1327 else if (hard_LC_COLLATE)
1329 if (ignore || translate)
1331 char buf[4000];
1332 size_t size = lena + 1 + lenb + 1;
1333 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1334 char *copy_b = copy_a + lena + 1;
1335 size_t new_len_a, new_len_b, i;
1337 /* Ignore and/or translate chars before comparing. */
1338 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1340 if (i < lena)
1342 copy_a[new_len_a] = (translate
1343 ? translate[to_uchar (texta[i])]
1344 : texta[i]);
1345 if (!ignore || !ignore[to_uchar (texta[i])])
1346 ++new_len_a;
1348 if (i < lenb)
1350 copy_b[new_len_b] = (translate
1351 ? translate[to_uchar (textb[i])]
1352 : textb [i]);
1353 if (!ignore || !ignore[to_uchar (textb[i])])
1354 ++new_len_b;
1358 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1360 if (sizeof buf < size)
1361 free (copy_a);
1363 else if (lena == 0)
1364 diff = - NONZERO (lenb);
1365 else if (lenb == 0)
1366 goto greater;
1367 else
1368 diff = xmemcoll (texta, lena, textb, lenb);
1370 else if (ignore)
1372 #define CMP_WITH_IGNORE(A, B) \
1373 do \
1375 for (;;) \
1377 while (texta < lima && ignore[to_uchar (*texta)]) \
1378 ++texta; \
1379 while (textb < limb && ignore[to_uchar (*textb)]) \
1380 ++textb; \
1381 if (! (texta < lima && textb < limb)) \
1382 break; \
1383 diff = to_uchar (A) - to_uchar (B); \
1384 if (diff) \
1385 goto not_equal; \
1386 ++texta; \
1387 ++textb; \
1390 diff = (texta < lima) - (textb < limb); \
1392 while (0)
1394 if (translate)
1395 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1396 translate[to_uchar (*textb)]);
1397 else
1398 CMP_WITH_IGNORE (*texta, *textb);
1400 else if (lena == 0)
1401 diff = - NONZERO (lenb);
1402 else if (lenb == 0)
1403 goto greater;
1404 else
1406 if (translate)
1408 while (texta < lima && textb < limb)
1410 diff = (to_uchar (translate[to_uchar (*texta++)])
1411 - to_uchar (translate[to_uchar (*textb++)]));
1412 if (diff)
1413 goto not_equal;
1416 else
1418 diff = memcmp (texta, textb, MIN (lena, lenb));
1419 if (diff)
1420 goto not_equal;
1422 diff = lena < lenb ? -1 : lena != lenb;
1425 if (diff)
1426 goto not_equal;
1428 key = key->next;
1429 if (! key)
1430 break;
1432 /* Find the beginning and limit of the next field. */
1433 if (key->eword != SIZE_MAX)
1434 lima = limfield (a, key), limb = limfield (b, key);
1435 else
1436 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1438 if (key->sword != SIZE_MAX)
1439 texta = begfield (a, key), textb = begfield (b, key);
1440 else
1442 texta = a->text, textb = b->text;
1443 if (key->skipsblanks)
1445 while (texta < lima && blanks[to_uchar (*texta)])
1446 ++texta;
1447 while (textb < limb && blanks[to_uchar (*textb)])
1448 ++textb;
1453 return 0;
1455 greater:
1456 diff = 1;
1457 not_equal:
1458 return key->reverse ? -diff : diff;
1461 /* Compare two lines A and B, returning negative, zero, or positive
1462 depending on whether A compares less than, equal to, or greater than B. */
1464 static int
1465 compare (const struct line *a, const struct line *b)
1467 int diff;
1468 size_t alen, blen;
1470 /* First try to compare on the specified keys (if any).
1471 The only two cases with no key at all are unadorned sort,
1472 and unadorned sort -r. */
1473 if (keylist)
1475 diff = keycompare (a, b);
1476 if (diff | unique | stable)
1477 return diff;
1480 /* If the keys all compare equal (or no keys were specified)
1481 fall through to the default comparison. */
1482 alen = a->length - 1, blen = b->length - 1;
1484 if (alen == 0)
1485 diff = - NONZERO (blen);
1486 else if (blen == 0)
1487 diff = 1;
1488 else if (hard_LC_COLLATE)
1489 diff = xmemcoll (a->text, alen, b->text, blen);
1490 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1491 diff = alen < blen ? -1 : alen != blen;
1493 return reverse ? -diff : diff;
1496 /* Check that the lines read from FILE_NAME come in order. Print a
1497 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1498 false if they are not in order. Otherwise, print no diagnostic
1499 and return true. */
1501 static bool
1502 check (char const *file_name)
1504 FILE *fp = xfopen (file_name, "r");
1505 struct buffer buf; /* Input buffer. */
1506 struct line temp; /* Copy of previous line. */
1507 size_t alloc = 0;
1508 uintmax_t line_number = 0;
1509 struct keyfield const *key = keylist;
1510 bool nonunique = ! unique;
1511 bool ordered = true;
1513 initbuf (&buf, sizeof (struct line),
1514 MAX (merge_buffer_size, sort_size));
1515 temp.text = NULL;
1517 while (fillbuf (&buf, fp, file_name))
1519 struct line const *line = buffer_linelim (&buf);
1520 struct line const *linebase = line - buf.nlines;
1522 /* Make sure the line saved from the old buffer contents is
1523 less than or equal to the first line of the new buffer. */
1524 if (alloc && nonunique <= compare (&temp, line - 1))
1526 found_disorder:
1528 struct line const *disorder_line = line - 1;
1529 uintmax_t disorder_line_number =
1530 buffer_linelim (&buf) - disorder_line + line_number;
1531 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1532 fprintf (stderr, _("%s: %s:%s: disorder: "),
1533 program_name, file_name,
1534 umaxtostr (disorder_line_number, hr_buf));
1535 write_bytes (disorder_line->text, disorder_line->length, stderr,
1536 _("standard error"));
1537 ordered = false;
1538 break;
1542 /* Compare each line in the buffer with its successor. */
1543 while (linebase < --line)
1544 if (nonunique <= compare (line, line - 1))
1545 goto found_disorder;
1547 line_number += buf.nlines;
1549 /* Save the last line of the buffer. */
1550 if (alloc < line->length)
1554 alloc *= 2;
1555 if (! alloc)
1557 alloc = line->length;
1558 break;
1561 while (alloc < line->length);
1563 temp.text = xrealloc (temp.text, alloc);
1565 memcpy (temp.text, line->text, line->length);
1566 temp.length = line->length;
1567 if (key)
1569 temp.keybeg = temp.text + (line->keybeg - line->text);
1570 temp.keylim = temp.text + (line->keylim - line->text);
1574 xfclose (fp, file_name);
1575 free (buf.buf);
1576 free (temp.text);
1577 return ordered;
1580 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
1581 files (all of which are at the start of the FILES array), and
1582 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
1583 Close input and output files before returning.
1584 OUTPUT_FILE gives the name of the output file. If it is NULL,
1585 the output file is standard output. If OFP is NULL, the output
1586 file has not been opened yet (or written to, if standard output). */
1588 static void
1589 mergefps (char **files, size_t ntemps, size_t nfiles,
1590 FILE *ofp, char const *output_file)
1592 FILE *fps[NMERGE]; /* Input streams for each file. */
1593 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1594 struct line saved; /* Saved line storage for unique check. */
1595 struct line const *savedline = NULL;
1596 /* &saved if there is a saved line. */
1597 size_t savealloc = 0; /* Size allocated for the saved line. */
1598 struct line const *cur[NMERGE]; /* Current line in each line table. */
1599 struct line const *base[NMERGE]; /* Base of each line table. */
1600 size_t ord[NMERGE]; /* Table representing a permutation of fps,
1601 such that cur[ord[0]] is the smallest line
1602 and will be next output. */
1603 size_t i;
1604 size_t j;
1605 size_t t;
1606 struct keyfield const *key = keylist;
1607 saved.text = NULL;
1609 /* Read initial lines from each input file. */
1610 for (i = 0; i < nfiles; )
1612 fps[i] = xfopen (files[i], "r");
1613 initbuf (&buffer[i], sizeof (struct line),
1614 MAX (merge_buffer_size, sort_size / nfiles));
1615 if (fillbuf (&buffer[i], fps[i], files[i]))
1617 struct line const *linelim = buffer_linelim (&buffer[i]);
1618 cur[i] = linelim - 1;
1619 base[i] = linelim - buffer[i].nlines;
1620 i++;
1622 else
1624 /* fps[i] is empty; eliminate it from future consideration. */
1625 xfclose (fps[i], files[i]);
1626 if (i < ntemps)
1628 ntemps--;
1629 zaptemp (files[i]);
1631 free (buffer[i].buf);
1632 --nfiles;
1633 for (j = i; j < nfiles; ++j)
1634 files[j] = files[j + 1];
1638 if (! ofp)
1639 ofp = xfopen (output_file, "w");
1641 /* Set up the ord table according to comparisons among input lines.
1642 Since this only reorders two items if one is strictly greater than
1643 the other, it is stable. */
1644 for (i = 0; i < nfiles; ++i)
1645 ord[i] = i;
1646 for (i = 1; i < nfiles; ++i)
1647 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1648 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1650 /* Repeatedly output the smallest line until no input remains. */
1651 while (nfiles)
1653 struct line const *smallest = cur[ord[0]];
1655 /* If uniquified output is turned on, output only the first of
1656 an identical series of lines. */
1657 if (unique)
1659 if (savedline && compare (savedline, smallest))
1661 savedline = NULL;
1662 write_bytes (saved.text, saved.length, ofp, output_file);
1664 if (!savedline)
1666 savedline = &saved;
1667 if (savealloc < smallest->length)
1670 if (! savealloc)
1672 savealloc = smallest->length;
1673 break;
1675 while ((savealloc *= 2) < smallest->length);
1677 saved.text = xrealloc (saved.text, savealloc);
1679 saved.length = smallest->length;
1680 memcpy (saved.text, smallest->text, saved.length);
1681 if (key)
1683 saved.keybeg =
1684 saved.text + (smallest->keybeg - smallest->text);
1685 saved.keylim =
1686 saved.text + (smallest->keylim - smallest->text);
1690 else
1691 write_bytes (smallest->text, smallest->length, ofp, output_file);
1693 /* Check if we need to read more lines into core. */
1694 if (base[ord[0]] < smallest)
1695 cur[ord[0]] = smallest - 1;
1696 else
1698 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1700 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1701 cur[ord[0]] = linelim - 1;
1702 base[ord[0]] = linelim - buffer[ord[0]].nlines;
1704 else
1706 /* We reached EOF on fps[ord[0]]. */
1707 for (i = 1; i < nfiles; ++i)
1708 if (ord[i] > ord[0])
1709 --ord[i];
1710 --nfiles;
1711 xfclose (fps[ord[0]], files[ord[0]]);
1712 if (ord[0] < ntemps)
1714 ntemps--;
1715 zaptemp (files[ord[0]]);
1717 free (buffer[ord[0]].buf);
1718 for (i = ord[0]; i < nfiles; ++i)
1720 fps[i] = fps[i + 1];
1721 files[i] = files[i + 1];
1722 buffer[i] = buffer[i + 1];
1723 cur[i] = cur[i + 1];
1724 base[i] = base[i + 1];
1726 for (i = 0; i < nfiles; ++i)
1727 ord[i] = ord[i + 1];
1728 continue;
1732 /* The new line just read in may be larger than other lines
1733 already in main memory; push it back in the queue until we
1734 encounter a line larger than it. Optimize for the common
1735 case where the new line is smallest. */
1737 size_t lo = 1;
1738 size_t hi = nfiles;
1739 size_t probe = lo;
1740 size_t ord0 = ord[0];
1741 size_t count_of_smaller_lines;
1743 while (lo < hi)
1745 int cmp = compare (cur[ord0], cur[ord[probe]]);
1746 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
1747 hi = probe;
1748 else
1749 lo = probe + 1;
1750 probe = (lo + hi) / 2;
1753 count_of_smaller_lines = lo - 1;
1754 for (j = 0; j < count_of_smaller_lines; j++)
1755 ord[j] = ord[j + 1];
1756 ord[count_of_smaller_lines] = ord0;
1760 if (unique && savedline)
1762 write_bytes (saved.text, saved.length, ofp, output_file);
1763 free (saved.text);
1766 xfclose (ofp, output_file);
1769 /* Merge into T the two sorted arrays of lines LO (with NLO members)
1770 and HI (with NHI members). T, LO, and HI point just past their
1771 respective arrays, and the arrays are in reverse order. NLO and
1772 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
1774 static inline void
1775 mergelines (struct line *t,
1776 struct line const *lo, size_t nlo,
1777 struct line const *hi, size_t nhi)
1779 for (;;)
1780 if (compare (lo - 1, hi - 1) <= 0)
1782 *--t = *--lo;
1783 if (! --nlo)
1785 /* HI - NHI equalled T - (NLO + NHI) when this function
1786 began. Therefore HI must equal T now, and there is no
1787 need to copy from HI to T. */
1788 return;
1791 else
1793 *--t = *--hi;
1794 if (! --nhi)
1797 *--t = *--lo;
1798 while (--nlo);
1800 return;
1805 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
1806 NLINES must be at least 2.
1807 The input and output arrays are in reverse order, and LINES and
1808 TEMP point just past the end of their respective arrays.
1810 Use a recursive divide-and-conquer algorithm, in the style
1811 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
1812 the optimization suggested by exercise 5.2.4-10; this requires room
1813 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
1814 writes that this memory optimization was originally published by
1815 D. A. Bell, Comp J. 1 (1958), 75. */
1817 static void
1818 sortlines (struct line *lines, size_t nlines, struct line *temp)
1820 if (nlines == 2)
1822 if (0 < compare (&lines[-1], &lines[-2]))
1824 struct line tmp = lines[-1];
1825 lines[-1] = lines[-2];
1826 lines[-2] = tmp;
1829 else
1831 size_t nlo = nlines / 2;
1832 size_t nhi = nlines - nlo;
1833 struct line *lo = lines;
1834 struct line *hi = lines - nlo;
1835 struct line *sorted_lo = temp;
1837 sortlines (hi, nhi, temp);
1838 if (1 < nlo)
1839 sortlines_temp (lo, nlo, sorted_lo);
1840 else
1841 sorted_lo[-1] = lo[-1];
1843 mergelines (lines, sorted_lo, nlo, hi, nhi);
1847 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1848 rather than sorting in place. */
1850 static void
1851 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1853 if (nlines == 2)
1855 /* Declare `swap' as int, not bool, to work around a bug
1856 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
1857 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
1858 int swap = (0 < compare (&lines[-1], &lines[-2]));
1859 temp[-1] = lines[-1 - swap];
1860 temp[-2] = lines[-2 + swap];
1862 else
1864 size_t nlo = nlines / 2;
1865 size_t nhi = nlines - nlo;
1866 struct line *lo = lines;
1867 struct line *hi = lines - nlo;
1868 struct line *sorted_hi = temp - nlo;
1870 sortlines_temp (hi, nhi, sorted_hi);
1871 if (1 < nlo)
1872 sortlines (lo, nlo, temp);
1874 mergelines (temp, lo, nlo, sorted_hi, nhi);
1878 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
1879 the same as OUTFILE. If found, merge the found instances (and perhaps
1880 some other files) into a temporary file so that it can in turn be
1881 merged into OUTFILE without destroying OUTFILE before it is completely
1882 read. Return the new value of NFILES, which differs from the old if
1883 some merging occurred.
1885 This test ensures that an otherwise-erroneous use like
1886 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
1887 It's not clear that POSIX requires this nicety.
1888 Detect common error cases, but don't try to catch obscure cases like
1889 "cat ... FILE ... | sort -m -o FILE"
1890 where traditional "sort" doesn't copy the input and where
1891 people should know that they're getting into trouble anyway.
1892 Catching these obscure cases would slow down performance in
1893 common cases. */
1895 static size_t
1896 avoid_trashing_input (char **files, size_t ntemps, size_t nfiles,
1897 char const *outfile)
1899 size_t i;
1900 bool got_outstat = false;
1901 struct stat outstat;
1903 for (i = ntemps; i < nfiles; i++)
1905 bool is_stdin = STREQ (files[i], "-");
1906 bool same;
1907 struct stat instat;
1909 if (outfile && STREQ (outfile, files[i]) && !is_stdin)
1910 same = true;
1911 else
1913 if (! got_outstat)
1915 if ((outfile
1916 ? stat (outfile, &outstat)
1917 : fstat (STDOUT_FILENO, &outstat))
1918 != 0)
1919 break;
1920 got_outstat = true;
1923 same = (((is_stdin
1924 ? fstat (STDIN_FILENO, &instat)
1925 : stat (files[i], &instat))
1926 == 0)
1927 && SAME_INODE (instat, outstat));
1930 if (same)
1932 FILE *tftp;
1933 char *temp = create_temp_file (&tftp);
1934 mergefps (&files[i], 0, nfiles - i, tftp, temp);
1935 files[i] = temp;
1936 return i + 1;
1940 return nfiles;
1943 /* Merge the input FILES. NTEMPS is the number of files at the
1944 start of FILES that are temporary; it is zero at the top level.
1945 NFILES is the total number of files. Put the output in
1946 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
1948 static void
1949 merge (char **files, size_t ntemps, size_t nfiles, char const *output_file)
1951 while (NMERGE < nfiles)
1953 /* Number of input files processed so far. */
1954 size_t in;
1956 /* Number of output files generated so far. */
1957 size_t out;
1959 /* nfiles % NMERGE; this counts input files that are left over
1960 after all full-sized merges have been done. */
1961 size_t remainder;
1963 /* Number of easily-available slots at the next loop iteration. */
1964 size_t cheap_slots;
1966 /* Do as many NMERGE-size merges as possible. */
1967 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
1969 FILE *tfp;
1970 char *temp = create_temp_file (&tfp);
1971 size_t nt = MIN (ntemps, NMERGE);
1972 ntemps -= nt;
1973 mergefps (&files[in], nt, NMERGE, tfp, temp);
1974 files[out] = temp;
1977 remainder = nfiles - in;
1978 cheap_slots = NMERGE - out % NMERGE;
1980 if (cheap_slots < remainder)
1982 /* So many files remain that they can't all be put into the last
1983 NMERGE-sized output window. Do one more merge. Merge as few
1984 files as possible, to avoid needless I/O. */
1985 size_t nshortmerge = remainder - cheap_slots + 1;
1986 FILE *tfp;
1987 char *temp = create_temp_file (&tfp);
1988 size_t nt = MIN (ntemps, nshortmerge);
1989 ntemps -= nt;
1990 mergefps (&files[in], nt, nshortmerge, tfp, temp);
1991 files[out++] = temp;
1992 in += nshortmerge;
1995 /* Put the remaining input files into the last NMERGE-sized output
1996 window, so they will be merged in the next pass. */
1997 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
1998 ntemps += out;
1999 nfiles -= in - out;
2002 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2003 mergefps (files, ntemps, nfiles, NULL, output_file);
2006 /* Sort NFILES FILES onto OUTPUT_FILE. */
2008 static void
2009 sort (char * const *files, size_t nfiles, char const *output_file)
2011 struct buffer buf;
2012 size_t ntemps = 0;
2013 bool output_file_created = false;
2015 buf.alloc = 0;
2017 while (nfiles)
2019 char const *temp_output;
2020 char const *file = *files;
2021 FILE *fp = xfopen (file, "r");
2022 FILE *tfp;
2023 size_t bytes_per_line = (2 * sizeof (struct line)
2024 - sizeof (struct line) / 2);
2026 if (! buf.alloc)
2027 initbuf (&buf, bytes_per_line,
2028 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2029 buf.eof = false;
2030 files++;
2031 nfiles--;
2033 while (fillbuf (&buf, fp, file))
2035 struct line *line;
2036 struct line *linebase;
2038 if (buf.eof && nfiles
2039 && (bytes_per_line + 1
2040 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2042 /* End of file, but there is more input and buffer room.
2043 Concatenate the next input file; this is faster in
2044 the usual case. */
2045 buf.left = buf.used;
2046 break;
2049 line = buffer_linelim (&buf);
2050 linebase = line - buf.nlines;
2051 if (1 < buf.nlines)
2052 sortlines (line, buf.nlines, linebase);
2053 if (buf.eof && !nfiles && !ntemps && !buf.left)
2055 xfclose (fp, file);
2056 tfp = xfopen (output_file, "w");
2057 temp_output = output_file;
2058 output_file_created = true;
2060 else
2062 ++ntemps;
2063 temp_output = create_temp_file (&tfp);
2068 line--;
2069 write_bytes (line->text, line->length, tfp, temp_output);
2070 if (unique)
2071 while (linebase < line && compare (line, line - 1) == 0)
2072 line--;
2074 while (linebase < line);
2076 xfclose (tfp, temp_output);
2078 if (output_file_created)
2079 goto finish;
2081 xfclose (fp, file);
2084 finish:
2085 free (buf.buf);
2087 if (! output_file_created)
2089 size_t i;
2090 struct tempnode *node = temphead;
2091 char **tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2092 for (i = 0; node; i++)
2094 tempfiles[i] = node->name;
2095 node = node->next;
2097 merge (tempfiles, ntemps, ntemps, output_file);
2098 free (tempfiles);
2102 /* Insert key KEY at the end of the key list. */
2104 static void
2105 insertkey (struct keyfield *key)
2107 struct keyfield **p;
2109 for (p = &keylist; *p; p = &(*p)->next)
2110 continue;
2111 *p = key;
2112 key->next = NULL;
2115 /* Report a bad field specification SPEC, with extra info MSGID. */
2117 static void badfieldspec (char const *, char const *)
2118 ATTRIBUTE_NORETURN;
2119 static void
2120 badfieldspec (char const *spec, char const *msgid)
2122 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2123 _(msgid), quote (spec));
2124 abort ();
2127 /* Report incompatible options. */
2129 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2130 static void
2131 incompatible_options (char const *opts)
2133 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2134 abort ();
2137 /* Check compatibility of ordering options. */
2139 static void
2140 check_ordering_compatibility (void)
2142 struct keyfield const *key;
2144 for (key = keylist; key; key = key->next)
2145 if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2146 + !!key->ignore))
2147 || (key->random && key->translate))
2149 char opts[7];
2150 char *p = opts;
2151 if (key->ignore == nondictionary)
2152 *p++ = 'd';
2153 if (key->translate)
2154 *p++ = 'f';
2155 if (key->general_numeric)
2156 *p++ = 'g';
2157 if (key->ignore == nonprinting)
2158 *p++ = 'i';
2159 if (key->month)
2160 *p++ = 'M';
2161 if (key->numeric)
2162 *p++ = 'n';
2163 if (key->random)
2164 *p++ = 'R';
2165 *p = '\0';
2166 incompatible_options (opts);
2170 /* Parse the leading integer in STRING and store the resulting value
2171 (which must fit into size_t) into *VAL. Return the address of the
2172 suffix after the integer. If MSGID is NULL, return NULL after
2173 failure; otherwise, report MSGID and exit on failure. */
2175 static char const *
2176 parse_field_count (char const *string, size_t *val, char const *msgid)
2178 char *suffix;
2179 uintmax_t n;
2181 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2183 case LONGINT_OK:
2184 case LONGINT_INVALID_SUFFIX_CHAR:
2185 *val = n;
2186 if (*val == n)
2187 break;
2188 /* Fall through. */
2189 case LONGINT_OVERFLOW:
2190 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2191 if (msgid)
2192 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2193 _(msgid), (int) (suffix - string), string);
2194 return NULL;
2196 case LONGINT_INVALID:
2197 if (msgid)
2198 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2199 _(msgid), quote (string));
2200 return NULL;
2203 return suffix;
2206 /* Handle interrupts and hangups. */
2208 static void
2209 sighandler (int sig)
2211 if (! SA_NOCLDSTOP)
2212 signal (sig, SIG_IGN);
2214 cleanup ();
2216 signal (sig, SIG_DFL);
2217 raise (sig);
2220 /* Set the ordering options for KEY specified in S.
2221 Return the address of the first character in S that
2222 is not a valid ordering option.
2223 BLANKTYPE is the kind of blanks that 'b' should skip. */
2225 static char *
2226 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2228 while (*s)
2230 switch (*s)
2232 case 'b':
2233 if (blanktype == bl_start || blanktype == bl_both)
2234 key->skipsblanks = true;
2235 if (blanktype == bl_end || blanktype == bl_both)
2236 key->skipeblanks = true;
2237 break;
2238 case 'd':
2239 key->ignore = nondictionary;
2240 break;
2241 case 'f':
2242 key->translate = fold_toupper;
2243 break;
2244 case 'g':
2245 key->general_numeric = true;
2246 break;
2247 case 'i':
2248 /* Option order should not matter, so don't let -i override
2249 -d. -d implies -i, but -i does not imply -d. */
2250 if (! key->ignore)
2251 key->ignore = nonprinting;
2252 break;
2253 case 'M':
2254 key->month = true;
2255 break;
2256 case 'n':
2257 key->numeric = true;
2258 break;
2259 case 'R':
2260 key->random = true;
2261 break;
2262 case 'r':
2263 key->reverse = true;
2264 break;
2265 default:
2266 return (char *) s;
2268 ++s;
2270 return (char *) s;
2273 static struct keyfield *
2274 new_key (void)
2276 struct keyfield *key = xzalloc (sizeof *key);
2277 key->eword = SIZE_MAX;
2278 return key;
2282 main (int argc, char **argv)
2284 struct keyfield *key;
2285 struct keyfield gkey;
2286 char const *s;
2287 int c = 0;
2288 bool checkonly = false;
2289 bool mergeonly = false;
2290 char *random_source = NULL;
2291 bool need_random = false;
2292 size_t nfiles = 0;
2293 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2294 bool obsolete_usage = (posix2_version () < 200112);
2295 char *minus = "-", **files;
2296 char const *outfile = NULL;
2298 initialize_main (&argc, &argv);
2299 program_name = argv[0];
2300 setlocale (LC_ALL, "");
2301 bindtextdomain (PACKAGE, LOCALEDIR);
2302 textdomain (PACKAGE);
2304 atexit (cleanup);
2306 initialize_exit_failure (SORT_FAILURE);
2307 atexit (close_stdout);
2309 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2310 #if HAVE_NL_LANGINFO
2311 hard_LC_TIME = hard_locale (LC_TIME);
2312 #endif
2314 /* Get locale's representation of the decimal point. */
2316 struct lconv const *locale = localeconv ();
2318 /* If the locale doesn't define a decimal point, or if the decimal
2319 point is multibyte, use the C locale's decimal point. FIXME:
2320 add support for multibyte decimal points. */
2321 decimal_point = to_uchar (locale->decimal_point[0]);
2322 if (! decimal_point || locale->decimal_point[1])
2323 decimal_point = '.';
2325 /* FIXME: add support for multibyte thousands separators. */
2326 thousands_sep = to_uchar (*locale->thousands_sep);
2327 if (! thousands_sep || locale->thousands_sep[1])
2328 thousands_sep = -1;
2331 have_read_stdin = false;
2332 inittables ();
2335 size_t i;
2336 static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2337 enum { nsigs = sizeof sig / sizeof sig[0] };
2339 #if SA_NOCLDSTOP
2340 struct sigaction act;
2342 sigemptyset (&caught_signals);
2343 for (i = 0; i < nsigs; i++)
2345 sigaction (sig[i], NULL, &act);
2346 if (act.sa_handler != SIG_IGN)
2347 sigaddset (&caught_signals, sig[i]);
2350 act.sa_handler = sighandler;
2351 act.sa_mask = caught_signals;
2352 act.sa_flags = 0;
2354 for (i = 0; i < nsigs; i++)
2355 if (sigismember (&caught_signals, sig[i]))
2356 sigaction (sig[i], &act, NULL);
2357 #else
2358 for (i = 0; i < nsigs; i++)
2359 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2361 signal (sig[i], sighandler);
2362 siginterrupt (sig[i], 1);
2364 #endif
2367 gkey.sword = gkey.eword = SIZE_MAX;
2368 gkey.ignore = NULL;
2369 gkey.translate = NULL;
2370 gkey.numeric = gkey.general_numeric = gkey.random = false;
2371 gkey.month = gkey.reverse = false;
2372 gkey.skipsblanks = gkey.skipeblanks = false;
2374 files = xnmalloc (argc, sizeof *files);
2376 for (;;)
2378 /* Parse an operand as a file after "--" was seen; or if
2379 pedantic and a file was seen, unless the POSIX version
2380 predates 1003.1-2001 and -c was not seen and the operand is
2381 "-o FILE" or "-oFILE". */
2383 if (c == -1
2384 || (posixly_correct && nfiles != 0
2385 && ! (obsolete_usage
2386 && ! checkonly
2387 && optind != argc
2388 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2389 && (argv[optind][2] || optind + 1 != argc)))
2390 || ((c = getopt_long (argc, argv, short_options,
2391 long_options, NULL))
2392 == -1))
2394 if (argc <= optind)
2395 break;
2396 files[nfiles++] = argv[optind++];
2398 else switch (c)
2400 case 1:
2401 key = NULL;
2402 if (optarg[0] == '+')
2404 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
2405 && ISDIGIT (argv[optind][1]));
2406 obsolete_usage |= minus_pos_usage & ~posixly_correct;
2407 if (obsolete_usage)
2409 /* Treat +POS1 [-POS2] as a key if possible; but silently
2410 treat an operand as a file if it is not a valid +POS1. */
2411 key = new_key ();
2412 s = parse_field_count (optarg + 1, &key->sword, NULL);
2413 if (s && *s == '.')
2414 s = parse_field_count (s + 1, &key->schar, NULL);
2415 if (! (key->sword | key->schar))
2416 key->sword = SIZE_MAX;
2417 if (! s || *set_ordering (s, key, bl_start))
2419 free (key);
2420 key = NULL;
2422 else
2424 if (minus_pos_usage)
2426 char const *optarg1 = argv[optind++];
2427 s = parse_field_count (optarg1 + 1, &key->eword,
2428 N_("invalid number after `-'"));
2429 if (*s == '.')
2430 s = parse_field_count (s + 1, &key->echar,
2431 N_("invalid number after `.'"));
2432 if (*set_ordering (s, key, bl_end))
2433 badfieldspec (optarg1,
2434 N_("stray character in field spec"));
2436 insertkey (key);
2440 if (! key)
2441 files[nfiles++] = optarg;
2442 break;
2444 case 'b':
2445 case 'd':
2446 case 'f':
2447 case 'g':
2448 case 'i':
2449 case 'M':
2450 case 'n':
2451 case 'r':
2452 case 'R':
2454 char str[2];
2455 str[0] = c;
2456 str[1] = '\0';
2457 set_ordering (str, &gkey, bl_both);
2459 break;
2461 case 'c':
2462 checkonly = true;
2463 break;
2465 case 'k':
2466 key = new_key ();
2468 /* Get POS1. */
2469 s = parse_field_count (optarg, &key->sword,
2470 N_("invalid number at field start"));
2471 if (! key->sword--)
2473 /* Provoke with `sort -k0' */
2474 badfieldspec (optarg, N_("field number is zero"));
2476 if (*s == '.')
2478 s = parse_field_count (s + 1, &key->schar,
2479 N_("invalid number after `.'"));
2480 if (! key->schar--)
2482 /* Provoke with `sort -k1.0' */
2483 badfieldspec (optarg, N_("character offset is zero"));
2486 if (! (key->sword | key->schar))
2487 key->sword = SIZE_MAX;
2488 s = set_ordering (s, key, bl_start);
2489 if (*s != ',')
2491 key->eword = SIZE_MAX;
2492 key->echar = 0;
2494 else
2496 /* Get POS2. */
2497 s = parse_field_count (s + 1, &key->eword,
2498 N_("invalid number after `,'"));
2499 if (! key->eword--)
2501 /* Provoke with `sort -k1,0' */
2502 badfieldspec (optarg, N_("field number is zero"));
2504 if (*s == '.')
2505 s = parse_field_count (s + 1, &key->echar,
2506 N_("invalid number after `.'"));
2507 else
2509 /* `-k 2,3' is equivalent to `+1 -3'. */
2510 key->eword++;
2512 s = set_ordering (s, key, bl_end);
2514 if (*s)
2515 badfieldspec (optarg, N_("stray character in field spec"));
2516 insertkey (key);
2517 break;
2519 case 'm':
2520 mergeonly = true;
2521 break;
2523 case 'o':
2524 if (outfile && !STREQ (outfile, optarg))
2525 error (SORT_FAILURE, 0, _("multiple output files specified"));
2526 outfile = optarg;
2527 break;
2529 case RANDOM_SOURCE_OPTION:
2530 if (random_source && !STREQ (random_source, optarg))
2531 error (SORT_FAILURE, 0, _("multiple random sources specified"));
2532 random_source = optarg;
2533 break;
2535 case 's':
2536 stable = true;
2537 break;
2539 case 'S':
2540 specify_sort_size (optarg);
2541 break;
2543 case 't':
2545 char newtab = optarg[0];
2546 if (! newtab)
2547 error (SORT_FAILURE, 0, _("empty tab"));
2548 if (optarg[1])
2550 if (STREQ (optarg, "\\0"))
2551 newtab = '\0';
2552 else
2554 /* Provoke with `sort -txx'. Complain about
2555 "multi-character tab" instead of "multibyte tab", so
2556 that the diagnostic's wording does not need to be
2557 changed once multibyte characters are supported. */
2558 error (SORT_FAILURE, 0, _("multi-character tab %s"),
2559 quote (optarg));
2562 if (tab != TAB_DEFAULT && tab != newtab)
2563 error (SORT_FAILURE, 0, _("incompatible tabs"));
2564 tab = newtab;
2566 break;
2568 case 'T':
2569 add_temp_dir (optarg);
2570 break;
2572 case 'u':
2573 unique = true;
2574 break;
2576 case 'y':
2577 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
2578 through Solaris 7. It is also accepted by many non-Solaris
2579 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
2580 -y is marked as obsolete starting with Solaris 8 (1999), but is
2581 still accepted as of Solaris 10 prerelease (2004).
2583 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
2584 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
2585 and which in general ignores the argument after "-y" if it
2586 consists entirely of digits (it can even be empty). */
2587 if (optarg == argv[optind - 1])
2589 char const *p;
2590 for (p = optarg; ISDIGIT (*p); p++)
2591 continue;
2592 optind -= (*p != '\0');
2594 break;
2596 case 'z':
2597 eolchar = 0;
2598 break;
2600 case_GETOPT_HELP_CHAR;
2602 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2604 default:
2605 usage (SORT_FAILURE);
2609 /* Inheritance of global options to individual keys. */
2610 for (key = keylist; key; key = key->next)
2612 if (! (key->ignore || key->translate
2613 || (key->skipsblanks | key->reverse
2614 | key->skipeblanks | key->month | key->numeric
2615 | key->general_numeric
2616 | key->random)))
2618 key->ignore = gkey.ignore;
2619 key->translate = gkey.translate;
2620 key->skipsblanks = gkey.skipsblanks;
2621 key->skipeblanks = gkey.skipeblanks;
2622 key->month = gkey.month;
2623 key->numeric = gkey.numeric;
2624 key->general_numeric = gkey.general_numeric;
2625 key->random = gkey.random;
2626 key->reverse = gkey.reverse;
2629 need_random |= key->random;
2632 if (!keylist && (gkey.ignore || gkey.translate
2633 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2634 | gkey.numeric | gkey.general_numeric
2635 | gkey.random)))
2637 insertkey (&gkey);
2638 need_random |= gkey.random;
2641 check_ordering_compatibility ();
2643 reverse = gkey.reverse;
2645 if (need_random)
2647 randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
2648 if (! randread_source)
2649 die (_("open failed"), random_source);
2652 if (temp_dir_count == 0)
2654 char const *tmp_dir = getenv ("TMPDIR");
2655 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2658 if (nfiles == 0)
2660 nfiles = 1;
2661 files = &minus;
2664 if (checkonly)
2666 if (nfiles > 1)
2667 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -c"),
2668 quote (files[1]));
2670 if (outfile)
2671 incompatible_options ("co");
2673 /* POSIX requires that sort return 1 IFF invoked with -c and the
2674 input is not properly sorted. */
2675 exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2678 if (mergeonly)
2679 merge (files, 0, nfiles, outfile);
2680 else
2681 sort (files, nfiles, outfile);
2683 if (have_read_stdin && fclose (stdin) == EOF)
2684 die (_("close failed"), "-");
2686 exit (EXIT_SUCCESS);