1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
26 #include <sys/types.h>
32 #include "filevercmp.h"
33 #include "hard-locale.h"
35 #include "ignore-value.h"
42 #include "readtokens0.h"
45 #include "strnumcmp.h"
48 #include "xnanosleep.h"
51 #if HAVE_SYS_RESOURCE_H
52 # include <sys/resource.h>
55 struct rlimit
{ size_t rlim_cur
; };
56 # define getrlimit(Resource, Rlp) (-1)
59 /* The official name of this program (e.g., no `g' prefix). */
60 #define PROGRAM_NAME "sort"
63 proper_name ("Mike Haertel"), \
64 proper_name ("Paul Eggert")
66 #if HAVE_LANGINFO_CODESET
67 # include <langinfo.h>
70 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
73 # define SA_NOCLDSTOP 0
74 /* No sigprocmask. Always 'return' zero. */
75 # define sigprocmask(How, Set, Oset) (0)
77 # if ! HAVE_SIGINTERRUPT
78 # define siginterrupt(sig, flag) /* empty */
82 #if !defined OPEN_MAX && defined NR_OPEN
83 # define OPEN_MAX NR_OPEN
89 #define UCHAR_LIM (UCHAR_MAX + 1)
91 #ifndef DEFAULT_TMPDIR
92 # define DEFAULT_TMPDIR "/tmp"
98 /* POSIX says to exit with status 1 if invoked with -c and the
99 input is not properly sorted. */
100 SORT_OUT_OF_ORDER
= 1,
102 /* POSIX says any other irregular exit must exit with a status
103 code greater than 1. */
109 /* The number of times we should try to fork a compression process
110 (we retry if the fork call fails). We don't _need_ to compress
111 temp files, this is just to reduce disk access, so this number
112 can be small. Each retry doubles in duration. */
113 MAX_FORK_TRIES_COMPRESS
= 4,
115 /* The number of times we should try to fork a decompression process.
116 If we can't fork a decompression process, we can't sort, so this
117 number should be big. Each retry doubles in duration. */
118 MAX_FORK_TRIES_DECOMPRESS
= 9
121 /* The representation of the decimal point in the current locale. */
122 static int decimal_point
;
124 /* Thousands separator; if -1, then there isn't one. */
125 static int thousands_sep
;
127 /* Nonzero if the corresponding locales are hard. */
128 static bool hard_LC_COLLATE
;
130 static bool hard_LC_TIME
;
133 #define NONZERO(x) ((x) != 0)
135 /* The kind of blanks for '-b' to skip in various options. */
136 enum blanktype
{ bl_start
, bl_end
, bl_both
};
138 /* The character marking end of line. Default to \n. */
139 static char eolchar
= '\n';
141 /* Lines are held in core as counted strings. */
144 char *text
; /* Text of the line. */
145 size_t length
; /* Length including final newline. */
146 char *keybeg
; /* Start of first key. */
147 char *keylim
; /* Limit of first key. */
153 char *buf
; /* Dynamically allocated buffer,
154 partitioned into 3 regions:
157 - an array of lines, in reverse order. */
158 size_t used
; /* Number of bytes used for input data. */
159 size_t nlines
; /* Number of lines in the line array. */
160 size_t alloc
; /* Number of bytes allocated. */
161 size_t left
; /* Number of bytes left from previous reads. */
162 size_t line_bytes
; /* Number of bytes to reserve for each line. */
163 bool eof
; /* An EOF has been read. */
168 size_t sword
; /* Zero-origin 'word' to start at. */
169 size_t schar
; /* Additional characters to skip. */
170 size_t eword
; /* Zero-origin first word after field. */
171 size_t echar
; /* Additional characters in field. */
172 bool const *ignore
; /* Boolean array of characters to ignore. */
173 char const *translate
; /* Translation applied to characters. */
174 bool skipsblanks
; /* Skip leading blanks when finding start. */
175 bool skipeblanks
; /* Skip leading blanks when finding end. */
176 bool numeric
; /* Flag for numeric comparison. Handle
177 strings of digits with optional decimal
178 point, but no exponential notation. */
179 bool random
; /* Sort by random hash of key. */
180 bool general_numeric
; /* Flag for general, numeric comparison.
181 Handle numbers in exponential notation. */
182 bool human_numeric
; /* Flag for sorting by human readable
183 units with either SI xor IEC prefixes. */
184 int iec_present
; /* Flag for checking for mixed SI and IEC. */
185 bool month
; /* Flag for comparison by month name. */
186 bool reverse
; /* Reverse the sense of comparison. */
187 bool version
; /* sort by version number */
188 struct keyfield
*next
; /* Next keyfield to try. */
197 /* FIXME: None of these tables work with multibyte character sets.
198 Also, there are many other bugs when handling multibyte characters.
199 One way to fix this is to rewrite `sort' to use wide characters
200 internally, but doing this with good performance is a bit
203 /* Table of blanks. */
204 static bool blanks
[UCHAR_LIM
];
206 /* Table of non-printing characters. */
207 static bool nonprinting
[UCHAR_LIM
];
209 /* Table of non-dictionary characters (not letters, digits, or blanks). */
210 static bool nondictionary
[UCHAR_LIM
];
212 /* Translation table folding lower case to upper. */
213 static unsigned char fold_toupper
[UCHAR_LIM
];
215 #define MONTHS_PER_YEAR 12
217 /* Table mapping month names to integers.
218 Alphabetic order allows binary search. */
219 static struct month monthtab
[] =
235 /* During the merge phase, the number of files to merge at once. */
236 #define NMERGE_DEFAULT 16
238 /* Minimum size for a merge or check buffer. */
239 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
241 /* Minimum sort size; the code might not work with smaller sizes. */
242 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
244 /* The number of bytes needed for a merge or check buffer, which can
245 function relatively efficiently even if it holds only one line. If
246 a longer line is seen, this value is increased. */
247 static size_t merge_buffer_size
= MAX (MIN_MERGE_BUFFER_SIZE
, 256 * 1024);
249 /* The approximate maximum number of bytes of main memory to use, as
250 specified by the user. Zero if the user has not specified a size. */
251 static size_t sort_size
;
253 /* The guessed size for non-regular files. */
254 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
256 /* Array of directory names in which any temporary files are to be created. */
257 static char const **temp_dirs
;
259 /* Number of temporary directory names used. */
260 static size_t temp_dir_count
;
262 /* Number of allocated slots in temp_dirs. */
263 static size_t temp_dir_alloc
;
265 /* Flag to reverse the order of all comparisons. */
268 /* Flag for stable sort. This turns off the last ditch bytewise
269 comparison of lines, and instead leaves lines in the same order
270 they were read if all keys compare equal. */
273 /* If TAB has this value, blanks separate fields. */
274 enum { TAB_DEFAULT
= CHAR_MAX
+ 1 };
276 /* Tab character separating fields. If TAB_DEFAULT, then fields are
277 separated by the empty string between a non-blank character and a blank
279 static int tab
= TAB_DEFAULT
;
281 /* Flag to remove consecutive duplicate lines from the output.
282 Only the last of a sequence of equal lines will be output. */
285 /* Nonzero if any of the input files are the standard input. */
286 static bool have_read_stdin
;
288 /* List of key field comparisons to be tried. */
289 static struct keyfield
*keylist
;
291 /* Program used to (de)compress temp files. Must accept -d. */
292 static char const *compress_program
;
294 /* Maximum number of files to merge in one go. If more than this
295 number are present, temp files will be used. */
296 static unsigned int nmerge
= NMERGE_DEFAULT
;
298 static void sortlines_temp (struct line
*, size_t, struct line
*);
300 /* Report MESSAGE for FILE, then clean up and exit.
301 If FILE is null, it represents standard output. */
303 static void die (char const *, char const *) ATTRIBUTE_NORETURN
;
305 die (char const *message
, char const *file
)
307 error (0, errno
, "%s: %s", message
, file
? file
: _("standard output"));
314 if (status
!= EXIT_SUCCESS
)
315 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
320 Usage: %s [OPTION]... [FILE]...\n\
321 or: %s [OPTION]... --files0-from=F\n\
323 program_name
, program_name
);
325 Write sorted concatenation of all FILE(s) to standard output.\n\
329 Mandatory arguments to long options are mandatory for short options too.\n\
336 -b, --ignore-leading-blanks ignore leading blanks\n\
337 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
338 -f, --ignore-case fold lower case to upper case characters\n\
341 -g, --general-numeric-sort compare according to general numerical value\n\
342 -i, --ignore-nonprinting consider only printable characters\n\
343 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
346 -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\
349 -n, --numeric-sort compare according to string numerical value\n\
350 -R, --random-sort sort by random hash of keys\n\
351 --random-source=FILE get random bytes from FILE\n\
352 -r, --reverse reverse the result of comparisons\n\
355 --sort=WORD sort according to WORD:\n\
356 general-numeric -g, human-numeric -h, month -M,\n\
357 numeric -n, random -R, version -V\n\
358 -V, --version-sort natural sort of (version) numbers within text\n\
366 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
367 for more use temp files\n\
370 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
371 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
372 --compress-program=PROG compress temporaries with PROG;\n\
373 decompress them with PROG -d\n\
374 --files0-from=F read input from the files specified by\n\
375 NUL-terminated names in file F;\n\
376 If F is - then read names from standard input\n\
379 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
380 (default end of line)\n\
381 -m, --merge merge already sorted files; do not sort\n\
384 -o, --output=FILE write result to FILE instead of standard output\n\
385 -s, --stable stabilize sort by disabling last-resort comparison\n\
386 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
389 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
390 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
391 multiple options specify multiple directories\n\
392 -u, --unique with -c, check for strict ordering;\n\
393 without -c, output only the first of an equal run\n\
396 -z, --zero-terminated end lines with 0 byte, not newline\n\
398 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
399 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
402 POS is F[.C][OPTS], where F is the field number and C the character position\n\
403 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
404 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
405 one or more single-letter ordering options, which override global ordering\n\
406 options for that key. If no key is given, use the entire line as the key.\n\
408 SIZE may be followed by the following multiplicative suffixes:\n\
411 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
413 With no FILE, or when FILE is -, read standard input.\n\
416 The locale specified by the environment affects sort order.\n\
417 Set LC_ALL=C to get the traditional sort order that uses\n\
418 native byte values.\n\
420 emit_ancillary_info ();
426 /* For long options that have no equivalent short option, use a
427 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
430 CHECK_OPTION
= CHAR_MAX
+ 1,
431 COMPRESS_PROGRAM_OPTION
,
434 RANDOM_SOURCE_OPTION
,
438 static char const short_options
[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
440 static struct option
const long_options
[] =
442 {"ignore-leading-blanks", no_argument
, NULL
, 'b'},
443 {"check", optional_argument
, NULL
, CHECK_OPTION
},
444 {"compress-program", required_argument
, NULL
, COMPRESS_PROGRAM_OPTION
},
445 {"dictionary-order", no_argument
, NULL
, 'd'},
446 {"ignore-case", no_argument
, NULL
, 'f'},
447 {"files0-from", required_argument
, NULL
, FILES0_FROM_OPTION
},
448 {"general-numeric-sort", no_argument
, NULL
, 'g'},
449 {"ignore-nonprinting", no_argument
, NULL
, 'i'},
450 {"key", required_argument
, NULL
, 'k'},
451 {"merge", no_argument
, NULL
, 'm'},
452 {"month-sort", no_argument
, NULL
, 'M'},
453 {"numeric-sort", no_argument
, NULL
, 'n'},
454 {"human-numeric-sort", no_argument
, NULL
, 'h'},
455 {"version-sort", no_argument
, NULL
, 'V'},
456 {"random-sort", no_argument
, NULL
, 'R'},
457 {"random-source", required_argument
, NULL
, RANDOM_SOURCE_OPTION
},
458 {"sort", required_argument
, NULL
, SORT_OPTION
},
459 {"output", required_argument
, NULL
, 'o'},
460 {"reverse", no_argument
, NULL
, 'r'},
461 {"stable", no_argument
, NULL
, 's'},
462 {"batch-size", required_argument
, NULL
, NMERGE_OPTION
},
463 {"buffer-size", required_argument
, NULL
, 'S'},
464 {"field-separator", required_argument
, NULL
, 't'},
465 {"temporary-directory", required_argument
, NULL
, 'T'},
466 {"unique", no_argument
, NULL
, 'u'},
467 {"zero-terminated", no_argument
, NULL
, 'z'},
468 {GETOPT_HELP_OPTION_DECL
},
469 {GETOPT_VERSION_OPTION_DECL
},
473 #define CHECK_TABLE \
475 _ct_("silent", 'C') \
476 _ct_("diagnose-first", 'c')
478 static char const *const check_args
[] =
480 #define _ct_(_s, _c) _s,
484 static char const check_types
[] =
486 #define _ct_(_s, _c) _c,
492 _st_("general-numeric", 'g') \
493 _st_("human-numeric", 'h') \
495 _st_("numeric", 'n') \
496 _st_("random", 'R') \
499 static char const *const sort_args
[] =
501 #define _st_(_s, _c) _s,
505 static char const sort_types
[] =
507 #define _st_(_s, _c) _c,
512 /* The set of signals that are caught. */
513 static sigset_t caught_signals
;
515 /* Critical section status. */
522 /* Enter a critical section. */
523 static struct cs_status
526 struct cs_status status
;
527 status
.valid
= (sigprocmask (SIG_BLOCK
, &caught_signals
, &status
.sigs
) == 0);
531 /* Leave a critical section. */
533 cs_leave (struct cs_status status
)
537 /* Ignore failure when restoring the signal mask. */
538 sigprocmask (SIG_SETMASK
, &status
.sigs
, NULL
);
542 /* The list of temporary files. */
545 struct tempnode
*volatile next
;
546 pid_t pid
; /* If compressed, the pid of compressor, else zero */
547 char name
[1]; /* Actual size is 1 + file name length. */
549 static struct tempnode
*volatile temphead
;
550 static struct tempnode
*volatile *temptail
= &temphead
;
555 pid_t pid
; /* If compressed, the pid of compressor, else zero */
558 /* A table where we store compression process states. We clean up all
559 processes in a timely manner so as not to exhaust system resources,
560 so we store the info on whether the process is still running, or has
562 static Hash_table
*proctab
;
564 enum { INIT_PROCTAB_SIZE
= 47 };
566 enum procstate
{ ALIVE
, ZOMBIE
};
568 /* A proctab entry. The COUNT field is there in case we fork a new
569 compression process that has the same PID as an old zombie process
570 that is still in the table (because the process to decompress the
571 temp file it was associated with hasn't started yet). */
575 enum procstate state
;
580 proctab_hasher (const void *entry
, size_t tabsize
)
582 const struct procnode
*node
= entry
;
583 return node
->pid
% tabsize
;
587 proctab_comparator (const void *e1
, const void *e2
)
589 const struct procnode
*n1
= e1
, *n2
= e2
;
590 return n1
->pid
== n2
->pid
;
593 /* The total number of forked processes (compressors and decompressors)
594 that have not been reaped yet. */
595 static size_t nprocs
;
597 /* The number of child processes we'll allow before we try to reap some. */
598 enum { MAX_PROCS_BEFORE_REAP
= 2 };
600 /* If 0 < PID, wait for the child process with that PID to exit.
601 If PID is -1, clean up a random child process which has finished and
602 return the process ID of that child. If PID is -1 and no processes
603 have quit yet, return 0 without waiting. */
609 pid_t cpid
= waitpid (pid
, &status
, pid
< 0 ? WNOHANG
: 0);
612 error (SORT_FAILURE
, errno
, _("waiting for %s [-d]"),
616 if (! WIFEXITED (status
) || WEXITSTATUS (status
))
617 error (SORT_FAILURE
, 0, _("%s [-d] terminated abnormally"),
625 /* Add the PID of a running compression process to proctab, or update
626 the entry COUNT and STATE fields if it's already there. This also
627 creates the table for us the first time it's called. */
630 register_proc (pid_t pid
)
632 struct procnode test
, *node
;
636 proctab
= hash_initialize (INIT_PROCTAB_SIZE
, NULL
,
645 node
= hash_lookup (proctab
, &test
);
653 node
= xmalloc (sizeof *node
);
657 if (hash_insert (proctab
, node
) == NULL
)
662 /* This is called when we reap a random process. We don't know
663 whether we have reaped a compression process or a decompression
664 process until we look in the table. If there's an ALIVE entry for
665 it, then we have reaped a compression process, so change the state
666 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
669 update_proc (pid_t pid
)
671 struct procnode test
, *node
;
674 node
= hash_lookup (proctab
, &test
);
676 node
->state
= ZOMBIE
;
679 /* This is for when we need to wait for a compression process to exit.
680 If it has a ZOMBIE entry in the table then it's already dead and has
681 been reaped. Note that if there's an ALIVE entry for it, it still may
682 already have died and been reaped if a second process was created with
683 the same PID. This is probably exceedingly rare, but to be on the safe
684 side we will have to wait for any compression process with this PID. */
687 wait_proc (pid_t pid
)
689 struct procnode test
, *node
;
692 node
= hash_lookup (proctab
, &test
);
693 if (node
->state
== ALIVE
)
696 node
->state
= ZOMBIE
;
699 hash_delete (proctab
, node
);
704 /* Keep reaping finished children as long as there are more to reap.
705 This doesn't block waiting for any of them, it only reaps those
706 that are already dead. */
713 while (0 < nprocs
&& (pid
= reap (-1)))
717 /* Clean up any remaining temporary files. */
722 struct tempnode
const *node
;
724 for (node
= temphead
; node
; node
= node
->next
)
729 /* Cleanup actions to take when exiting. */
736 /* Clean up any remaining temporary files in a critical section so
737 that a signal handler does not try to clean them too. */
738 struct cs_status cs
= cs_enter ();
746 /* Create a new temporary file, returning its newly allocated tempnode.
747 Store into *PFD the file descriptor open for writing.
748 If the creation fails, return NULL and store -1 into *PFD if the
749 failure is due to file descriptor exhaustion and
750 SURVIVE_FD_EXHAUSTION; otherwise, die. */
752 static struct tempnode
*
753 create_temp_file (int *pfd
, bool survive_fd_exhaustion
)
755 static char const slashbase
[] = "/sortXXXXXX";
756 static size_t temp_dir_index
;
759 char const *temp_dir
= temp_dirs
[temp_dir_index
];
760 size_t len
= strlen (temp_dir
);
761 struct tempnode
*node
=
762 xmalloc (offsetof (struct tempnode
, name
) + len
+ sizeof slashbase
);
763 char *file
= node
->name
;
766 memcpy (file
, temp_dir
, len
);
767 memcpy (file
+ len
, slashbase
, sizeof slashbase
);
770 if (++temp_dir_index
== temp_dir_count
)
773 /* Create the temporary file in a critical section, to avoid races. */
779 temptail
= &node
->next
;
787 if (! (survive_fd_exhaustion
&& errno
== EMFILE
))
788 error (SORT_FAILURE
, errno
, _("cannot create temporary file in %s"),
798 /* Predeclare an access pattern for input files.
799 Ignore any errors -- this is only advisory.
801 There are a few hints we could possibly provide,
802 and after careful testing it was decided that
803 specifying POSIX_FADV_SEQUENTIAL was not detrimental
804 to any cases. On Linux 2.6.31, this option doubles
805 the size of read ahead performed and thus was seen to
808 Sorting with a smaller internal buffer
809 Reading from faster flash devices
811 In _addition_ one could also specify other hints...
813 POSIX_FADV_WILLNEED was tested, but Linux 2.6.31
814 at least uses that to _synchronously_ prepopulate the cache
815 with the specified range. While sort does need to
816 read all of its input before outputting, a synchronous
817 read of the whole file up front precludes any processing
818 that sort could do in parallel with the system doing
819 read ahead of the data. This was seen to have negative effects
820 in a couple of cases:
822 Sorting with a smaller internal buffer
823 Note this option was seen to shorten the runtime for sort
824 on a multicore system with lots of RAM and other processes
825 competing for CPU. It could be argued that more explicit
826 scheduling hints with `nice` et. al. are more appropriate
829 POSIX_FADV_NOREUSE is a possibility as it could lower
830 the priority of input data in the cache as sort will
831 only need to process it once. However its functionality
832 has changed over Linux kernel versions and as of 2.6.31
833 it does nothing and thus we can't depend on what it might
836 POSIX_FADV_DONTNEED is not appropriate for user specified
837 input files, but for temp files we do want to drop the
838 cache immediately after processing. This is done implicitly
839 however when the files are unlinked. */
842 fadvise_input (FILE *fp
)
844 #if HAVE_POSIX_FADVISE
847 int fd
= fileno (fp
);
848 ignore_value (posix_fadvise (fd
, 0, 0, POSIX_FADV_SEQUENTIAL
));
853 /* Return a stream for FILE, opened with mode HOW. A null FILE means
854 standard output; HOW should be "w". When opening for input, "-"
855 means standard input. To avoid confusion, do not return file
856 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
857 opening an ordinary FILE. Return NULL if unsuccessful. */
860 stream_open (const char *file
, const char *how
)
867 if (STREQ (file
, "-"))
869 have_read_stdin
= true;
873 fp
= fopen (file
, how
);
877 return fopen (file
, how
);
880 /* Same as stream_open, except always return a non-null value; die on
884 xfopen (const char *file
, const char *how
)
886 FILE *fp
= stream_open (file
, how
);
888 die (_("open failed"), file
);
892 /* Close FP, whose name is FILE, and report any errors. */
895 xfclose (FILE *fp
, char const *file
)
900 /* Allow reading stdin from tty more than once. */
906 /* Don't close stdout just yet. close_stdout does that. */
907 if (fflush (fp
) != 0)
908 die (_("fflush failed"), file
);
912 if (fclose (fp
) != 0)
913 die (_("close failed"), file
);
919 dup2_or_die (int oldfd
, int newfd
)
921 if (dup2 (oldfd
, newfd
) < 0)
922 error (SORT_FAILURE
, errno
, _("dup2 failed"));
925 /* Fork a child process for piping to and do common cleanup. The
926 TRIES parameter tells us how many times to try to fork before
927 giving up. Return the PID of the child, or -1 (setting errno)
931 pipe_fork (int pipefds
[2], size_t tries
)
933 #if HAVE_WORKING_FORK
934 struct tempnode
*saved_temphead
;
936 double wait_retry
= 0.25;
937 pid_t pid
IF_LINT (= -1);
940 if (pipe (pipefds
) < 0)
945 /* This is so the child process won't delete our temp files
946 if it receives a signal before exec-ing. */
948 saved_temphead
= temphead
;
954 temphead
= saved_temphead
;
959 if (0 <= pid
|| errno
!= EAGAIN
)
963 xnanosleep (wait_retry
);
978 close (STDIN_FILENO
);
979 close (STDOUT_FILENO
);
986 #else /* ! HAVE_WORKING_FORK */
991 /* Create a temporary file and start a compression program to filter output
992 to that file. Set *PFP to the file handle and if PPID is non-NULL,
993 set *PPID to the PID of the newly-created process. If the creation
994 fails, return NULL if the failure is due to file descriptor
995 exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */
998 maybe_create_temp (FILE **pfp
, pid_t
*ppid
, bool survive_fd_exhaustion
)
1001 struct tempnode
*node
= create_temp_file (&tempfd
, survive_fd_exhaustion
);
1009 if (compress_program
)
1013 node
->pid
= pipe_fork (pipefds
, MAX_FORK_TRIES_COMPRESS
);
1018 tempfd
= pipefds
[1];
1020 register_proc (node
->pid
);
1022 else if (node
->pid
== 0)
1025 dup2_or_die (tempfd
, STDOUT_FILENO
);
1027 dup2_or_die (pipefds
[0], STDIN_FILENO
);
1030 if (execlp (compress_program
, compress_program
, (char *) NULL
) < 0)
1031 error (SORT_FAILURE
, errno
, _("couldn't execute %s"),
1038 *pfp
= fdopen (tempfd
, "w");
1040 die (_("couldn't create temporary file"), name
);
1048 /* Create a temporary file and start a compression program to filter output
1049 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
1050 set it to the PID of the newly-created process. Die on failure. */
1053 create_temp (FILE **pfp
, pid_t
*ppid
)
1055 return maybe_create_temp (pfp
, ppid
, false);
1058 /* Open a compressed temp file and start a decompression process through
1059 which to filter the input. PID must be the valid processes ID of the
1060 process used to compress the file. Return NULL (setting errno to
1061 EMFILE) if we ran out of file descriptors, and die on any other
1065 open_temp (const char *name
, pid_t pid
)
1067 int tempfd
, pipefds
[2];
1072 tempfd
= open (name
, O_RDONLY
);
1076 switch (pipe_fork (pipefds
, MAX_FORK_TRIES_DECOMPRESS
))
1079 if (errno
!= EMFILE
)
1080 error (SORT_FAILURE
, errno
, _("couldn't create process for %s -d"),
1088 dup2_or_die (tempfd
, STDIN_FILENO
);
1090 dup2_or_die (pipefds
[1], STDOUT_FILENO
);
1093 execlp (compress_program
, compress_program
, "-d", (char *) NULL
);
1094 error (SORT_FAILURE
, errno
, _("couldn't execute %s -d"),
1101 fp
= fdopen (pipefds
[0], "r");
1104 int saved_errno
= errno
;
1106 errno
= saved_errno
;
1115 write_bytes (const char *buf
, size_t n_bytes
, FILE *fp
, const char *output_file
)
1117 if (fwrite (buf
, 1, n_bytes
, fp
) != n_bytes
)
1118 die (_("write failed"), output_file
);
1121 /* Append DIR to the array of temporary directory names. */
1123 add_temp_dir (char const *dir
)
1125 if (temp_dir_count
== temp_dir_alloc
)
1126 temp_dirs
= X2NREALLOC (temp_dirs
, &temp_dir_alloc
);
1128 temp_dirs
[temp_dir_count
++] = dir
;
1131 /* Remove NAME from the list of temporary files. */
1134 zaptemp (const char *name
)
1136 struct tempnode
*volatile *pnode
;
1137 struct tempnode
*node
;
1138 struct tempnode
*next
;
1140 int unlink_errno
= 0;
1141 struct cs_status cs
;
1143 for (pnode
= &temphead
; (node
= *pnode
)->name
!= name
; pnode
= &node
->next
)
1146 /* Unlink the temporary file in a critical section to avoid races. */
1149 unlink_status
= unlink (name
);
1150 unlink_errno
= errno
;
1154 if (unlink_status
!= 0)
1155 error (0, unlink_errno
, _("warning: cannot remove: %s"), name
);
1161 #if HAVE_NL_LANGINFO
1164 struct_month_cmp (const void *m1
, const void *m2
)
1166 struct month
const *month1
= m1
;
1167 struct month
const *month2
= m2
;
1168 return strcmp (month1
->name
, month2
->name
);
1173 /* Initialize the character class tables. */
1180 for (i
= 0; i
< UCHAR_LIM
; ++i
)
1182 blanks
[i
] = !! isblank (i
);
1183 nonprinting
[i
] = ! isprint (i
);
1184 nondictionary
[i
] = ! isalnum (i
) && ! isblank (i
);
1185 fold_toupper
[i
] = toupper (i
);
1188 #if HAVE_NL_LANGINFO
1189 /* If we're not in the "C" locale, read different names for months. */
1192 for (i
= 0; i
< MONTHS_PER_YEAR
; i
++)
1199 s
= (char *) nl_langinfo (ABMON_1
+ i
);
1201 monthtab
[i
].name
= name
= xmalloc (s_len
+ 1);
1202 monthtab
[i
].val
= i
+ 1;
1204 for (j
= k
= 0; j
< s_len
; j
++)
1205 if (! isblank (to_uchar (s
[j
])))
1206 name
[k
++] = fold_toupper
[to_uchar (s
[j
])];
1209 qsort ((void *) monthtab
, MONTHS_PER_YEAR
,
1210 sizeof *monthtab
, struct_month_cmp
);
1215 /* Specify how many inputs may be merged at once.
1216 This may be set on the command-line with the
1217 --batch-size option. */
1219 specify_nmerge (int oi
, char c
, char const *s
)
1222 struct rlimit rlimit
;
1223 enum strtol_error e
= xstrtoumax (s
, NULL
, 10, &n
, NULL
);
1225 /* Try to find out how many file descriptors we'll be able
1226 to open. We need at least nmerge + 3 (STDIN_FILENO,
1227 STDOUT_FILENO and STDERR_FILENO). */
1228 unsigned int max_nmerge
= ((getrlimit (RLIMIT_NOFILE
, &rlimit
) == 0
1233 if (e
== LONGINT_OK
)
1237 e
= LONGINT_OVERFLOW
;
1242 error (0, 0, _("invalid --%s argument %s"),
1243 long_options
[oi
].name
, quote(s
));
1244 error (SORT_FAILURE
, 0,
1245 _("minimum --%s argument is %s"),
1246 long_options
[oi
].name
, quote("2"));
1248 else if (max_nmerge
< nmerge
)
1250 e
= LONGINT_OVERFLOW
;
1257 if (e
== LONGINT_OVERFLOW
)
1259 char max_nmerge_buf
[INT_BUFSIZE_BOUND (unsigned int)];
1260 error (0, 0, _("--%s argument %s too large"),
1261 long_options
[oi
].name
, quote(s
));
1262 error (SORT_FAILURE
, 0,
1263 _("maximum --%s argument with current rlimit is %s"),
1264 long_options
[oi
].name
,
1265 uinttostr (max_nmerge
, max_nmerge_buf
));
1268 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1271 /* Specify the amount of main memory to use when sorting. */
1273 specify_sort_size (int oi
, char c
, char const *s
)
1277 enum strtol_error e
= xstrtoumax (s
, &suffix
, 10, &n
, "EgGkKmMPtTYZ");
1279 /* The default unit is KiB. */
1280 if (e
== LONGINT_OK
&& ISDIGIT (suffix
[-1]))
1282 if (n
<= UINTMAX_MAX
/ 1024)
1285 e
= LONGINT_OVERFLOW
;
1288 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1289 if (e
== LONGINT_INVALID_SUFFIX_CHAR
&& ISDIGIT (suffix
[-1]) && ! suffix
[1])
1298 double mem
= physmem_total () * n
/ 100;
1300 /* Use "<", not "<=", to avoid problems with rounding. */
1301 if (mem
< UINTMAX_MAX
)
1307 e
= LONGINT_OVERFLOW
;
1312 if (e
== LONGINT_OK
)
1314 /* If multiple sort sizes are specified, take the maximum, so
1315 that option order does not matter. */
1322 sort_size
= MAX (sort_size
, MIN_SORT_SIZE
);
1326 e
= LONGINT_OVERFLOW
;
1329 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1332 /* Return the default sort size. */
1334 default_sort_size (void)
1336 /* Let MEM be available memory or 1/8 of total memory, whichever
1338 double avail
= physmem_available ();
1339 double total
= physmem_total ();
1340 double mem
= MAX (avail
, total
/ 8);
1341 struct rlimit rlimit
;
1343 /* Let SIZE be MEM, but no more than the maximum object size or
1344 system resource limits. Avoid the MIN macro here, as it is not
1345 quite right when only one argument is floating point. Don't
1346 bother to check for values like RLIM_INFINITY since in practice
1347 they are not much less than SIZE_MAX. */
1348 size_t size
= SIZE_MAX
;
1351 if (getrlimit (RLIMIT_DATA
, &rlimit
) == 0 && rlimit
.rlim_cur
< size
)
1352 size
= rlimit
.rlim_cur
;
1354 if (getrlimit (RLIMIT_AS
, &rlimit
) == 0 && rlimit
.rlim_cur
< size
)
1355 size
= rlimit
.rlim_cur
;
1358 /* Leave a large safety margin for the above limits, as failure can
1359 occur when they are exceeded. */
1363 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1364 Exceeding RSS is not fatal, but can be quite slow. */
1365 if (getrlimit (RLIMIT_RSS
, &rlimit
) == 0 && rlimit
.rlim_cur
/ 16 * 15 < size
)
1366 size
= rlimit
.rlim_cur
/ 16 * 15;
1369 /* Use no less than the minimum. */
1370 return MAX (size
, MIN_SORT_SIZE
);
1373 /* Return the sort buffer size to use with the input files identified
1374 by FPS and FILES, which are alternate names of the same files.
1375 NFILES gives the number of input files; NFPS may be less. Assume
1376 that each input line requires LINE_BYTES extra bytes' worth of line
1377 information. Do not exceed the size bound specified by the user
1378 (or a default size bound, if the user does not specify one). */
1381 sort_buffer_size (FILE *const *fps
, size_t nfps
,
1382 char *const *files
, size_t nfiles
,
1385 /* A bound on the input size. If zero, the bound hasn't been
1387 static size_t size_bound
;
1389 /* In the worst case, each input byte is a newline. */
1390 size_t worst_case_per_input_byte
= line_bytes
+ 1;
1392 /* Keep enough room for one extra input line and an extra byte.
1393 This extra room might be needed when preparing to read EOF. */
1394 size_t size
= worst_case_per_input_byte
+ 1;
1398 for (i
= 0; i
< nfiles
; i
++)
1404 if ((i
< nfps
? fstat (fileno (fps
[i
]), &st
)
1405 : STREQ (files
[i
], "-") ? fstat (STDIN_FILENO
, &st
)
1406 : stat (files
[i
], &st
))
1408 die (_("stat failed"), files
[i
]);
1410 if (S_ISREG (st
.st_mode
))
1411 file_size
= st
.st_size
;
1414 /* The file has unknown size. If the user specified a sort
1415 buffer size, use that; otherwise, guess the size. */
1418 file_size
= INPUT_FILE_SIZE_GUESS
;
1423 size_bound
= sort_size
;
1425 size_bound
= default_sort_size ();
1428 /* Add the amount of memory needed to represent the worst case
1429 where the input consists entirely of newlines followed by a
1430 single non-newline. Check for overflow. */
1431 worst_case
= file_size
* worst_case_per_input_byte
+ 1;
1432 if (file_size
!= worst_case
/ worst_case_per_input_byte
1433 || size_bound
- size
<= worst_case
)
1441 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1442 must be at least sizeof (struct line). Allocate ALLOC bytes
1446 initbuf (struct buffer
*buf
, size_t line_bytes
, size_t alloc
)
1448 /* Ensure that the line array is properly aligned. If the desired
1449 size cannot be allocated, repeatedly halve it until allocation
1450 succeeds. The smaller allocation may hurt overall performance,
1451 but that's better than failing. */
1454 alloc
+= sizeof (struct line
) - alloc
% sizeof (struct line
);
1455 buf
->buf
= malloc (alloc
);
1459 if (alloc
<= line_bytes
+ 1)
1463 buf
->line_bytes
= line_bytes
;
1465 buf
->used
= buf
->left
= buf
->nlines
= 0;
1469 /* Return one past the limit of the line array. */
1471 static inline struct line
*
1472 buffer_linelim (struct buffer
const *buf
)
1474 return (struct line
*) (buf
->buf
+ buf
->alloc
);
1477 /* Return a pointer to the first character of the field specified
1481 begfield (const struct line
*line
, const struct keyfield
*key
)
1483 char *ptr
= line
->text
, *lim
= ptr
+ line
->length
- 1;
1484 size_t sword
= key
->sword
;
1485 size_t schar
= key
->schar
;
1487 /* The leading field separator itself is included in a field when -t
1490 if (tab
!= TAB_DEFAULT
)
1491 while (ptr
< lim
&& sword
--)
1493 while (ptr
< lim
&& *ptr
!= tab
)
1499 while (ptr
< lim
&& sword
--)
1501 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1503 while (ptr
< lim
&& !blanks
[to_uchar (*ptr
)])
1507 /* If we're ignoring leading blanks when computing the Start
1508 of the field, skip past them here. */
1509 if (key
->skipsblanks
)
1510 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1513 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1514 ptr
= MIN (lim
, ptr
+ schar
);
1519 /* Return the limit of (a pointer to the first character after) the field
1520 in LINE specified by KEY. */
1523 limfield (const struct line
*line
, const struct keyfield
*key
)
1525 char *ptr
= line
->text
, *lim
= ptr
+ line
->length
- 1;
1526 size_t eword
= key
->eword
, echar
= key
->echar
;
1529 eword
++; /* Skip all of end field. */
1531 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1532 whichever comes first. If there are more than EWORD fields, leave
1533 PTR pointing at the beginning of the field having zero-based index,
1534 EWORD. If a delimiter character was specified (via -t), then that
1535 `beginning' is the first character following the delimiting TAB.
1536 Otherwise, leave PTR pointing at the first `blank' character after
1537 the preceding field. */
1538 if (tab
!= TAB_DEFAULT
)
1539 while (ptr
< lim
&& eword
--)
1541 while (ptr
< lim
&& *ptr
!= tab
)
1543 if (ptr
< lim
&& (eword
|| echar
))
1547 while (ptr
< lim
&& eword
--)
1549 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1551 while (ptr
< lim
&& !blanks
[to_uchar (*ptr
)])
1555 #ifdef POSIX_UNSPECIFIED
1556 /* The following block of code makes GNU sort incompatible with
1557 standard Unix sort, so it's ifdef'd out for now.
1558 The POSIX spec isn't clear on how to interpret this.
1559 FIXME: request clarification.
1561 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1562 Date: Thu, 30 May 96 12:20:41 -0400
1563 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1565 [...]I believe I've found another bug in `sort'.
1570 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1573 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1577 Unix sort produced the answer I expected: sort on the single character
1578 in column 7. GNU sort produced different results, because it disagrees
1579 on the interpretation of the key-end spec "M.N". Unix sort reads this
1580 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1581 "skip M-1 fields, then either N-1 characters or the rest of the current
1582 field, whichever comes first". This extra clause applies only to
1583 key-ends, not key-starts.
1586 /* Make LIM point to the end of (one byte past) the current field. */
1587 if (tab
!= TAB_DEFAULT
)
1590 newlim
= memchr (ptr
, tab
, lim
- ptr
);
1598 while (newlim
< lim
&& blanks
[to_uchar (*newlim
)])
1600 while (newlim
< lim
&& !blanks
[to_uchar (*newlim
)])
1606 if (echar
!= 0) /* We need to skip over a portion of the end field. */
1608 /* If we're ignoring leading blanks when computing the End
1609 of the field, skip past them here. */
1610 if (key
->skipeblanks
)
1611 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1614 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1615 ptr
= MIN (lim
, ptr
+ echar
);
1621 /* Fill BUF reading from FP, moving buf->left bytes from the end
1622 of buf->buf to the beginning first. If EOF is reached and the
1623 file wasn't terminated by a newline, supply one. Set up BUF's line
1624 table too. FILE is the name of the file corresponding to FP.
1625 Return true if some input was read. */
1628 fillbuf (struct buffer
*buf
, FILE *fp
, char const *file
)
1630 struct keyfield
const *key
= keylist
;
1632 size_t line_bytes
= buf
->line_bytes
;
1633 size_t mergesize
= merge_buffer_size
- MIN_MERGE_BUFFER_SIZE
;
1638 if (buf
->used
!= buf
->left
)
1640 memmove (buf
->buf
, buf
->buf
+ buf
->used
- buf
->left
, buf
->left
);
1641 buf
->used
= buf
->left
;
1647 char *ptr
= buf
->buf
+ buf
->used
;
1648 struct line
*linelim
= buffer_linelim (buf
);
1649 struct line
*line
= linelim
- buf
->nlines
;
1650 size_t avail
= (char *) linelim
- buf
->nlines
* line_bytes
- ptr
;
1651 char *line_start
= buf
->nlines
? line
->text
+ line
->length
: buf
->buf
;
1653 while (line_bytes
+ 1 < avail
)
1655 /* Read as many bytes as possible, but do not read so many
1656 bytes that there might not be enough room for the
1657 corresponding line array. The worst case is when the
1658 rest of the input file consists entirely of newlines,
1659 except that the last byte is not a newline. */
1660 size_t readsize
= (avail
- 1) / (line_bytes
+ 1);
1661 size_t bytes_read
= fread (ptr
, 1, readsize
, fp
);
1662 char *ptrlim
= ptr
+ bytes_read
;
1664 avail
-= bytes_read
;
1666 if (bytes_read
!= readsize
)
1669 die (_("read failed"), file
);
1673 if (buf
->buf
== ptrlim
)
1675 if (ptrlim
[-1] != eol
)
1680 /* Find and record each line in the just-read input. */
1681 while ((p
= memchr (ptr
, eol
, ptrlim
- ptr
)))
1685 line
->text
= line_start
;
1686 line
->length
= ptr
- line_start
;
1687 mergesize
= MAX (mergesize
, line
->length
);
1688 avail
-= line_bytes
;
1692 /* Precompute the position of the first key for
1694 line
->keylim
= (key
->eword
== SIZE_MAX
1696 : limfield (line
, key
));
1698 if (key
->sword
!= SIZE_MAX
)
1699 line
->keybeg
= begfield (line
, key
);
1702 if (key
->skipsblanks
)
1703 while (blanks
[to_uchar (*line_start
)])
1705 line
->keybeg
= line_start
;
1717 buf
->used
= ptr
- buf
->buf
;
1718 buf
->nlines
= buffer_linelim (buf
) - line
;
1719 if (buf
->nlines
!= 0)
1721 buf
->left
= ptr
- line_start
;
1722 merge_buffer_size
= mergesize
+ MIN_MERGE_BUFFER_SIZE
;
1727 /* The current input line is too long to fit in the buffer.
1728 Double the buffer size and try again, keeping it properly
1730 size_t line_alloc
= buf
->alloc
/ sizeof (struct line
);
1731 buf
->buf
= x2nrealloc (buf
->buf
, &line_alloc
, sizeof (struct line
));
1732 buf
->alloc
= line_alloc
* sizeof (struct line
);
1737 /* Compare strings A and B as numbers without explicitly converting them to
1738 machine numbers. Comparatively slow for short strings, but asymptotically
1742 numcompare (const char *a
, const char *b
)
1744 while (blanks
[to_uchar (*a
)])
1746 while (blanks
[to_uchar (*b
)])
1749 return strnumcmp (a
, b
, decimal_point
, thousands_sep
);
1752 /* Exit with an error if a mixture of SI and IEC units detected. */
1755 check_mixed_SI_IEC (char prefix
, struct keyfield
*key
)
1757 int iec_present
= prefix
== 'i';
1758 if (key
->iec_present
!= -1 && iec_present
!= key
->iec_present
)
1759 error (SORT_FAILURE
, 0, _("both SI and IEC prefixes present on units"));
1760 key
->iec_present
= iec_present
;
1763 /* Return an integer which represents the order of magnitude of
1764 the unit following the number. NUMBER can contain thousands separators
1765 or a decimal point, but not have preceeding blanks.
1766 Negative numbers return a negative unit order. */
1769 find_unit_order (const char *number
, struct keyfield
*key
)
1771 static const char orders
[UCHAR_LIM
] =
1773 #if SOME_DAY_WE_WILL_REQUIRE_C99
1774 ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1777 /* Generate the following table with this command:
1778 perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
1779 foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
1781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
1784 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
1785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1788 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1789 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1790 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1791 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1795 const unsigned char *p
= number
;
1805 /* Scan to end of number.
1806 Decimals or separators not followed by digits stop the scan.
1807 Numbers ending in decimals or separators are thus considered
1808 to be lacking in units.
1809 FIXME: add support for multibyte thousands_sep and decimal_point. */
1811 while (ISDIGIT (*p
))
1815 if (*p
== decimal_point
&& ISDIGIT (*(p
+ 1)))
1817 else if (*p
== thousands_sep
&& ISDIGIT (*(p
+ 1)))
1821 int order
= orders
[*p
];
1823 /* For valid units check for MiB vs MB etc. */
1825 check_mixed_SI_IEC (*(p
+ 1), key
);
1827 return sign
* order
;
1830 /* Compare numbers ending in units with SI xor IEC prefixes
1831 <none/unknown> < K/k < M < G < T < P < E < Z < Y
1832 Assume that numbers are properly abbreviated.
1833 i.e. input will never have both 6000K and 5M. */
1836 human_numcompare (const char *a
, const char *b
, struct keyfield
*key
)
1838 while (blanks
[to_uchar (*a
)])
1840 while (blanks
[to_uchar (*b
)])
1843 int order_a
= find_unit_order (a
, key
);
1844 int order_b
= find_unit_order (b
, key
);
1846 return (order_a
> order_b
? 1
1847 : order_a
< order_b
? -1
1848 : strnumcmp (a
, b
, decimal_point
, thousands_sep
));
1852 general_numcompare (const char *sa
, const char *sb
)
1854 /* FIXME: add option to warn about failed conversions. */
1855 /* FIXME: maybe add option to try expensive FP conversion
1856 only if A and B can't be compared more cheaply/accurately. */
1860 double a
= strtod (sa
, &ea
);
1861 double b
= strtod (sb
, &eb
);
1863 /* Put conversion errors at the start of the collating sequence. */
1865 return sb
== eb
? 0 : -1;
1869 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1870 conversion errors but before numbers; sort them by internal
1871 bit-pattern, for lack of a more portable alternative. */
1877 : memcmp ((char *) &a
, (char *) &b
, sizeof a
));
1880 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1881 Return 0 if the name in S is not recognized. */
1884 getmonth (char const *month
, size_t len
)
1887 size_t hi
= MONTHS_PER_YEAR
;
1888 char const *monthlim
= month
+ len
;
1892 if (month
== monthlim
)
1894 if (!blanks
[to_uchar (*month
)])
1901 size_t ix
= (lo
+ hi
) / 2;
1902 char const *m
= month
;
1903 char const *n
= monthtab
[ix
].name
;
1908 return monthtab
[ix
].val
;
1909 if (m
== monthlim
|| fold_toupper
[to_uchar (*m
)] < to_uchar (*n
))
1914 else if (fold_toupper
[to_uchar (*m
)] > to_uchar (*n
))
1926 /* A source of random data. */
1927 static struct randread_source
*randread_source
;
1929 /* Return the Ith randomly-generated state. The caller must invoke
1930 random_state (H) for all H less than I before invoking random_state
1933 static struct md5_ctx
1934 random_state (size_t i
)
1936 /* An array of states resulting from the random data, and counts of
1937 its used and allocated members. */
1938 static struct md5_ctx
*state
;
1940 static size_t allocated
;
1942 struct md5_ctx
*s
= &state
[i
];
1946 unsigned char buf
[MD5_DIGEST_SIZE
];
1952 state
= X2NREALLOC (state
, &allocated
);
1956 randread (randread_source
, buf
, sizeof buf
);
1958 md5_process_bytes (buf
, sizeof buf
, s
);
1964 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1965 with length LENGTHB. Return negative if less, zero if equal,
1966 positive if greater. */
1969 cmp_hashes (char const *texta
, size_t lena
,
1970 char const *textb
, size_t lenb
)
1972 /* Try random hashes until a pair of hashes disagree. But if the
1973 first pair of random hashes agree, check whether the keys are
1974 identical and if so report no difference. */
1979 uint32_t dig
[2][MD5_DIGEST_SIZE
/ sizeof (uint32_t)];
1980 struct md5_ctx s
[2];
1981 s
[0] = s
[1] = random_state (i
);
1982 md5_process_bytes (texta
, lena
, &s
[0]); md5_finish_ctx (&s
[0], dig
[0]);
1983 md5_process_bytes (textb
, lenb
, &s
[1]); md5_finish_ctx (&s
[1], dig
[1]);
1984 diff
= memcmp (dig
[0], dig
[1], sizeof dig
[0]);
1987 if (i
== 0 && lena
== lenb
&& memcmp (texta
, textb
, lena
) == 0)
1994 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1995 using one or more random hash functions. */
1998 compare_random (char *restrict texta
, size_t lena
,
1999 char *restrict textb
, size_t lenb
)
2003 if (! hard_LC_COLLATE
)
2004 diff
= cmp_hashes (texta
, lena
, textb
, lenb
);
2007 /* Transform the text into the basis of comparison, so that byte
2008 strings that would otherwise considered to be equal are
2009 considered equal here even if their bytes differ. */
2012 char stackbuf
[4000];
2013 size_t tlena
= xmemxfrm (stackbuf
, sizeof stackbuf
, texta
, lena
);
2014 bool a_fits
= tlena
<= sizeof stackbuf
;
2015 size_t tlenb
= xmemxfrm ((a_fits
? stackbuf
+ tlena
: NULL
),
2016 (a_fits
? sizeof stackbuf
- tlena
: 0),
2019 if (a_fits
&& tlena
+ tlenb
<= sizeof stackbuf
)
2023 /* Adding 1 to the buffer size lets xmemxfrm run a bit
2024 faster by avoiding the need for an extra buffer copy. */
2025 buf
= xmalloc (tlena
+ tlenb
+ 1);
2026 xmemxfrm (buf
, tlena
+ 1, texta
, lena
);
2027 xmemxfrm (buf
+ tlena
, tlenb
+ 1, textb
, lenb
);
2030 diff
= cmp_hashes (buf
, tlena
, buf
+ tlena
, tlenb
);
2032 if (buf
!= stackbuf
)
2039 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2040 using filevercmp. See lib/filevercmp.h for function description. */
2043 compare_version (char *restrict texta
, size_t lena
,
2044 char *restrict textb
, size_t lenb
)
2048 /* It is necessary to save the character after the end of the field.
2049 "filevercmp" works with NUL terminated strings. Our blocks of
2050 text are not necessarily terminated with a NUL byte. */
2051 char sv_a
= texta
[lena
];
2052 char sv_b
= textb
[lenb
];
2057 diff
= filevercmp (texta
, textb
);
2065 /* Compare two lines A and B trying every key in sequence until there
2066 are no more keys or a difference is found. */
2069 keycompare (const struct line
*a
, const struct line
*b
)
2071 struct keyfield
*key
= keylist
;
2073 /* For the first iteration only, the key positions have been
2074 precomputed for us. */
2075 char *texta
= a
->keybeg
;
2076 char *textb
= b
->keybeg
;
2077 char *lima
= a
->keylim
;
2078 char *limb
= b
->keylim
;
2084 char const *translate
= key
->translate
;
2085 bool const *ignore
= key
->ignore
;
2087 /* Treat field ends before field starts as empty fields. */
2088 lima
= MAX (texta
, lima
);
2089 limb
= MAX (textb
, limb
);
2091 /* Find the lengths. */
2092 size_t lena
= lima
- texta
;
2093 size_t lenb
= limb
- textb
;
2095 /* Actually compare the fields. */
2098 diff
= compare_random (texta
, lena
, textb
, lenb
);
2099 else if (key
->numeric
|| key
->general_numeric
|| key
->human_numeric
)
2101 char savea
= *lima
, saveb
= *limb
;
2103 *lima
= *limb
= '\0';
2104 diff
= (key
->numeric
? numcompare (texta
, textb
)
2105 : key
->general_numeric
? general_numcompare (texta
, textb
)
2106 : human_numcompare (texta
, textb
, key
));
2107 *lima
= savea
, *limb
= saveb
;
2109 else if (key
->version
)
2110 diff
= compare_version (texta
, lena
, textb
, lenb
);
2111 else if (key
->month
)
2112 diff
= getmonth (texta
, lena
) - getmonth (textb
, lenb
);
2113 /* Sorting like this may become slow, so in a simple locale the user
2114 can select a faster sort that is similar to ascii sort. */
2115 else if (hard_LC_COLLATE
)
2117 if (ignore
|| translate
)
2120 size_t size
= lena
+ 1 + lenb
+ 1;
2121 char *copy_a
= (size
<= sizeof buf
? buf
: xmalloc (size
));
2122 char *copy_b
= copy_a
+ lena
+ 1;
2123 size_t new_len_a
, new_len_b
, i
;
2125 /* Ignore and/or translate chars before comparing. */
2126 for (new_len_a
= new_len_b
= i
= 0; i
< MAX (lena
, lenb
); i
++)
2130 copy_a
[new_len_a
] = (translate
2131 ? translate
[to_uchar (texta
[i
])]
2133 if (!ignore
|| !ignore
[to_uchar (texta
[i
])])
2138 copy_b
[new_len_b
] = (translate
2139 ? translate
[to_uchar (textb
[i
])]
2141 if (!ignore
|| !ignore
[to_uchar (textb
[i
])])
2146 diff
= xmemcoll (copy_a
, new_len_a
, copy_b
, new_len_b
);
2148 if (sizeof buf
< size
)
2152 diff
= - NONZERO (lenb
);
2156 diff
= xmemcoll (texta
, lena
, textb
, lenb
);
2160 #define CMP_WITH_IGNORE(A, B) \
2165 while (texta < lima && ignore[to_uchar (*texta)]) \
2167 while (textb < limb && ignore[to_uchar (*textb)]) \
2169 if (! (texta < lima && textb < limb)) \
2171 diff = to_uchar (A) - to_uchar (B); \
2178 diff = (texta < lima) - (textb < limb); \
2183 CMP_WITH_IGNORE (translate
[to_uchar (*texta
)],
2184 translate
[to_uchar (*textb
)]);
2186 CMP_WITH_IGNORE (*texta
, *textb
);
2189 diff
= - NONZERO (lenb
);
2196 while (texta
< lima
&& textb
< limb
)
2198 diff
= (to_uchar (translate
[to_uchar (*texta
++)])
2199 - to_uchar (translate
[to_uchar (*textb
++)]));
2206 diff
= memcmp (texta
, textb
, MIN (lena
, lenb
));
2210 diff
= lena
< lenb
? -1 : lena
!= lenb
;
2220 /* Find the beginning and limit of the next field. */
2221 if (key
->eword
!= SIZE_MAX
)
2222 lima
= limfield (a
, key
), limb
= limfield (b
, key
);
2224 lima
= a
->text
+ a
->length
- 1, limb
= b
->text
+ b
->length
- 1;
2226 if (key
->sword
!= SIZE_MAX
)
2227 texta
= begfield (a
, key
), textb
= begfield (b
, key
);
2230 texta
= a
->text
, textb
= b
->text
;
2231 if (key
->skipsblanks
)
2233 while (texta
< lima
&& blanks
[to_uchar (*texta
)])
2235 while (textb
< limb
&& blanks
[to_uchar (*textb
)])
2246 return key
->reverse
? -diff
: diff
;
2249 /* Compare two lines A and B, returning negative, zero, or positive
2250 depending on whether A compares less than, equal to, or greater than B. */
2253 compare (const struct line
*a
, const struct line
*b
)
2258 /* First try to compare on the specified keys (if any).
2259 The only two cases with no key at all are unadorned sort,
2260 and unadorned sort -r. */
2263 diff
= keycompare (a
, b
);
2264 if (diff
|| unique
|| stable
)
2268 /* If the keys all compare equal (or no keys were specified)
2269 fall through to the default comparison. */
2270 alen
= a
->length
- 1, blen
= b
->length
- 1;
2273 diff
= - NONZERO (blen
);
2276 else if (hard_LC_COLLATE
)
2277 diff
= xmemcoll (a
->text
, alen
, b
->text
, blen
);
2278 else if (! (diff
= memcmp (a
->text
, b
->text
, MIN (alen
, blen
))))
2279 diff
= alen
< blen
? -1 : alen
!= blen
;
2281 return reverse
? -diff
: diff
;
2284 /* Check that the lines read from FILE_NAME come in order. Return
2285 true if they are in order. If CHECKONLY == 'c', also print a
2286 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2287 they are not in order. */
2290 check (char const *file_name
, char checkonly
)
2292 FILE *fp
= xfopen (file_name
, "r");
2293 struct buffer buf
; /* Input buffer. */
2294 struct line temp
; /* Copy of previous line. */
2296 uintmax_t line_number
= 0;
2297 struct keyfield
const *key
= keylist
;
2298 bool nonunique
= ! unique
;
2299 bool ordered
= true;
2301 initbuf (&buf
, sizeof (struct line
),
2302 MAX (merge_buffer_size
, sort_size
));
2305 while (fillbuf (&buf
, fp
, file_name
))
2307 struct line
const *line
= buffer_linelim (&buf
);
2308 struct line
const *linebase
= line
- buf
.nlines
;
2310 /* Make sure the line saved from the old buffer contents is
2311 less than or equal to the first line of the new buffer. */
2312 if (alloc
&& nonunique
<= compare (&temp
, line
- 1))
2316 if (checkonly
== 'c')
2318 struct line
const *disorder_line
= line
- 1;
2319 uintmax_t disorder_line_number
=
2320 buffer_linelim (&buf
) - disorder_line
+ line_number
;
2321 char hr_buf
[INT_BUFSIZE_BOUND (uintmax_t)];
2322 fprintf (stderr
, _("%s: %s:%s: disorder: "),
2323 program_name
, file_name
,
2324 umaxtostr (disorder_line_number
, hr_buf
));
2325 write_bytes (disorder_line
->text
, disorder_line
->length
,
2326 stderr
, _("standard error"));
2334 /* Compare each line in the buffer with its successor. */
2335 while (linebase
< --line
)
2336 if (nonunique
<= compare (line
, line
- 1))
2337 goto found_disorder
;
2339 line_number
+= buf
.nlines
;
2341 /* Save the last line of the buffer. */
2342 if (alloc
< line
->length
)
2349 alloc
= line
->length
;
2353 while (alloc
< line
->length
);
2355 temp
.text
= xrealloc (temp
.text
, alloc
);
2357 memcpy (temp
.text
, line
->text
, line
->length
);
2358 temp
.length
= line
->length
;
2361 temp
.keybeg
= temp
.text
+ (line
->keybeg
- line
->text
);
2362 temp
.keylim
= temp
.text
+ (line
->keylim
- line
->text
);
2366 xfclose (fp
, file_name
);
2372 /* Open FILES (there are NFILES of them) and store the resulting array
2373 of stream pointers into (*PFPS). Allocate the array. Return the
2374 number of successfully opened files, setting errno if this value is
2375 less than NFILES. */
2378 open_input_files (struct sortfile
*files
, size_t nfiles
, FILE ***pfps
)
2380 FILE **fps
= *pfps
= xnmalloc (nfiles
, sizeof *fps
);
2383 /* Open as many input files as we can. */
2384 for (i
= 0; i
< nfiles
; i
++)
2386 fps
[i
] = (files
[i
].pid
2387 ? open_temp (files
[i
].name
, files
[i
].pid
)
2388 : stream_open (files
[i
].name
, "r"));
2396 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2397 files (all of which are at the start of the FILES array), and
2398 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2399 FPS is the vector of open stream corresponding to the files.
2400 Close input and output streams before returning.
2401 OUTPUT_FILE gives the name of the output file. If it is NULL,
2402 the output file is standard output. */
2405 mergefps (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
2406 FILE *ofp
, char const *output_file
, FILE **fps
)
2408 struct buffer
*buffer
= xnmalloc (nfiles
, sizeof *buffer
);
2409 /* Input buffers for each file. */
2410 struct line saved
; /* Saved line storage for unique check. */
2411 struct line
const *savedline
= NULL
;
2412 /* &saved if there is a saved line. */
2413 size_t savealloc
= 0; /* Size allocated for the saved line. */
2414 struct line
const **cur
= xnmalloc (nfiles
, sizeof *cur
);
2415 /* Current line in each line table. */
2416 struct line
const **base
= xnmalloc (nfiles
, sizeof *base
);
2417 /* Base of each line table. */
2418 size_t *ord
= xnmalloc (nfiles
, sizeof *ord
);
2419 /* Table representing a permutation of fps,
2420 such that cur[ord[0]] is the smallest line
2421 and will be next output. */
2425 struct keyfield
const *key
= keylist
;
2428 /* Read initial lines from each input file. */
2429 for (i
= 0; i
< nfiles
; )
2431 initbuf (&buffer
[i
], sizeof (struct line
),
2432 MAX (merge_buffer_size
, sort_size
/ nfiles
));
2433 if (fillbuf (&buffer
[i
], fps
[i
], files
[i
].name
))
2435 struct line
const *linelim
= buffer_linelim (&buffer
[i
]);
2436 cur
[i
] = linelim
- 1;
2437 base
[i
] = linelim
- buffer
[i
].nlines
;
2442 /* fps[i] is empty; eliminate it from future consideration. */
2443 xfclose (fps
[i
], files
[i
].name
);
2447 zaptemp (files
[i
].name
);
2449 free (buffer
[i
].buf
);
2451 for (j
= i
; j
< nfiles
; ++j
)
2453 files
[j
] = files
[j
+ 1];
2454 fps
[j
] = fps
[j
+ 1];
2459 /* Set up the ord table according to comparisons among input lines.
2460 Since this only reorders two items if one is strictly greater than
2461 the other, it is stable. */
2462 for (i
= 0; i
< nfiles
; ++i
)
2464 for (i
= 1; i
< nfiles
; ++i
)
2465 if (0 < compare (cur
[ord
[i
- 1]], cur
[ord
[i
]]))
2466 t
= ord
[i
- 1], ord
[i
- 1] = ord
[i
], ord
[i
] = t
, i
= 0;
2468 /* Repeatedly output the smallest line until no input remains. */
2471 struct line
const *smallest
= cur
[ord
[0]];
2473 /* If uniquified output is turned on, output only the first of
2474 an identical series of lines. */
2477 if (savedline
&& compare (savedline
, smallest
))
2480 write_bytes (saved
.text
, saved
.length
, ofp
, output_file
);
2485 if (savealloc
< smallest
->length
)
2490 savealloc
= smallest
->length
;
2493 while ((savealloc
*= 2) < smallest
->length
);
2495 saved
.text
= xrealloc (saved
.text
, savealloc
);
2497 saved
.length
= smallest
->length
;
2498 memcpy (saved
.text
, smallest
->text
, saved
.length
);
2502 saved
.text
+ (smallest
->keybeg
- smallest
->text
);
2504 saved
.text
+ (smallest
->keylim
- smallest
->text
);
2509 write_bytes (smallest
->text
, smallest
->length
, ofp
, output_file
);
2511 /* Check if we need to read more lines into core. */
2512 if (base
[ord
[0]] < smallest
)
2513 cur
[ord
[0]] = smallest
- 1;
2516 if (fillbuf (&buffer
[ord
[0]], fps
[ord
[0]], files
[ord
[0]].name
))
2518 struct line
const *linelim
= buffer_linelim (&buffer
[ord
[0]]);
2519 cur
[ord
[0]] = linelim
- 1;
2520 base
[ord
[0]] = linelim
- buffer
[ord
[0]].nlines
;
2524 /* We reached EOF on fps[ord[0]]. */
2525 for (i
= 1; i
< nfiles
; ++i
)
2526 if (ord
[i
] > ord
[0])
2529 xfclose (fps
[ord
[0]], files
[ord
[0]].name
);
2530 if (ord
[0] < ntemps
)
2533 zaptemp (files
[ord
[0]].name
);
2535 free (buffer
[ord
[0]].buf
);
2536 for (i
= ord
[0]; i
< nfiles
; ++i
)
2538 fps
[i
] = fps
[i
+ 1];
2539 files
[i
] = files
[i
+ 1];
2540 buffer
[i
] = buffer
[i
+ 1];
2541 cur
[i
] = cur
[i
+ 1];
2542 base
[i
] = base
[i
+ 1];
2544 for (i
= 0; i
< nfiles
; ++i
)
2545 ord
[i
] = ord
[i
+ 1];
2550 /* The new line just read in may be larger than other lines
2551 already in main memory; push it back in the queue until we
2552 encounter a line larger than it. Optimize for the common
2553 case where the new line is smallest. */
2558 size_t ord0
= ord
[0];
2559 size_t count_of_smaller_lines
;
2563 int cmp
= compare (cur
[ord0
], cur
[ord
[probe
]]);
2564 if (cmp
< 0 || (cmp
== 0 && ord0
< ord
[probe
]))
2568 probe
= (lo
+ hi
) / 2;
2571 count_of_smaller_lines
= lo
- 1;
2572 for (j
= 0; j
< count_of_smaller_lines
; j
++)
2573 ord
[j
] = ord
[j
+ 1];
2574 ord
[count_of_smaller_lines
] = ord0
;
2577 /* Free up some resources every once in a while. */
2578 if (MAX_PROCS_BEFORE_REAP
< nprocs
)
2582 if (unique
&& savedline
)
2584 write_bytes (saved
.text
, saved
.length
, ofp
, output_file
);
2588 xfclose (ofp
, output_file
);
2596 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2597 files (all of which are at the start of the FILES array), and
2598 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2599 Close input and output files before returning.
2600 OUTPUT_FILE gives the name of the output file.
2602 Return the number of files successfully merged. This number can be
2603 less than NFILES if we ran low on file descriptors, but in this
2604 case it is never less than 2. */
2607 mergefiles (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
2608 FILE *ofp
, char const *output_file
)
2611 size_t nopened
= open_input_files (files
, nfiles
, &fps
);
2612 if (nopened
< nfiles
&& nopened
< 2)
2613 die (_("open failed"), files
[nopened
].name
);
2614 mergefps (files
, ntemps
, nopened
, ofp
, output_file
, fps
);
2618 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2619 and HI (with NHI members). T, LO, and HI point just past their
2620 respective arrays, and the arrays are in reverse order. NLO and
2621 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2624 mergelines (struct line
*t
,
2625 struct line
const *lo
, size_t nlo
,
2626 struct line
const *hi
, size_t nhi
)
2629 if (compare (lo
- 1, hi
- 1) <= 0)
2634 /* HI - NHI equalled T - (NLO + NHI) when this function
2635 began. Therefore HI must equal T now, and there is no
2636 need to copy from HI to T. */
2654 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2655 NLINES must be at least 2.
2656 The input and output arrays are in reverse order, and LINES and
2657 TEMP point just past the end of their respective arrays.
2659 Use a recursive divide-and-conquer algorithm, in the style
2660 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2661 the optimization suggested by exercise 5.2.4-10; this requires room
2662 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2663 writes that this memory optimization was originally published by
2664 D. A. Bell, Comp J. 1 (1958), 75. */
2667 sortlines (struct line
*lines
, size_t nlines
, struct line
*temp
)
2671 if (0 < compare (&lines
[-1], &lines
[-2]))
2673 struct line tmp
= lines
[-1];
2674 lines
[-1] = lines
[-2];
2680 size_t nlo
= nlines
/ 2;
2681 size_t nhi
= nlines
- nlo
;
2682 struct line
*lo
= lines
;
2683 struct line
*hi
= lines
- nlo
;
2684 struct line
*sorted_lo
= temp
;
2686 sortlines (hi
, nhi
, temp
);
2688 sortlines_temp (lo
, nlo
, sorted_lo
);
2690 sorted_lo
[-1] = lo
[-1];
2692 mergelines (lines
, sorted_lo
, nlo
, hi
, nhi
);
2696 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2697 rather than sorting in place. */
2700 sortlines_temp (struct line
*lines
, size_t nlines
, struct line
*temp
)
2704 /* Declare `swap' as int, not bool, to work around a bug
2705 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2706 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
2707 int swap
= (0 < compare (&lines
[-1], &lines
[-2]));
2708 temp
[-1] = lines
[-1 - swap
];
2709 temp
[-2] = lines
[-2 + swap
];
2713 size_t nlo
= nlines
/ 2;
2714 size_t nhi
= nlines
- nlo
;
2715 struct line
*lo
= lines
;
2716 struct line
*hi
= lines
- nlo
;
2717 struct line
*sorted_hi
= temp
- nlo
;
2719 sortlines_temp (hi
, nhi
, sorted_hi
);
2721 sortlines (lo
, nlo
, temp
);
2723 mergelines (temp
, lo
, nlo
, sorted_hi
, nhi
);
2727 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2728 the same as OUTFILE. If found, merge the found instances (and perhaps
2729 some other files) into a temporary file so that it can in turn be
2730 merged into OUTFILE without destroying OUTFILE before it is completely
2731 read. Return the new value of NFILES, which differs from the old if
2732 some merging occurred.
2734 This test ensures that an otherwise-erroneous use like
2735 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2736 It's not clear that POSIX requires this nicety.
2737 Detect common error cases, but don't try to catch obscure cases like
2738 "cat ... FILE ... | sort -m -o FILE"
2739 where traditional "sort" doesn't copy the input and where
2740 people should know that they're getting into trouble anyway.
2741 Catching these obscure cases would slow down performance in
2745 avoid_trashing_input (struct sortfile
*files
, size_t ntemps
,
2746 size_t nfiles
, char const *outfile
)
2749 bool got_outstat
= false;
2750 struct stat outstat
;
2752 for (i
= ntemps
; i
< nfiles
; i
++)
2754 bool is_stdin
= STREQ (files
[i
].name
, "-");
2758 if (outfile
&& STREQ (outfile
, files
[i
].name
) && !is_stdin
)
2765 ? stat (outfile
, &outstat
)
2766 : fstat (STDOUT_FILENO
, &outstat
))
2773 ? fstat (STDIN_FILENO
, &instat
)
2774 : stat (files
[i
].name
, &instat
))
2776 && SAME_INODE (instat
, outstat
));
2783 char *temp
= create_temp (&tftp
, &pid
);
2784 size_t num_merged
= 0;
2787 num_merged
+= mergefiles (&files
[i
], 0, nfiles
- i
, tftp
, temp
);
2788 files
[i
].name
= temp
;
2791 if (i
+ num_merged
< nfiles
)
2792 memmove(&files
[i
+ 1], &files
[i
+ num_merged
],
2793 num_merged
* sizeof *files
);
2795 nfiles
-= num_merged
- 1;;
2805 /* Merge the input FILES. NTEMPS is the number of files at the
2806 start of FILES that are temporary; it is zero at the top level.
2807 NFILES is the total number of files. Put the output in
2808 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
2811 merge (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
2812 char const *output_file
)
2814 while (nmerge
< nfiles
)
2816 /* Number of input files processed so far. */
2819 /* Number of output files generated so far. */
2822 /* nfiles % NMERGE; this counts input files that are left over
2823 after all full-sized merges have been done. */
2826 /* Number of easily-available slots at the next loop iteration. */
2829 /* Do as many NMERGE-size merges as possible. In the case that
2830 nmerge is bogus, increment by the maximum number of file
2831 descriptors allowed. */
2832 for (out
= in
= 0; nmerge
<= nfiles
- in
; out
++)
2836 char *temp
= create_temp (&tfp
, &pid
);
2837 size_t num_merged
= mergefiles (&files
[in
], MIN (ntemps
, nmerge
),
2839 ntemps
-= MIN (ntemps
, num_merged
);
2840 files
[out
].name
= temp
;
2841 files
[out
].pid
= pid
;
2845 remainder
= nfiles
- in
;
2846 cheap_slots
= nmerge
- out
% nmerge
;
2848 if (cheap_slots
< remainder
)
2850 /* So many files remain that they can't all be put into the last
2851 NMERGE-sized output window. Do one more merge. Merge as few
2852 files as possible, to avoid needless I/O. */
2853 size_t nshortmerge
= remainder
- cheap_slots
+ 1;
2856 char *temp
= create_temp (&tfp
, &pid
);
2857 size_t num_merged
= mergefiles (&files
[in
], MIN (ntemps
, nshortmerge
),
2858 nshortmerge
, tfp
, temp
);
2859 ntemps
-= MIN (ntemps
, num_merged
);
2860 files
[out
].name
= temp
;
2861 files
[out
++].pid
= pid
;
2865 /* Put the remaining input files into the last NMERGE-sized output
2866 window, so they will be merged in the next pass. */
2867 memmove(&files
[out
], &files
[in
], (nfiles
- in
) * sizeof *files
);
2872 nfiles
= avoid_trashing_input (files
, ntemps
, nfiles
, output_file
);
2874 /* We aren't guaranteed that this final mergefiles will work, therefore we
2875 try to merge into the output, and then merge as much as we can into a
2876 temp file if we can't. Repeat. */
2880 /* Merge directly into the output file if possible. */
2882 size_t nopened
= open_input_files (files
, nfiles
, &fps
);
2884 if (nopened
== nfiles
)
2886 FILE *ofp
= stream_open (output_file
, "w");
2889 mergefps (files
, ntemps
, nfiles
, ofp
, output_file
, fps
);
2892 if (errno
!= EMFILE
|| nopened
<= 2)
2893 die (_("open failed"), output_file
);
2895 else if (nopened
<= 2)
2896 die (_("open failed"), files
[nopened
].name
);
2898 /* We ran out of file descriptors. Close one of the input
2899 files, to gain a file descriptor. Then create a temporary
2900 file with our spare file descriptor. Retry if that failed
2901 (e.g., some other process could open a file between the time
2902 we closed and tried to create). */
2909 xfclose (fps
[nopened
], files
[nopened
].name
);
2910 temp
= maybe_create_temp (&tfp
, &pid
, ! (nopened
<= 2));
2914 /* Merge into the newly allocated temporary. */
2915 mergefps (&files
[0], MIN (ntemps
, nopened
), nopened
, tfp
, temp
, fps
);
2916 ntemps
-= MIN (ntemps
, nopened
);
2917 files
[0].name
= temp
;
2920 memmove (&files
[1], &files
[nopened
], (nfiles
- nopened
) * sizeof *files
);
2922 nfiles
-= nopened
- 1;
2926 /* Sort NFILES FILES onto OUTPUT_FILE. */
2929 sort (char * const *files
, size_t nfiles
, char const *output_file
)
2933 bool output_file_created
= false;
2939 char const *temp_output
;
2940 char const *file
= *files
;
2941 FILE *fp
= xfopen (file
, "r");
2943 size_t bytes_per_line
= (2 * sizeof (struct line
)
2944 - sizeof (struct line
) / 2);
2947 initbuf (&buf
, bytes_per_line
,
2948 sort_buffer_size (&fp
, 1, files
, nfiles
, bytes_per_line
));
2953 while (fillbuf (&buf
, fp
, file
))
2956 struct line
*linebase
;
2958 if (buf
.eof
&& nfiles
2959 && (bytes_per_line
+ 1
2960 < (buf
.alloc
- buf
.used
- bytes_per_line
* buf
.nlines
)))
2962 /* End of file, but there is more input and buffer room.
2963 Concatenate the next input file; this is faster in
2965 buf
.left
= buf
.used
;
2969 line
= buffer_linelim (&buf
);
2970 linebase
= line
- buf
.nlines
;
2972 sortlines (line
, buf
.nlines
, linebase
);
2973 if (buf
.eof
&& !nfiles
&& !ntemps
&& !buf
.left
)
2976 tfp
= xfopen (output_file
, "w");
2977 temp_output
= output_file
;
2978 output_file_created
= true;
2983 temp_output
= create_temp (&tfp
, NULL
);
2989 write_bytes (line
->text
, line
->length
, tfp
, temp_output
);
2991 while (linebase
< line
&& compare (line
, line
- 1) == 0)
2994 while (linebase
< line
);
2996 xfclose (tfp
, temp_output
);
2998 /* Free up some resources every once in a while. */
2999 if (MAX_PROCS_BEFORE_REAP
< nprocs
)
3002 if (output_file_created
)
3011 if (! output_file_created
)
3014 struct tempnode
*node
= temphead
;
3015 struct sortfile
*tempfiles
= xnmalloc (ntemps
, sizeof *tempfiles
);
3016 for (i
= 0; node
; i
++)
3018 tempfiles
[i
].name
= node
->name
;
3019 tempfiles
[i
].pid
= node
->pid
;
3022 merge (tempfiles
, ntemps
, ntemps
, output_file
);
3027 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
3030 insertkey (struct keyfield
*key_arg
)
3032 struct keyfield
**p
;
3033 struct keyfield
*key
= xmemdup (key_arg
, sizeof *key
);
3035 for (p
= &keylist
; *p
; p
= &(*p
)->next
)
3041 /* Report a bad field specification SPEC, with extra info MSGID. */
3043 static void badfieldspec (char const *, char const *)
3046 badfieldspec (char const *spec
, char const *msgid
)
3048 error (SORT_FAILURE
, 0, _("%s: invalid field specification %s"),
3049 _(msgid
), quote (spec
));
3053 /* Report incompatible options. */
3055 static void incompatible_options (char const *) ATTRIBUTE_NORETURN
;
3057 incompatible_options (char const *opts
)
3059 error (SORT_FAILURE
, 0, _("options `-%s' are incompatible"), opts
);
3063 /* Check compatibility of ordering options. */
3066 check_ordering_compatibility (void)
3068 struct keyfield
const *key
;
3070 for (key
= keylist
; key
; key
= key
->next
)
3071 if ((1 < (key
->random
+ key
->numeric
+ key
->general_numeric
+ key
->month
3072 + key
->version
+ !!key
->ignore
+ key
->human_numeric
))
3073 || (key
->random
&& key
->translate
))
3075 /* The following is too big, but guaranteed to be "big enough". */
3076 char opts
[sizeof short_options
];
3078 if (key
->ignore
== nondictionary
)
3082 if (key
->general_numeric
)
3084 if (key
->human_numeric
)
3086 if (key
->ignore
== nonprinting
)
3097 incompatible_options (opts
);
3101 /* Parse the leading integer in STRING and store the resulting value
3102 (which must fit into size_t) into *VAL. Return the address of the
3103 suffix after the integer. If the value is too large, silently
3104 substitute SIZE_MAX. If MSGID is NULL, return NULL after
3105 failure; otherwise, report MSGID and exit on failure. */
3108 parse_field_count (char const *string
, size_t *val
, char const *msgid
)
3113 switch (xstrtoumax (string
, &suffix
, 10, &n
, ""))
3116 case LONGINT_INVALID_SUFFIX_CHAR
:
3121 case LONGINT_OVERFLOW
:
3122 case LONGINT_OVERFLOW
| LONGINT_INVALID_SUFFIX_CHAR
:
3126 case LONGINT_INVALID
:
3128 error (SORT_FAILURE
, 0, _("%s: invalid count at start of %s"),
3129 _(msgid
), quote (string
));
3136 /* Handle interrupts and hangups. */
3139 sighandler (int sig
)
3142 signal (sig
, SIG_IGN
);
3146 signal (sig
, SIG_DFL
);
3150 /* Set the ordering options for KEY specified in S.
3151 Return the address of the first character in S that
3152 is not a valid ordering option.
3153 BLANKTYPE is the kind of blanks that 'b' should skip. */
3156 set_ordering (const char *s
, struct keyfield
*key
, enum blanktype blanktype
)
3163 if (blanktype
== bl_start
|| blanktype
== bl_both
)
3164 key
->skipsblanks
= true;
3165 if (blanktype
== bl_end
|| blanktype
== bl_both
)
3166 key
->skipeblanks
= true;
3169 key
->ignore
= nondictionary
;
3172 key
->translate
= fold_toupper
;
3175 key
->general_numeric
= true;
3178 key
->human_numeric
= true;
3181 /* Option order should not matter, so don't let -i override
3182 -d. -d implies -i, but -i does not imply -d. */
3184 key
->ignore
= nonprinting
;
3190 key
->numeric
= true;
3196 key
->reverse
= true;
3199 key
->version
= true;
3209 static struct keyfield
*
3210 key_init (struct keyfield
*key
)
3212 memset (key
, 0, sizeof *key
);
3213 key
->eword
= SIZE_MAX
;
3214 key
->iec_present
= -1;
3219 main (int argc
, char **argv
)
3221 struct keyfield
*key
;
3222 struct keyfield key_buf
;
3223 struct keyfield gkey
;
3227 bool mergeonly
= false;
3228 char *random_source
= NULL
;
3229 bool need_random
= false;
3231 bool posixly_correct
= (getenv ("POSIXLY_CORRECT") != NULL
);
3232 bool obsolete_usage
= (posix2_version () < 200112);
3234 char *files_from
= NULL
;
3236 char const *outfile
= NULL
;
3238 initialize_main (&argc
, &argv
);
3239 set_program_name (argv
[0]);
3240 setlocale (LC_ALL
, "");
3241 bindtextdomain (PACKAGE
, LOCALEDIR
);
3242 textdomain (PACKAGE
);
3244 initialize_exit_failure (SORT_FAILURE
);
3246 hard_LC_COLLATE
= hard_locale (LC_COLLATE
);
3247 #if HAVE_NL_LANGINFO
3248 hard_LC_TIME
= hard_locale (LC_TIME
);
3251 /* Get locale's representation of the decimal point. */
3253 struct lconv
const *locale
= localeconv ();
3255 /* If the locale doesn't define a decimal point, or if the decimal
3256 point is multibyte, use the C locale's decimal point. FIXME:
3257 add support for multibyte decimal points. */
3258 decimal_point
= to_uchar (locale
->decimal_point
[0]);
3259 if (! decimal_point
|| locale
->decimal_point
[1])
3260 decimal_point
= '.';
3262 /* FIXME: add support for multibyte thousands separators. */
3263 thousands_sep
= to_uchar (*locale
->thousands_sep
);
3264 if (! thousands_sep
|| locale
->thousands_sep
[1])
3268 have_read_stdin
= false;
3273 static int const sig
[] =
3275 /* The usual suspects. */
3276 SIGALRM
, SIGHUP
, SIGINT
, SIGPIPE
, SIGQUIT
, SIGTERM
,
3293 enum { nsigs
= ARRAY_CARDINALITY (sig
) };
3296 struct sigaction act
;
3298 sigemptyset (&caught_signals
);
3299 for (i
= 0; i
< nsigs
; i
++)
3301 sigaction (sig
[i
], NULL
, &act
);
3302 if (act
.sa_handler
!= SIG_IGN
)
3303 sigaddset (&caught_signals
, sig
[i
]);
3306 act
.sa_handler
= sighandler
;
3307 act
.sa_mask
= caught_signals
;
3310 for (i
= 0; i
< nsigs
; i
++)
3311 if (sigismember (&caught_signals
, sig
[i
]))
3312 sigaction (sig
[i
], &act
, NULL
);
3314 for (i
= 0; i
< nsigs
; i
++)
3315 if (signal (sig
[i
], SIG_IGN
) != SIG_IGN
)
3317 signal (sig
[i
], sighandler
);
3318 siginterrupt (sig
[i
], 1);
3322 signal (SIGCHLD
, SIG_DFL
); /* Don't inherit CHLD handling from parent. */
3324 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
3325 atexit (exit_cleanup
);
3327 gkey
.sword
= gkey
.eword
= SIZE_MAX
;
3329 gkey
.translate
= NULL
;
3330 gkey
.numeric
= gkey
.general_numeric
= gkey
.human_numeric
= false;
3331 gkey
.iec_present
= -1;
3332 gkey
.random
= gkey
.version
= false;
3333 gkey
.month
= gkey
.reverse
= false;
3334 gkey
.skipsblanks
= gkey
.skipeblanks
= false;
3336 files
= xnmalloc (argc
, sizeof *files
);
3340 /* Parse an operand as a file after "--" was seen; or if
3341 pedantic and a file was seen, unless the POSIX version
3342 predates 1003.1-2001 and -c was not seen and the operand is
3343 "-o FILE" or "-oFILE". */
3347 || (posixly_correct
&& nfiles
!= 0
3348 && ! (obsolete_usage
3351 && argv
[optind
][0] == '-' && argv
[optind
][1] == 'o'
3352 && (argv
[optind
][2] || optind
+ 1 != argc
)))
3353 || ((c
= getopt_long (argc
, argv
, short_options
,
3359 files
[nfiles
++] = argv
[optind
++];
3365 if (optarg
[0] == '+')
3367 bool minus_pos_usage
= (optind
!= argc
&& argv
[optind
][0] == '-'
3368 && ISDIGIT (argv
[optind
][1]));
3369 obsolete_usage
|= minus_pos_usage
&& !posixly_correct
;
3372 /* Treat +POS1 [-POS2] as a key if possible; but silently
3373 treat an operand as a file if it is not a valid +POS1. */
3374 key
= key_init (&key_buf
);
3375 s
= parse_field_count (optarg
+ 1, &key
->sword
, NULL
);
3377 s
= parse_field_count (s
+ 1, &key
->schar
, NULL
);
3378 if (! (key
->sword
|| key
->schar
))
3379 key
->sword
= SIZE_MAX
;
3380 if (! s
|| *set_ordering (s
, key
, bl_start
))
3384 if (minus_pos_usage
)
3386 char const *optarg1
= argv
[optind
++];
3387 s
= parse_field_count (optarg1
+ 1, &key
->eword
,
3388 N_("invalid number after `-'"));
3390 s
= parse_field_count (s
+ 1, &key
->echar
,
3391 N_("invalid number after `.'"));
3392 if (*set_ordering (s
, key
, bl_end
))
3393 badfieldspec (optarg1
,
3394 N_("stray character in field spec"));
3401 files
[nfiles
++] = optarg
;
3405 c
= XARGMATCH ("--sort", optarg
, sort_args
, sort_types
);
3422 set_ordering (str
, &gkey
, bl_both
);
3428 ? XARGMATCH ("--check", optarg
, check_args
, check_types
)
3433 if (checkonly
&& checkonly
!= c
)
3434 incompatible_options ("cC");
3438 case COMPRESS_PROGRAM_OPTION
:
3439 if (compress_program
&& !STREQ (compress_program
, optarg
))
3440 error (SORT_FAILURE
, 0, _("multiple compress programs specified"));
3441 compress_program
= optarg
;
3444 case FILES0_FROM_OPTION
:
3445 files_from
= optarg
;
3449 key
= key_init (&key_buf
);
3452 s
= parse_field_count (optarg
, &key
->sword
,
3453 N_("invalid number at field start"));
3456 /* Provoke with `sort -k0' */
3457 badfieldspec (optarg
, N_("field number is zero"));
3461 s
= parse_field_count (s
+ 1, &key
->schar
,
3462 N_("invalid number after `.'"));
3465 /* Provoke with `sort -k1.0' */
3466 badfieldspec (optarg
, N_("character offset is zero"));
3469 if (! (key
->sword
|| key
->schar
))
3470 key
->sword
= SIZE_MAX
;
3471 s
= set_ordering (s
, key
, bl_start
);
3474 key
->eword
= SIZE_MAX
;
3480 s
= parse_field_count (s
+ 1, &key
->eword
,
3481 N_("invalid number after `,'"));
3484 /* Provoke with `sort -k1,0' */
3485 badfieldspec (optarg
, N_("field number is zero"));
3489 s
= parse_field_count (s
+ 1, &key
->echar
,
3490 N_("invalid number after `.'"));
3492 s
= set_ordering (s
, key
, bl_end
);
3495 badfieldspec (optarg
, N_("stray character in field spec"));
3504 specify_nmerge (oi
, c
, optarg
);
3508 if (outfile
&& !STREQ (outfile
, optarg
))
3509 error (SORT_FAILURE
, 0, _("multiple output files specified"));
3513 case RANDOM_SOURCE_OPTION
:
3514 if (random_source
&& !STREQ (random_source
, optarg
))
3515 error (SORT_FAILURE
, 0, _("multiple random sources specified"));
3516 random_source
= optarg
;
3524 specify_sort_size (oi
, c
, optarg
);
3529 char newtab
= optarg
[0];
3531 error (SORT_FAILURE
, 0, _("empty tab"));
3534 if (STREQ (optarg
, "\\0"))
3538 /* Provoke with `sort -txx'. Complain about
3539 "multi-character tab" instead of "multibyte tab", so
3540 that the diagnostic's wording does not need to be
3541 changed once multibyte characters are supported. */
3542 error (SORT_FAILURE
, 0, _("multi-character tab %s"),
3546 if (tab
!= TAB_DEFAULT
&& tab
!= newtab
)
3547 error (SORT_FAILURE
, 0, _("incompatible tabs"));
3553 add_temp_dir (optarg
);
3561 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3562 through Solaris 7. It is also accepted by many non-Solaris
3563 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3564 -y is marked as obsolete starting with Solaris 8 (1999), but is
3565 still accepted as of Solaris 10 prerelease (2004).
3567 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3568 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3569 and which in general ignores the argument after "-y" if it
3570 consists entirely of digits (it can even be empty). */
3571 if (optarg
== argv
[optind
- 1])
3574 for (p
= optarg
; ISDIGIT (*p
); p
++)
3576 optind
-= (*p
!= '\0');
3584 case_GETOPT_HELP_CHAR
;
3586 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
3589 usage (SORT_FAILURE
);
3597 /* When using --files0-from=F, you may not specify any files
3598 on the command-line. */
3601 error (0, 0, _("extra operand %s"), quote (files
[0]));
3602 fprintf (stderr
, "%s\n",
3603 _("file operands cannot be combined with --files0-from"));
3604 usage (SORT_FAILURE
);
3607 if (STREQ (files_from
, "-"))
3611 stream
= fopen (files_from
, "r");
3613 error (SORT_FAILURE
, errno
, _("cannot open %s for reading"),
3614 quote (files_from
));
3617 readtokens0_init (&tok
);
3619 if (! readtokens0 (stream
, &tok
) || fclose (stream
) != 0)
3620 error (SORT_FAILURE
, 0, _("cannot read file names from %s"),
3621 quote (files_from
));
3629 for (i
= 0; i
< nfiles
; i
++)
3631 if (STREQ (files
[i
], "-"))
3632 error (SORT_FAILURE
, 0, _("when reading file names from stdin, "
3633 "no file name of %s allowed"),
3635 else if (files
[i
][0] == '\0')
3637 /* Using the standard `filename:line-number:' prefix here is
3638 not totally appropriate, since NUL is the separator, not NL,
3639 but it might be better than nothing. */
3640 unsigned long int file_number
= i
+ 1;
3641 error (SORT_FAILURE
, 0,
3642 _("%s:%lu: invalid zero-length file name"),
3643 quotearg_colon (files_from
), file_number
);
3648 error (SORT_FAILURE
, 0, _("no input from %s"),
3649 quote (files_from
));
3652 /* Inheritance of global options to individual keys. */
3653 for (key
= keylist
; key
; key
= key
->next
)
3657 || (key
->skipsblanks
3663 || key
->general_numeric
3664 || key
->human_numeric
3667 key
->ignore
= gkey
.ignore
;
3668 key
->translate
= gkey
.translate
;
3669 key
->skipsblanks
= gkey
.skipsblanks
;
3670 key
->skipeblanks
= gkey
.skipeblanks
;
3671 key
->month
= gkey
.month
;
3672 key
->numeric
= gkey
.numeric
;
3673 key
->general_numeric
= gkey
.general_numeric
;
3674 key
->human_numeric
= gkey
.human_numeric
;
3675 key
->random
= gkey
.random
;
3676 key
->reverse
= gkey
.reverse
;
3677 key
->version
= gkey
.version
;
3680 need_random
|= key
->random
;
3683 if (!keylist
&& (gkey
.ignore
3685 || (gkey
.skipsblanks
3689 || gkey
.general_numeric
3690 || gkey
.human_numeric
3695 need_random
|= gkey
.random
;
3698 check_ordering_compatibility ();
3700 reverse
= gkey
.reverse
;
3704 randread_source
= randread_new (random_source
, MD5_DIGEST_SIZE
);
3705 if (! randread_source
)
3706 die (_("open failed"), random_source
);
3709 if (temp_dir_count
== 0)
3711 char const *tmp_dir
= getenv ("TMPDIR");
3712 add_temp_dir (tmp_dir
? tmp_dir
: DEFAULT_TMPDIR
);
3717 static char *minus
= (char *) "-";
3723 /* Need to re-check that we meet the minimum requirement for memory
3724 usage with the final value for NMERGE. */
3726 sort_size
= MAX (sort_size
, MIN_SORT_SIZE
);
3731 error (SORT_FAILURE
, 0, _("extra operand %s not allowed with -%c"),
3732 quote (files
[1]), checkonly
);
3736 static char opts
[] = {0, 'o', 0};
3737 opts
[0] = checkonly
;
3738 incompatible_options (opts
);
3741 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3742 input is not properly sorted. */
3743 exit (check (files
[0], checkonly
) ? EXIT_SUCCESS
: SORT_OUT_OF_ORDER
);
3748 struct sortfile
*sortfiles
= xcalloc (nfiles
, sizeof *sortfiles
);
3751 for (i
= 0; i
< nfiles
; ++i
)
3752 sortfiles
[i
].name
= files
[i
];
3754 merge (sortfiles
, 0, nfiles
, outfile
);
3755 IF_LINT (free (sortfiles
));
3758 sort (files
, nfiles
, outfile
);
3760 if (have_read_stdin
&& fclose (stdin
) == EOF
)
3761 die (_("close failed"), "-");
3763 exit (EXIT_SUCCESS
);