1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written December 1988 by Mike Haertel.
18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19 or (US mail) as Mike Haertel c/o Free Software Foundation.
21 Ørn E. Hansen added NLS support in 1997. */
27 #include <sys/types.h>
34 #include "filevercmp.h"
35 #include "hard-locale.h"
46 #include "readtokens0.h"
49 #include "strnumcmp.h"
51 #include "xnanosleep.h"
54 #if HAVE_SYS_RESOURCE_H
55 # include <sys/resource.h>
58 struct rlimit
{ size_t rlim_cur
; };
59 # define getrlimit(Resource, Rlp) (-1)
62 /* The official name of this program (e.g., no `g' prefix). */
63 #define PROGRAM_NAME "sort"
66 proper_name ("Mike Haertel"), \
67 proper_name ("Paul Eggert")
69 #if HAVE_LANGINFO_CODESET
70 # include <langinfo.h>
73 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
76 # define SA_NOCLDSTOP 0
77 /* No sigprocmask. Always 'return' zero. */
78 # define sigprocmask(How, Set, Oset) (0)
80 # if ! HAVE_SIGINTERRUPT
81 # define siginterrupt(sig, flag) /* empty */
85 #if !defined OPEN_MAX && defined NR_OPEN
86 # define OPEN_MAX NR_OPEN
92 #define UCHAR_LIM (UCHAR_MAX + 1)
94 #ifndef DEFAULT_TMPDIR
95 # define DEFAULT_TMPDIR "/tmp"
98 /* Maximum number of lines to merge every time a NODE is taken from
99 the MERGE_QUEUE. Node is at LEVEL in the binary merge tree,
100 and is responsible for merging TOTAL lines. */
101 #define MAX_MERGE(total, level) ((total) / ((2 << level) * (2 << level)) + 1)
103 /* Heuristic value for the number of lines for which it is worth
104 creating a subthread, during an internal merge sort, on a machine
105 that has processors galore. Currently this number is just a guess.
106 This value must be at least 4. We don't know of any machine where
107 this number has any practical effect. */
108 enum { SUBTHREAD_LINES_HEURISTIC
= 4 };
113 /* POSIX says to exit with status 1 if invoked with -c and the
114 input is not properly sorted. */
115 SORT_OUT_OF_ORDER
= 1,
117 /* POSIX says any other irregular exit must exit with a status
118 code greater than 1. */
124 /* The number of times we should try to fork a compression process
125 (we retry if the fork call fails). We don't _need_ to compress
126 temp files, this is just to reduce disk access, so this number
127 can be small. Each retry doubles in duration. */
128 MAX_FORK_TRIES_COMPRESS
= 4,
130 /* The number of times we should try to fork a decompression process.
131 If we can't fork a decompression process, we can't sort, so this
132 number should be big. Each retry doubles in duration. */
133 MAX_FORK_TRIES_DECOMPRESS
= 9
138 /* Level of the end-of-merge node, one level above the root. */
141 /* Level of the root node in merge tree. */
145 /* The representation of the decimal point in the current locale. */
146 static int decimal_point
;
148 /* Thousands separator; if -1, then there isn't one. */
149 static int thousands_sep
;
151 /* Nonzero if the corresponding locales are hard. */
152 static bool hard_LC_COLLATE
;
154 static bool hard_LC_TIME
;
157 #define NONZERO(x) ((x) != 0)
159 /* The kind of blanks for '-b' to skip in various options. */
160 enum blanktype
{ bl_start
, bl_end
, bl_both
};
162 /* The character marking end of line. Default to \n. */
163 static char eolchar
= '\n';
165 /* Lines are held in core as counted strings. */
168 char *text
; /* Text of the line. */
169 size_t length
; /* Length including final newline. */
170 char *keybeg
; /* Start of first key. */
171 char *keylim
; /* Limit of first key. */
177 char *buf
; /* Dynamically allocated buffer,
178 partitioned into 3 regions:
181 - an array of lines, in reverse order. */
182 size_t used
; /* Number of bytes used for input data. */
183 size_t nlines
; /* Number of lines in the line array. */
184 size_t alloc
; /* Number of bytes allocated. */
185 size_t left
; /* Number of bytes left from previous reads. */
186 size_t line_bytes
; /* Number of bytes to reserve for each line. */
187 bool eof
; /* An EOF has been read. */
192 size_t sword
; /* Zero-origin 'word' to start at. */
193 size_t schar
; /* Additional characters to skip. */
194 size_t eword
; /* Zero-origin last 'word' of key. */
195 size_t echar
; /* Additional characters in field. */
196 bool const *ignore
; /* Boolean array of characters to ignore. */
197 char const *translate
; /* Translation applied to characters. */
198 bool skipsblanks
; /* Skip leading blanks when finding start. */
199 bool skipeblanks
; /* Skip leading blanks when finding end. */
200 bool numeric
; /* Flag for numeric comparison. Handle
201 strings of digits with optional decimal
202 point, but no exponential notation. */
203 bool random
; /* Sort by random hash of key. */
204 bool general_numeric
; /* Flag for general, numeric comparison.
205 Handle numbers in exponential notation. */
206 bool human_numeric
; /* Flag for sorting by human readable
207 units with either SI xor IEC prefixes. */
208 bool month
; /* Flag for comparison by month name. */
209 bool reverse
; /* Reverse the sense of comparison. */
210 bool version
; /* sort by version number */
211 bool obsolete_used
; /* obsolescent key option format is used. */
212 struct keyfield
*next
; /* Next keyfield to try. */
221 /* Binary merge tree node. */
224 struct line
*lo
; /* Lines to merge from LO child node. */
225 struct line
*hi
; /* Lines to merge from HI child ndoe. */
226 struct line
*end_lo
; /* End of available lines from LO. */
227 struct line
*end_hi
; /* End of available lines from HI. */
228 struct line
**dest
; /* Pointer to destination of merge. */
229 size_t nlo
; /* Total Lines remaining from LO. */
230 size_t nhi
; /* Total lines remaining from HI. */
231 size_t level
; /* Level in merge tree. */
232 struct merge_node
*parent
; /* Parent node. */
233 bool queued
; /* Node is already in heap. */
234 pthread_spinlock_t
*lock
; /* Lock for node operations. */
237 /* Priority queue of merge nodes. */
238 struct merge_node_queue
240 struct heap
*priority_queue
; /* Priority queue of merge tree nodes. */
241 pthread_mutex_t mutex
; /* Lock for queue operations. */
242 pthread_cond_t cond
; /* Conditional wait for empty queue to populate
246 /* FIXME: None of these tables work with multibyte character sets.
247 Also, there are many other bugs when handling multibyte characters.
248 One way to fix this is to rewrite `sort' to use wide characters
249 internally, but doing this with good performance is a bit
252 /* Table of blanks. */
253 static bool blanks
[UCHAR_LIM
];
255 /* Table of non-printing characters. */
256 static bool nonprinting
[UCHAR_LIM
];
258 /* Table of non-dictionary characters (not letters, digits, or blanks). */
259 static bool nondictionary
[UCHAR_LIM
];
261 /* Translation table folding lower case to upper. */
262 static unsigned char fold_toupper
[UCHAR_LIM
];
264 #define MONTHS_PER_YEAR 12
266 /* Table mapping month names to integers.
267 Alphabetic order allows binary search. */
268 static struct month monthtab
[] =
284 /* During the merge phase, the number of files to merge at once. */
285 #define NMERGE_DEFAULT 16
287 /* Minimum size for a merge or check buffer. */
288 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
290 /* Minimum sort size; the code might not work with smaller sizes. */
291 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
293 /* The number of bytes needed for a merge or check buffer, which can
294 function relatively efficiently even if it holds only one line. If
295 a longer line is seen, this value is increased. */
296 static size_t merge_buffer_size
= MAX (MIN_MERGE_BUFFER_SIZE
, 256 * 1024);
298 /* The approximate maximum number of bytes of main memory to use, as
299 specified by the user. Zero if the user has not specified a size. */
300 static size_t sort_size
;
302 /* The guessed size for non-regular files. */
303 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
305 /* Array of directory names in which any temporary files are to be created. */
306 static char const **temp_dirs
;
308 /* Number of temporary directory names used. */
309 static size_t temp_dir_count
;
311 /* Number of allocated slots in temp_dirs. */
312 static size_t temp_dir_alloc
;
314 /* Flag to reverse the order of all comparisons. */
317 /* Flag for stable sort. This turns off the last ditch bytewise
318 comparison of lines, and instead leaves lines in the same order
319 they were read if all keys compare equal. */
322 /* If TAB has this value, blanks separate fields. */
323 enum { TAB_DEFAULT
= CHAR_MAX
+ 1 };
325 /* Tab character separating fields. If TAB_DEFAULT, then fields are
326 separated by the empty string between a non-blank character and a blank
328 static int tab
= TAB_DEFAULT
;
330 /* Flag to remove consecutive duplicate lines from the output.
331 Only the last of a sequence of equal lines will be output. */
334 /* Nonzero if any of the input files are the standard input. */
335 static bool have_read_stdin
;
337 /* List of key field comparisons to be tried. */
338 static struct keyfield
*keylist
;
340 /* Program used to (de)compress temp files. Must accept -d. */
341 static char const *compress_program
;
343 /* Annotate the output with extra info to aid the user. */
346 /* Maximum number of files to merge in one go. If more than this
347 number are present, temp files will be used. */
348 static unsigned int nmerge
= NMERGE_DEFAULT
;
350 /* Report MESSAGE for FILE, then clean up and exit.
351 If FILE is null, it represents standard output. */
353 static void die (char const *, char const *) ATTRIBUTE_NORETURN
;
355 die (char const *message
, char const *file
)
357 error (0, errno
, "%s: %s", message
, file
? file
: _("standard output"));
364 if (status
!= EXIT_SUCCESS
)
365 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
370 Usage: %s [OPTION]... [FILE]...\n\
371 or: %s [OPTION]... --files0-from=F\n\
373 program_name
, program_name
);
375 Write sorted concatenation of all FILE(s) to standard output.\n\
379 Mandatory arguments to long options are mandatory for short options too.\n\
386 -b, --ignore-leading-blanks ignore leading blanks\n\
387 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
388 -f, --ignore-case fold lower case to upper case characters\n\
391 -g, --general-numeric-sort compare according to general numerical value\n\
392 -i, --ignore-nonprinting consider only printable characters\n\
393 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
396 -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\
399 -n, --numeric-sort compare according to string numerical value\n\
400 -R, --random-sort sort by random hash of keys\n\
401 --random-source=FILE get random bytes from FILE\n\
402 -r, --reverse reverse the result of comparisons\n\
405 --sort=WORD sort according to WORD:\n\
406 general-numeric -g, human-numeric -h, month -M,\n\
407 numeric -n, random -R, version -V\n\
408 -V, --version-sort natural sort of (version) numbers within text\n\
416 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
417 for more use temp files\n\
420 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
421 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
422 --compress-program=PROG compress temporaries with PROG;\n\
423 decompress them with PROG -d\n\
426 --debug annotate the part of the line used to sort,\n\
427 and warn about questionable usage to stderr\n\
428 --files0-from=F read input from the files specified by\n\
429 NUL-terminated names in file F;\n\
430 If F is - then read names from standard input\n\
433 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
434 (default end of line). See POS syntax below\n\
435 -m, --merge merge already sorted files; do not sort\n\
438 -o, --output=FILE write result to FILE instead of standard output\n\
439 -s, --stable stabilize sort by disabling last-resort comparison\n\
440 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
443 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
444 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
445 multiple options specify multiple directories\n\
446 --parallel=N limit the number of sorts run concurrently to N\n\
447 -u, --unique with -c, check for strict ordering;\n\
448 without -c, output only the first of an equal run\n\
451 -z, --zero-terminated end lines with 0 byte, not newline\n\
453 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
454 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
457 POS is F[.C][OPTS], where F is the field number and C the character position\n\
458 in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
459 in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
460 one or more single-letter ordering options, which override global ordering\n\
461 options for that key. If no key is given, use the entire line as the key.\n\
463 SIZE may be followed by the following multiplicative suffixes:\n\
466 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
468 With no FILE, or when FILE is -, read standard input.\n\
471 The locale specified by the environment affects sort order.\n\
472 Set LC_ALL=C to get the traditional sort order that uses\n\
473 native byte values.\n\
475 emit_ancillary_info ();
481 /* For long options that have no equivalent short option, use a
482 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
485 CHECK_OPTION
= CHAR_MAX
+ 1,
486 COMPRESS_PROGRAM_OPTION
,
487 DEBUG_PROGRAM_OPTION
,
490 RANDOM_SOURCE_OPTION
,
495 static char const short_options
[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
497 static struct option
const long_options
[] =
499 {"ignore-leading-blanks", no_argument
, NULL
, 'b'},
500 {"check", optional_argument
, NULL
, CHECK_OPTION
},
501 {"compress-program", required_argument
, NULL
, COMPRESS_PROGRAM_OPTION
},
502 {"debug", no_argument
, NULL
, DEBUG_PROGRAM_OPTION
},
503 {"dictionary-order", no_argument
, NULL
, 'd'},
504 {"ignore-case", no_argument
, NULL
, 'f'},
505 {"files0-from", required_argument
, NULL
, FILES0_FROM_OPTION
},
506 {"general-numeric-sort", no_argument
, NULL
, 'g'},
507 {"ignore-nonprinting", no_argument
, NULL
, 'i'},
508 {"key", required_argument
, NULL
, 'k'},
509 {"merge", no_argument
, NULL
, 'm'},
510 {"month-sort", no_argument
, NULL
, 'M'},
511 {"numeric-sort", no_argument
, NULL
, 'n'},
512 {"human-numeric-sort", no_argument
, NULL
, 'h'},
513 {"version-sort", no_argument
, NULL
, 'V'},
514 {"random-sort", no_argument
, NULL
, 'R'},
515 {"random-source", required_argument
, NULL
, RANDOM_SOURCE_OPTION
},
516 {"sort", required_argument
, NULL
, SORT_OPTION
},
517 {"output", required_argument
, NULL
, 'o'},
518 {"reverse", no_argument
, NULL
, 'r'},
519 {"stable", no_argument
, NULL
, 's'},
520 {"batch-size", required_argument
, NULL
, NMERGE_OPTION
},
521 {"buffer-size", required_argument
, NULL
, 'S'},
522 {"field-separator", required_argument
, NULL
, 't'},
523 {"temporary-directory", required_argument
, NULL
, 'T'},
524 {"unique", no_argument
, NULL
, 'u'},
525 {"zero-terminated", no_argument
, NULL
, 'z'},
526 {"parallel", required_argument
, NULL
, PARALLEL_OPTION
},
527 {GETOPT_HELP_OPTION_DECL
},
528 {GETOPT_VERSION_OPTION_DECL
},
532 #define CHECK_TABLE \
534 _ct_("silent", 'C') \
535 _ct_("diagnose-first", 'c')
537 static char const *const check_args
[] =
539 #define _ct_(_s, _c) _s,
543 static char const check_types
[] =
545 #define _ct_(_s, _c) _c,
551 _st_("general-numeric", 'g') \
552 _st_("human-numeric", 'h') \
554 _st_("numeric", 'n') \
555 _st_("random", 'R') \
558 static char const *const sort_args
[] =
560 #define _st_(_s, _c) _s,
564 static char const sort_types
[] =
566 #define _st_(_s, _c) _c,
571 /* The set of signals that are caught. */
572 static sigset_t caught_signals
;
574 /* Critical section status. */
581 /* Enter a critical section. */
582 static struct cs_status
585 struct cs_status status
;
586 status
.valid
= (sigprocmask (SIG_BLOCK
, &caught_signals
, &status
.sigs
) == 0);
590 /* Leave a critical section. */
592 cs_leave (struct cs_status status
)
596 /* Ignore failure when restoring the signal mask. */
597 sigprocmask (SIG_SETMASK
, &status
.sigs
, NULL
);
601 /* The list of temporary files. */
604 struct tempnode
*volatile next
;
605 pid_t pid
; /* If compressed, the pid of compressor, else zero */
606 char name
[1]; /* Actual size is 1 + file name length. */
608 static struct tempnode
*volatile temphead
;
609 static struct tempnode
*volatile *temptail
= &temphead
;
614 pid_t pid
; /* If compressed, the pid of compressor, else zero */
617 /* A table where we store compression process states. We clean up all
618 processes in a timely manner so as not to exhaust system resources,
619 so we store the info on whether the process is still running, or has
621 static Hash_table
*proctab
;
623 enum { INIT_PROCTAB_SIZE
= 47 };
625 enum procstate
{ ALIVE
, ZOMBIE
};
627 /* A proctab entry. The COUNT field is there in case we fork a new
628 compression process that has the same PID as an old zombie process
629 that is still in the table (because the process to decompress the
630 temp file it was associated with hasn't started yet). */
634 enum procstate state
;
639 proctab_hasher (void const *entry
, size_t tabsize
)
641 struct procnode
const *node
= entry
;
642 return node
->pid
% tabsize
;
646 proctab_comparator (void const *e1
, void const *e2
)
648 struct procnode
const *n1
= e1
, *n2
= e2
;
649 return n1
->pid
== n2
->pid
;
652 /* The total number of forked processes (compressors and decompressors)
653 that have not been reaped yet. */
654 static size_t nprocs
;
656 /* The number of child processes we'll allow before we try to reap some. */
657 enum { MAX_PROCS_BEFORE_REAP
= 2 };
659 /* If 0 < PID, wait for the child process with that PID to exit.
660 If PID is -1, clean up a random child process which has finished and
661 return the process ID of that child. If PID is -1 and no processes
662 have quit yet, return 0 without waiting. */
668 pid_t cpid
= waitpid (pid
, &status
, pid
< 0 ? WNOHANG
: 0);
671 error (SORT_FAILURE
, errno
, _("waiting for %s [-d]"),
675 if (! WIFEXITED (status
) || WEXITSTATUS (status
))
676 error (SORT_FAILURE
, 0, _("%s [-d] terminated abnormally"),
684 /* Add the PID of a running compression process to proctab, or update
685 the entry COUNT and STATE fields if it's already there. This also
686 creates the table for us the first time it's called. */
689 register_proc (pid_t pid
)
691 struct procnode test
, *node
;
695 proctab
= hash_initialize (INIT_PROCTAB_SIZE
, NULL
,
704 node
= hash_lookup (proctab
, &test
);
712 node
= xmalloc (sizeof *node
);
716 if (hash_insert (proctab
, node
) == NULL
)
721 /* This is called when we reap a random process. We don't know
722 whether we have reaped a compression process or a decompression
723 process until we look in the table. If there's an ALIVE entry for
724 it, then we have reaped a compression process, so change the state
725 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
728 update_proc (pid_t pid
)
730 struct procnode test
, *node
;
733 node
= hash_lookup (proctab
, &test
);
735 node
->state
= ZOMBIE
;
738 /* This is for when we need to wait for a compression process to exit.
739 If it has a ZOMBIE entry in the table then it's already dead and has
740 been reaped. Note that if there's an ALIVE entry for it, it still may
741 already have died and been reaped if a second process was created with
742 the same PID. This is probably exceedingly rare, but to be on the safe
743 side we will have to wait for any compression process with this PID. */
746 wait_proc (pid_t pid
)
748 struct procnode test
, *node
;
751 node
= hash_lookup (proctab
, &test
);
752 if (node
->state
== ALIVE
)
755 node
->state
= ZOMBIE
;
758 hash_delete (proctab
, node
);
763 /* Keep reaping finished children as long as there are more to reap.
764 This doesn't block waiting for any of them, it only reaps those
765 that are already dead. */
772 while (0 < nprocs
&& (pid
= reap (-1)))
776 /* Clean up any remaining temporary files. */
781 struct tempnode
const *node
;
783 for (node
= temphead
; node
; node
= node
->next
)
788 /* Cleanup actions to take when exiting. */
795 /* Clean up any remaining temporary files in a critical section so
796 that a signal handler does not try to clean them too. */
797 struct cs_status cs
= cs_enter ();
805 /* Create a new temporary file, returning its newly allocated tempnode.
806 Store into *PFD the file descriptor open for writing.
807 If the creation fails, return NULL and store -1 into *PFD if the
808 failure is due to file descriptor exhaustion and
809 SURVIVE_FD_EXHAUSTION; otherwise, die. */
811 static struct tempnode
*
812 create_temp_file (int *pfd
, bool survive_fd_exhaustion
)
814 static char const slashbase
[] = "/sortXXXXXX";
815 static size_t temp_dir_index
;
818 char const *temp_dir
= temp_dirs
[temp_dir_index
];
819 size_t len
= strlen (temp_dir
);
820 struct tempnode
*node
=
821 xmalloc (offsetof (struct tempnode
, name
) + len
+ sizeof slashbase
);
822 char *file
= node
->name
;
825 memcpy (file
, temp_dir
, len
);
826 memcpy (file
+ len
, slashbase
, sizeof slashbase
);
829 if (++temp_dir_index
== temp_dir_count
)
832 /* Create the temporary file in a critical section, to avoid races. */
838 temptail
= &node
->next
;
846 if (! (survive_fd_exhaustion
&& errno
== EMFILE
))
847 error (SORT_FAILURE
, errno
, _("cannot create temporary file in %s"),
857 /* Return a stream for FILE, opened with mode HOW. A null FILE means
858 standard output; HOW should be "w". When opening for input, "-"
859 means standard input. To avoid confusion, do not return file
860 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
861 opening an ordinary FILE. Return NULL if unsuccessful.
863 fadvise() is used to specify an access pattern for input files.
864 There are a few hints we could possibly provide,
865 and after careful testing it was decided that
866 specifying POSIX_FADV_SEQUENTIAL was not detrimental
867 to any cases. On Linux 2.6.31, this option doubles
868 the size of read ahead performed and thus was seen to
871 Sorting with a smaller internal buffer
872 Reading from faster flash devices
874 In _addition_ one could also specify other hints...
876 POSIX_FADV_WILLNEED was tested, but Linux 2.6.31
877 at least uses that to _synchronously_ prepopulate the cache
878 with the specified range. While sort does need to
879 read all of its input before outputting, a synchronous
880 read of the whole file up front precludes any processing
881 that sort could do in parallel with the system doing
882 read ahead of the data. This was seen to have negative effects
883 in a couple of cases:
885 Sorting with a smaller internal buffer
886 Note this option was seen to shorten the runtime for sort
887 on a multicore system with lots of RAM and other processes
888 competing for CPU. It could be argued that more explicit
889 scheduling hints with `nice` et. al. are more appropriate
892 POSIX_FADV_NOREUSE is a possibility as it could lower
893 the priority of input data in the cache as sort will
894 only need to process it once. However its functionality
895 has changed over Linux kernel versions and as of 2.6.31
896 it does nothing and thus we can't depend on what it might
899 POSIX_FADV_DONTNEED is not appropriate for user specified
900 input files, but for temp files we do want to drop the
901 cache immediately after processing. This is done implicitly
902 however when the files are unlinked. */
905 stream_open (char const *file
, char const *how
)
912 if (STREQ (file
, "-"))
914 have_read_stdin
= true;
918 fp
= fopen (file
, how
);
919 fadvise (fp
, FADVISE_SEQUENTIAL
);
922 return fopen (file
, how
);
925 /* Same as stream_open, except always return a non-null value; die on
929 xfopen (char const *file
, char const *how
)
931 FILE *fp
= stream_open (file
, how
);
933 die (_("open failed"), file
);
937 /* Close FP, whose name is FILE, and report any errors. */
940 xfclose (FILE *fp
, char const *file
)
945 /* Allow reading stdin from tty more than once. */
951 /* Don't close stdout just yet. close_stdout does that. */
952 if (fflush (fp
) != 0)
953 die (_("fflush failed"), file
);
957 if (fclose (fp
) != 0)
958 die (_("close failed"), file
);
964 dup2_or_die (int oldfd
, int newfd
)
966 if (dup2 (oldfd
, newfd
) < 0)
967 error (SORT_FAILURE
, errno
, _("dup2 failed"));
970 /* Fork a child process for piping to and do common cleanup. The
971 TRIES parameter tells us how many times to try to fork before
972 giving up. Return the PID of the child, or -1 (setting errno)
976 pipe_fork (int pipefds
[2], size_t tries
)
978 #if HAVE_WORKING_FORK
979 struct tempnode
*saved_temphead
;
981 double wait_retry
= 0.25;
982 pid_t pid
IF_LINT ( = -1);
985 if (pipe (pipefds
) < 0)
990 /* This is so the child process won't delete our temp files
991 if it receives a signal before exec-ing. */
993 saved_temphead
= temphead
;
999 temphead
= saved_temphead
;
1002 errno
= saved_errno
;
1004 if (0 <= pid
|| errno
!= EAGAIN
)
1008 xnanosleep (wait_retry
);
1016 saved_errno
= errno
;
1019 errno
= saved_errno
;
1023 close (STDIN_FILENO
);
1024 close (STDOUT_FILENO
);
1031 #else /* ! HAVE_WORKING_FORK */
1036 /* Create a temporary file and start a compression program to filter output
1037 to that file. Set *PFP to the file handle and if PPID is non-NULL,
1038 set *PPID to the PID of the newly-created process. If the creation
1039 fails, return NULL if the failure is due to file descriptor
1040 exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */
1043 maybe_create_temp (FILE **pfp
, pid_t
*ppid
, bool survive_fd_exhaustion
)
1046 struct tempnode
*node
= create_temp_file (&tempfd
, survive_fd_exhaustion
);
1054 if (compress_program
)
1058 node
->pid
= pipe_fork (pipefds
, MAX_FORK_TRIES_COMPRESS
);
1063 tempfd
= pipefds
[1];
1065 register_proc (node
->pid
);
1067 else if (node
->pid
== 0)
1070 dup2_or_die (tempfd
, STDOUT_FILENO
);
1072 dup2_or_die (pipefds
[0], STDIN_FILENO
);
1075 if (execlp (compress_program
, compress_program
, (char *) NULL
) < 0)
1076 error (SORT_FAILURE
, errno
, _("couldn't execute %s"),
1083 *pfp
= fdopen (tempfd
, "w");
1085 die (_("couldn't create temporary file"), name
);
1093 /* Create a temporary file and start a compression program to filter output
1094 to that file. Set *PFP to the file handle and if *PPID is non-NULL,
1095 set it to the PID of the newly-created process. Die on failure. */
1098 create_temp (FILE **pfp
, pid_t
*ppid
)
1100 return maybe_create_temp (pfp
, ppid
, false);
1103 /* Open a compressed temp file and start a decompression process through
1104 which to filter the input. PID must be the valid processes ID of the
1105 process used to compress the file. Return NULL (setting errno to
1106 EMFILE) if we ran out of file descriptors, and die on any other
1110 open_temp (char const *name
, pid_t pid
)
1112 int tempfd
, pipefds
[2];
1117 tempfd
= open (name
, O_RDONLY
);
1121 switch (pipe_fork (pipefds
, MAX_FORK_TRIES_DECOMPRESS
))
1124 if (errno
!= EMFILE
)
1125 error (SORT_FAILURE
, errno
, _("couldn't create process for %s -d"),
1133 dup2_or_die (tempfd
, STDIN_FILENO
);
1135 dup2_or_die (pipefds
[1], STDOUT_FILENO
);
1138 execlp (compress_program
, compress_program
, "-d", (char *) NULL
);
1139 error (SORT_FAILURE
, errno
, _("couldn't execute %s -d"),
1146 fp
= fdopen (pipefds
[0], "r");
1149 int saved_errno
= errno
;
1151 errno
= saved_errno
;
1159 /* Append DIR to the array of temporary directory names. */
1161 add_temp_dir (char const *dir
)
1163 if (temp_dir_count
== temp_dir_alloc
)
1164 temp_dirs
= X2NREALLOC (temp_dirs
, &temp_dir_alloc
);
1166 temp_dirs
[temp_dir_count
++] = dir
;
1169 /* Remove NAME from the list of temporary files. */
1172 zaptemp (char const *name
)
1174 struct tempnode
*volatile *pnode
;
1175 struct tempnode
*node
;
1176 struct tempnode
*next
;
1178 int unlink_errno
= 0;
1179 struct cs_status cs
;
1181 for (pnode
= &temphead
; (node
= *pnode
)->name
!= name
; pnode
= &node
->next
)
1184 /* Unlink the temporary file in a critical section to avoid races. */
1187 unlink_status
= unlink (name
);
1188 unlink_errno
= errno
;
1192 if (unlink_status
!= 0)
1193 error (0, unlink_errno
, _("warning: cannot remove: %s"), name
);
1199 #if HAVE_NL_LANGINFO
1202 struct_month_cmp (void const *m1
, void const *m2
)
1204 struct month
const *month1
= m1
;
1205 struct month
const *month2
= m2
;
1206 return strcmp (month1
->name
, month2
->name
);
1211 /* Initialize the character class tables. */
1218 for (i
= 0; i
< UCHAR_LIM
; ++i
)
1220 blanks
[i
] = !! isblank (i
);
1221 nonprinting
[i
] = ! isprint (i
);
1222 nondictionary
[i
] = ! isalnum (i
) && ! isblank (i
);
1223 fold_toupper
[i
] = toupper (i
);
1226 #if HAVE_NL_LANGINFO
1227 /* If we're not in the "C" locale, read different names for months. */
1230 for (i
= 0; i
< MONTHS_PER_YEAR
; i
++)
1237 s
= nl_langinfo (ABMON_1
+ i
);
1239 monthtab
[i
].name
= name
= xmalloc (s_len
+ 1);
1240 monthtab
[i
].val
= i
+ 1;
1242 for (j
= k
= 0; j
< s_len
; j
++)
1243 if (! isblank (to_uchar (s
[j
])))
1244 name
[k
++] = fold_toupper
[to_uchar (s
[j
])];
1247 qsort (monthtab
, MONTHS_PER_YEAR
, sizeof *monthtab
, struct_month_cmp
);
1252 /* Specify how many inputs may be merged at once.
1253 This may be set on the command-line with the
1254 --batch-size option. */
1256 specify_nmerge (int oi
, char c
, char const *s
)
1259 struct rlimit rlimit
;
1260 enum strtol_error e
= xstrtoumax (s
, NULL
, 10, &n
, NULL
);
1262 /* Try to find out how many file descriptors we'll be able
1263 to open. We need at least nmerge + 3 (STDIN_FILENO,
1264 STDOUT_FILENO and STDERR_FILENO). */
1265 unsigned int max_nmerge
= ((getrlimit (RLIMIT_NOFILE
, &rlimit
) == 0
1270 if (e
== LONGINT_OK
)
1274 e
= LONGINT_OVERFLOW
;
1279 error (0, 0, _("invalid --%s argument %s"),
1280 long_options
[oi
].name
, quote (s
));
1281 error (SORT_FAILURE
, 0,
1282 _("minimum --%s argument is %s"),
1283 long_options
[oi
].name
, quote ("2"));
1285 else if (max_nmerge
< nmerge
)
1287 e
= LONGINT_OVERFLOW
;
1294 if (e
== LONGINT_OVERFLOW
)
1296 char max_nmerge_buf
[INT_BUFSIZE_BOUND (max_nmerge
)];
1297 error (0, 0, _("--%s argument %s too large"),
1298 long_options
[oi
].name
, quote (s
));
1299 error (SORT_FAILURE
, 0,
1300 _("maximum --%s argument with current rlimit is %s"),
1301 long_options
[oi
].name
,
1302 uinttostr (max_nmerge
, max_nmerge_buf
));
1305 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1308 /* Specify the amount of main memory to use when sorting. */
1310 specify_sort_size (int oi
, char c
, char const *s
)
1314 enum strtol_error e
= xstrtoumax (s
, &suffix
, 10, &n
, "EgGkKmMPtTYZ");
1316 /* The default unit is KiB. */
1317 if (e
== LONGINT_OK
&& ISDIGIT (suffix
[-1]))
1319 if (n
<= UINTMAX_MAX
/ 1024)
1322 e
= LONGINT_OVERFLOW
;
1325 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
1326 if (e
== LONGINT_INVALID_SUFFIX_CHAR
&& ISDIGIT (suffix
[-1]) && ! suffix
[1])
1335 double mem
= physmem_total () * n
/ 100;
1337 /* Use "<", not "<=", to avoid problems with rounding. */
1338 if (mem
< UINTMAX_MAX
)
1344 e
= LONGINT_OVERFLOW
;
1349 if (e
== LONGINT_OK
)
1351 /* If multiple sort sizes are specified, take the maximum, so
1352 that option order does not matter. */
1359 sort_size
= MAX (sort_size
, MIN_SORT_SIZE
);
1363 e
= LONGINT_OVERFLOW
;
1366 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1369 /* Specify the number of threads to spawn during internal sort. */
1370 static unsigned long int
1371 specify_nthreads (int oi
, char c
, char const *s
)
1373 unsigned long int nthreads
;
1374 enum strtol_error e
= xstrtoul (s
, NULL
, 10, &nthreads
, "");
1375 if (e
== LONGINT_OVERFLOW
)
1377 if (e
!= LONGINT_OK
)
1378 xstrtol_fatal (e
, oi
, c
, long_options
, s
);
1380 error (SORT_FAILURE
, 0, _("number in parallel must be nonzero"));
1385 /* Return the default sort size. */
1387 default_sort_size (void)
1389 /* Let MEM be available memory or 1/8 of total memory, whichever
1391 double avail
= physmem_available ();
1392 double total
= physmem_total ();
1393 double mem
= MAX (avail
, total
/ 8);
1394 struct rlimit rlimit
;
1396 /* Let SIZE be MEM, but no more than the maximum object size or
1397 system resource limits. Avoid the MIN macro here, as it is not
1398 quite right when only one argument is floating point. Don't
1399 bother to check for values like RLIM_INFINITY since in practice
1400 they are not much less than SIZE_MAX. */
1401 size_t size
= SIZE_MAX
;
1404 if (getrlimit (RLIMIT_DATA
, &rlimit
) == 0 && rlimit
.rlim_cur
< size
)
1405 size
= rlimit
.rlim_cur
;
1407 if (getrlimit (RLIMIT_AS
, &rlimit
) == 0 && rlimit
.rlim_cur
< size
)
1408 size
= rlimit
.rlim_cur
;
1411 /* Leave a large safety margin for the above limits, as failure can
1412 occur when they are exceeded. */
1416 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1417 Exceeding RSS is not fatal, but can be quite slow. */
1418 if (getrlimit (RLIMIT_RSS
, &rlimit
) == 0 && rlimit
.rlim_cur
/ 16 * 15 < size
)
1419 size
= rlimit
.rlim_cur
/ 16 * 15;
1422 /* Use no less than the minimum. */
1423 return MAX (size
, MIN_SORT_SIZE
);
1426 /* Return the sort buffer size to use with the input files identified
1427 by FPS and FILES, which are alternate names of the same files.
1428 NFILES gives the number of input files; NFPS may be less. Assume
1429 that each input line requires LINE_BYTES extra bytes' worth of line
1430 information. Do not exceed the size bound specified by the user
1431 (or a default size bound, if the user does not specify one). */
1434 sort_buffer_size (FILE *const *fps
, size_t nfps
,
1435 char *const *files
, size_t nfiles
,
1438 /* A bound on the input size. If zero, the bound hasn't been
1440 static size_t size_bound
;
1442 /* In the worst case, each input byte is a newline. */
1443 size_t worst_case_per_input_byte
= line_bytes
+ 1;
1445 /* Keep enough room for one extra input line and an extra byte.
1446 This extra room might be needed when preparing to read EOF. */
1447 size_t size
= worst_case_per_input_byte
+ 1;
1451 for (i
= 0; i
< nfiles
; i
++)
1457 if ((i
< nfps
? fstat (fileno (fps
[i
]), &st
)
1458 : STREQ (files
[i
], "-") ? fstat (STDIN_FILENO
, &st
)
1459 : stat (files
[i
], &st
))
1461 die (_("stat failed"), files
[i
]);
1463 if (S_ISREG (st
.st_mode
))
1464 file_size
= st
.st_size
;
1467 /* The file has unknown size. If the user specified a sort
1468 buffer size, use that; otherwise, guess the size. */
1471 file_size
= INPUT_FILE_SIZE_GUESS
;
1476 size_bound
= sort_size
;
1478 size_bound
= default_sort_size ();
1481 /* Add the amount of memory needed to represent the worst case
1482 where the input consists entirely of newlines followed by a
1483 single non-newline. Check for overflow. */
1484 worst_case
= file_size
* worst_case_per_input_byte
+ 1;
1485 if (file_size
!= worst_case
/ worst_case_per_input_byte
1486 || size_bound
- size
<= worst_case
)
1494 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
1495 must be at least sizeof (struct line). Allocate ALLOC bytes
1499 initbuf (struct buffer
*buf
, size_t line_bytes
, size_t alloc
)
1501 /* Ensure that the line array is properly aligned. If the desired
1502 size cannot be allocated, repeatedly halve it until allocation
1503 succeeds. The smaller allocation may hurt overall performance,
1504 but that's better than failing. */
1507 alloc
+= sizeof (struct line
) - alloc
% sizeof (struct line
);
1508 buf
->buf
= malloc (alloc
);
1512 if (alloc
<= line_bytes
+ 1)
1516 buf
->line_bytes
= line_bytes
;
1518 buf
->used
= buf
->left
= buf
->nlines
= 0;
1522 /* Return one past the limit of the line array. */
1524 static inline struct line
*
1525 buffer_linelim (struct buffer
const *buf
)
1527 return (struct line
*) (buf
->buf
+ buf
->alloc
);
1530 /* Return a pointer to the first character of the field specified
1534 begfield (struct line
const *line
, struct keyfield
const *key
)
1536 char *ptr
= line
->text
, *lim
= ptr
+ line
->length
- 1;
1537 size_t sword
= key
->sword
;
1538 size_t schar
= key
->schar
;
1540 /* The leading field separator itself is included in a field when -t
1543 if (tab
!= TAB_DEFAULT
)
1544 while (ptr
< lim
&& sword
--)
1546 while (ptr
< lim
&& *ptr
!= tab
)
1552 while (ptr
< lim
&& sword
--)
1554 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1556 while (ptr
< lim
&& !blanks
[to_uchar (*ptr
)])
1560 /* If we're ignoring leading blanks when computing the Start
1561 of the field, skip past them here. */
1562 if (key
->skipsblanks
)
1563 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1566 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
1567 ptr
= MIN (lim
, ptr
+ schar
);
1572 /* Return the limit of (a pointer to the first character after) the field
1573 in LINE specified by KEY. */
1576 limfield (struct line
const *line
, struct keyfield
const *key
)
1578 char *ptr
= line
->text
, *lim
= ptr
+ line
->length
- 1;
1579 size_t eword
= key
->eword
, echar
= key
->echar
;
1582 eword
++; /* Skip all of end field. */
1584 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1585 whichever comes first. If there are more than EWORD fields, leave
1586 PTR pointing at the beginning of the field having zero-based index,
1587 EWORD. If a delimiter character was specified (via -t), then that
1588 `beginning' is the first character following the delimiting TAB.
1589 Otherwise, leave PTR pointing at the first `blank' character after
1590 the preceding field. */
1591 if (tab
!= TAB_DEFAULT
)
1592 while (ptr
< lim
&& eword
--)
1594 while (ptr
< lim
&& *ptr
!= tab
)
1596 if (ptr
< lim
&& (eword
|| echar
))
1600 while (ptr
< lim
&& eword
--)
1602 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1604 while (ptr
< lim
&& !blanks
[to_uchar (*ptr
)])
1608 #ifdef POSIX_UNSPECIFIED
1609 /* The following block of code makes GNU sort incompatible with
1610 standard Unix sort, so it's ifdef'd out for now.
1611 The POSIX spec isn't clear on how to interpret this.
1612 FIXME: request clarification.
1614 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1615 Date: Thu, 30 May 96 12:20:41 -0400
1616 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1618 [...]I believe I've found another bug in `sort'.
1623 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1626 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1630 Unix sort produced the answer I expected: sort on the single character
1631 in column 7. GNU sort produced different results, because it disagrees
1632 on the interpretation of the key-end spec "M.N". Unix sort reads this
1633 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1634 "skip M-1 fields, then either N-1 characters or the rest of the current
1635 field, whichever comes first". This extra clause applies only to
1636 key-ends, not key-starts.
1639 /* Make LIM point to the end of (one byte past) the current field. */
1640 if (tab
!= TAB_DEFAULT
)
1643 newlim
= memchr (ptr
, tab
, lim
- ptr
);
1651 while (newlim
< lim
&& blanks
[to_uchar (*newlim
)])
1653 while (newlim
< lim
&& !blanks
[to_uchar (*newlim
)])
1659 if (echar
!= 0) /* We need to skip over a portion of the end field. */
1661 /* If we're ignoring leading blanks when computing the End
1662 of the field, skip past them here. */
1663 if (key
->skipeblanks
)
1664 while (ptr
< lim
&& blanks
[to_uchar (*ptr
)])
1667 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1668 ptr
= MIN (lim
, ptr
+ echar
);
1674 /* Fill BUF reading from FP, moving buf->left bytes from the end
1675 of buf->buf to the beginning first. If EOF is reached and the
1676 file wasn't terminated by a newline, supply one. Set up BUF's line
1677 table too. FILE is the name of the file corresponding to FP.
1678 Return true if some input was read. */
1681 fillbuf (struct buffer
*buf
, FILE *fp
, char const *file
)
1683 struct keyfield
const *key
= keylist
;
1685 size_t line_bytes
= buf
->line_bytes
;
1686 size_t mergesize
= merge_buffer_size
- MIN_MERGE_BUFFER_SIZE
;
1691 if (buf
->used
!= buf
->left
)
1693 memmove (buf
->buf
, buf
->buf
+ buf
->used
- buf
->left
, buf
->left
);
1694 buf
->used
= buf
->left
;
1700 char *ptr
= buf
->buf
+ buf
->used
;
1701 struct line
*linelim
= buffer_linelim (buf
);
1702 struct line
*line
= linelim
- buf
->nlines
;
1703 size_t avail
= (char *) linelim
- buf
->nlines
* line_bytes
- ptr
;
1704 char *line_start
= buf
->nlines
? line
->text
+ line
->length
: buf
->buf
;
1706 while (line_bytes
+ 1 < avail
)
1708 /* Read as many bytes as possible, but do not read so many
1709 bytes that there might not be enough room for the
1710 corresponding line array. The worst case is when the
1711 rest of the input file consists entirely of newlines,
1712 except that the last byte is not a newline. */
1713 size_t readsize
= (avail
- 1) / (line_bytes
+ 1);
1714 size_t bytes_read
= fread (ptr
, 1, readsize
, fp
);
1715 char *ptrlim
= ptr
+ bytes_read
;
1717 avail
-= bytes_read
;
1719 if (bytes_read
!= readsize
)
1722 die (_("read failed"), file
);
1726 if (buf
->buf
== ptrlim
)
1728 if (line_start
!= ptrlim
&& ptrlim
[-1] != eol
)
1733 /* Find and record each line in the just-read input. */
1734 while ((p
= memchr (ptr
, eol
, ptrlim
- ptr
)))
1736 /* Delimit the line with NUL. This eliminates the need to
1737 temporarily replace the last byte with NUL when calling
1738 xmemcoll(), which increases performance. */
1742 line
->text
= line_start
;
1743 line
->length
= ptr
- line_start
;
1744 mergesize
= MAX (mergesize
, line
->length
);
1745 avail
-= line_bytes
;
1749 /* Precompute the position of the first key for
1751 line
->keylim
= (key
->eword
== SIZE_MAX
1753 : limfield (line
, key
));
1755 if (key
->sword
!= SIZE_MAX
)
1756 line
->keybeg
= begfield (line
, key
);
1759 if (key
->skipsblanks
)
1760 while (blanks
[to_uchar (*line_start
)])
1762 line
->keybeg
= line_start
;
1774 buf
->used
= ptr
- buf
->buf
;
1775 buf
->nlines
= buffer_linelim (buf
) - line
;
1776 if (buf
->nlines
!= 0)
1778 buf
->left
= ptr
- line_start
;
1779 merge_buffer_size
= mergesize
+ MIN_MERGE_BUFFER_SIZE
;
1784 /* The current input line is too long to fit in the buffer.
1785 Double the buffer size and try again, keeping it properly
1787 size_t line_alloc
= buf
->alloc
/ sizeof (struct line
);
1788 buf
->buf
= x2nrealloc (buf
->buf
, &line_alloc
, sizeof (struct line
));
1789 buf
->alloc
= line_alloc
* sizeof (struct line
);
1794 /* Return an integer that represents the order of magnitude of the
1795 unit following the number. The number may contain thousands
1796 separators and a decimal point, but it may not contain leading blanks.
1797 Negative numbers get negative orders; zero numbers have a zero order.
1798 Store the address of the end of the number into *ENDPTR. */
1801 find_unit_order (char const *number
, char **endptr
)
1803 static char const orders
[UCHAR_LIM
] =
1805 #if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \
1806 && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107)
1807 /* This initializer syntax works on all C99 hosts. For now, use
1808 it only on non-ASCII hosts, to ease the pain of porting to
1809 pre-C99 ASCII hosts. */
1810 ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1813 /* Generate the following table with this command:
1814 perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
1815 foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
1817 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1818 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1819 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
1820 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
1821 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1822 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1823 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1824 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1825 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1826 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1827 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1831 bool minus_sign
= (*number
== '-');
1832 char const *p
= number
+ minus_sign
;
1836 /* Scan to end of number.
1837 Decimals or separators not followed by digits stop the scan.
1838 Numbers ending in decimals or separators are thus considered
1839 to be lacking in units.
1840 FIXME: add support for multibyte thousands_sep and decimal_point. */
1844 while (ISDIGIT (ch
= *p
++))
1845 nonzero
|= ch
- '0';
1847 while (ch
== thousands_sep
);
1849 if (ch
== decimal_point
)
1850 while (ISDIGIT (ch
= *p
++))
1851 nonzero
|= ch
- '0';
1853 int order
= (nonzero
? orders
[ch
] : 0);
1854 *endptr
= (char *) p
- !order
;
1855 return (minus_sign
? -order
: order
);
1858 /* Compare numbers A and B ending in units with SI or IEC prefixes
1859 <none/unknown> < K/k < M < G < T < P < E < Z < Y
1860 This may temporarily modify the strings. Store into *EA the end
1864 human_numcompare (char *a
, char *b
, char **ea
)
1868 while (blanks
[to_uchar (*a
)])
1870 while (blanks
[to_uchar (*b
)])
1873 int diff
= find_unit_order (a
, ea
) - find_unit_order (b
, &endb
);
1874 return (diff
? diff
: strnumcmp (a
, b
, decimal_point
, thousands_sep
));
1877 /* Compare strings A and B as numbers without explicitly converting them to
1878 machine numbers. Comparatively slow for short strings, but asymptotically
1882 numcompare (char const *a
, char const *b
, char **ea
)
1884 while (blanks
[to_uchar (*a
)])
1886 while (blanks
[to_uchar (*b
)])
1891 /* Approximate strnumcmp extents with find_unit_order. */
1892 int order
= find_unit_order (a
, ea
);
1896 return strnumcmp (a
, b
, decimal_point
, thousands_sep
);
1900 general_numcompare (char const *sa
, char const *sb
, char **ea
)
1902 /* FIXME: maybe add option to try expensive FP conversion
1903 only if A and B can't be compared more cheaply/accurately. */
1905 #if HAVE_C99_STRTOLD
1906 # define long_double long double
1908 # define long_double double
1910 # define strtold strtod
1914 long_double a
= strtold (sa
, ea
);
1915 long_double b
= strtold (sb
, &eb
);
1917 /* Put conversion errors at the start of the collating sequence. */
1919 return sb
== eb
? 0 : -1;
1923 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1924 conversion errors but before numbers; sort them by internal
1925 bit-pattern, for lack of a more portable alternative. */
1931 : memcmp (&a
, &b
, sizeof a
));
1934 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1935 Return 0 if the name in S is not recognized. */
1938 getmonth (char const *month
, size_t len
, char const **ea
)
1941 size_t hi
= MONTHS_PER_YEAR
;
1942 char const *monthlim
= month
+ len
;
1946 if (month
== monthlim
)
1948 if (!blanks
[to_uchar (*month
)])
1955 size_t ix
= (lo
+ hi
) / 2;
1956 char const *m
= month
;
1957 char const *n
= monthtab
[ix
].name
;
1965 return monthtab
[ix
].val
;
1967 if (m
== monthlim
|| fold_toupper
[to_uchar (*m
)] < to_uchar (*n
))
1972 else if (fold_toupper
[to_uchar (*m
)] > to_uchar (*n
))
1984 /* A randomly chosen MD5 state, used for random comparison. */
1985 static struct md5_ctx random_md5_state
;
1987 /* Initialize the randomly chosen MD5 state. */
1990 random_md5_state_init (char const *random_source
)
1992 unsigned char buf
[MD5_DIGEST_SIZE
];
1993 struct randread_source
*r
= randread_new (random_source
, sizeof buf
);
1995 die (_("open failed"), random_source
);
1996 randread (r
, buf
, sizeof buf
);
1997 if (randread_free (r
) != 0)
1998 die (_("close failed"), random_source
);
1999 md5_init_ctx (&random_md5_state
);
2000 md5_process_bytes (buf
, sizeof buf
, &random_md5_state
);
2003 /* This is like strxfrm, except it reports any error and exits. */
2006 xstrxfrm (char *restrict dest
, char const *restrict src
, size_t destsize
)
2009 size_t translated_size
= strxfrm (dest
, src
, destsize
);
2013 error (0, errno
, _("string transformation failed"));
2014 error (0, 0, _("set LC_ALL='C' to work around the problem"));
2015 error (SORT_FAILURE
, 0,
2016 _("the untransformed string was %s"),
2017 quotearg_n_style (0, locale_quoting_style
, src
));
2020 return translated_size
;
2023 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2024 using one or more random hash functions. */
2027 compare_random (char *restrict texta
, size_t lena
,
2028 char *restrict textb
, size_t lenb
)
2030 /* XFRM_DIFF records the equivalent of memcmp on the transformed
2031 data. This is used to break ties if there is an checksum
2032 collision, and this is good enough given the astronomically low
2033 probability of a collision. */
2036 char stackbuf
[4000];
2037 char *buf
= stackbuf
;
2038 size_t bufsize
= sizeof stackbuf
;
2039 uint32_t dig
[2][MD5_DIGEST_SIZE
/ sizeof (uint32_t)];
2040 struct md5_ctx s
[2];
2041 s
[0] = s
[1] = random_md5_state
;
2043 if (hard_LC_COLLATE
)
2045 /* Null-terminate the keys so that strxfrm knows where to stop. */
2046 char *lima
= texta
+ lena
; char enda
= *lima
; *lima
= '\0';
2047 char *limb
= textb
+ lenb
; char endb
= *limb
; *limb
= '\0';
2051 /* Transform the text into the basis of comparison, so that byte
2052 strings that would otherwise considered to be equal are
2053 considered equal here even if their bytes differ.
2055 Each time through this loop, transform one
2056 null-terminated string's worth from TEXTA or from TEXTB
2057 or both. That way, there's no need to store the
2058 transformation of the whole line, if it contains many
2059 null-terminated strings. */
2061 /* Store the transformed data into a big-enough buffer. */
2064 (texta
< lima
? xstrxfrm (buf
, texta
, bufsize
) + 1 : 0);
2065 bool a_fits
= sizea
<= bufsize
;
2068 ? (xstrxfrm ((a_fits
? buf
+ sizea
: NULL
), textb
,
2069 (a_fits
? bufsize
- sizea
: 0))
2073 if (! (a_fits
&& sizea
+ sizeb
<= bufsize
))
2075 bufsize
= sizea
+ sizeb
;
2076 if (bufsize
< SIZE_MAX
/ 3)
2077 bufsize
= bufsize
* 3 / 2;
2078 buf
= (buf
== stackbuf
2080 : xrealloc (buf
, bufsize
));
2082 strxfrm (buf
, texta
, sizea
);
2084 strxfrm (buf
+ sizea
, textb
, sizeb
);
2087 /* Advance past NULs to the next part of each input string,
2088 exiting the loop if both strings are exhausted. When
2089 exiting the loop, prepare to finish off the tiebreaker
2090 comparison properly. */
2092 texta
+= strlen (texta
) + 1;
2094 textb
+= strlen (textb
) + 1;
2095 if (! (texta
< lima
|| textb
< limb
))
2097 lena
= sizea
; texta
= buf
;
2098 lenb
= sizeb
; textb
= buf
+ sizea
;
2102 /* Accumulate the transformed data in the corresponding
2104 md5_process_bytes (buf
, sizea
, &s
[0]);
2105 md5_process_bytes (buf
+ sizea
, sizeb
, &s
[1]);
2107 /* Update the tiebreaker comparison of the transformed data. */
2110 xfrm_diff
= memcmp (buf
, buf
+ sizea
, MIN (sizea
, sizeb
));
2112 xfrm_diff
= (sizea
> sizeb
) - (sizea
< sizeb
);
2120 /* Compute and compare the checksums. */
2121 md5_process_bytes (texta
, lena
, &s
[0]); md5_finish_ctx (&s
[0], dig
[0]);
2122 md5_process_bytes (textb
, lenb
, &s
[1]); md5_finish_ctx (&s
[1], dig
[1]);
2123 int diff
= memcmp (dig
[0], dig
[1], sizeof dig
[0]);
2125 /* Fall back on the tiebreaker if the checksums collide. */
2130 xfrm_diff
= memcmp (texta
, textb
, MIN (lena
, lenb
));
2132 xfrm_diff
= (lena
> lenb
) - (lena
< lenb
);
2138 if (buf
!= stackbuf
)
2144 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
2145 using filevercmp. See lib/filevercmp.h for function description. */
2148 compare_version (char *restrict texta
, size_t lena
,
2149 char *restrict textb
, size_t lenb
)
2153 /* It is necessary to save the character after the end of the field.
2154 "filevercmp" works with NUL terminated strings. Our blocks of
2155 text are not necessarily terminated with a NUL byte. */
2156 char sv_a
= texta
[lena
];
2157 char sv_b
= textb
[lenb
];
2162 diff
= filevercmp (texta
, textb
);
2170 /* Return the printable width of the block of memory starting at
2171 TEXT and ending just before LIM, counting each tab as one byte.
2172 FIXME: Should we generally be counting non printable chars? */
2175 debug_width (char const *text
, char const *lim
)
2177 size_t width
= mbsnwidth (text
, lim
- text
, 0);
2179 width
+= (*text
++ == '\t');
2183 /* For debug mode, "underline" a key at the
2184 specified offset and screen width. */
2187 mark_key (size_t offset
, size_t width
)
2193 printf (_("^ no match for key\n"));
2204 /* For debug mode, determine the screen offset and width
2205 to highlight for a key, and then output the highlight. */
2208 debug_key (char const *sline
, char const *sfield
, char const *efield
,
2209 size_t flen
, bool skipb
)
2211 char const *sa
= sfield
;
2213 if (skipb
) /* This key type implicitly skips leading blanks. */
2215 while (sa
< efield
&& blanks
[to_uchar (*sa
)])
2219 flen
--; /* This assumes TABs same width as SPACEs. */
2223 size_t offset
= debug_width (sline
, sfield
) + (sa
- sfield
);
2224 size_t width
= debug_width (sa
, sa
+ flen
);
2225 mark_key (offset
, width
);
2228 /* Testing if a key is numeric is done in various places. */
2231 key_numeric (struct keyfield
const *key
)
2233 return key
->numeric
|| key
->general_numeric
|| key
->human_numeric
;
2236 /* Return whether sorting options specified for key. */
2239 default_key_compare (struct keyfield
const *key
)
2241 return ! (key
->ignore
2245 || key_numeric (key
)
2249 /* || key->reverse */
2253 /* Convert a key to the short options used to specify it. */
2256 key_to_opts (struct keyfield
const *key
, char *opts
)
2258 if (key
->skipsblanks
|| key
->skipeblanks
)
2259 *opts
++ = 'b';/* either disables global -b */
2260 if (key
->ignore
== nondictionary
)
2264 if (key
->general_numeric
)
2266 if (key
->human_numeric
)
2268 if (key
->ignore
== nonprinting
)
2283 /* Output data independent key warnings to stderr. */
2286 key_warnings (struct keyfield
const *gkey
, bool gkey_only
)
2288 struct keyfield
const *key
;
2289 struct keyfield ugkey
= *gkey
;
2290 unsigned long keynum
= 1;
2292 for (key
= keylist
; key
; key
= key
->next
, keynum
++)
2294 if (key
->obsolete_used
)
2296 size_t sword
= key
->sword
;
2297 size_t eword
= key
->eword
;
2298 char tmp
[INT_BUFSIZE_BOUND (sword
)];
2299 /* obsolescent syntax +A.x -B.y is equivalent to:
2300 -k A+1.x+1,B.y (when y = 0)
2301 -k A+1.x+1,B+1.y (when y > 0) */
2302 char obuf
[INT_BUFSIZE_BOUND (sword
) * 2 + 4]; /* +# -# */
2303 char nbuf
[INT_BUFSIZE_BOUND (sword
) * 2 + 5]; /* -k #,# */
2307 if (sword
== SIZE_MAX
)
2310 po
= stpcpy (stpcpy (po
, "+"), umaxtostr (sword
, tmp
));
2311 pn
= stpcpy (stpcpy (pn
, "-k "), umaxtostr (sword
+ 1, tmp
));
2312 if (key
->eword
!= SIZE_MAX
)
2314 po
= stpcpy (stpcpy (po
, " -"), umaxtostr (eword
+ 1, tmp
));
2315 pn
= stpcpy (stpcpy (pn
, ","),
2316 umaxtostr (eword
+ 1
2317 + (key
->echar
== SIZE_MAX
), tmp
));
2319 error (0, 0, _("obsolescent key `%s' used; consider `%s' instead"),
2323 /* Warn about field specs that will never match. */
2324 if (key
->sword
!= SIZE_MAX
&& key
->eword
< key
->sword
)
2325 error (0, 0, _("key %lu has zero width and will be ignored"), keynum
);
2327 /* Warn about significant leading blanks. */
2328 bool implicit_skip
= key_numeric (key
) || key
->month
;
2329 bool maybe_space_aligned
= !hard_LC_COLLATE
&& default_key_compare (key
)
2330 && !(key
->schar
|| key
->echar
);
2331 bool line_offset
= key
->eword
== 0 && key
->echar
!= 0; /* -k1.x,1.y */
2332 if (!gkey_only
&& tab
== TAB_DEFAULT
&& !line_offset
2333 && ((!key
->skipsblanks
&& !(implicit_skip
|| maybe_space_aligned
))
2334 || (!key
->skipsblanks
&& key
->schar
)
2335 || (!key
->skipeblanks
&& key
->echar
)))
2336 error (0, 0, _("leading blanks are significant in key %lu; "
2337 "consider also specifying `b'"), keynum
);
2339 /* Warn about numeric comparisons spanning fields,
2340 as field delimiters could be interpreted as part
2341 of the number (maybe only in other locales). */
2342 if (!gkey_only
&& key_numeric (key
))
2344 size_t sword
= key
->sword
+ 1;
2345 size_t eword
= key
->eword
+ 1;
2349 error (0, 0, _("key %lu is numeric and spans multiple fields"),
2353 /* Flag global options not copied or specified in any key. */
2354 if (ugkey
.ignore
&& (ugkey
.ignore
== key
->ignore
))
2355 ugkey
.ignore
= NULL
;
2356 if (ugkey
.translate
&& (ugkey
.translate
== key
->translate
))
2357 ugkey
.translate
= NULL
;
2358 ugkey
.skipsblanks
&= !key
->skipsblanks
;
2359 ugkey
.skipeblanks
&= !key
->skipeblanks
;
2360 ugkey
.month
&= !key
->month
;
2361 ugkey
.numeric
&= !key
->numeric
;
2362 ugkey
.general_numeric
&= !key
->general_numeric
;
2363 ugkey
.human_numeric
&= !key
->human_numeric
;
2364 ugkey
.random
&= !key
->random
;
2365 ugkey
.version
&= !key
->version
;
2366 ugkey
.reverse
&= !key
->reverse
;
2369 /* Warn about ignored global options flagged above.
2370 Note if gkey is the only one in the list, all flags are cleared. */
2371 if (!default_key_compare (&ugkey
)
2372 || (ugkey
.reverse
&& (stable
|| unique
) && keylist
))
2374 bool ugkey_reverse
= ugkey
.reverse
;
2375 if (!(stable
|| unique
))
2376 ugkey
.reverse
= false;
2377 /* The following is too big, but guaranteed to be "big enough". */
2378 char opts
[sizeof short_options
];
2379 key_to_opts (&ugkey
, opts
);
2381 ngettext ("option `-%s' is ignored",
2382 "options `-%s' are ignored",
2383 select_plural (strlen (opts
))), opts
);
2384 ugkey
.reverse
= ugkey_reverse
;
2386 if (ugkey
.reverse
&& !(stable
|| unique
) && keylist
)
2387 error (0, 0, _("option `-r' only applies to last-resort comparison"));
2390 /* Compare two lines A and B trying every key in sequence until there
2391 are no more keys or a difference is found. */
2394 keycompare (struct line
const *a
, struct line
const *b
, bool show_debug
)
2396 struct keyfield
*key
= keylist
;
2398 /* For the first iteration only, the key positions have been
2399 precomputed for us. */
2400 char *texta
= a
->keybeg
;
2401 char *textb
= b
->keybeg
;
2402 char *lima
= a
->keylim
;
2403 char *limb
= b
->keylim
;
2409 char const *translate
= key
->translate
;
2410 bool const *ignore
= key
->ignore
;
2411 bool skipb
= false; /* Whether key type auto skips leading blanks. */
2413 /* Treat field ends before field starts as empty fields. */
2414 lima
= MAX (texta
, lima
);
2415 limb
= MAX (textb
, limb
);
2417 /* Find the lengths. */
2418 size_t lena
= lima
- texta
;
2419 size_t lenb
= limb
- textb
;
2421 /* Actually compare the fields. */
2424 diff
= compare_random (texta
, lena
, textb
, lenb
);
2425 else if (key_numeric (key
))
2427 char savea
= *lima
, saveb
= *limb
;
2430 *lima
= *limb
= '\0';
2431 diff
= (key
->numeric
? numcompare (texta
, textb
, &ea
)
2432 : key
->general_numeric
? general_numcompare (texta
, textb
,
2434 : human_numcompare (texta
, textb
, &ea
));
2440 *lima
= savea
, *limb
= saveb
;
2442 else if (key
->version
)
2443 diff
= compare_version (texta
, lena
, textb
, lenb
);
2444 else if (key
->month
)
2446 char const *ea
= lima
;
2448 int amon
= getmonth (texta
, lena
, &ea
);
2449 diff
= amon
- getmonth (textb
, lenb
, NULL
);
2453 lena
= amon
? ea
- texta
: 0;
2457 /* Sorting like this may become slow, so in a simple locale the user
2458 can select a faster sort that is similar to ascii sort. */
2459 else if (hard_LC_COLLATE
)
2461 /* FIXME: for debug, account for skipped chars, while handling mb chars.
2462 Generally perhaps xmemfrm could be used to determine chars that are
2463 excluded from the collating order? */
2464 if (ignore
|| translate
)
2467 size_t size
= lena
+ 1 + lenb
+ 1;
2468 char *copy_a
= (size
<= sizeof buf
? buf
: xmalloc (size
));
2469 char *copy_b
= copy_a
+ lena
+ 1;
2470 size_t new_len_a
, new_len_b
, i
;
2472 /* Ignore and/or translate chars before comparing. */
2473 for (new_len_a
= new_len_b
= i
= 0; i
< MAX (lena
, lenb
); i
++)
2477 copy_a
[new_len_a
] = (translate
2478 ? translate
[to_uchar (texta
[i
])]
2480 if (!ignore
|| !ignore
[to_uchar (texta
[i
])])
2485 copy_b
[new_len_b
] = (translate
2486 ? translate
[to_uchar (textb
[i
])]
2488 if (!ignore
|| !ignore
[to_uchar (textb
[i
])])
2493 copy_a
[new_len_a
++] = '\0';
2494 copy_b
[new_len_b
++] = '\0';
2495 diff
= xmemcoll0 (copy_a
, new_len_a
, copy_b
, new_len_b
);
2497 if (sizeof buf
< size
)
2501 diff
= - NONZERO (lenb
);
2505 diff
= xmemcoll (texta
, lena
, textb
, lenb
);
2509 char *savea
= texta
;
2511 #define CMP_WITH_IGNORE(A, B) \
2516 while (texta < lima && ignore[to_uchar (*texta)]) \
2518 while (textb < limb && ignore[to_uchar (*textb)]) \
2520 if (! (texta < lima && textb < limb)) \
2522 diff = to_uchar (A) - to_uchar (B); \
2529 diff = (texta < lima) - (textb < limb); \
2534 CMP_WITH_IGNORE (translate
[to_uchar (*texta
)],
2535 translate
[to_uchar (*textb
)]);
2537 CMP_WITH_IGNORE (*texta
, *textb
);
2539 /* We only need to restore this for debug_key
2540 in which case the keys being compared are equal. */
2544 diff
= - NONZERO (lenb
);
2551 char *savea
= texta
;
2553 while (texta
< lima
&& textb
< limb
)
2555 diff
= (to_uchar (translate
[to_uchar (*texta
++)])
2556 - to_uchar (translate
[to_uchar (*textb
++)]));
2561 /* We only need to restore this for debug_key
2562 in which case the keys being compared are equal. */
2567 diff
= memcmp (texta
, textb
, MIN (lena
, lenb
));
2571 diff
= lena
< lenb
? -1 : lena
!= lenb
;
2578 debug_key (a
->text
, texta
, lima
, lena
, skipb
);
2584 /* Find the beginning and limit of the next field. */
2585 if (key
->eword
!= SIZE_MAX
)
2586 lima
= limfield (a
, key
), limb
= limfield (b
, key
);
2588 lima
= a
->text
+ a
->length
- 1, limb
= b
->text
+ b
->length
- 1;
2590 if (key
->sword
!= SIZE_MAX
)
2591 texta
= begfield (a
, key
), textb
= begfield (b
, key
);
2594 texta
= a
->text
, textb
= b
->text
;
2595 if (key
->skipsblanks
)
2597 while (texta
< lima
&& blanks
[to_uchar (*texta
)])
2599 while (textb
< limb
&& blanks
[to_uchar (*textb
)])
2610 return key
->reverse
? -diff
: diff
;
2613 /* Compare two lines A and B, returning negative, zero, or positive
2614 depending on whether A compares less than, equal to, or greater than B. */
2617 compare (struct line
const *a
, struct line
const *b
, bool show_debug
)
2622 /* First try to compare on the specified keys (if any).
2623 The only two cases with no key at all are unadorned sort,
2624 and unadorned sort -r. */
2627 diff
= keycompare (a
, b
, show_debug
);
2628 if (diff
|| unique
|| stable
)
2632 /* If the keys all compare equal (or no keys were specified)
2633 fall through to the default comparison. */
2634 alen
= a
->length
- 1, blen
= b
->length
- 1;
2637 debug_key (a
->text
, a
->text
, a
->text
+ alen
, alen
, false);
2640 diff
= - NONZERO (blen
);
2643 else if (hard_LC_COLLATE
)
2645 /* Note xmemcoll0 is a performance enhancement as
2646 it will not unconditionally write '\0' after the
2647 passed in buffers, which was seen to give around
2648 a 3% increase in performance for short lines. */
2649 diff
= xmemcoll0 (a
->text
, alen
+ 1, b
->text
, blen
+ 1);
2651 else if (! (diff
= memcmp (a
->text
, b
->text
, MIN (alen
, blen
))))
2652 diff
= alen
< blen
? -1 : alen
!= blen
;
2654 return reverse
? -diff
: diff
;
2658 write_bytes (struct line
const *line
, FILE *fp
, char const *output_file
)
2660 char *buf
= line
->text
;
2661 size_t n_bytes
= line
->length
;
2662 char *ebuf
= buf
+ n_bytes
;
2664 /* Convert TABs to '>' and \0 to \n when -z specified. */
2665 if (debug
&& fp
== stdout
)
2667 char const *c
= buf
;
2676 if (fputc (wc
, fp
) == EOF
)
2677 die (_("write failed"), output_file
);
2680 compare (line
, line
, true);
2685 if (fwrite (buf
, 1, n_bytes
, fp
) != n_bytes
)
2686 die (_("write failed"), output_file
);
2691 /* Check that the lines read from FILE_NAME come in order. Return
2692 true if they are in order. If CHECKONLY == 'c', also print a
2693 diagnostic (FILE_NAME, line number, contents of line) to stderr if
2694 they are not in order. */
2697 check (char const *file_name
, char checkonly
)
2699 FILE *fp
= xfopen (file_name
, "r");
2700 struct buffer buf
; /* Input buffer. */
2701 struct line temp
; /* Copy of previous line. */
2703 uintmax_t line_number
= 0;
2704 struct keyfield
const *key
= keylist
;
2705 bool nonunique
= ! unique
;
2706 bool ordered
= true;
2708 initbuf (&buf
, sizeof (struct line
),
2709 MAX (merge_buffer_size
, sort_size
));
2712 while (fillbuf (&buf
, fp
, file_name
))
2714 struct line
const *line
= buffer_linelim (&buf
);
2715 struct line
const *linebase
= line
- buf
.nlines
;
2717 /* Make sure the line saved from the old buffer contents is
2718 less than or equal to the first line of the new buffer. */
2719 if (alloc
&& nonunique
<= compare (&temp
, line
- 1, false))
2723 if (checkonly
== 'c')
2725 struct line
const *disorder_line
= line
- 1;
2726 uintmax_t disorder_line_number
=
2727 buffer_linelim (&buf
) - disorder_line
+ line_number
;
2728 char hr_buf
[INT_BUFSIZE_BOUND (disorder_line_number
)];
2729 fprintf (stderr
, _("%s: %s:%s: disorder: "),
2730 program_name
, file_name
,
2731 umaxtostr (disorder_line_number
, hr_buf
));
2733 fputc ('\n', stderr
);
2734 write_bytes (disorder_line
, debug
? stdout
: stderr
,
2735 debug
? _("standard out") : _("standard error"));
2743 /* Compare each line in the buffer with its successor. */
2744 while (linebase
< --line
)
2745 if (nonunique
<= compare (line
, line
- 1, false))
2746 goto found_disorder
;
2748 line_number
+= buf
.nlines
;
2750 /* Save the last line of the buffer. */
2751 if (alloc
< line
->length
)
2758 alloc
= line
->length
;
2762 while (alloc
< line
->length
);
2764 temp
.text
= xrealloc (temp
.text
, alloc
);
2766 memcpy (temp
.text
, line
->text
, line
->length
);
2767 temp
.length
= line
->length
;
2770 temp
.keybeg
= temp
.text
+ (line
->keybeg
- line
->text
);
2771 temp
.keylim
= temp
.text
+ (line
->keylim
- line
->text
);
2775 xfclose (fp
, file_name
);
2781 /* Open FILES (there are NFILES of them) and store the resulting array
2782 of stream pointers into (*PFPS). Allocate the array. Return the
2783 number of successfully opened files, setting errno if this value is
2784 less than NFILES. */
2787 open_input_files (struct sortfile
*files
, size_t nfiles
, FILE ***pfps
)
2789 FILE **fps
= *pfps
= xnmalloc (nfiles
, sizeof *fps
);
2792 /* Open as many input files as we can. */
2793 for (i
= 0; i
< nfiles
; i
++)
2795 fps
[i
] = (files
[i
].pid
2796 ? open_temp (files
[i
].name
, files
[i
].pid
)
2797 : stream_open (files
[i
].name
, "r"));
2805 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
2806 files (all of which are at the start of the FILES array), and
2807 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2808 FPS is the vector of open stream corresponding to the files.
2809 Close input and output streams before returning.
2810 OUTPUT_FILE gives the name of the output file. If it is NULL,
2811 the output file is standard output. */
2814 mergefps (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
2815 FILE *ofp
, char const *output_file
, FILE **fps
)
2817 struct buffer
*buffer
= xnmalloc (nfiles
, sizeof *buffer
);
2818 /* Input buffers for each file. */
2819 struct line saved
; /* Saved line storage for unique check. */
2820 struct line
const *savedline
= NULL
;
2821 /* &saved if there is a saved line. */
2822 size_t savealloc
= 0; /* Size allocated for the saved line. */
2823 struct line
const **cur
= xnmalloc (nfiles
, sizeof *cur
);
2824 /* Current line in each line table. */
2825 struct line
const **base
= xnmalloc (nfiles
, sizeof *base
);
2826 /* Base of each line table. */
2827 size_t *ord
= xnmalloc (nfiles
, sizeof *ord
);
2828 /* Table representing a permutation of fps,
2829 such that cur[ord[0]] is the smallest line
2830 and will be next output. */
2834 struct keyfield
const *key
= keylist
;
2837 /* Read initial lines from each input file. */
2838 for (i
= 0; i
< nfiles
; )
2840 initbuf (&buffer
[i
], sizeof (struct line
),
2841 MAX (merge_buffer_size
, sort_size
/ nfiles
));
2842 if (fillbuf (&buffer
[i
], fps
[i
], files
[i
].name
))
2844 struct line
const *linelim
= buffer_linelim (&buffer
[i
]);
2845 cur
[i
] = linelim
- 1;
2846 base
[i
] = linelim
- buffer
[i
].nlines
;
2851 /* fps[i] is empty; eliminate it from future consideration. */
2852 xfclose (fps
[i
], files
[i
].name
);
2856 zaptemp (files
[i
].name
);
2858 free (buffer
[i
].buf
);
2860 for (j
= i
; j
< nfiles
; ++j
)
2862 files
[j
] = files
[j
+ 1];
2863 fps
[j
] = fps
[j
+ 1];
2868 /* Set up the ord table according to comparisons among input lines.
2869 Since this only reorders two items if one is strictly greater than
2870 the other, it is stable. */
2871 for (i
= 0; i
< nfiles
; ++i
)
2873 for (i
= 1; i
< nfiles
; ++i
)
2874 if (0 < compare (cur
[ord
[i
- 1]], cur
[ord
[i
]], false))
2875 t
= ord
[i
- 1], ord
[i
- 1] = ord
[i
], ord
[i
] = t
, i
= 0;
2877 /* Repeatedly output the smallest line until no input remains. */
2880 struct line
const *smallest
= cur
[ord
[0]];
2882 /* If uniquified output is turned on, output only the first of
2883 an identical series of lines. */
2886 if (savedline
&& compare (savedline
, smallest
, false))
2889 write_bytes (&saved
, ofp
, output_file
);
2894 if (savealloc
< smallest
->length
)
2899 savealloc
= smallest
->length
;
2902 while ((savealloc
*= 2) < smallest
->length
);
2904 saved
.text
= xrealloc (saved
.text
, savealloc
);
2906 saved
.length
= smallest
->length
;
2907 memcpy (saved
.text
, smallest
->text
, saved
.length
);
2911 saved
.text
+ (smallest
->keybeg
- smallest
->text
);
2913 saved
.text
+ (smallest
->keylim
- smallest
->text
);
2918 write_bytes (smallest
, ofp
, output_file
);
2920 /* Check if we need to read more lines into core. */
2921 if (base
[ord
[0]] < smallest
)
2922 cur
[ord
[0]] = smallest
- 1;
2925 if (fillbuf (&buffer
[ord
[0]], fps
[ord
[0]], files
[ord
[0]].name
))
2927 struct line
const *linelim
= buffer_linelim (&buffer
[ord
[0]]);
2928 cur
[ord
[0]] = linelim
- 1;
2929 base
[ord
[0]] = linelim
- buffer
[ord
[0]].nlines
;
2933 /* We reached EOF on fps[ord[0]]. */
2934 for (i
= 1; i
< nfiles
; ++i
)
2935 if (ord
[i
] > ord
[0])
2938 xfclose (fps
[ord
[0]], files
[ord
[0]].name
);
2939 if (ord
[0] < ntemps
)
2942 zaptemp (files
[ord
[0]].name
);
2944 free (buffer
[ord
[0]].buf
);
2945 for (i
= ord
[0]; i
< nfiles
; ++i
)
2947 fps
[i
] = fps
[i
+ 1];
2948 files
[i
] = files
[i
+ 1];
2949 buffer
[i
] = buffer
[i
+ 1];
2950 cur
[i
] = cur
[i
+ 1];
2951 base
[i
] = base
[i
+ 1];
2953 for (i
= 0; i
< nfiles
; ++i
)
2954 ord
[i
] = ord
[i
+ 1];
2959 /* The new line just read in may be larger than other lines
2960 already in main memory; push it back in the queue until we
2961 encounter a line larger than it. Optimize for the common
2962 case where the new line is smallest. */
2967 size_t ord0
= ord
[0];
2968 size_t count_of_smaller_lines
;
2972 int cmp
= compare (cur
[ord0
], cur
[ord
[probe
]], false);
2973 if (cmp
< 0 || (cmp
== 0 && ord0
< ord
[probe
]))
2977 probe
= (lo
+ hi
) / 2;
2980 count_of_smaller_lines
= lo
- 1;
2981 for (j
= 0; j
< count_of_smaller_lines
; j
++)
2982 ord
[j
] = ord
[j
+ 1];
2983 ord
[count_of_smaller_lines
] = ord0
;
2986 /* Free up some resources every once in a while. */
2987 if (MAX_PROCS_BEFORE_REAP
< nprocs
)
2991 if (unique
&& savedline
)
2993 write_bytes (&saved
, ofp
, output_file
);
2997 xfclose (ofp
, output_file
);
3005 /* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
3006 files (all of which are at the start of the FILES array), and
3007 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
3008 Close input and output files before returning.
3009 OUTPUT_FILE gives the name of the output file.
3011 Return the number of files successfully merged. This number can be
3012 less than NFILES if we ran low on file descriptors, but in this
3013 case it is never less than 2. */
3016 mergefiles (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
3017 FILE *ofp
, char const *output_file
)
3020 size_t nopened
= open_input_files (files
, nfiles
, &fps
);
3021 if (nopened
< nfiles
&& nopened
< 2)
3022 die (_("open failed"), files
[nopened
].name
);
3023 mergefps (files
, ntemps
, nopened
, ofp
, output_file
, fps
);
3027 /* Merge into T (of size NLINES) the two sorted arrays of lines
3028 LO (with NLINES / 2 members), and
3029 T - (NLINES / 2) (with NLINES - NLINES / 2 members).
3030 T and LO point just past their respective arrays, and the arrays
3031 are in reverse order. NLINES must be at least 2. */
3034 mergelines (struct line
*restrict t
, size_t nlines
,
3035 struct line
const *restrict lo
)
3037 size_t nlo
= nlines
/ 2;
3038 size_t nhi
= nlines
- nlo
;
3039 struct line
*hi
= t
- nlo
;
3042 if (compare (lo
- 1, hi
- 1, false) <= 0)
3047 /* HI must equal T now, and there is no need to copy from
3066 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
3067 NLINES must be at least 2.
3068 The input and output arrays are in reverse order, and LINES and
3069 TEMP point just past the end of their respective arrays.
3071 Use a recursive divide-and-conquer algorithm, in the style
3072 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
3073 the optimization suggested by exercise 5.2.4-10; this requires room
3074 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
3075 writes that this memory optimization was originally published by
3076 D. A. Bell, Comp J. 1 (1958), 75. */
3079 sequential_sort (struct line
*restrict lines
, size_t nlines
,
3080 struct line
*restrict temp
, bool to_temp
)
3084 /* Declare `swap' as int, not bool, to work around a bug
3085 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
3086 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
3087 int swap
= (0 < compare (&lines
[-1], &lines
[-2], false));
3090 temp
[-1] = lines
[-1 - swap
];
3091 temp
[-2] = lines
[-2 + swap
];
3095 temp
[-1] = lines
[-1];
3096 lines
[-1] = lines
[-2];
3097 lines
[-2] = temp
[-1];
3102 size_t nlo
= nlines
/ 2;
3103 size_t nhi
= nlines
- nlo
;
3104 struct line
*lo
= lines
;
3105 struct line
*hi
= lines
- nlo
;
3107 sequential_sort (hi
, nhi
, temp
- (to_temp
? nlo
: 0), to_temp
);
3109 sequential_sort (lo
, nlo
, temp
, !to_temp
);
3114 struct line
const *sorted_lo
;
3125 mergelines (dest
, nlines
, sorted_lo
);
3129 /* Compare two NODEs for priority. The NODE with the higher (numerically
3130 lower) level has priority. If tie, the NODE with the most remaining
3131 lines has priority. */
3134 compare_nodes (void const *a
, void const *b
)
3136 struct merge_node
const *nodea
= a
;
3137 struct merge_node
const *nodeb
= b
;
3138 if (nodea
->level
== nodeb
->level
)
3139 return (nodea
->nlo
+ nodea
->nhi
) < (nodeb
->nlo
+ nodeb
->nhi
);
3140 return nodea
->level
< nodeb
->level
;
3143 /* Lock a merge tree NODE.
3144 Note spin locks were seen to perform better than mutexes
3145 as long as the number of threads is limited to the
3146 number of processors. */
3149 lock_node (struct merge_node
*node
)
3151 pthread_spin_lock (node
->lock
);
3154 /* Unlock a merge tree NODE. */
3157 unlock_node (struct merge_node
*node
)
3159 pthread_spin_unlock (node
->lock
);
3162 /* Destroy merge QUEUE. */
3165 queue_destroy (struct merge_node_queue
*queue
)
3167 heap_free (queue
->priority_queue
);
3168 pthread_cond_destroy (&queue
->cond
);
3169 pthread_mutex_destroy (&queue
->mutex
);
3172 /* Initialize merge QUEUE, allocating space for a maximum of RESERVE nodes.
3173 Though it's highly unlikely all nodes are in the heap at the same time,
3174 RESERVE should accommodate all of them. Counting a NULL dummy head for the
3175 heap, RESERVE should be 2 * NTHREADS. */
3178 queue_init (struct merge_node_queue
*queue
, size_t reserve
)
3180 queue
->priority_queue
= heap_alloc (compare_nodes
, reserve
);
3181 pthread_mutex_init (&queue
->mutex
, NULL
);
3182 pthread_cond_init (&queue
->cond
, NULL
);
3185 /* Insert NODE into priority QUEUE. Assume caller either holds lock on NODE
3186 or does not need to lock NODE. */
3189 queue_insert (struct merge_node_queue
*queue
, struct merge_node
*node
)
3191 pthread_mutex_lock (&queue
->mutex
);
3192 heap_insert (queue
->priority_queue
, node
);
3193 node
->queued
= true;
3194 pthread_mutex_unlock (&queue
->mutex
);
3195 pthread_cond_signal (&queue
->cond
);
3198 /* Pop NODE off priority QUEUE. Guarantee a non-null, spinlocked NODE. */
3200 static struct merge_node
*
3201 queue_pop (struct merge_node_queue
*queue
)
3203 struct merge_node
*node
;
3204 pthread_mutex_lock (&queue
->mutex
);
3205 while (! (node
= heap_remove_top (queue
->priority_queue
)))
3206 pthread_cond_wait (&queue
->cond
, &queue
->mutex
);
3207 pthread_mutex_unlock (&queue
->mutex
);
3209 node
->queued
= false;
3213 /* If UNQIUE is set, checks to make sure line isn't a duplicate before
3214 outputting. If UNIQUE is not set, output the passed in line. Note that
3215 this function does not actually save the line, nor any key information,
3216 thus is only appropriate for internal sort. */
3219 write_unique (struct line
const *line
, FILE *tfp
, char const *temp_output
)
3221 static struct line
const *saved
= NULL
;
3224 write_bytes (line
, tfp
, temp_output
);
3225 else if (!saved
|| compare (line
, saved
, false))
3228 write_bytes (line
, tfp
, temp_output
);
3232 /* Merge the lines currently available to a NODE in the binary
3233 merge tree, up to a maximum specified by MAX_MERGE. */
3236 mergelines_node (struct merge_node
*restrict node
, size_t total_lines
,
3237 FILE *tfp
, char const *temp_output
)
3239 struct line
*lo_orig
= node
->lo
;
3240 struct line
*hi_orig
= node
->hi
;
3241 size_t to_merge
= MAX_MERGE (total_lines
, node
->level
);
3245 if (node
->level
> MERGE_ROOT
)
3247 /* Merge to destination buffer. */
3248 struct line
*dest
= *node
->dest
;
3249 while (node
->lo
!= node
->end_lo
&& node
->hi
!= node
->end_hi
&& to_merge
--)
3250 if (compare (node
->lo
- 1, node
->hi
- 1, false) <= 0)
3251 *--dest
= *--node
->lo
;
3253 *--dest
= *--node
->hi
;
3255 merged_lo
= lo_orig
- node
->lo
;
3256 merged_hi
= hi_orig
- node
->hi
;
3258 if (node
->nhi
== merged_hi
)
3259 while (node
->lo
!= node
->end_lo
&& to_merge
--)
3260 *--dest
= *--node
->lo
;
3261 else if (node
->nlo
== merged_lo
)
3262 while (node
->hi
!= node
->end_hi
&& to_merge
--)
3263 *--dest
= *--node
->hi
;
3267 /* Merge directly to output. */
3268 while (node
->lo
!= node
->end_lo
&& node
->hi
!= node
->end_hi
&& to_merge
--)
3270 if (compare (node
->lo
- 1, node
->hi
- 1, false) <= 0)
3271 write_unique (--node
->lo
, tfp
, temp_output
);
3273 write_unique (--node
->hi
, tfp
, temp_output
);
3276 merged_lo
= lo_orig
- node
->lo
;
3277 merged_hi
= hi_orig
- node
->hi
;
3279 if (node
->nhi
== merged_hi
)
3281 while (node
->lo
!= node
->end_lo
&& to_merge
--)
3282 write_unique (--node
->lo
, tfp
, temp_output
);
3284 else if (node
->nlo
== merged_lo
)
3286 while (node
->hi
!= node
->end_hi
&& to_merge
--)
3287 write_unique (--node
->hi
, tfp
, temp_output
);
3289 node
->dest
-= lo_orig
- node
->lo
+ hi_orig
- node
->hi
;
3293 merged_lo
= lo_orig
- node
->lo
;
3294 merged_hi
= hi_orig
- node
->hi
;
3295 node
->nlo
-= merged_lo
;
3296 node
->nhi
-= merged_hi
;
3297 return merged_lo
+ merged_hi
;
3300 /* Insert NODE into QUEUE if it passes insertion checks. */
3303 check_insert (struct merge_node
*node
, struct merge_node_queue
*queue
)
3305 size_t lo_avail
= node
->lo
- node
->end_lo
;
3306 size_t hi_avail
= node
->hi
- node
->end_hi
;
3308 /* Conditions for insertion:
3309 1. NODE is not already in heap.
3310 2. NODE has available lines from both it's children, OR one child has
3311 available lines, but the other has exhausted all its lines. */
3313 && ((lo_avail
&& (hi_avail
|| !(node
->nhi
)))
3314 || (hi_avail
&& !(node
->nlo
))))
3316 queue_insert (queue
, node
);
3320 /* Update parent merge tree NODE. */
3323 update_parent (struct merge_node
*node
, size_t merged
,
3324 struct merge_node_queue
*queue
)
3326 if (node
->level
> MERGE_ROOT
)
3328 lock_node (node
->parent
);
3329 *node
->dest
-= merged
;
3330 check_insert (node
->parent
, queue
);
3331 unlock_node (node
->parent
);
3333 else if (node
->nlo
+ node
->nhi
== 0)
3335 /* If the MERGE_ROOT NODE has finished merging, insert the
3337 queue_insert (queue
, node
->parent
);
3341 /* Repeatedly pop QUEUE for a NODE with lines to merge, and merge at least
3342 some of those lines, until the MERGE_END node is popped. */
3345 merge_loop (struct merge_node_queue
*queue
,
3346 size_t total_lines
, FILE *tfp
, char const *temp_output
)
3350 struct merge_node
*node
= queue_pop (queue
);
3352 if (node
->level
== MERGE_END
)
3355 /* Reinsert so other threads can pop it. */
3356 queue_insert (queue
, node
);
3359 size_t merged_lines
= mergelines_node (node
, total_lines
, tfp
,
3361 check_insert (node
, queue
);
3362 update_parent (node
, merged_lines
, queue
);
3369 static void sortlines (struct line
*restrict
, struct line
*restrict
,
3370 unsigned long int, size_t,
3371 struct merge_node
*, bool,
3372 struct merge_node_queue
*,
3373 FILE *, char const *);
3375 /* Thread arguments for sortlines_thread. */
3381 unsigned long int nthreads
;
3382 size_t const total_lines
;
3383 struct merge_node
*const parent
;
3385 struct merge_node_queue
*const merge_queue
;
3387 char const *output_temp
;
3390 /* Like sortlines, except with a signature acceptable to pthread_create. */
3393 sortlines_thread (void *data
)
3395 struct thread_args
const *args
= data
;
3396 sortlines (args
->lines
, args
->dest
, args
->nthreads
, args
->total_lines
,
3397 args
->parent
, args
->lo_child
, args
->merge_queue
,
3398 args
->tfp
, args
->output_temp
);
3402 /* There are three phases to the algorithm: node creation, sequential sort,
3405 During node creation, sortlines recursively visits each node in the
3406 binary merge tree and creates a NODE structure corresponding to all the
3407 future line merging NODE is responsible for. For each call to
3408 sortlines, half the available threads are assigned to each recursive
3409 call, until a leaf node having only 1 available thread is reached.
3411 Each leaf node then performs two sequential sorts, one on each half of
3412 the lines it is responsible for. It records in its NODE structure that
3413 there are two sorted sublists available to merge from, and inserts its
3414 NODE into the priority queue.
3416 The binary merge phase then begins. Each thread drops into a loop
3417 where the thread retrieves a NODE from the priority queue, merges lines
3418 available to that NODE, and potentially insert NODE or its parent back
3419 into the queue if there are sufficient available lines for them to
3420 merge. This continues until all lines at all nodes of the merge tree
3421 have been merged. */
3424 sortlines (struct line
*restrict lines
, struct line
*restrict dest
,
3425 unsigned long int nthreads
, size_t total_lines
,
3426 struct merge_node
*parent
, bool lo_child
,
3427 struct merge_node_queue
*merge_queue
,
3428 FILE *tfp
, char const *temp_output
)
3430 /* Create merge tree NODE. */
3431 size_t nlines
= (lo_child
)? parent
->nlo
: parent
->nhi
;
3432 size_t nlo
= nlines
/ 2;
3433 size_t nhi
= nlines
- nlo
;
3434 struct line
*lo
= dest
- total_lines
;
3435 struct line
*hi
= lo
- nlo
;
3436 struct line
**parent_end
= (lo_child
)? &parent
->end_lo
: &parent
->end_hi
;
3437 pthread_spinlock_t lock
;
3438 pthread_spin_init (&lock
, PTHREAD_PROCESS_PRIVATE
);
3439 struct merge_node node
= {lo
, hi
, lo
, hi
, parent_end
, nlo
, nhi
,
3440 parent
->level
+ 1, parent
, false, &lock
};
3442 /* Calculate thread arguments. */
3443 unsigned long int lo_threads
= nthreads
/ 2;
3444 unsigned long int hi_threads
= nthreads
- lo_threads
;
3446 struct thread_args args
= {lines
, lo
, lo_threads
, total_lines
, &node
,
3447 true, merge_queue
, tfp
, temp_output
};
3449 if (nthreads
> 1 && SUBTHREAD_LINES_HEURISTIC
<= nlines
3450 && pthread_create (&thread
, NULL
, sortlines_thread
, &args
) == 0)
3452 sortlines (lines
- nlo
, hi
, hi_threads
, total_lines
, &node
, false,
3453 merge_queue
, tfp
, temp_output
);
3454 pthread_join (thread
, NULL
);
3458 /* Nthreads = 1, this is a leaf NODE, or pthread_create failed.
3459 Sort with 1 thread. */
3460 struct line
*temp
= lines
- total_lines
;
3462 sequential_sort (lines
- nlo
, nhi
, temp
- nlo
/ 2, false);
3464 sequential_sort (lines
, nlo
, temp
, false);
3466 /* Update merge NODE. No need to lock yet. */
3468 node
.hi
= lines
- nlo
;
3469 node
.end_lo
= lines
- nlo
;
3470 node
.end_hi
= lines
- nlo
- nhi
;
3472 queue_insert (merge_queue
, &node
);
3473 merge_loop (merge_queue
, total_lines
, tfp
, temp_output
);
3477 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
3478 the same as OUTFILE. If found, merge the found instances (and perhaps
3479 some other files) into a temporary file so that it can in turn be
3480 merged into OUTFILE without destroying OUTFILE before it is completely
3481 read. Return the new value of NFILES, which differs from the old if
3482 some merging occurred.
3484 This test ensures that an otherwise-erroneous use like
3485 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
3486 It's not clear that POSIX requires this nicety.
3487 Detect common error cases, but don't try to catch obscure cases like
3488 "cat ... FILE ... | sort -m -o FILE"
3489 where traditional "sort" doesn't copy the input and where
3490 people should know that they're getting into trouble anyway.
3491 Catching these obscure cases would slow down performance in
3495 avoid_trashing_input (struct sortfile
*files
, size_t ntemps
,
3496 size_t nfiles
, char const *outfile
)
3499 bool got_outstat
= false;
3500 struct stat outstat
;
3502 for (i
= ntemps
; i
< nfiles
; i
++)
3504 bool is_stdin
= STREQ (files
[i
].name
, "-");
3508 if (outfile
&& STREQ (outfile
, files
[i
].name
) && !is_stdin
)
3515 ? stat (outfile
, &outstat
)
3516 : fstat (STDOUT_FILENO
, &outstat
))
3523 ? fstat (STDIN_FILENO
, &instat
)
3524 : stat (files
[i
].name
, &instat
))
3526 && SAME_INODE (instat
, outstat
));
3533 char *temp
= create_temp (&tftp
, &pid
);
3534 size_t num_merged
= 0;
3537 num_merged
+= mergefiles (&files
[i
], 0, nfiles
- i
, tftp
, temp
);
3538 files
[i
].name
= temp
;
3541 if (i
+ num_merged
< nfiles
)
3542 memmove (&files
[i
+ 1], &files
[i
+ num_merged
],
3543 num_merged
* sizeof *files
);
3545 nfiles
-= num_merged
- 1;;
3555 /* Merge the input FILES. NTEMPS is the number of files at the
3556 start of FILES that are temporary; it is zero at the top level.
3557 NFILES is the total number of files. Put the output in
3558 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
3561 merge (struct sortfile
*files
, size_t ntemps
, size_t nfiles
,
3562 char const *output_file
)
3564 while (nmerge
< nfiles
)
3566 /* Number of input files processed so far. */
3569 /* Number of output files generated so far. */
3572 /* nfiles % NMERGE; this counts input files that are left over
3573 after all full-sized merges have been done. */
3576 /* Number of easily-available slots at the next loop iteration. */
3579 /* Do as many NMERGE-size merges as possible. In the case that
3580 nmerge is bogus, increment by the maximum number of file
3581 descriptors allowed. */
3582 for (out
= in
= 0; nmerge
<= nfiles
- in
; out
++)
3586 char *temp
= create_temp (&tfp
, &pid
);
3587 size_t num_merged
= mergefiles (&files
[in
], MIN (ntemps
, nmerge
),
3589 ntemps
-= MIN (ntemps
, num_merged
);
3590 files
[out
].name
= temp
;
3591 files
[out
].pid
= pid
;
3595 remainder
= nfiles
- in
;
3596 cheap_slots
= nmerge
- out
% nmerge
;
3598 if (cheap_slots
< remainder
)
3600 /* So many files remain that they can't all be put into the last
3601 NMERGE-sized output window. Do one more merge. Merge as few
3602 files as possible, to avoid needless I/O. */
3603 size_t nshortmerge
= remainder
- cheap_slots
+ 1;
3606 char *temp
= create_temp (&tfp
, &pid
);
3607 size_t num_merged
= mergefiles (&files
[in
], MIN (ntemps
, nshortmerge
),
3608 nshortmerge
, tfp
, temp
);
3609 ntemps
-= MIN (ntemps
, num_merged
);
3610 files
[out
].name
= temp
;
3611 files
[out
++].pid
= pid
;
3615 /* Put the remaining input files into the last NMERGE-sized output
3616 window, so they will be merged in the next pass. */
3617 memmove (&files
[out
], &files
[in
], (nfiles
- in
) * sizeof *files
);
3622 nfiles
= avoid_trashing_input (files
, ntemps
, nfiles
, output_file
);
3624 /* We aren't guaranteed that this final mergefiles will work, therefore we
3625 try to merge into the output, and then merge as much as we can into a
3626 temp file if we can't. Repeat. */
3630 /* Merge directly into the output file if possible. */
3632 size_t nopened
= open_input_files (files
, nfiles
, &fps
);
3634 if (nopened
== nfiles
)
3636 FILE *ofp
= stream_open (output_file
, "w");
3639 mergefps (files
, ntemps
, nfiles
, ofp
, output_file
, fps
);
3642 if (errno
!= EMFILE
|| nopened
<= 2)
3643 die (_("open failed"), output_file
);
3645 else if (nopened
<= 2)
3646 die (_("open failed"), files
[nopened
].name
);
3648 /* We ran out of file descriptors. Close one of the input
3649 files, to gain a file descriptor. Then create a temporary
3650 file with our spare file descriptor. Retry if that failed
3651 (e.g., some other process could open a file between the time
3652 we closed and tried to create). */
3659 xfclose (fps
[nopened
], files
[nopened
].name
);
3660 temp
= maybe_create_temp (&tfp
, &pid
, ! (nopened
<= 2));
3664 /* Merge into the newly allocated temporary. */
3665 mergefps (&files
[0], MIN (ntemps
, nopened
), nopened
, tfp
, temp
, fps
);
3666 ntemps
-= MIN (ntemps
, nopened
);
3667 files
[0].name
= temp
;
3670 memmove (&files
[1], &files
[nopened
], (nfiles
- nopened
) * sizeof *files
);
3672 nfiles
-= nopened
- 1;
3676 /* Sort NFILES FILES onto OUTPUT_FILE. */
3679 sort (char * const *files
, size_t nfiles
, char const *output_file
,
3680 unsigned long int nthreads
)
3684 bool output_file_created
= false;
3690 char const *temp_output
;
3691 char const *file
= *files
;
3692 FILE *fp
= xfopen (file
, "r");
3695 size_t bytes_per_line
;
3699 unsigned long int tmp
= 1;
3701 while (tmp
< nthreads
)
3706 bytes_per_line
= (mult
* sizeof (struct line
));
3709 bytes_per_line
= sizeof (struct line
) * 3 / 2;
3712 initbuf (&buf
, bytes_per_line
,
3713 sort_buffer_size (&fp
, 1, files
, nfiles
, bytes_per_line
));
3718 while (fillbuf (&buf
, fp
, file
))
3722 if (buf
.eof
&& nfiles
3723 && (bytes_per_line
+ 1
3724 < (buf
.alloc
- buf
.used
- bytes_per_line
* buf
.nlines
)))
3726 /* End of file, but there is more input and buffer room.
3727 Concatenate the next input file; this is faster in
3729 buf
.left
= buf
.used
;
3733 line
= buffer_linelim (&buf
);
3734 if (buf
.eof
&& !nfiles
&& !ntemps
&& !buf
.left
)
3737 tfp
= xfopen (output_file
, "w");
3738 temp_output
= output_file
;
3739 output_file_created
= true;
3744 temp_output
= create_temp (&tfp
, NULL
);
3748 struct merge_node_queue merge_queue
;
3749 queue_init (&merge_queue
, 2 * nthreads
);
3751 pthread_spinlock_t lock
;
3752 pthread_spin_init (&lock
, PTHREAD_PROCESS_PRIVATE
);
3753 struct merge_node node
=
3754 {NULL
, NULL
, NULL
, NULL
, NULL
, buf
.nlines
,
3755 buf
.nlines
, MERGE_END
, NULL
, false, &lock
};
3757 sortlines (line
, line
, nthreads
, buf
.nlines
, &node
, true,
3758 &merge_queue
, tfp
, temp_output
);
3759 queue_destroy (&merge_queue
);
3762 write_unique (line
- 1, tfp
, temp_output
);
3764 xfclose (tfp
, temp_output
);
3766 /* Free up some resources every once in a while. */
3767 if (MAX_PROCS_BEFORE_REAP
< nprocs
)
3770 if (output_file_created
)
3779 if (! output_file_created
)
3782 struct tempnode
*node
= temphead
;
3783 struct sortfile
*tempfiles
= xnmalloc (ntemps
, sizeof *tempfiles
);
3784 for (i
= 0; node
; i
++)
3786 tempfiles
[i
].name
= node
->name
;
3787 tempfiles
[i
].pid
= node
->pid
;
3790 merge (tempfiles
, ntemps
, ntemps
, output_file
);
3795 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
3798 insertkey (struct keyfield
*key_arg
)
3800 struct keyfield
**p
;
3801 struct keyfield
*key
= xmemdup (key_arg
, sizeof *key
);
3803 for (p
= &keylist
; *p
; p
= &(*p
)->next
)
3809 /* Report a bad field specification SPEC, with extra info MSGID. */
3811 static void badfieldspec (char const *, char const *)
3814 badfieldspec (char const *spec
, char const *msgid
)
3816 error (SORT_FAILURE
, 0, _("%s: invalid field specification %s"),
3817 _(msgid
), quote (spec
));
3821 /* Report incompatible options. */
3823 static void incompatible_options (char const *) ATTRIBUTE_NORETURN
;
3825 incompatible_options (char const *opts
)
3827 error (SORT_FAILURE
, 0, _("options `-%s' are incompatible"), opts
);
3831 /* Check compatibility of ordering options. */
3834 check_ordering_compatibility (void)
3836 struct keyfield
*key
;
3838 for (key
= keylist
; key
; key
= key
->next
)
3839 if ((1 < (key
->random
+ key
->numeric
+ key
->general_numeric
+ key
->month
3840 + key
->version
+ !!key
->ignore
+ key
->human_numeric
))
3841 || (key
->random
&& key
->translate
))
3843 /* The following is too big, but guaranteed to be "big enough". */
3844 char opts
[sizeof short_options
];
3845 /* Clear flags we're not interested in. */
3846 key
->skipsblanks
= key
->skipeblanks
= key
->reverse
= false;
3847 key_to_opts (key
, opts
);
3848 incompatible_options (opts
);
3852 /* Parse the leading integer in STRING and store the resulting value
3853 (which must fit into size_t) into *VAL. Return the address of the
3854 suffix after the integer. If the value is too large, silently
3855 substitute SIZE_MAX. If MSGID is NULL, return NULL after
3856 failure; otherwise, report MSGID and exit on failure. */
3859 parse_field_count (char const *string
, size_t *val
, char const *msgid
)
3864 switch (xstrtoumax (string
, &suffix
, 10, &n
, ""))
3867 case LONGINT_INVALID_SUFFIX_CHAR
:
3872 case LONGINT_OVERFLOW
:
3873 case LONGINT_OVERFLOW
| LONGINT_INVALID_SUFFIX_CHAR
:
3877 case LONGINT_INVALID
:
3879 error (SORT_FAILURE
, 0, _("%s: invalid count at start of %s"),
3880 _(msgid
), quote (string
));
3887 /* Handle interrupts and hangups. */
3890 sighandler (int sig
)
3893 signal (sig
, SIG_IGN
);
3897 signal (sig
, SIG_DFL
);
3901 /* Set the ordering options for KEY specified in S.
3902 Return the address of the first character in S that
3903 is not a valid ordering option.
3904 BLANKTYPE is the kind of blanks that 'b' should skip. */
3907 set_ordering (char const *s
, struct keyfield
*key
, enum blanktype blanktype
)
3914 if (blanktype
== bl_start
|| blanktype
== bl_both
)
3915 key
->skipsblanks
= true;
3916 if (blanktype
== bl_end
|| blanktype
== bl_both
)
3917 key
->skipeblanks
= true;
3920 key
->ignore
= nondictionary
;
3923 key
->translate
= fold_toupper
;
3926 key
->general_numeric
= true;
3929 key
->human_numeric
= true;
3932 /* Option order should not matter, so don't let -i override
3933 -d. -d implies -i, but -i does not imply -d. */
3935 key
->ignore
= nonprinting
;
3941 key
->numeric
= true;
3947 key
->reverse
= true;
3950 key
->version
= true;
3960 static struct keyfield
*
3961 key_init (struct keyfield
*key
)
3963 memset (key
, 0, sizeof *key
);
3964 key
->eword
= SIZE_MAX
;
3969 main (int argc
, char **argv
)
3971 struct keyfield
*key
;
3972 struct keyfield key_buf
;
3973 struct keyfield gkey
;
3974 bool gkey_only
= false;
3978 bool mergeonly
= false;
3979 char *random_source
= NULL
;
3980 bool need_random
= false;
3981 unsigned long int nthreads
= 0;
3983 bool posixly_correct
= (getenv ("POSIXLY_CORRECT") != NULL
);
3984 bool obsolete_usage
= (posix2_version () < 200112);
3986 char *files_from
= NULL
;
3988 char const *outfile
= NULL
;
3990 initialize_main (&argc
, &argv
);
3991 set_program_name (argv
[0]);
3992 setlocale (LC_ALL
, "");
3993 bindtextdomain (PACKAGE
, LOCALEDIR
);
3994 textdomain (PACKAGE
);
3996 initialize_exit_failure (SORT_FAILURE
);
3998 hard_LC_COLLATE
= hard_locale (LC_COLLATE
);
3999 #if HAVE_NL_LANGINFO
4000 hard_LC_TIME
= hard_locale (LC_TIME
);
4003 /* Get locale's representation of the decimal point. */
4005 struct lconv
const *locale
= localeconv ();
4007 /* If the locale doesn't define a decimal point, or if the decimal
4008 point is multibyte, use the C locale's decimal point. FIXME:
4009 add support for multibyte decimal points. */
4010 decimal_point
= to_uchar (locale
->decimal_point
[0]);
4011 if (! decimal_point
|| locale
->decimal_point
[1])
4012 decimal_point
= '.';
4014 /* FIXME: add support for multibyte thousands separators. */
4015 thousands_sep
= to_uchar (*locale
->thousands_sep
);
4016 if (! thousands_sep
|| locale
->thousands_sep
[1])
4020 have_read_stdin
= false;
4025 static int const sig
[] =
4027 /* The usual suspects. */
4028 SIGALRM
, SIGHUP
, SIGINT
, SIGPIPE
, SIGQUIT
, SIGTERM
,
4045 enum { nsigs
= ARRAY_CARDINALITY (sig
) };
4048 struct sigaction act
;
4050 sigemptyset (&caught_signals
);
4051 for (i
= 0; i
< nsigs
; i
++)
4053 sigaction (sig
[i
], NULL
, &act
);
4054 if (act
.sa_handler
!= SIG_IGN
)
4055 sigaddset (&caught_signals
, sig
[i
]);
4058 act
.sa_handler
= sighandler
;
4059 act
.sa_mask
= caught_signals
;
4062 for (i
= 0; i
< nsigs
; i
++)
4063 if (sigismember (&caught_signals
, sig
[i
]))
4064 sigaction (sig
[i
], &act
, NULL
);
4066 for (i
= 0; i
< nsigs
; i
++)
4067 if (signal (sig
[i
], SIG_IGN
) != SIG_IGN
)
4069 signal (sig
[i
], sighandler
);
4070 siginterrupt (sig
[i
], 1);
4074 signal (SIGCHLD
, SIG_DFL
); /* Don't inherit CHLD handling from parent. */
4076 /* The signal mask is known, so it is safe to invoke exit_cleanup. */
4077 atexit (exit_cleanup
);
4080 gkey
.sword
= SIZE_MAX
;
4082 files
= xnmalloc (argc
, sizeof *files
);
4086 /* Parse an operand as a file after "--" was seen; or if
4087 pedantic and a file was seen, unless the POSIX version
4088 predates 1003.1-2001 and -c was not seen and the operand is
4089 "-o FILE" or "-oFILE". */
4093 || (posixly_correct
&& nfiles
!= 0
4094 && ! (obsolete_usage
4097 && argv
[optind
][0] == '-' && argv
[optind
][1] == 'o'
4098 && (argv
[optind
][2] || optind
+ 1 != argc
)))
4099 || ((c
= getopt_long (argc
, argv
, short_options
,
4105 files
[nfiles
++] = argv
[optind
++];
4111 if (optarg
[0] == '+')
4113 bool minus_pos_usage
= (optind
!= argc
&& argv
[optind
][0] == '-'
4114 && ISDIGIT (argv
[optind
][1]));
4115 obsolete_usage
|= minus_pos_usage
&& !posixly_correct
;
4118 /* Treat +POS1 [-POS2] as a key if possible; but silently
4119 treat an operand as a file if it is not a valid +POS1. */
4120 key
= key_init (&key_buf
);
4121 s
= parse_field_count (optarg
+ 1, &key
->sword
, NULL
);
4123 s
= parse_field_count (s
+ 1, &key
->schar
, NULL
);
4124 if (! (key
->sword
|| key
->schar
))
4125 key
->sword
= SIZE_MAX
;
4126 if (! s
|| *set_ordering (s
, key
, bl_start
))
4130 if (minus_pos_usage
)
4132 char const *optarg1
= argv
[optind
++];
4133 s
= parse_field_count (optarg1
+ 1, &key
->eword
,
4134 N_("invalid number after `-'"));
4136 s
= parse_field_count (s
+ 1, &key
->echar
,
4137 N_("invalid number after `.'"));
4138 if (!key
->echar
&& key
->eword
)
4140 /* obsolescent syntax +A.x -B.y is equivalent to:
4141 -k A+1.x+1,B.y (when y = 0)
4142 -k A+1.x+1,B+1.y (when y > 0)
4143 So eword is decremented as in the -k case
4144 only when the end field (B) is specified and
4148 if (*set_ordering (s
, key
, bl_end
))
4149 badfieldspec (optarg1
,
4150 N_("stray character in field spec"));
4152 key
->obsolete_used
= true;
4158 files
[nfiles
++] = optarg
;
4162 c
= XARGMATCH ("--sort", optarg
, sort_args
, sort_types
);
4179 set_ordering (str
, &gkey
, bl_both
);
4185 ? XARGMATCH ("--check", optarg
, check_args
, check_types
)
4190 if (checkonly
&& checkonly
!= c
)
4191 incompatible_options ("cC");
4195 case COMPRESS_PROGRAM_OPTION
:
4196 if (compress_program
&& !STREQ (compress_program
, optarg
))
4197 error (SORT_FAILURE
, 0, _("multiple compress programs specified"));
4198 compress_program
= optarg
;
4201 case DEBUG_PROGRAM_OPTION
:
4205 case FILES0_FROM_OPTION
:
4206 files_from
= optarg
;
4210 key
= key_init (&key_buf
);
4213 s
= parse_field_count (optarg
, &key
->sword
,
4214 N_("invalid number at field start"));
4217 /* Provoke with `sort -k0' */
4218 badfieldspec (optarg
, N_("field number is zero"));
4222 s
= parse_field_count (s
+ 1, &key
->schar
,
4223 N_("invalid number after `.'"));
4226 /* Provoke with `sort -k1.0' */
4227 badfieldspec (optarg
, N_("character offset is zero"));
4230 if (! (key
->sword
|| key
->schar
))
4231 key
->sword
= SIZE_MAX
;
4232 s
= set_ordering (s
, key
, bl_start
);
4235 key
->eword
= SIZE_MAX
;
4241 s
= parse_field_count (s
+ 1, &key
->eword
,
4242 N_("invalid number after `,'"));
4245 /* Provoke with `sort -k1,0' */
4246 badfieldspec (optarg
, N_("field number is zero"));
4250 s
= parse_field_count (s
+ 1, &key
->echar
,
4251 N_("invalid number after `.'"));
4253 s
= set_ordering (s
, key
, bl_end
);
4256 badfieldspec (optarg
, N_("stray character in field spec"));
4265 specify_nmerge (oi
, c
, optarg
);
4269 if (outfile
&& !STREQ (outfile
, optarg
))
4270 error (SORT_FAILURE
, 0, _("multiple output files specified"));
4274 case RANDOM_SOURCE_OPTION
:
4275 if (random_source
&& !STREQ (random_source
, optarg
))
4276 error (SORT_FAILURE
, 0, _("multiple random sources specified"));
4277 random_source
= optarg
;
4285 specify_sort_size (oi
, c
, optarg
);
4290 char newtab
= optarg
[0];
4292 error (SORT_FAILURE
, 0, _("empty tab"));
4295 if (STREQ (optarg
, "\\0"))
4299 /* Provoke with `sort -txx'. Complain about
4300 "multi-character tab" instead of "multibyte tab", so
4301 that the diagnostic's wording does not need to be
4302 changed once multibyte characters are supported. */
4303 error (SORT_FAILURE
, 0, _("multi-character tab %s"),
4307 if (tab
!= TAB_DEFAULT
&& tab
!= newtab
)
4308 error (SORT_FAILURE
, 0, _("incompatible tabs"));
4314 add_temp_dir (optarg
);
4317 case PARALLEL_OPTION
:
4318 nthreads
= specify_nthreads (oi
, c
, optarg
);
4326 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
4327 through Solaris 7. It is also accepted by many non-Solaris
4328 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
4329 -y is marked as obsolete starting with Solaris 8 (1999), but is
4330 still accepted as of Solaris 10 prerelease (2004).
4332 Solaris 2.5.1 "sort -y 100" reads the input file "100", but
4333 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
4334 and which in general ignores the argument after "-y" if it
4335 consists entirely of digits (it can even be empty). */
4336 if (optarg
== argv
[optind
- 1])
4339 for (p
= optarg
; ISDIGIT (*p
); p
++)
4341 optind
-= (*p
!= '\0');
4349 case_GETOPT_HELP_CHAR
;
4351 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
4354 usage (SORT_FAILURE
);
4362 /* When using --files0-from=F, you may not specify any files
4363 on the command-line. */
4366 error (0, 0, _("extra operand %s"), quote (files
[0]));
4367 fprintf (stderr
, "%s\n",
4368 _("file operands cannot be combined with --files0-from"));
4369 usage (SORT_FAILURE
);
4372 if (STREQ (files_from
, "-"))
4376 stream
= fopen (files_from
, "r");
4378 error (SORT_FAILURE
, errno
, _("cannot open %s for reading"),
4379 quote (files_from
));
4382 readtokens0_init (&tok
);
4384 if (! readtokens0 (stream
, &tok
) || fclose (stream
) != 0)
4385 error (SORT_FAILURE
, 0, _("cannot read file names from %s"),
4386 quote (files_from
));
4394 for (i
= 0; i
< nfiles
; i
++)
4396 if (STREQ (files
[i
], "-"))
4397 error (SORT_FAILURE
, 0, _("when reading file names from stdin, "
4398 "no file name of %s allowed"),
4400 else if (files
[i
][0] == '\0')
4402 /* Using the standard `filename:line-number:' prefix here is
4403 not totally appropriate, since NUL is the separator, not NL,
4404 but it might be better than nothing. */
4405 unsigned long int file_number
= i
+ 1;
4406 error (SORT_FAILURE
, 0,
4407 _("%s:%lu: invalid zero-length file name"),
4408 quotearg_colon (files_from
), file_number
);
4413 error (SORT_FAILURE
, 0, _("no input from %s"),
4414 quote (files_from
));
4417 /* Inheritance of global options to individual keys. */
4418 for (key
= keylist
; key
; key
= key
->next
)
4420 if (default_key_compare (key
) && !key
->reverse
)
4422 key
->ignore
= gkey
.ignore
;
4423 key
->translate
= gkey
.translate
;
4424 key
->skipsblanks
= gkey
.skipsblanks
;
4425 key
->skipeblanks
= gkey
.skipeblanks
;
4426 key
->month
= gkey
.month
;
4427 key
->numeric
= gkey
.numeric
;
4428 key
->general_numeric
= gkey
.general_numeric
;
4429 key
->human_numeric
= gkey
.human_numeric
;
4430 key
->version
= gkey
.version
;
4431 key
->random
= gkey
.random
;
4432 key
->reverse
= gkey
.reverse
;
4435 need_random
|= key
->random
;
4438 if (!keylist
&& !default_key_compare (&gkey
))
4442 need_random
|= gkey
.random
;
4445 check_ordering_compatibility ();
4447 /* Disable this combination so that users are less likely
4448 to inadvertantly update a file with debugging enabled.
4449 Also it simplifies the code for handling temp files. */
4450 if (debug
&& outfile
)
4451 error (SORT_FAILURE
, 0, _("options -o and --debug are incompatible"));
4455 /* Always output the locale in debug mode, since this
4456 is such a common source of confusion. */
4457 if (hard_LC_COLLATE
)
4458 error (0, 0, _("using %s sorting rules"),
4459 quote (setlocale (LC_COLLATE
, NULL
)));
4461 error (0, 0, _("using simple byte comparison"));
4462 key_warnings (&gkey
, gkey_only
);
4465 reverse
= gkey
.reverse
;
4468 random_md5_state_init (random_source
);
4470 if (temp_dir_count
== 0)
4472 char const *tmp_dir
= getenv ("TMPDIR");
4473 add_temp_dir (tmp_dir
? tmp_dir
: DEFAULT_TMPDIR
);
4478 static char *minus
= (char *) "-";
4484 /* Need to re-check that we meet the minimum requirement for memory
4485 usage with the final value for NMERGE. */
4487 sort_size
= MAX (sort_size
, MIN_SORT_SIZE
);
4492 error (SORT_FAILURE
, 0, _("extra operand %s not allowed with -%c"),
4493 quote (files
[1]), checkonly
);
4497 static char opts
[] = {0, 'o', 0};
4498 opts
[0] = checkonly
;
4499 incompatible_options (opts
);
4502 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
4503 input is not properly sorted. */
4504 exit (check (files
[0], checkonly
) ? EXIT_SUCCESS
: SORT_OUT_OF_ORDER
);
4509 struct sortfile
*sortfiles
= xcalloc (nfiles
, sizeof *sortfiles
);
4512 for (i
= 0; i
< nfiles
; ++i
)
4513 sortfiles
[i
].name
= files
[i
];
4515 merge (sortfiles
, 0, nfiles
, outfile
);
4516 IF_LINT (free (sortfiles
));
4520 /* If NTHREADS > number of cores on the machine, spinlocking
4521 could be wasteful. */
4522 unsigned long int np2
= num_processors (NPROC_CURRENT_OVERRIDABLE
);
4523 if (!nthreads
|| nthreads
> np2
)
4526 sort (files
, nfiles
, outfile
, nthreads
);
4529 if (have_read_stdin
&& fclose (stdin
) == EOF
)
4530 die (_("close failed"), "-");
4532 exit (EXIT_SUCCESS
);