src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 1988, 1991-2009 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17    Written December 1988 by Mike Haertel.
  18    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  19    or (US mail) as Mike Haertel c/o Free Software Foundation.
  20
  21    Ørn E. Hansen added NLS support in 1997.  */
  22
  23 #include <config.h>
  24
  25 #include <getopt.h>
  26 #include <sys/types.h>
  27 #include <sys/wait.h>
  28 #include <signal.h>
  29 #include "system.h"
  30 #include "argmatch.h"
  31 #include "error.h"
  32 #include "filevercmp.h"
  33 #include "hash.h"
  34 #include "md5.h"
  35 #include "physmem.h"
  36 #include "posixver.h"
  37 #include "quote.h"
  38 #include "quotearg.h"
  39 #include "randread.h"
  40 #include "readtokens0.h"
  41 #include "stdio--.h"
  42 #include "stdlib--.h"
  43 #include "strnumcmp.h"
  44 #include "xmemcoll.h"
  45 #include "xmemxfrm.h"
  46 #include "xstrtol.h"
  47
  48 #if HAVE_SYS_RESOURCE_H
  49 # include <sys/resource.h>
  50 #endif
  51 #ifndef RLIMIT_DATA
  52 struct rlimit { size_t rlim_cur; };
  53 # define getrlimit(Resource, Rlp) (-1)
  54 #endif
  55
  56 /* The official name of this program (e.g., no `g' prefix).  */
  57 #define PROGRAM_NAME "sort"
  58
  59 #define AUTHORS \
  60   proper_name ("Mike Haertel"), \
  61   proper_name ("Paul Eggert")
  62
  63 #if HAVE_LANGINFO_CODESET
  64 # include <langinfo.h>
  65 #endif
  66
  67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  68    present.  */
  69 #ifndef SA_NOCLDSTOP
  70 # define SA_NOCLDSTOP 0
  71 /* No sigprocmask.  Always 'return' zero. */
  72 # define sigprocmask(How, Set, Oset) (0)
  73 # define sigset_t int
  74 # if ! HAVE_SIGINTERRUPT
  75 #  define siginterrupt(sig, flag) /* empty */
  76 # endif
  77 #endif
  78
  79 #if !defined OPEN_MAX && defined NR_OPEN
  80 # define OPEN_MAX NR_OPEN
  81 #endif
  82 #if !defined OPEN_MAX
  83 # define OPEN_MAX 20
  84 #endif
  85
  86 #define UCHAR_LIM (UCHAR_MAX + 1)
  87
  88 #ifndef DEFAULT_TMPDIR
  89 # define DEFAULT_TMPDIR "/tmp"
  90 #endif
  91
  92 /* Exit statuses.  */
  93 enum
  94   {
  95     /* POSIX says to exit with status 1 if invoked with -c and the
  96        input is not properly sorted.  */
  97     SORT_OUT_OF_ORDER = 1,
  98
  99     /* POSIX says any other irregular exit must exit with a status
 100        code greater than 1.  */
 101     SORT_FAILURE = 2
 102   };
 103
 104 enum
 105   {
 106     /* The number of times we should try to fork a compression process
 107        (we retry if the fork call fails).  We don't _need_ to compress
 108        temp files, this is just to reduce disk access, so this number
 109        can be small.  */
 110     MAX_FORK_TRIES_COMPRESS = 2,
 111
 112     /* The number of times we should try to fork a decompression process.
 113        If we can't fork a decompression process, we can't sort, so this
 114        number should be big.  */
 115     MAX_FORK_TRIES_DECOMPRESS = 8
 116   };
 117
 118 /* The representation of the decimal point in the current locale.  */
 119 static int decimal_point;
 120
 121 /* Thousands separator; if -1, then there isn't one.  */
 122 static int thousands_sep;
 123
 124 /* Nonzero if the corresponding locales are hard.  */
 125 static bool hard_LC_COLLATE;
 126 #if HAVE_NL_LANGINFO
 127 static bool hard_LC_TIME;
 128 #endif
 129
 130 #define NONZERO(x) ((x) != 0)
 131
 132 /* The kind of blanks for '-b' to skip in various options. */
 133 enum blanktype { bl_start, bl_end, bl_both };
 134
 135 /* The character marking end of line. Default to \n. */
 136 static char eolchar = '\n';
 137
 138 /* Lines are held in core as counted strings. */
 139 struct line
 140 {
 141   char *text;                   /* Text of the line. */
 142   size_t length;                /* Length including final newline. */
 143   char *keybeg;                 /* Start of first key. */
 144   char *keylim;                 /* Limit of first key. */
 145 };
 146
 147 /* Input buffers. */
 148 struct buffer
 149 {
 150   char *buf;                    /* Dynamically allocated buffer,
 151                                    partitioned into 3 regions:
 152                                    - input data;
 153                                    - unused area;
 154                                    - an array of lines, in reverse order.  */
 155   size_t used;                  /* Number of bytes used for input data.  */
 156   size_t nlines;                /* Number of lines in the line array.  */
 157   size_t alloc;                 /* Number of bytes allocated. */
 158   size_t left;                  /* Number of bytes left from previous reads. */
 159   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 160   bool eof;                     /* An EOF has been read.  */
 161 };
 162
 163 struct keyfield
 164 {
 165   size_t sword;                 /* Zero-origin 'word' to start at. */
 166   size_t schar;                 /* Additional characters to skip. */
 167   size_t eword;                 /* Zero-origin first word after field. */
 168   size_t echar;                 /* Additional characters in field. */
 169   bool const *ignore;           /* Boolean array of characters to ignore. */
 170   char const *translate;        /* Translation applied to characters. */
 171   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 172   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 173   bool numeric;                 /* Flag for numeric comparison.  Handle
 174                                    strings of digits with optional decimal
 175                                    point, but no exponential notation. */
 176   bool random;                  /* Sort by random hash of key.  */
 177   bool general_numeric;         /* Flag for general, numeric comparison.
 178                                    Handle numbers in exponential notation. */
 179   bool human_numeric;           /* Flag for sorting by human readable
 180                                    units with either SI xor IEC prefixes. */
 181   int si_present;               /* Flag for checking for mixed SI and IEC. */
 182   bool month;                   /* Flag for comparison by month name. */
 183   bool reverse;                 /* Reverse the sense of comparison. */
 184   bool version;                 /* sort by version number */
 185   struct keyfield *next;        /* Next keyfield to try. */
 186 };
 187
 188 struct month
 189 {
 190   char const *name;
 191   int val;
 192 };
 193
 194 /* FIXME: None of these tables work with multibyte character sets.
 195    Also, there are many other bugs when handling multibyte characters.
 196    One way to fix this is to rewrite `sort' to use wide characters
 197    internally, but doing this with good performance is a bit
 198    tricky.  */
 199
 200 /* Table of blanks.  */
 201 static bool blanks[UCHAR_LIM];
 202
 203 /* Table of non-printing characters. */
 204 static bool nonprinting[UCHAR_LIM];
 205
 206 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 207 static bool nondictionary[UCHAR_LIM];
 208
 209 /* Translation table folding lower case to upper.  */
 210 static char fold_toupper[UCHAR_LIM];
 211
 212 #define MONTHS_PER_YEAR 12
 213
 214 /* Table mapping month names to integers.
 215    Alphabetic order allows binary search. */
 216 static struct month monthtab[] =
 217 {
 218   {"APR", 4},
 219   {"AUG", 8},
 220   {"DEC", 12},
 221   {"FEB", 2},
 222   {"JAN", 1},
 223   {"JUL", 7},
 224   {"JUN", 6},
 225   {"MAR", 3},
 226   {"MAY", 5},
 227   {"NOV", 11},
 228   {"OCT", 10},
 229   {"SEP", 9}
 230 };
 231
 232 /* During the merge phase, the number of files to merge at once. */
 233 #define NMERGE_DEFAULT 16
 234
 235 /* Minimum size for a merge or check buffer.  */
 236 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 237
 238 /* Minimum sort size; the code might not work with smaller sizes.  */
 239 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
 240
 241 /* The number of bytes needed for a merge or check buffer, which can
 242    function relatively efficiently even if it holds only one line.  If
 243    a longer line is seen, this value is increased.  */
 244 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 245
 246 /* The approximate maximum number of bytes of main memory to use, as
 247    specified by the user.  Zero if the user has not specified a size.  */
 248 static size_t sort_size;
 249
 250 /* The guessed size for non-regular files.  */
 251 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 252
 253 /* Array of directory names in which any temporary files are to be created. */
 254 static char const **temp_dirs;
 255
 256 /* Number of temporary directory names used.  */
 257 static size_t temp_dir_count;
 258
 259 /* Number of allocated slots in temp_dirs.  */
 260 static size_t temp_dir_alloc;
 261
 262 /* Flag to reverse the order of all comparisons. */
 263 static bool reverse;
 264
 265 /* Flag for stable sort.  This turns off the last ditch bytewise
 266    comparison of lines, and instead leaves lines in the same order
 267    they were read if all keys compare equal.  */
 268 static bool stable;
 269
 270 /* If TAB has this value, blanks separate fields.  */
 271 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 272
 273 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 274    separated by the empty string between a non-blank character and a blank
 275    character. */
 276 static int tab = TAB_DEFAULT;
 277
 278 /* Flag to remove consecutive duplicate lines from the output.
 279    Only the last of a sequence of equal lines will be output. */
 280 static bool unique;
 281
 282 /* Nonzero if any of the input files are the standard input. */
 283 static bool have_read_stdin;
 284
 285 /* List of key field comparisons to be tried.  */
 286 static struct keyfield *keylist;
 287
 288 /* Program used to (de)compress temp files.  Must accept -d.  */
 289 static char const *compress_program;
 290
 291 /* Maximum number of files to merge in one go.  If more than this
 292    number are present, temp files will be used. */
 293 static unsigned int nmerge = NMERGE_DEFAULT;
 294
 295 static void sortlines_temp (struct line *, size_t, struct line *);
 296
 297 /* Report MESSAGE for FILE, then clean up and exit.
 298    If FILE is null, it represents standard output.  */
 299
 300 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 301 static void
 302 die (char const *message, char const *file)
 303 {
 304   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 305   exit (SORT_FAILURE);
 306 }
 307
 308 void
 309 usage (int status)
 310 {
 311   if (status != EXIT_SUCCESS)
 312     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 313              program_name);
 314   else
 315     {
 316       printf (_("\
 317 Usage: %s [OPTION]... [FILE]...\n\
 318   or:  %s [OPTION]... --files0-from=F\n\
 319 "),
 320               program_name, program_name);
 321       fputs (_("\
 322 Write sorted concatenation of all FILE(s) to standard output.\n\
 323 \n\
 324 "), stdout);
 325       fputs (_("\
 326 Mandatory arguments to long options are mandatory for short options too.\n\
 327 "), stdout);
 328       fputs (_("\
 329 Ordering options:\n\
 330 \n\
 331 "), stdout);
 332       fputs (_("\
 333   -b, --ignore-leading-blanks  ignore leading blanks\n\
 334   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 335   -f, --ignore-case           fold lower case to upper case characters\n\
 336 "), stdout);
 337       fputs (_("\
 338   -g, --general-numeric-sort  compare according to general numerical value\n\
 339   -i, --ignore-nonprinting    consider only printable characters\n\
 340   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 341 "), stdout);
 342       fputs (_("\
 343   -h, --human-numeric-sort    compare human readable numbers (e.g., 2K 1G)\n\
 344 "), stdout);
 345       fputs (_("\
 346   -n, --numeric-sort          compare according to string numerical value\n\
 347   -R, --random-sort           sort by random hash of keys\n\
 348       --random-source=FILE    get random bytes from FILE\n\
 349   -r, --reverse               reverse the result of comparisons\n\
 350 "), stdout);
 351       fputs (_("\
 352       --sort=WORD             sort according to WORD:\n\
 353                                 general-numeric -g, human-numeric -h, month -M,\n\
 354                                 numeric -n, random -R, version -V\n\
 355   -V, --version-sort          natural sort of (version) numbers within text\n\
 356 \n\
 357 "), stdout);
 358       fputs (_("\
 359 Other options:\n\
 360 \n\
 361 "), stdout);
 362       fputs (_("\
 363       --batch-size=NMERGE   merge at most NMERGE inputs at once;\n\
 364                             for more use temp files\n\
 365 "), stdout);
 366       fputs (_("\
 367   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
 368   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
 369       --compress-program=PROG  compress temporaries with PROG;\n\
 370                               decompress them with PROG -d\n\
 371       --files0-from=F       read input from the files specified by\n\
 372                             NUL-terminated names in file F;\n\
 373                             If F is - then read names from standard input\n\
 374 "), stdout);
 375       fputs (_("\
 376   -k, --key=POS1[,POS2]     start a key at POS1 (origin 1), end it at POS2\n\
 377                             (default end of line)\n\
 378   -m, --merge               merge already sorted files; do not sort\n\
 379 "), stdout);
 380       fputs (_("\
 381   -o, --output=FILE         write result to FILE instead of standard output\n\
 382   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 383   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 384 "), stdout);
 385       printf (_("\
 386   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
 387   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 388                               multiple options specify multiple directories\n\
 389   -u, --unique              with -c, check for strict ordering;\n\
 390                               without -c, output only the first of an equal run\n\
 391 "), DEFAULT_TMPDIR);
 392       fputs (_("\
 393   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 394 "), stdout);
 395       fputs (HELP_OPTION_DESCRIPTION, stdout);
 396       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 397       fputs (_("\
 398 \n\
 399 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 400 in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
 401 in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
 402 one or more single-letter ordering options, which override global ordering\n\
 403 options for that key.  If no key is given, use the entire line as the key.\n\
 404 \n\
 405 SIZE may be followed by the following multiplicative suffixes:\n\
 406 "), stdout);
 407       fputs (_("\
 408 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 409 \n\
 410 With no FILE, or when FILE is -, read standard input.\n\
 411 \n\
 412 *** WARNING ***\n\
 413 The locale specified by the environment affects sort order.\n\
 414 Set LC_ALL=C to get the traditional sort order that uses\n\
 415 native byte values.\n\
 416 "), stdout );
 417       emit_bug_reporting_address ();
 418     }
 419
 420   exit (status);
 421 }
 422
 423 /* For long options that have no equivalent short option, use a
 424    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 425 enum
 426 {
 427   CHECK_OPTION = CHAR_MAX + 1,
 428   COMPRESS_PROGRAM_OPTION,
 429   FILES0_FROM_OPTION,
 430   NMERGE_OPTION,
 431   RANDOM_SOURCE_OPTION,
 432   SORT_OPTION
 433 };
 434
 435 static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
 436
 437 static struct option const long_options[] =
 438 {
 439   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 440   {"check", optional_argument, NULL, CHECK_OPTION},
 441   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
 442   {"dictionary-order", no_argument, NULL, 'd'},
 443   {"ignore-case", no_argument, NULL, 'f'},
 444   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
 445   {"general-numeric-sort", no_argument, NULL, 'g'},
 446   {"ignore-nonprinting", no_argument, NULL, 'i'},
 447   {"key", required_argument, NULL, 'k'},
 448   {"merge", no_argument, NULL, 'm'},
 449   {"month-sort", no_argument, NULL, 'M'},
 450   {"numeric-sort", no_argument, NULL, 'n'},
 451   {"human-numeric-sort", no_argument, NULL, 'h'},
 452   {"version-sort", no_argument, NULL, 'V'},
 453   {"random-sort", no_argument, NULL, 'R'},
 454   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 455   {"sort", required_argument, NULL, SORT_OPTION},
 456   {"output", required_argument, NULL, 'o'},
 457   {"reverse", no_argument, NULL, 'r'},
 458   {"stable", no_argument, NULL, 's'},
 459   {"batch-size", required_argument, NULL, NMERGE_OPTION},
 460   {"buffer-size", required_argument, NULL, 'S'},
 461   {"field-separator", required_argument, NULL, 't'},
 462   {"temporary-directory", required_argument, NULL, 'T'},
 463   {"unique", no_argument, NULL, 'u'},
 464   {"zero-terminated", no_argument, NULL, 'z'},
 465   {GETOPT_HELP_OPTION_DECL},
 466   {GETOPT_VERSION_OPTION_DECL},
 467   {NULL, 0, NULL, 0},
 468 };
 469
 470 #define CHECK_TABLE \
 471   _ct_("quiet",          'C') \
 472   _ct_("silent",         'C') \
 473   _ct_("diagnose-first", 'c')
 474
 475 static char const *const check_args[] =
 476 {
 477 #define _ct_(_s, _c) _s,
 478   CHECK_TABLE NULL
 479 #undef  _ct_
 480 };
 481 static char const check_types[] =
 482 {
 483 #define _ct_(_s, _c) _c,
 484   CHECK_TABLE
 485 #undef  _ct_
 486 };
 487
 488 #define SORT_TABLE \
 489   _st_("general-numeric", 'g') \
 490   _st_("human-numeric",   'h') \
 491   _st_("month",           'M') \
 492   _st_("numeric",         'n') \
 493   _st_("random",          'R') \
 494   _st_("version",         'V')
 495
 496 static char const *const sort_args[] =
 497 {
 498 #define _st_(_s, _c) _s,
 499   SORT_TABLE NULL
 500 #undef  _st_
 501 };
 502 static char const sort_types[] =
 503 {
 504 #define _st_(_s, _c) _c,
 505   SORT_TABLE
 506 #undef  _st_
 507 };
 508
 509 /* The set of signals that are caught.  */
 510 static sigset_t caught_signals;
 511
 512 /* Critical section status.  */
 513 struct cs_status
 514 {
 515   bool valid;
 516   sigset_t sigs;
 517 };
 518
 519 /* Enter a critical section.  */
 520 static struct cs_status
 521 cs_enter (void)
 522 {
 523   struct cs_status status;
 524   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
 525   return status;
 526 }
 527
 528 /* Leave a critical section.  */
 529 static void
 530 cs_leave (struct cs_status status)
 531 {
 532   if (status.valid)
 533     {
 534       /* Ignore failure when restoring the signal mask. */
 535       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
 536     }
 537 }
 538
 539 /* The list of temporary files. */
 540 struct tempnode
 541 {
 542   struct tempnode *volatile next;
 543   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 544   char name[1];  /* Actual size is 1 + file name length.  */
 545 };
 546 static struct tempnode *volatile temphead;
 547 static struct tempnode *volatile *temptail = &temphead;
 548
 549 struct sortfile
 550 {
 551   char const *name;
 552   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 553 };
 554
 555 /* A table where we store compression process states.  We clean up all
 556    processes in a timely manner so as not to exhaust system resources,
 557    so we store the info on whether the process is still running, or has
 558    been reaped here.  */
 559 static Hash_table *proctab;
 560
 561 enum { INIT_PROCTAB_SIZE = 47 };
 562
 563 enum procstate { ALIVE, ZOMBIE };
 564
 565 /* A proctab entry.  The COUNT field is there in case we fork a new
 566    compression process that has the same PID as an old zombie process
 567    that is still in the table (because the process to decompress the
 568    temp file it was associated with hasn't started yet).  */
 569 struct procnode
 570 {
 571   pid_t pid;
 572   enum procstate state;
 573   size_t count;
 574 };
 575
 576 static size_t
 577 proctab_hasher (const void *entry, size_t tabsize)
 578 {
 579   const struct procnode *node = entry;
 580   return node->pid % tabsize;
 581 }
 582
 583 static bool
 584 proctab_comparator (const void *e1, const void *e2)
 585 {
 586   const struct procnode *n1 = e1, *n2 = e2;
 587   return n1->pid == n2->pid;
 588 }
 589
 590 /* The total number of forked processes (compressors and decompressors)
 591    that have not been reaped yet. */
 592 static size_t nprocs;
 593
 594 /* The number of child processes we'll allow before we try to reap some. */
 595 enum { MAX_PROCS_BEFORE_REAP = 2 };
 596
 597 /* If 0 < PID, wait for the child process with that PID to exit.
 598    If PID is -1, clean up a random child process which has finished and
 599    return the process ID of that child.  If PID is -1 and no processes
 600    have quit yet, return 0 without waiting.  */
 601
 602 static pid_t
 603 reap (pid_t pid)
 604 {
 605   int status;
 606   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
 607
 608   if (cpid < 0)
 609     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
 610            compress_program);
 611   else if (0 < cpid)
 612     {
 613       if (! WIFEXITED (status) || WEXITSTATUS (status))
 614         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
 615                compress_program);
 616       --nprocs;
 617     }
 618
 619   return cpid;
 620 }
 621
 622 /* Add the PID of a running compression process to proctab, or update
 623    the entry COUNT and STATE fields if it's already there.  This also
 624    creates the table for us the first time it's called.  */
 625
 626 static void
 627 register_proc (pid_t pid)
 628 {
 629   struct procnode test, *node;
 630
 631   if (! proctab)
 632     {
 633       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
 634                                  proctab_hasher,
 635                                  proctab_comparator,
 636                                  free);
 637       if (! proctab)
 638         xalloc_die ();
 639     }
 640
 641   test.pid = pid;
 642   node = hash_lookup (proctab, &test);
 643   if (node)
 644     {
 645       node->state = ALIVE;
 646       ++node->count;
 647     }
 648   else
 649     {
 650       node = xmalloc (sizeof *node);
 651       node->pid = pid;
 652       node->state = ALIVE;
 653       node->count = 1;
 654       if (hash_insert (proctab, node) == NULL)
 655         xalloc_die ();
 656     }
 657 }
 658
 659 /* This is called when we reap a random process.  We don't know
 660    whether we have reaped a compression process or a decompression
 661    process until we look in the table.  If there's an ALIVE entry for
 662    it, then we have reaped a compression process, so change the state
 663    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
 664
 665 static void
 666 update_proc (pid_t pid)
 667 {
 668   struct procnode test, *node;
 669
 670   test.pid = pid;
 671   node = hash_lookup (proctab, &test);
 672   if (node)
 673     node->state = ZOMBIE;
 674 }
 675
 676 /* This is for when we need to wait for a compression process to exit.
 677    If it has a ZOMBIE entry in the table then it's already dead and has
 678    been reaped.  Note that if there's an ALIVE entry for it, it still may
 679    already have died and been reaped if a second process was created with
 680    the same PID.  This is probably exceedingly rare, but to be on the safe
 681    side we will have to wait for any compression process with this PID.  */
 682
 683 static void
 684 wait_proc (pid_t pid)
 685 {
 686   struct procnode test, *node;
 687
 688   test.pid = pid;
 689   node = hash_lookup (proctab, &test);
 690   if (node->state == ALIVE)
 691     reap (pid);
 692
 693   node->state = ZOMBIE;
 694   if (! --node->count)
 695     {
 696       hash_delete (proctab, node);
 697       free (node);
 698     }
 699 }
 700
 701 /* Keep reaping finished children as long as there are more to reap.
 702    This doesn't block waiting for any of them, it only reaps those
 703    that are already dead.  */
 704
 705 static void
 706 reap_some (void)
 707 {
 708   pid_t pid;
 709
 710   while (0 < nprocs && (pid = reap (-1)))
 711     update_proc (pid);
 712 }
 713
 714 /* Clean up any remaining temporary files.  */
 715
 716 static void
 717 cleanup (void)
 718 {
 719   struct tempnode const *node;
 720
 721   for (node = temphead; node; node = node->next)
 722     unlink (node->name);
 723   temphead = NULL;
 724 }
 725
 726 /* Cleanup actions to take when exiting.  */
 727
 728 static void
 729 exit_cleanup (void)
 730 {
 731   if (temphead)
 732     {
 733       /* Clean up any remaining temporary files in a critical section so
 734          that a signal handler does not try to clean them too.  */
 735       struct cs_status cs = cs_enter ();
 736       cleanup ();
 737       cs_leave (cs);
 738     }
 739
 740   close_stdout ();
 741 }
 742
 743 /* Create a new temporary file, returning its newly allocated tempnode.
 744    Store into *PFD the file descriptor open for writing.
 745    If the creation fails, return NULL and store -1 into *PFD if the
 746    failure is due to file descriptor exhaustion and
 747    SURVIVE_FD_EXHAUSTION; otherwise, die.  */
 748
 749 static struct tempnode *
 750 create_temp_file (int *pfd, bool survive_fd_exhaustion)
 751 {
 752   static char const slashbase[] = "/sortXXXXXX";
 753   static size_t temp_dir_index;
 754   int fd;
 755   int saved_errno;
 756   char const *temp_dir = temp_dirs[temp_dir_index];
 757   size_t len = strlen (temp_dir);
 758   struct tempnode *node =
 759     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 760   char *file = node->name;
 761   struct cs_status cs;
 762
 763   memcpy (file, temp_dir, len);
 764   memcpy (file + len, slashbase, sizeof slashbase);
 765   node->next = NULL;
 766   node->pid = 0;
 767   if (++temp_dir_index == temp_dir_count)
 768     temp_dir_index = 0;
 769
 770   /* Create the temporary file in a critical section, to avoid races.  */
 771   cs = cs_enter ();
 772   fd = mkstemp (file);
 773   if (0 <= fd)
 774     {
 775       *temptail = node;
 776       temptail = &node->next;
 777     }
 778   saved_errno = errno;
 779   cs_leave (cs);
 780   errno = saved_errno;
 781
 782   if (fd < 0)
 783     {
 784       if (! (survive_fd_exhaustion && errno == EMFILE))
 785         error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
 786                quote (temp_dir));
 787       free (node);
 788       node = NULL;
 789     }
 790
 791   *pfd = fd;
 792   return node;
 793 }
 794
 795 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 796    standard output; HOW should be "w".  When opening for input, "-"
 797    means standard input.  To avoid confusion, do not return file
 798    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
 799    opening an ordinary FILE.  Return NULL if unsuccessful.  */
 800
 801 static FILE *
 802 stream_open (const char *file, const char *how)
 803 {
 804   if (!file)
 805     return stdout;
 806   if (STREQ (file, "-") && *how == 'r')
 807     {
 808       have_read_stdin = true;
 809       return stdin;
 810     }
 811   return fopen (file, how);
 812 }
 813
 814 /* Same as stream_open, except always return a non-null value; die on
 815    failure.  */
 816
 817 static FILE *
 818 xfopen (const char *file, const char *how)
 819  {
 820   FILE *fp = stream_open (file, how);
 821   if (!fp)
 822     die (_("open failed"), file);
 823   return fp;
 824 }
 825
 826 /* Close FP, whose name is FILE, and report any errors.  */
 827
 828 static void
 829 xfclose (FILE *fp, char const *file)
 830 {
 831   switch (fileno (fp))
 832     {
 833     case STDIN_FILENO:
 834       /* Allow reading stdin from tty more than once.  */
 835       if (feof (fp))
 836         clearerr (fp);
 837       break;
 838
 839     case STDOUT_FILENO:
 840       /* Don't close stdout just yet.  close_stdout does that.  */
 841       if (fflush (fp) != 0)
 842         die (_("fflush failed"), file);
 843       break;
 844
 845     default:
 846       if (fclose (fp) != 0)
 847         die (_("close failed"), file);
 848       break;
 849     }
 850 }
 851
 852 static void
 853 dup2_or_die (int oldfd, int newfd)
 854 {
 855   if (dup2 (oldfd, newfd) < 0)
 856     error (SORT_FAILURE, errno, _("dup2 failed"));
 857 }
 858
 859 /* Fork a child process for piping to and do common cleanup.  The
 860    TRIES parameter tells us how many times to try to fork before
 861    giving up.  Return the PID of the child, or -1 (setting errno)
 862    on failure. */
 863
 864 static pid_t
 865 pipe_fork (int pipefds[2], size_t tries)
 866 {
 867 #if HAVE_WORKING_FORK
 868   struct tempnode *saved_temphead;
 869   int saved_errno;
 870   unsigned int wait_retry = 1;
 871   pid_t pid IF_LINT (= -1);
 872   struct cs_status cs;
 873
 874   if (pipe (pipefds) < 0)
 875     return -1;
 876
 877   while (tries--)
 878     {
 879       /* This is so the child process won't delete our temp files
 880          if it receives a signal before exec-ing.  */
 881       cs = cs_enter ();
 882       saved_temphead = temphead;
 883       temphead = NULL;
 884
 885       pid = fork ();
 886       saved_errno = errno;
 887       if (pid)
 888         temphead = saved_temphead;
 889
 890       cs_leave (cs);
 891       errno = saved_errno;
 892
 893       if (0 <= pid || errno != EAGAIN)
 894         break;
 895       else
 896         {
 897           sleep (wait_retry);
 898           wait_retry *= 2;
 899           reap_some ();
 900         }
 901     }
 902
 903   if (pid < 0)
 904     {
 905       saved_errno = errno;
 906       close (pipefds[0]);
 907       close (pipefds[1]);
 908       errno = saved_errno;
 909     }
 910   else if (pid == 0)
 911     {
 912       close (STDIN_FILENO);
 913       close (STDOUT_FILENO);
 914     }
 915   else
 916     ++nprocs;
 917
 918   return pid;
 919
 920 #else  /* ! HAVE_WORKING_FORK */
 921   return -1;
 922 #endif
 923 }
 924
 925 /* Create a temporary file and start a compression program to filter output
 926    to that file.  Set *PFP to the file handle and if PPID is non-NULL,
 927    set *PPID to the PID of the newly-created process.  If the creation
 928    fails, return NULL if the failure is due to file descriptor
 929    exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die.  */
 930
 931 static char *
 932 maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion)
 933 {
 934   int tempfd;
 935   struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion);
 936   char *name;
 937
 938   if (! node)
 939     return NULL;
 940
 941   name = node->name;
 942
 943   if (compress_program)
 944     {
 945       int pipefds[2];
 946
 947       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
 948       if (0 < node->pid)
 949         {
 950           close (tempfd);
 951           close (pipefds[0]);
 952           tempfd = pipefds[1];
 953
 954           register_proc (node->pid);
 955         }
 956       else if (node->pid == 0)
 957         {
 958           close (pipefds[1]);
 959           dup2_or_die (tempfd, STDOUT_FILENO);
 960           close (tempfd);
 961           dup2_or_die (pipefds[0], STDIN_FILENO);
 962           close (pipefds[0]);
 963
 964           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
 965             error (SORT_FAILURE, errno, _("couldn't execute %s"),
 966                    compress_program);
 967         }
 968       else
 969         node->pid = 0;
 970     }
 971
 972   *pfp = fdopen (tempfd, "w");
 973   if (! *pfp)
 974     die (_("couldn't create temporary file"), name);
 975
 976   if (ppid)
 977     *ppid = node->pid;
 978
 979   return name;
 980 }
 981
 982 /* Create a temporary file and start a compression program to filter output
 983    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
 984    set it to the PID of the newly-created process.  Die on failure.  */
 985
 986 static char *
 987 create_temp (FILE **pfp, pid_t *ppid)
 988 {
 989   return maybe_create_temp (pfp, ppid, false);
 990 }
 991
 992 /* Open a compressed temp file and start a decompression process through
 993    which to filter the input.  PID must be the valid processes ID of the
 994    process used to compress the file.  Return NULL (setting errno to
 995    EMFILE) if we ran out of file descriptors, and die on any other
 996    kind of failure.  */
 997
 998 static FILE *
 999 open_temp (const char *name, pid_t pid)
1000 {
1001   int tempfd, pipefds[2];
1002   FILE *fp = NULL;
1003
1004   wait_proc (pid);
1005
1006   tempfd = open (name, O_RDONLY);
1007   if (tempfd < 0)
1008     return NULL;
1009
1010   switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS))
1011     {
1012     case -1:
1013       if (errno != EMFILE)
1014         error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
1015                compress_program);
1016       close (tempfd);
1017       errno = EMFILE;
1018       break;
1019
1020     case 0:
1021       close (pipefds[0]);
1022       dup2_or_die (tempfd, STDIN_FILENO);
1023       close (tempfd);
1024       dup2_or_die (pipefds[1], STDOUT_FILENO);
1025       close (pipefds[1]);
1026
1027       execlp (compress_program, compress_program, "-d", (char *) NULL);
1028       error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
1029              compress_program);
1030
1031     default:
1032       close (tempfd);
1033       close (pipefds[1]);
1034
1035       fp = fdopen (pipefds[0], "r");
1036       if (! fp)
1037         {
1038           int saved_errno = errno;
1039           close (pipefds[0]);
1040           errno = saved_errno;
1041         }
1042       break;
1043     }
1044
1045   return fp;
1046 }
1047
1048 static void
1049 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
1050 {
1051   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
1052     die (_("write failed"), output_file);
1053 }
1054
1055 /* Append DIR to the array of temporary directory names.  */
1056 static void
1057 add_temp_dir (char const *dir)
1058 {
1059   if (temp_dir_count == temp_dir_alloc)
1060     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1061
1062   temp_dirs[temp_dir_count++] = dir;
1063 }
1064
1065 /* Remove NAME from the list of temporary files.  */
1066
1067 static void
1068 zaptemp (const char *name)
1069 {
1070   struct tempnode *volatile *pnode;
1071   struct tempnode *node;
1072   struct tempnode *next;
1073   int unlink_status;
1074   int unlink_errno = 0;
1075   struct cs_status cs;
1076
1077   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1078     continue;
1079
1080   /* Unlink the temporary file in a critical section to avoid races.  */
1081   next = node->next;
1082   cs = cs_enter ();
1083   unlink_status = unlink (name);
1084   unlink_errno = errno;
1085   *pnode = next;
1086   cs_leave (cs);
1087
1088   if (unlink_status != 0)
1089     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1090   if (! next)
1091     temptail = pnode;
1092   free (node);
1093 }
1094
1095 #if HAVE_NL_LANGINFO
1096
1097 static int
1098 struct_month_cmp (const void *m1, const void *m2)
1099 {
1100   struct month const *month1 = m1;
1101   struct month const *month2 = m2;
1102   return strcmp (month1->name, month2->name);
1103 }
1104
1105 #endif
1106
1107 /* Initialize the character class tables. */
1108
1109 static void
1110 inittables (void)
1111 {
1112   size_t i;
1113
1114   for (i = 0; i < UCHAR_LIM; ++i)
1115     {
1116       blanks[i] = !! isblank (i);
1117       nonprinting[i] = ! isprint (i);
1118       nondictionary[i] = ! isalnum (i) && ! isblank (i);
1119       fold_toupper[i] = toupper (i);
1120     }
1121
1122 #if HAVE_NL_LANGINFO
1123   /* If we're not in the "C" locale, read different names for months.  */
1124   if (hard_LC_TIME)
1125     {
1126       for (i = 0; i < MONTHS_PER_YEAR; i++)
1127         {
1128           char const *s;
1129           size_t s_len;
1130           size_t j;
1131           char *name;
1132
1133           s = (char *) nl_langinfo (ABMON_1 + i);
1134           s_len = strlen (s);
1135           monthtab[i].name = name = xmalloc (s_len + 1);
1136           monthtab[i].val = i + 1;
1137
1138           for (j = 0; j < s_len; j++)
1139             name[j] = fold_toupper[to_uchar (s[j])];
1140           name[j] = '\0';
1141         }
1142       qsort ((void *) monthtab, MONTHS_PER_YEAR,
1143              sizeof *monthtab, struct_month_cmp);
1144     }
1145 #endif
1146 }
1147
1148 /* Specify how many inputs may be merged at once.
1149    This may be set on the command-line with the
1150    --batch-size option. */
1151 static void
1152 specify_nmerge (int oi, char c, char const *s)
1153 {
1154   uintmax_t n;
1155   struct rlimit rlimit;
1156   enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1157
1158   /* Try to find out how many file descriptors we'll be able
1159      to open.  We need at least nmerge + 3 (STDIN_FILENO,
1160      STDOUT_FILENO and STDERR_FILENO). */
1161   unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1162                               ? rlimit.rlim_cur
1163                               : OPEN_MAX)
1164                              - 3);
1165
1166   if (e == LONGINT_OK)
1167     {
1168       nmerge = n;
1169       if (nmerge != n)
1170         e = LONGINT_OVERFLOW;
1171       else
1172         {
1173           if (nmerge < 2)
1174             {
1175               error (0, 0, _("invalid --%s argument %s"),
1176                      long_options[oi].name, quote(s));
1177               error (SORT_FAILURE, 0,
1178                      _("minimum --%s argument is %s"),
1179                      long_options[oi].name, quote("2"));
1180             }
1181           else if (max_nmerge < nmerge)
1182             {
1183               e = LONGINT_OVERFLOW;
1184             }
1185           else
1186             return;
1187         }
1188     }
1189
1190   if (e == LONGINT_OVERFLOW)
1191     {
1192       char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1193       error (0, 0, _("--%s argument %s too large"),
1194              long_options[oi].name, quote(s));
1195       error (SORT_FAILURE, 0,
1196              _("maximum --%s argument with current rlimit is %s"),
1197              long_options[oi].name,
1198              uinttostr (max_nmerge, max_nmerge_buf));
1199     }
1200   else
1201     xstrtol_fatal (e, oi, c, long_options, s);
1202 }
1203
1204 /* Specify the amount of main memory to use when sorting.  */
1205 static void
1206 specify_sort_size (int oi, char c, char const *s)
1207 {
1208   uintmax_t n;
1209   char *suffix;
1210   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1211
1212   /* The default unit is KiB.  */
1213   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1214     {
1215       if (n <= UINTMAX_MAX / 1024)
1216         n *= 1024;
1217       else
1218         e = LONGINT_OVERFLOW;
1219     }
1220
1221   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1222   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1223     switch (suffix[0])
1224       {
1225       case 'b':
1226         e = LONGINT_OK;
1227         break;
1228
1229       case '%':
1230         {
1231           double mem = physmem_total () * n / 100;
1232
1233           /* Use "<", not "<=", to avoid problems with rounding.  */
1234           if (mem < UINTMAX_MAX)
1235             {
1236               n = mem;
1237               e = LONGINT_OK;
1238             }
1239           else
1240             e = LONGINT_OVERFLOW;
1241         }
1242         break;
1243       }
1244
1245   if (e == LONGINT_OK)
1246     {
1247       /* If multiple sort sizes are specified, take the maximum, so
1248          that option order does not matter.  */
1249       if (n < sort_size)
1250         return;
1251
1252       sort_size = n;
1253       if (sort_size == n)
1254         {
1255           sort_size = MAX (sort_size, MIN_SORT_SIZE);
1256           return;
1257         }
1258
1259       e = LONGINT_OVERFLOW;
1260     }
1261
1262   xstrtol_fatal (e, oi, c, long_options, s);
1263 }
1264
1265 /* Return the default sort size.  */
1266 static size_t
1267 default_sort_size (void)
1268 {
1269   /* Let MEM be available memory or 1/8 of total memory, whichever
1270      is greater.  */
1271   double avail = physmem_available ();
1272   double total = physmem_total ();
1273   double mem = MAX (avail, total / 8);
1274   struct rlimit rlimit;
1275
1276   /* Let SIZE be MEM, but no more than the maximum object size or
1277      system resource limits.  Avoid the MIN macro here, as it is not
1278      quite right when only one argument is floating point.  Don't
1279      bother to check for values like RLIM_INFINITY since in practice
1280      they are not much less than SIZE_MAX.  */
1281   size_t size = SIZE_MAX;
1282   if (mem < size)
1283     size = mem;
1284   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1285     size = rlimit.rlim_cur;
1286 #ifdef RLIMIT_AS
1287   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1288     size = rlimit.rlim_cur;
1289 #endif
1290
1291   /* Leave a large safety margin for the above limits, as failure can
1292      occur when they are exceeded.  */
1293   size /= 2;
1294
1295 #ifdef RLIMIT_RSS
1296   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1297      Exceeding RSS is not fatal, but can be quite slow.  */
1298   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1299     size = rlimit.rlim_cur / 16 * 15;
1300 #endif
1301
1302   /* Use no less than the minimum.  */
1303   return MAX (size, MIN_SORT_SIZE);
1304 }
1305
1306 /* Return the sort buffer size to use with the input files identified
1307    by FPS and FILES, which are alternate names of the same files.
1308    NFILES gives the number of input files; NFPS may be less.  Assume
1309    that each input line requires LINE_BYTES extra bytes' worth of line
1310    information.  Do not exceed the size bound specified by the user
1311    (or a default size bound, if the user does not specify one).  */
1312
1313 static size_t
1314 sort_buffer_size (FILE *const *fps, size_t nfps,
1315                   char *const *files, size_t nfiles,
1316                   size_t line_bytes)
1317 {
1318   /* A bound on the input size.  If zero, the bound hasn't been
1319      determined yet.  */
1320   static size_t size_bound;
1321
1322   /* In the worst case, each input byte is a newline.  */
1323   size_t worst_case_per_input_byte = line_bytes + 1;
1324
1325   /* Keep enough room for one extra input line and an extra byte.
1326      This extra room might be needed when preparing to read EOF.  */
1327   size_t size = worst_case_per_input_byte + 1;
1328
1329   size_t i;
1330
1331   for (i = 0; i < nfiles; i++)
1332     {
1333       struct stat st;
1334       off_t file_size;
1335       size_t worst_case;
1336
1337       if ((i < nfps ? fstat (fileno (fps[i]), &st)
1338            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1339            : stat (files[i], &st))
1340           != 0)
1341         die (_("stat failed"), files[i]);
1342
1343       if (S_ISREG (st.st_mode))
1344         file_size = st.st_size;
1345       else
1346         {
1347           /* The file has unknown size.  If the user specified a sort
1348              buffer size, use that; otherwise, guess the size.  */
1349           if (sort_size)
1350             return sort_size;
1351           file_size = INPUT_FILE_SIZE_GUESS;
1352         }
1353
1354       if (! size_bound)
1355         {
1356           size_bound = sort_size;
1357           if (! size_bound)
1358             size_bound = default_sort_size ();
1359         }
1360
1361       /* Add the amount of memory needed to represent the worst case
1362          where the input consists entirely of newlines followed by a
1363          single non-newline.  Check for overflow.  */
1364       worst_case = file_size * worst_case_per_input_byte + 1;
1365       if (file_size != worst_case / worst_case_per_input_byte
1366           || size_bound - size <= worst_case)
1367         return size_bound;
1368       size += worst_case;
1369     }
1370
1371   return size;
1372 }
1373
1374 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1375    must be at least sizeof (struct line).  Allocate ALLOC bytes
1376    initially.  */
1377
1378 static void
1379 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1380 {
1381   /* Ensure that the line array is properly aligned.  If the desired
1382      size cannot be allocated, repeatedly halve it until allocation
1383      succeeds.  The smaller allocation may hurt overall performance,
1384      but that's better than failing.  */
1385   for (;;)
1386     {
1387       alloc += sizeof (struct line) - alloc % sizeof (struct line);
1388       buf->buf = malloc (alloc);
1389       if (buf->buf)
1390         break;
1391       alloc /= 2;
1392       if (alloc <= line_bytes + 1)
1393         xalloc_die ();
1394     }
1395
1396   buf->line_bytes = line_bytes;
1397   buf->alloc = alloc;
1398   buf->used = buf->left = buf->nlines = 0;
1399   buf->eof = false;
1400 }
1401
1402 /* Return one past the limit of the line array.  */
1403
1404 static inline struct line *
1405 buffer_linelim (struct buffer const *buf)
1406 {
1407   return (struct line *) (buf->buf + buf->alloc);
1408 }
1409
1410 /* Return a pointer to the first character of the field specified
1411    by KEY in LINE. */
1412
1413 static char *
1414 begfield (const struct line *line, const struct keyfield *key)
1415 {
1416   char *ptr = line->text, *lim = ptr + line->length - 1;
1417   size_t sword = key->sword;
1418   size_t schar = key->schar;
1419
1420   /* The leading field separator itself is included in a field when -t
1421      is absent.  */
1422
1423   if (tab != TAB_DEFAULT)
1424     while (ptr < lim && sword--)
1425       {
1426         while (ptr < lim && *ptr != tab)
1427           ++ptr;
1428         if (ptr < lim)
1429           ++ptr;
1430       }
1431   else
1432     while (ptr < lim && sword--)
1433       {
1434         while (ptr < lim && blanks[to_uchar (*ptr)])
1435           ++ptr;
1436         while (ptr < lim && !blanks[to_uchar (*ptr)])
1437           ++ptr;
1438       }
1439
1440   /* If we're ignoring leading blanks when computing the Start
1441      of the field, skip past them here.  */
1442   if (key->skipsblanks)
1443     while (ptr < lim && blanks[to_uchar (*ptr)])
1444       ++ptr;
1445
1446   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1447   ptr = MIN (lim, ptr + schar);
1448
1449   return ptr;
1450 }
1451
1452 /* Return the limit of (a pointer to the first character after) the field
1453    in LINE specified by KEY. */
1454
1455 static char *
1456 limfield (const struct line *line, const struct keyfield *key)
1457 {
1458   char *ptr = line->text, *lim = ptr + line->length - 1;
1459   size_t eword = key->eword, echar = key->echar;
1460
1461   if (echar == 0)
1462     eword++; /* Skip all of end field.  */
1463
1464   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1465      whichever comes first.  If there are more than EWORD fields, leave
1466      PTR pointing at the beginning of the field having zero-based index,
1467      EWORD.  If a delimiter character was specified (via -t), then that
1468      `beginning' is the first character following the delimiting TAB.
1469      Otherwise, leave PTR pointing at the first `blank' character after
1470      the preceding field.  */
1471   if (tab != TAB_DEFAULT)
1472     while (ptr < lim && eword--)
1473       {
1474         while (ptr < lim && *ptr != tab)
1475           ++ptr;
1476         if (ptr < lim && (eword | echar))
1477           ++ptr;
1478       }
1479   else
1480     while (ptr < lim && eword--)
1481       {
1482         while (ptr < lim && blanks[to_uchar (*ptr)])
1483           ++ptr;
1484         while (ptr < lim && !blanks[to_uchar (*ptr)])
1485           ++ptr;
1486       }
1487
1488 #ifdef POSIX_UNSPECIFIED
1489   /* The following block of code makes GNU sort incompatible with
1490      standard Unix sort, so it's ifdef'd out for now.
1491      The POSIX spec isn't clear on how to interpret this.
1492      FIXME: request clarification.
1493
1494      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1495      Date: Thu, 30 May 96 12:20:41 -0400
1496      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1497
1498      [...]I believe I've found another bug in `sort'.
1499
1500      $ cat /tmp/sort.in
1501      a b c 2 d
1502      pq rs 1 t
1503      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1504      a b c 2 d
1505      pq rs 1 t
1506      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1507      pq rs 1 t
1508      a b c 2 d
1509
1510      Unix sort produced the answer I expected: sort on the single character
1511      in column 7.  GNU sort produced different results, because it disagrees
1512      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1513      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1514      "skip M-1 fields, then either N-1 characters or the rest of the current
1515      field, whichever comes first".  This extra clause applies only to
1516      key-ends, not key-starts.
1517      */
1518
1519   /* Make LIM point to the end of (one byte past) the current field.  */
1520   if (tab != TAB_DEFAULT)
1521     {
1522       char *newlim;
1523       newlim = memchr (ptr, tab, lim - ptr);
1524       if (newlim)
1525         lim = newlim;
1526     }
1527   else
1528     {
1529       char *newlim;
1530       newlim = ptr;
1531       while (newlim < lim && blanks[to_uchar (*newlim)])
1532         ++newlim;
1533       while (newlim < lim && !blanks[to_uchar (*newlim)])
1534         ++newlim;
1535       lim = newlim;
1536     }
1537 #endif
1538
1539   if (echar != 0) /* We need to skip over a portion of the end field.  */
1540     {
1541       /* If we're ignoring leading blanks when computing the End
1542          of the field, skip past them here.  */
1543       if (key->skipeblanks)
1544         while (ptr < lim && blanks[to_uchar (*ptr)])
1545           ++ptr;
1546
1547       /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1548       ptr = MIN (lim, ptr + echar);
1549     }
1550
1551   return ptr;
1552 }
1553
1554 /* Fill BUF reading from FP, moving buf->left bytes from the end
1555    of buf->buf to the beginning first.  If EOF is reached and the
1556    file wasn't terminated by a newline, supply one.  Set up BUF's line
1557    table too.  FILE is the name of the file corresponding to FP.
1558    Return true if some input was read.  */
1559
1560 static bool
1561 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1562 {
1563   struct keyfield const *key = keylist;
1564   char eol = eolchar;
1565   size_t line_bytes = buf->line_bytes;
1566   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1567
1568   if (buf->eof)
1569     return false;
1570
1571   if (buf->used != buf->left)
1572     {
1573       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1574       buf->used = buf->left;
1575       buf->nlines = 0;
1576     }
1577
1578   for (;;)
1579     {
1580       char *ptr = buf->buf + buf->used;
1581       struct line *linelim = buffer_linelim (buf);
1582       struct line *line = linelim - buf->nlines;
1583       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1584       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1585
1586       while (line_bytes + 1 < avail)
1587         {
1588           /* Read as many bytes as possible, but do not read so many
1589              bytes that there might not be enough room for the
1590              corresponding line array.  The worst case is when the
1591              rest of the input file consists entirely of newlines,
1592              except that the last byte is not a newline.  */
1593           size_t readsize = (avail - 1) / (line_bytes + 1);
1594           size_t bytes_read = fread (ptr, 1, readsize, fp);
1595           char *ptrlim = ptr + bytes_read;
1596           char *p;
1597           avail -= bytes_read;
1598
1599           if (bytes_read != readsize)
1600             {
1601               if (ferror (fp))
1602                 die (_("read failed"), file);
1603               if (feof (fp))
1604                 {
1605                   buf->eof = true;
1606                   if (buf->buf == ptrlim)
1607                     return false;
1608                   if (ptrlim[-1] != eol)
1609                     *ptrlim++ = eol;
1610                 }
1611             }
1612
1613           /* Find and record each line in the just-read input.  */
1614           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1615             {
1616               ptr = p + 1;
1617               line--;
1618               line->text = line_start;
1619               line->length = ptr - line_start;
1620               mergesize = MAX (mergesize, line->length);
1621               avail -= line_bytes;
1622
1623               if (key)
1624                 {
1625                   /* Precompute the position of the first key for
1626                      efficiency.  */
1627                   line->keylim = (key->eword == SIZE_MAX
1628                                   ? p
1629                                   : limfield (line, key));
1630
1631                   if (key->sword != SIZE_MAX)
1632                     line->keybeg = begfield (line, key);
1633                   else
1634                     {
1635                       if (key->skipsblanks)
1636                         while (blanks[to_uchar (*line_start)])
1637                           line_start++;
1638                       line->keybeg = line_start;
1639                     }
1640                 }
1641
1642               line_start = ptr;
1643             }
1644
1645           ptr = ptrlim;
1646           if (buf->eof)
1647             break;
1648         }
1649
1650       buf->used = ptr - buf->buf;
1651       buf->nlines = buffer_linelim (buf) - line;
1652       if (buf->nlines != 0)
1653         {
1654           buf->left = ptr - line_start;
1655           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1656           return true;
1657         }
1658
1659       {
1660         /* The current input line is too long to fit in the buffer.
1661            Double the buffer size and try again, keeping it properly
1662            aligned.  */
1663         size_t line_alloc = buf->alloc / sizeof (struct line);
1664         buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1665         buf->alloc = line_alloc * sizeof (struct line);
1666       }
1667     }
1668 }
1669
1670 /* Compare strings A and B as numbers without explicitly converting them to
1671    machine numbers.  Comparatively slow for short strings, but asymptotically
1672    hideously fast. */
1673
1674 static int
1675 numcompare (const char *a, const char *b)
1676 {
1677   while (blanks[to_uchar (*a)])
1678     a++;
1679   while (blanks[to_uchar (*b)])
1680     b++;
1681
1682   return strnumcmp (a, b, decimal_point, thousands_sep);
1683 }
1684
1685 /* Exit with an error if a mixture of SI and IEC units detected.  */
1686
1687 static void
1688 check_mixed_SI_IEC (char prefix, struct keyfield *key)
1689 {
1690   int si_present = prefix == 'i';
1691   if (key->si_present != -1 && si_present != key->si_present)
1692     error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
1693   key->si_present = si_present;
1694 }
1695
1696 /* Return an integer which represents the order of magnitude of
1697    the unit following the number.  NUMBER can contain thousands separators
1698    or a decimal point, but not have preceeding blanks.
1699    Negative numbers return a negative unit order.  */
1700
1701 static int
1702 find_unit_order (const char *number, struct keyfield *key)
1703 {
1704   static const char orders [UCHAR_LIM] = {
1705     ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1706     ['k']=1,
1707   };
1708
1709   const unsigned char *p = number;
1710
1711   int sign = 1;
1712
1713   if (*p == '-')
1714     {
1715       sign = -1;
1716       p++;
1717     }
1718
1719   /* Scan to end of number.
1720      Decimals or separators not followed by digits stop the scan.
1721      Numbers ending in decimals or separators are thus considered
1722      to be lacking in units.
1723      FIXME: add support for multibyte thousands_sep and decimal_point.  */
1724
1725   while (ISDIGIT (*p))
1726     {
1727       p++;
1728
1729       if (*p == decimal_point && ISDIGIT (*(p + 1)))
1730         p += 2;
1731       else if (*p == thousands_sep && ISDIGIT (*(p + 1)))
1732         p += 2;
1733     }
1734
1735   int order = orders[*p];
1736
1737   /* For valid units check for MiB vs MB etc.  */
1738   if (order)
1739     check_mixed_SI_IEC (*(p + 1), key);
1740
1741   return sign * order;
1742 }
1743
1744 /* Compare numbers ending in units with SI xor IEC prefixes
1745        <none/unknown> < K/k < M < G < T < P < E < Z < Y
1746    Assume that numbers are properly abbreviated.
1747    i.e. input will never have both 6000K and 5M.  */
1748
1749 static int
1750 human_numcompare (const char *a, const char *b, struct keyfield *key)
1751 {
1752   while (blanks[to_uchar (*a)])
1753     a++;
1754   while (blanks[to_uchar (*b)])
1755     b++;
1756
1757   int order_a = find_unit_order (a, key);
1758   int order_b = find_unit_order (b, key);
1759
1760   return (order_a > order_b ? 1
1761           : order_a < order_b ? -1
1762           : strnumcmp (a, b, decimal_point, thousands_sep));
1763 }
1764
1765 static int
1766 general_numcompare (const char *sa, const char *sb)
1767 {
1768   /* FIXME: add option to warn about failed conversions.  */
1769   /* FIXME: maybe add option to try expensive FP conversion
1770      only if A and B can't be compared more cheaply/accurately.  */
1771
1772   char *ea;
1773   char *eb;
1774   double a = strtod (sa, &ea);
1775   double b = strtod (sb, &eb);
1776
1777   /* Put conversion errors at the start of the collating sequence.  */
1778   if (sa == ea)
1779     return sb == eb ? 0 : -1;
1780   if (sb == eb)
1781     return 1;
1782
1783   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1784      conversion errors but before numbers; sort them by internal
1785      bit-pattern, for lack of a more portable alternative.  */
1786   return (a < b ? -1
1787           : a > b ? 1
1788           : a == b ? 0
1789           : b == b ? -1
1790           : a == a ? 1
1791           : memcmp ((char *) &a, (char *) &b, sizeof a));
1792 }
1793
1794 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1795    Return 0 if the name in S is not recognized.  */
1796
1797 static int
1798 getmonth (char const *month, size_t len)
1799 {
1800   size_t lo = 0;
1801   size_t hi = MONTHS_PER_YEAR;
1802   char const *monthlim = month + len;
1803
1804   for (;;)
1805     {
1806       if (month == monthlim)
1807         return 0;
1808       if (!blanks[to_uchar (*month)])
1809         break;
1810       ++month;
1811     }
1812
1813   do
1814     {
1815       size_t ix = (lo + hi) / 2;
1816       char const *m = month;
1817       char const *n = monthtab[ix].name;
1818
1819       for (;; m++, n++)
1820         {
1821           if (!*n)
1822             return monthtab[ix].val;
1823           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1824             {
1825               hi = ix;
1826               break;
1827             }
1828           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1829             {
1830               lo = ix + 1;
1831               break;
1832             }
1833         }
1834     }
1835   while (lo < hi);
1836
1837   return 0;
1838 }
1839
1840 /* A source of random data.  */
1841 static struct randread_source *randread_source;
1842
1843 /* Return the Ith randomly-generated state.  The caller must invoke
1844    random_state (H) for all H less than I before invoking random_state
1845    (I).  */
1846
1847 static struct md5_ctx
1848 random_state (size_t i)
1849 {
1850   /* An array of states resulting from the random data, and counts of
1851      its used and allocated members.  */
1852   static struct md5_ctx *state;
1853   static size_t used;
1854   static size_t allocated;
1855
1856   struct md5_ctx *s = &state[i];
1857
1858   if (used <= i)
1859     {
1860       unsigned char buf[MD5_DIGEST_SIZE];
1861
1862       used++;
1863
1864       if (allocated <= i)
1865         {
1866           state = X2NREALLOC (state, &allocated);
1867           s = &state[i];
1868         }
1869
1870       randread (randread_source, buf, sizeof buf);
1871       md5_init_ctx (s);
1872       md5_process_bytes (buf, sizeof buf, s);
1873     }
1874
1875   return *s;
1876 }
1877
1878 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1879    with length LENGTHB.  Return negative if less, zero if equal,
1880    positive if greater.  */
1881
1882 static int
1883 cmp_hashes (char const *texta, size_t lena,
1884             char const *textb, size_t lenb)
1885 {
1886   /* Try random hashes until a pair of hashes disagree.  But if the
1887      first pair of random hashes agree, check whether the keys are
1888      identical and if so report no difference.  */
1889   int diff;
1890   size_t i;
1891   for (i = 0; ; i++)
1892     {
1893       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1894       struct md5_ctx s[2];
1895       s[0] = s[1] = random_state (i);
1896       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1897       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1898       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1899       if (diff != 0)
1900         break;
1901       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1902         break;
1903     }
1904
1905   return diff;
1906 }
1907
1908 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1909    using one or more random hash functions.  */
1910
1911 static int
1912 compare_random (char *restrict texta, size_t lena,
1913                 char *restrict textb, size_t lenb)
1914 {
1915   int diff;
1916
1917   if (! hard_LC_COLLATE)
1918     diff = cmp_hashes (texta, lena, textb, lenb);
1919   else
1920     {
1921       /* Transform the text into the basis of comparison, so that byte
1922          strings that would otherwise considered to be equal are
1923          considered equal here even if their bytes differ.  */
1924
1925       char *buf = NULL;
1926       char stackbuf[4000];
1927       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1928       bool a_fits = tlena <= sizeof stackbuf;
1929       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1930                                (a_fits ? sizeof stackbuf - tlena : 0),
1931                                textb, lenb);
1932
1933       if (a_fits && tlena + tlenb <= sizeof stackbuf)
1934         buf = stackbuf;
1935       else
1936         {
1937           /* Adding 1 to the buffer size lets xmemxfrm run a bit
1938              faster by avoiding the need for an extra buffer copy.  */
1939           buf = xmalloc (tlena + tlenb + 1);
1940           xmemxfrm (buf, tlena + 1, texta, lena);
1941           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1942         }
1943
1944       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1945
1946       if (buf != stackbuf)
1947         free (buf);
1948     }
1949
1950   return diff;
1951 }
1952
1953 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1954    using filevercmp. See lib/filevercmp.h for function description. */
1955
1956 static int
1957 compare_version (char *restrict texta, size_t lena,
1958                  char *restrict textb, size_t lenb)
1959 {
1960   int diff;
1961
1962   /* It is necessary to save the character after the end of the field.
1963      "filevercmp" works with NUL terminated strings.  Our blocks of
1964      text are not necessarily terminated with a NUL byte. */
1965   char sv_a = texta[lena];
1966   char sv_b = textb[lenb];
1967
1968   texta[lena] = '\0';
1969   textb[lenb] = '\0';
1970
1971   diff = filevercmp (texta, textb);
1972
1973   texta[lena] = sv_a;
1974   textb[lenb] = sv_b;
1975
1976   return diff;
1977 }
1978
1979 /* Compare two lines A and B trying every key in sequence until there
1980    are no more keys or a difference is found. */
1981
1982 static int
1983 keycompare (const struct line *a, const struct line *b)
1984 {
1985   struct keyfield *key = keylist;
1986
1987   /* For the first iteration only, the key positions have been
1988      precomputed for us. */
1989   char *texta = a->keybeg;
1990   char *textb = b->keybeg;
1991   char *lima = a->keylim;
1992   char *limb = b->keylim;
1993
1994   int diff;
1995
1996   for (;;)
1997     {
1998       char const *translate = key->translate;
1999       bool const *ignore = key->ignore;
2000
2001       /* Treat field ends before field starts as empty fields.  */
2002       lima = MAX (texta, lima);
2003       limb = MAX (textb, limb);
2004
2005       /* Find the lengths. */
2006       size_t lena = lima - texta;
2007       size_t lenb = limb - textb;
2008
2009       /* Actually compare the fields. */
2010
2011       if (key->random)
2012         diff = compare_random (texta, lena, textb, lenb);
2013       else if (key->numeric | key->general_numeric | key->human_numeric)
2014         {
2015           char savea = *lima, saveb = *limb;
2016
2017           *lima = *limb = '\0';
2018           diff = (key->numeric ? numcompare (texta, textb)
2019                   : key->general_numeric ? general_numcompare (texta, textb)
2020                   : human_numcompare (texta, textb, key));
2021           *lima = savea, *limb = saveb;
2022         }
2023       else if (key->version)
2024         diff = compare_version (texta, lena, textb, lenb);
2025       else if (key->month)
2026         diff = getmonth (texta, lena) - getmonth (textb, lenb);
2027       /* Sorting like this may become slow, so in a simple locale the user
2028          can select a faster sort that is similar to ascii sort.  */
2029       else if (hard_LC_COLLATE)
2030         {
2031           if (ignore || translate)
2032             {
2033               char buf[4000];
2034               size_t size = lena + 1 + lenb + 1;
2035               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
2036               char *copy_b = copy_a + lena + 1;
2037               size_t new_len_a, new_len_b, i;
2038
2039               /* Ignore and/or translate chars before comparing.  */
2040               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
2041                 {
2042                   if (i < lena)
2043                     {
2044                       copy_a[new_len_a] = (translate
2045                                            ? translate[to_uchar (texta[i])]
2046                                            : texta[i]);
2047                       if (!ignore || !ignore[to_uchar (texta[i])])
2048                         ++new_len_a;
2049                     }
2050                   if (i < lenb)
2051                     {
2052                       copy_b[new_len_b] = (translate
2053                                            ? translate[to_uchar (textb[i])]
2054                                            : textb [i]);
2055                       if (!ignore || !ignore[to_uchar (textb[i])])
2056                         ++new_len_b;
2057                     }
2058                 }
2059
2060               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2061
2062               if (sizeof buf < size)
2063                 free (copy_a);
2064             }
2065           else if (lena == 0)
2066             diff = - NONZERO (lenb);
2067           else if (lenb == 0)
2068             goto greater;
2069           else
2070             diff = xmemcoll (texta, lena, textb, lenb);
2071         }
2072       else if (ignore)
2073         {
2074 #define CMP_WITH_IGNORE(A, B)                                           \
2075   do                                                                    \
2076     {                                                                   \
2077           for (;;)                                                      \
2078             {                                                           \
2079               while (texta < lima && ignore[to_uchar (*texta)])         \
2080                 ++texta;                                                \
2081               while (textb < limb && ignore[to_uchar (*textb)])         \
2082                 ++textb;                                                \
2083               if (! (texta < lima && textb < limb))                     \
2084                 break;                                                  \
2085               diff = to_uchar (A) - to_uchar (B);                       \
2086               if (diff)                                                 \
2087                 goto not_equal;                                         \
2088               ++texta;                                                  \
2089               ++textb;                                                  \
2090             }                                                           \
2091                                                                         \
2092           diff = (texta < lima) - (textb < limb);                       \
2093     }                                                                   \
2094   while (0)
2095
2096           if (translate)
2097             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
2098                              translate[to_uchar (*textb)]);
2099           else
2100             CMP_WITH_IGNORE (*texta, *textb);
2101         }
2102       else if (lena == 0)
2103         diff = - NONZERO (lenb);
2104       else if (lenb == 0)
2105         goto greater;
2106       else
2107         {
2108           if (translate)
2109             {
2110               while (texta < lima && textb < limb)
2111                 {
2112                   diff = (to_uchar (translate[to_uchar (*texta++)])
2113                           - to_uchar (translate[to_uchar (*textb++)]));
2114                   if (diff)
2115                     goto not_equal;
2116                 }
2117             }
2118           else
2119             {
2120               diff = memcmp (texta, textb, MIN (lena, lenb));
2121               if (diff)
2122                 goto not_equal;
2123             }
2124           diff = lena < lenb ? -1 : lena != lenb;
2125         }
2126
2127       if (diff)
2128         goto not_equal;
2129
2130       key = key->next;
2131       if (! key)
2132         break;
2133
2134       /* Find the beginning and limit of the next field.  */
2135       if (key->eword != SIZE_MAX)
2136         lima = limfield (a, key), limb = limfield (b, key);
2137       else
2138         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2139
2140       if (key->sword != SIZE_MAX)
2141         texta = begfield (a, key), textb = begfield (b, key);
2142       else
2143         {
2144           texta = a->text, textb = b->text;
2145           if (key->skipsblanks)
2146             {
2147               while (texta < lima && blanks[to_uchar (*texta)])
2148                 ++texta;
2149               while (textb < limb && blanks[to_uchar (*textb)])
2150                 ++textb;
2151             }
2152         }
2153     }
2154
2155   return 0;
2156
2157  greater:
2158   diff = 1;
2159  not_equal:
2160   return key->reverse ? -diff : diff;
2161 }
2162
2163 /* Compare two lines A and B, returning negative, zero, or positive
2164    depending on whether A compares less than, equal to, or greater than B. */
2165
2166 static int
2167 compare (const struct line *a, const struct line *b)
2168 {
2169   int diff;
2170   size_t alen, blen;
2171
2172   /* First try to compare on the specified keys (if any).
2173      The only two cases with no key at all are unadorned sort,
2174      and unadorned sort -r. */
2175   if (keylist)
2176     {
2177       diff = keycompare (a, b);
2178       if (diff | unique | stable)
2179         return diff;
2180     }
2181
2182   /* If the keys all compare equal (or no keys were specified)
2183      fall through to the default comparison.  */
2184   alen = a->length - 1, blen = b->length - 1;
2185
2186   if (alen == 0)
2187     diff = - NONZERO (blen);
2188   else if (blen == 0)
2189     diff = 1;
2190   else if (hard_LC_COLLATE)
2191     diff = xmemcoll (a->text, alen, b->text, blen);
2192   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2193     diff = alen < blen ? -1 : alen != blen;
2194
2195   return reverse ? -diff : diff;
2196 }
2197
2198 /* Check that the lines read from FILE_NAME come in order.  Return
2199    true if they are in order.  If CHECKONLY == 'c', also print a
2200    diagnostic (FILE_NAME, line number, contents of line) to stderr if
2201    they are not in order.  */
2202
2203 static bool
2204 check (char const *file_name, char checkonly)
2205 {
2206   FILE *fp = xfopen (file_name, "r");
2207   struct buffer buf;            /* Input buffer. */
2208   struct line temp;             /* Copy of previous line. */
2209   size_t alloc = 0;
2210   uintmax_t line_number = 0;
2211   struct keyfield const *key = keylist;
2212   bool nonunique = ! unique;
2213   bool ordered = true;
2214
2215   initbuf (&buf, sizeof (struct line),
2216            MAX (merge_buffer_size, sort_size));
2217   temp.text = NULL;
2218
2219   while (fillbuf (&buf, fp, file_name))
2220     {
2221       struct line const *line = buffer_linelim (&buf);
2222       struct line const *linebase = line - buf.nlines;
2223
2224       /* Make sure the line saved from the old buffer contents is
2225          less than or equal to the first line of the new buffer. */
2226       if (alloc && nonunique <= compare (&temp, line - 1))
2227         {
2228         found_disorder:
2229           {
2230             if (checkonly == 'c')
2231               {
2232                 struct line const *disorder_line = line - 1;
2233                 uintmax_t disorder_line_number =
2234                   buffer_linelim (&buf) - disorder_line + line_number;
2235                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2236                 fprintf (stderr, _("%s: %s:%s: disorder: "),
2237                          program_name, file_name,
2238                          umaxtostr (disorder_line_number, hr_buf));
2239                 write_bytes (disorder_line->text, disorder_line->length,
2240                              stderr, _("standard error"));
2241               }
2242
2243             ordered = false;
2244             break;
2245           }
2246         }
2247
2248       /* Compare each line in the buffer with its successor.  */
2249       while (linebase < --line)
2250         if (nonunique <= compare (line, line - 1))
2251           goto found_disorder;
2252
2253       line_number += buf.nlines;
2254
2255       /* Save the last line of the buffer.  */
2256       if (alloc < line->length)
2257         {
2258           do
2259             {
2260               alloc *= 2;
2261               if (! alloc)
2262                 {
2263                   alloc = line->length;
2264                   break;
2265                 }
2266             }
2267           while (alloc < line->length);
2268
2269           temp.text = xrealloc (temp.text, alloc);
2270         }
2271       memcpy (temp.text, line->text, line->length);
2272       temp.length = line->length;
2273       if (key)
2274         {
2275           temp.keybeg = temp.text + (line->keybeg - line->text);
2276           temp.keylim = temp.text + (line->keylim - line->text);
2277         }
2278     }
2279
2280   xfclose (fp, file_name);
2281   free (buf.buf);
2282   free (temp.text);
2283   return ordered;
2284 }
2285
2286 /* Open FILES (there are NFILES of them) and store the resulting array
2287    of stream pointers into (*PFPS).  Allocate the array.  Return the
2288    number of successfully opened files, setting errno if this value is
2289    less than NFILES.  */
2290
2291 static size_t
2292 open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps)
2293 {
2294   FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps);
2295   int i;
2296
2297   /* Open as many input files as we can.  */
2298   for (i = 0; i < nfiles; i++)
2299     {
2300       fps[i] = (files[i].pid
2301                 ? open_temp (files[i].name, files[i].pid)
2302                 : stream_open (files[i].name, "r"));
2303       if (!fps[i])
2304         break;
2305     }
2306
2307   return i;
2308 }
2309
2310 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2311    files (all of which are at the start of the FILES array), and
2312    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2313    FPS is the vector of open stream corresponding to the files.
2314    Close input and output streams before returning.
2315    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2316    the output file is standard output.  */
2317
2318 static void
2319 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2320           FILE *ofp, char const *output_file, FILE **fps)
2321 {
2322   struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer);
2323                                 /* Input buffers for each file. */
2324   struct line saved;            /* Saved line storage for unique check. */
2325   struct line const *savedline = NULL;
2326                                 /* &saved if there is a saved line. */
2327   size_t savealloc = 0;         /* Size allocated for the saved line. */
2328   struct line const **cur = xnmalloc (nfiles, sizeof *cur);
2329                                 /* Current line in each line table. */
2330   struct line const **base = xnmalloc (nfiles, sizeof *base);
2331                                 /* Base of each line table.  */
2332   size_t *ord = xnmalloc (nfiles, sizeof *ord);
2333                                 /* Table representing a permutation of fps,
2334                                    such that cur[ord[0]] is the smallest line
2335                                    and will be next output. */
2336   size_t i;
2337   size_t j;
2338   size_t t;
2339   struct keyfield const *key = keylist;
2340   saved.text = NULL;
2341
2342   /* Read initial lines from each input file. */
2343   for (i = 0; i < nfiles; )
2344     {
2345       initbuf (&buffer[i], sizeof (struct line),
2346                MAX (merge_buffer_size, sort_size / nfiles));
2347       if (fillbuf (&buffer[i], fps[i], files[i].name))
2348         {
2349           struct line const *linelim = buffer_linelim (&buffer[i]);
2350           cur[i] = linelim - 1;
2351           base[i] = linelim - buffer[i].nlines;
2352           i++;
2353         }
2354       else
2355         {
2356           /* fps[i] is empty; eliminate it from future consideration.  */
2357           xfclose (fps[i], files[i].name);
2358           if (i < ntemps)
2359             {
2360               ntemps--;
2361               zaptemp (files[i].name);
2362             }
2363           free (buffer[i].buf);
2364           --nfiles;
2365           for (j = i; j < nfiles; ++j)
2366             {
2367               files[j] = files[j + 1];
2368               fps[j] = fps[j + 1];
2369             }
2370         }
2371     }
2372
2373   /* Set up the ord table according to comparisons among input lines.
2374      Since this only reorders two items if one is strictly greater than
2375      the other, it is stable. */
2376   for (i = 0; i < nfiles; ++i)
2377     ord[i] = i;
2378   for (i = 1; i < nfiles; ++i)
2379     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2380       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2381
2382   /* Repeatedly output the smallest line until no input remains. */
2383   while (nfiles)
2384     {
2385       struct line const *smallest = cur[ord[0]];
2386
2387       /* If uniquified output is turned on, output only the first of
2388          an identical series of lines. */
2389       if (unique)
2390         {
2391           if (savedline && compare (savedline, smallest))
2392             {
2393               savedline = NULL;
2394               write_bytes (saved.text, saved.length, ofp, output_file);
2395             }
2396           if (!savedline)
2397             {
2398               savedline = &saved;
2399               if (savealloc < smallest->length)
2400                 {
2401                   do
2402                     if (! savealloc)
2403                       {
2404                         savealloc = smallest->length;
2405                         break;
2406                       }
2407                   while ((savealloc *= 2) < smallest->length);
2408
2409                   saved.text = xrealloc (saved.text, savealloc);
2410                 }
2411               saved.length = smallest->length;
2412               memcpy (saved.text, smallest->text, saved.length);
2413               if (key)
2414                 {
2415                   saved.keybeg =
2416                     saved.text + (smallest->keybeg - smallest->text);
2417                   saved.keylim =
2418                     saved.text + (smallest->keylim - smallest->text);
2419                 }
2420             }
2421         }
2422       else
2423         write_bytes (smallest->text, smallest->length, ofp, output_file);
2424
2425       /* Check if we need to read more lines into core. */
2426       if (base[ord[0]] < smallest)
2427         cur[ord[0]] = smallest - 1;
2428       else
2429         {
2430           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2431             {
2432               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2433               cur[ord[0]] = linelim - 1;
2434               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2435             }
2436           else
2437             {
2438               /* We reached EOF on fps[ord[0]].  */
2439               for (i = 1; i < nfiles; ++i)
2440                 if (ord[i] > ord[0])
2441                   --ord[i];
2442               --nfiles;
2443               xfclose (fps[ord[0]], files[ord[0]].name);
2444               if (ord[0] < ntemps)
2445                 {
2446                   ntemps--;
2447                   zaptemp (files[ord[0]].name);
2448                 }
2449               free (buffer[ord[0]].buf);
2450               for (i = ord[0]; i < nfiles; ++i)
2451                 {
2452                   fps[i] = fps[i + 1];
2453                   files[i] = files[i + 1];
2454                   buffer[i] = buffer[i + 1];
2455                   cur[i] = cur[i + 1];
2456                   base[i] = base[i + 1];
2457                 }
2458               for (i = 0; i < nfiles; ++i)
2459                 ord[i] = ord[i + 1];
2460               continue;
2461             }
2462         }
2463
2464       /* The new line just read in may be larger than other lines
2465          already in main memory; push it back in the queue until we
2466          encounter a line larger than it.  Optimize for the common
2467          case where the new line is smallest.  */
2468       {
2469         size_t lo = 1;
2470         size_t hi = nfiles;
2471         size_t probe = lo;
2472         size_t ord0 = ord[0];
2473         size_t count_of_smaller_lines;
2474
2475         while (lo < hi)
2476           {
2477             int cmp = compare (cur[ord0], cur[ord[probe]]);
2478             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2479               hi = probe;
2480             else
2481               lo = probe + 1;
2482             probe = (lo + hi) / 2;
2483           }
2484
2485         count_of_smaller_lines = lo - 1;
2486         for (j = 0; j < count_of_smaller_lines; j++)
2487           ord[j] = ord[j + 1];
2488         ord[count_of_smaller_lines] = ord0;
2489       }
2490
2491       /* Free up some resources every once in a while.  */
2492       if (MAX_PROCS_BEFORE_REAP < nprocs)
2493         reap_some ();
2494     }
2495
2496   if (unique && savedline)
2497     {
2498       write_bytes (saved.text, saved.length, ofp, output_file);
2499       free (saved.text);
2500     }
2501
2502   xfclose (ofp, output_file);
2503   free(fps);
2504   free(buffer);
2505   free(ord);
2506   free(base);
2507   free(cur);
2508 }
2509
2510 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2511    files (all of which are at the start of the FILES array), and
2512    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2513    Close input and output files before returning.
2514    OUTPUT_FILE gives the name of the output file.
2515
2516    Return the number of files successfully merged.  This number can be
2517    less than NFILES if we ran low on file descriptors, but in this
2518    case it is never less than 2.  */
2519
2520 static size_t
2521 mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles,
2522             FILE *ofp, char const *output_file)
2523 {
2524   FILE **fps;
2525   size_t nopened = open_input_files (files, nfiles, &fps);
2526   if (nopened < nfiles && nopened < 2)
2527     die (_("open failed"), files[nopened].name);
2528   mergefps (files, ntemps, nopened, ofp, output_file, fps);
2529   return nopened;
2530 }
2531
2532 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2533    and HI (with NHI members).  T, LO, and HI point just past their
2534    respective arrays, and the arrays are in reverse order.  NLO and
2535    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2536
2537 static inline void
2538 mergelines (struct line *t,
2539             struct line const *lo, size_t nlo,
2540             struct line const *hi, size_t nhi)
2541 {
2542   for (;;)
2543     if (compare (lo - 1, hi - 1) <= 0)
2544       {
2545         *--t = *--lo;
2546         if (! --nlo)
2547           {
2548             /* HI - NHI equalled T - (NLO + NHI) when this function
2549                began.  Therefore HI must equal T now, and there is no
2550                need to copy from HI to T.  */
2551             return;
2552           }
2553       }
2554     else
2555       {
2556         *--t = *--hi;
2557         if (! --nhi)
2558           {
2559             do
2560               *--t = *--lo;
2561             while (--nlo);
2562
2563             return;
2564           }
2565       }
2566 }
2567
2568 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2569    NLINES must be at least 2.
2570    The input and output arrays are in reverse order, and LINES and
2571    TEMP point just past the end of their respective arrays.
2572
2573    Use a recursive divide-and-conquer algorithm, in the style
2574    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2575    the optimization suggested by exercise 5.2.4-10; this requires room
2576    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2577    writes that this memory optimization was originally published by
2578    D. A. Bell, Comp J. 1 (1958), 75.  */
2579
2580 static void
2581 sortlines (struct line *lines, size_t nlines, struct line *temp)
2582 {
2583   if (nlines == 2)
2584     {
2585       if (0 < compare (&lines[-1], &lines[-2]))
2586         {
2587           struct line tmp = lines[-1];
2588           lines[-1] = lines[-2];
2589           lines[-2] = tmp;
2590         }
2591     }
2592   else
2593     {
2594       size_t nlo = nlines / 2;
2595       size_t nhi = nlines - nlo;
2596       struct line *lo = lines;
2597       struct line *hi = lines - nlo;
2598       struct line *sorted_lo = temp;
2599
2600       sortlines (hi, nhi, temp);
2601       if (1 < nlo)
2602         sortlines_temp (lo, nlo, sorted_lo);
2603       else
2604         sorted_lo[-1] = lo[-1];
2605
2606       mergelines (lines, sorted_lo, nlo, hi, nhi);
2607     }
2608 }
2609
2610 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2611    rather than sorting in place.  */
2612
2613 static void
2614 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2615 {
2616   if (nlines == 2)
2617     {
2618       /* Declare `swap' as int, not bool, to work around a bug
2619          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2620          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2621       int swap = (0 < compare (&lines[-1], &lines[-2]));
2622       temp[-1] = lines[-1 - swap];
2623       temp[-2] = lines[-2 + swap];
2624     }
2625   else
2626     {
2627       size_t nlo = nlines / 2;
2628       size_t nhi = nlines - nlo;
2629       struct line *lo = lines;
2630       struct line *hi = lines - nlo;
2631       struct line *sorted_hi = temp - nlo;
2632
2633       sortlines_temp (hi, nhi, sorted_hi);
2634       if (1 < nlo)
2635         sortlines (lo, nlo, temp);
2636
2637       mergelines (temp, lo, nlo, sorted_hi, nhi);
2638     }
2639 }
2640
2641 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2642    the same as OUTFILE.  If found, merge the found instances (and perhaps
2643    some other files) into a temporary file so that it can in turn be
2644    merged into OUTFILE without destroying OUTFILE before it is completely
2645    read.  Return the new value of NFILES, which differs from the old if
2646    some merging occurred.
2647
2648    This test ensures that an otherwise-erroneous use like
2649    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2650    It's not clear that POSIX requires this nicety.
2651    Detect common error cases, but don't try to catch obscure cases like
2652    "cat ... FILE ... | sort -m -o FILE"
2653    where traditional "sort" doesn't copy the input and where
2654    people should know that they're getting into trouble anyway.
2655    Catching these obscure cases would slow down performance in
2656    common cases.  */
2657
2658 static size_t
2659 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2660                       size_t nfiles, char const *outfile)
2661 {
2662   size_t i;
2663   bool got_outstat = false;
2664   struct stat outstat;
2665
2666   for (i = ntemps; i < nfiles; i++)
2667     {
2668       bool is_stdin = STREQ (files[i].name, "-");
2669       bool same;
2670       struct stat instat;
2671
2672       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2673         same = true;
2674       else
2675         {
2676           if (! got_outstat)
2677             {
2678               if ((outfile
2679                    ? stat (outfile, &outstat)
2680                    : fstat (STDOUT_FILENO, &outstat))
2681                   != 0)
2682                 break;
2683               got_outstat = true;
2684             }
2685
2686           same = (((is_stdin
2687                     ? fstat (STDIN_FILENO, &instat)
2688                     : stat (files[i].name, &instat))
2689                    == 0)
2690                   && SAME_INODE (instat, outstat));
2691         }
2692
2693       if (same)
2694         {
2695           FILE *tftp;
2696           pid_t pid;
2697           char *temp = create_temp (&tftp, &pid);
2698           size_t num_merged = 0;
2699           do
2700             {
2701               num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
2702               files[i].name = temp;
2703               files[i].pid = pid;
2704
2705               if (i + num_merged < nfiles)
2706                 memmove(&files[i + 1], &files[i + num_merged],
2707                         num_merged * sizeof *files);
2708               ntemps += 1;
2709               nfiles -= num_merged - 1;;
2710               i += num_merged;
2711             }
2712           while (i < nfiles);
2713         }
2714     }
2715
2716   return nfiles;
2717 }
2718
2719 /* Merge the input FILES.  NTEMPS is the number of files at the
2720    start of FILES that are temporary; it is zero at the top level.
2721    NFILES is the total number of files.  Put the output in
2722    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2723
2724 static void
2725 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2726        char const *output_file)
2727 {
2728   while (nmerge < nfiles)
2729     {
2730       /* Number of input files processed so far.  */
2731       size_t in;
2732
2733       /* Number of output files generated so far.  */
2734       size_t out;
2735
2736       /* nfiles % NMERGE; this counts input files that are left over
2737          after all full-sized merges have been done.  */
2738       size_t remainder;
2739
2740       /* Number of easily-available slots at the next loop iteration.  */
2741       size_t cheap_slots;
2742
2743       /* Do as many NMERGE-size merges as possible. In the case that
2744          nmerge is bogus, increment by the maximum number of file
2745          descriptors allowed.  */
2746       for (out = in = 0; nmerge <= nfiles - in; out++)
2747         {
2748           FILE *tfp;
2749           pid_t pid;
2750           char *temp = create_temp (&tfp, &pid);
2751           size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge),
2752                                           nmerge, tfp, temp);
2753           ntemps -= MIN (ntemps, num_merged);
2754           files[out].name = temp;
2755           files[out].pid = pid;
2756           in += num_merged;
2757         }
2758
2759       remainder = nfiles - in;
2760       cheap_slots = nmerge - out % nmerge;
2761
2762       if (cheap_slots < remainder)
2763         {
2764           /* So many files remain that they can't all be put into the last
2765              NMERGE-sized output window.  Do one more merge.  Merge as few
2766              files as possible, to avoid needless I/O.  */
2767           size_t nshortmerge = remainder - cheap_slots + 1;
2768           FILE *tfp;
2769           pid_t pid;
2770           char *temp = create_temp (&tfp, &pid);
2771           size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge),
2772                                           nshortmerge, tfp, temp);
2773           ntemps -= MIN (ntemps, num_merged);
2774           files[out].name = temp;
2775           files[out++].pid = pid;
2776           in += num_merged;
2777         }
2778
2779       /* Put the remaining input files into the last NMERGE-sized output
2780          window, so they will be merged in the next pass.  */
2781       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2782       ntemps += out;
2783       nfiles -= in - out;
2784     }
2785
2786   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2787
2788   /* We aren't guaranteed that this final mergefiles will work, therefore we
2789      try to merge into the output, and then merge as much as we can into a
2790      temp file if we can't. Repeat.  */
2791
2792   for (;;)
2793     {
2794       /* Merge directly into the output file if possible.  */
2795       FILE **fps;
2796       size_t nopened = open_input_files (files, nfiles, &fps);
2797
2798       if (nopened == nfiles)
2799         {
2800           FILE *ofp = stream_open (output_file, "w");
2801           if (ofp)
2802             {
2803               mergefps (files, ntemps, nfiles, ofp, output_file, fps);
2804               break;
2805             }
2806           if (errno != EMFILE || nopened <= 2)
2807             die (_("open failed"), output_file);
2808         }
2809       else if (nopened <= 2)
2810         die (_("open failed"), files[nopened].name);
2811
2812       /* We ran out of file descriptors.  Close one of the input
2813          files, to gain a file descriptor.  Then create a temporary
2814          file with our spare file descriptor.  Retry if that failed
2815          (e.g., some other process could open a file between the time
2816          we closed and tried to create).  */
2817       FILE *tfp;
2818       pid_t pid;
2819       char *temp;
2820       do
2821         {
2822           nopened--;
2823           xfclose (fps[nopened], files[nopened].name);
2824           temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2));
2825         }
2826       while (!temp);
2827
2828       /* Merge into the newly allocated temporary.  */
2829       mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps);
2830       ntemps -= MIN (ntemps, nopened);
2831       files[0].name = temp;
2832       files[0].pid = pid;
2833
2834       memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files);
2835       ntemps++;
2836       nfiles -= nopened - 1;
2837     }
2838 }
2839
2840 /* Sort NFILES FILES onto OUTPUT_FILE. */
2841
2842 static void
2843 sort (char * const *files, size_t nfiles, char const *output_file)
2844 {
2845   struct buffer buf;
2846   size_t ntemps = 0;
2847   bool output_file_created = false;
2848
2849   buf.alloc = 0;
2850
2851   while (nfiles)
2852     {
2853       char const *temp_output;
2854       char const *file = *files;
2855       FILE *fp = xfopen (file, "r");
2856       FILE *tfp;
2857       size_t bytes_per_line = (2 * sizeof (struct line)
2858                                - sizeof (struct line) / 2);
2859
2860       if (! buf.alloc)
2861         initbuf (&buf, bytes_per_line,
2862                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2863       buf.eof = false;
2864       files++;
2865       nfiles--;
2866
2867       while (fillbuf (&buf, fp, file))
2868         {
2869           struct line *line;
2870           struct line *linebase;
2871
2872           if (buf.eof && nfiles
2873               && (bytes_per_line + 1
2874                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2875             {
2876               /* End of file, but there is more input and buffer room.
2877                  Concatenate the next input file; this is faster in
2878                  the usual case.  */
2879               buf.left = buf.used;
2880               break;
2881             }
2882
2883           line = buffer_linelim (&buf);
2884           linebase = line - buf.nlines;
2885           if (1 < buf.nlines)
2886             sortlines (line, buf.nlines, linebase);
2887           if (buf.eof && !nfiles && !ntemps && !buf.left)
2888             {
2889               xfclose (fp, file);
2890               tfp = xfopen (output_file, "w");
2891               temp_output = output_file;
2892               output_file_created = true;
2893             }
2894           else
2895             {
2896               ++ntemps;
2897               temp_output = create_temp (&tfp, NULL);
2898             }
2899
2900           do
2901             {
2902               line--;
2903               write_bytes (line->text, line->length, tfp, temp_output);
2904               if (unique)
2905                 while (linebase < line && compare (line, line - 1) == 0)
2906                   line--;
2907             }
2908           while (linebase < line);
2909
2910           xfclose (tfp, temp_output);
2911
2912           /* Free up some resources every once in a while.  */
2913           if (MAX_PROCS_BEFORE_REAP < nprocs)
2914             reap_some ();
2915
2916           if (output_file_created)
2917             goto finish;
2918         }
2919       xfclose (fp, file);
2920     }
2921
2922  finish:
2923   free (buf.buf);
2924
2925   if (! output_file_created)
2926     {
2927       size_t i;
2928       struct tempnode *node = temphead;
2929       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2930       for (i = 0; node; i++)
2931         {
2932           tempfiles[i].name = node->name;
2933           tempfiles[i].pid = node->pid;
2934           node = node->next;
2935         }
2936       merge (tempfiles, ntemps, ntemps, output_file);
2937       free (tempfiles);
2938     }
2939 }
2940
2941 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2942
2943 static void
2944 insertkey (struct keyfield *key_arg)
2945 {
2946   struct keyfield **p;
2947   struct keyfield *key = xmemdup (key_arg, sizeof *key);
2948
2949   for (p = &keylist; *p; p = &(*p)->next)
2950     continue;
2951   *p = key;
2952   key->next = NULL;
2953 }
2954
2955 /* Report a bad field specification SPEC, with extra info MSGID.  */
2956
2957 static void badfieldspec (char const *, char const *)
2958      ATTRIBUTE_NORETURN;
2959 static void
2960 badfieldspec (char const *spec, char const *msgid)
2961 {
2962   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2963          _(msgid), quote (spec));
2964   abort ();
2965 }
2966
2967 /* Report incompatible options.  */
2968
2969 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2970 static void
2971 incompatible_options (char const *opts)
2972 {
2973   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2974   abort ();
2975 }
2976
2977 /* Check compatibility of ordering options.  */
2978
2979 static void
2980 check_ordering_compatibility (void)
2981 {
2982   struct keyfield const *key;
2983
2984   for (key = keylist; key; key = key->next)
2985     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
2986               + key->version + !!key->ignore + key->human_numeric))
2987         || (key->random && key->translate))
2988       {
2989         /* The following is too big, but guaranteed to be "big enough". */
2990         char opts[sizeof short_options];
2991         char *p = opts;
2992         if (key->ignore == nondictionary)
2993           *p++ = 'd';
2994         if (key->translate)
2995           *p++ = 'f';
2996         if (key->general_numeric)
2997           *p++ = 'g';
2998         if (key->human_numeric)
2999           *p++ = 'h';
3000         if (key->ignore == nonprinting)
3001           *p++ = 'i';
3002         if (key->month)
3003           *p++ = 'M';
3004         if (key->numeric)
3005           *p++ = 'n';
3006         if (key->version)
3007           *p++ = 'V';
3008         if (key->random)
3009           *p++ = 'R';
3010         *p = '\0';
3011         incompatible_options (opts);
3012       }
3013 }
3014
3015 /* Parse the leading integer in STRING and store the resulting value
3016    (which must fit into size_t) into *VAL.  Return the address of the
3017    suffix after the integer.  If the value is too large, silently
3018    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
3019    failure; otherwise, report MSGID and exit on failure.  */
3020
3021 static char const *
3022 parse_field_count (char const *string, size_t *val, char const *msgid)
3023 {
3024   char *suffix;
3025   uintmax_t n;
3026
3027   switch (xstrtoumax (string, &suffix, 10, &n, ""))
3028     {
3029     case LONGINT_OK:
3030     case LONGINT_INVALID_SUFFIX_CHAR:
3031       *val = n;
3032       if (*val == n)
3033         break;
3034       /* Fall through.  */
3035     case LONGINT_OVERFLOW:
3036     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
3037       *val = SIZE_MAX;
3038       break;
3039
3040     case LONGINT_INVALID:
3041       if (msgid)
3042         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
3043                _(msgid), quote (string));
3044       return NULL;
3045     }
3046
3047   return suffix;
3048 }
3049
3050 /* Handle interrupts and hangups. */
3051
3052 static void
3053 sighandler (int sig)
3054 {
3055   if (! SA_NOCLDSTOP)
3056     signal (sig, SIG_IGN);
3057
3058   cleanup ();
3059
3060   signal (sig, SIG_DFL);
3061   raise (sig);
3062 }
3063
3064 /* Set the ordering options for KEY specified in S.
3065    Return the address of the first character in S that
3066    is not a valid ordering option.
3067    BLANKTYPE is the kind of blanks that 'b' should skip. */
3068
3069 static char *
3070 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
3071 {
3072   while (*s)
3073     {
3074       switch (*s)
3075         {
3076         case 'b':
3077           if (blanktype == bl_start || blanktype == bl_both)
3078             key->skipsblanks = true;
3079           if (blanktype == bl_end || blanktype == bl_both)
3080             key->skipeblanks = true;
3081           break;
3082         case 'd':
3083           key->ignore = nondictionary;
3084           break;
3085         case 'f':
3086           key->translate = fold_toupper;
3087           break;
3088         case 'g':
3089           key->general_numeric = true;
3090           break;
3091         case 'h':
3092           key->human_numeric = true;
3093           break;
3094         case 'i':
3095           /* Option order should not matter, so don't let -i override
3096              -d.  -d implies -i, but -i does not imply -d.  */
3097           if (! key->ignore)
3098             key->ignore = nonprinting;
3099           break;
3100         case 'M':
3101           key->month = true;
3102           break;
3103         case 'n':
3104           key->numeric = true;
3105           break;
3106         case 'R':
3107           key->random = true;
3108           break;
3109         case 'r':
3110           key->reverse = true;
3111           break;
3112         case 'V':
3113           key->version = true;
3114           break;
3115         default:
3116           return (char *) s;
3117         }
3118       ++s;
3119     }
3120   return (char *) s;
3121 }
3122
3123 static struct keyfield *
3124 key_init (struct keyfield *key)
3125 {
3126   memset (key, 0, sizeof *key);
3127   key->eword = SIZE_MAX;
3128   key->si_present = -1;
3129   return key;
3130 }
3131
3132 int
3133 main (int argc, char **argv)
3134 {
3135   struct keyfield *key;
3136   struct keyfield key_buf;
3137   struct keyfield gkey;
3138   char const *s;
3139   int c = 0;
3140   char checkonly = 0;
3141   bool mergeonly = false;
3142   char *random_source = NULL;
3143   bool need_random = false;
3144   size_t nfiles = 0;
3145   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
3146   bool obsolete_usage = (posix2_version () < 200112);
3147   char **files;
3148   char *files_from = NULL;
3149   struct Tokens tok;
3150   char const *outfile = NULL;
3151
3152   initialize_main (&argc, &argv);
3153   set_program_name (argv[0]);
3154   setlocale (LC_ALL, "");
3155   bindtextdomain (PACKAGE, LOCALEDIR);
3156   textdomain (PACKAGE);
3157
3158   initialize_exit_failure (SORT_FAILURE);
3159
3160   hard_LC_COLLATE = hard_locale (LC_COLLATE);
3161 #if HAVE_NL_LANGINFO
3162   hard_LC_TIME = hard_locale (LC_TIME);
3163 #endif
3164
3165   /* Get locale's representation of the decimal point.  */
3166   {
3167     struct lconv const *locale = localeconv ();
3168
3169     /* If the locale doesn't define a decimal point, or if the decimal
3170        point is multibyte, use the C locale's decimal point.  FIXME:
3171        add support for multibyte decimal points.  */
3172     decimal_point = to_uchar (locale->decimal_point[0]);
3173     if (! decimal_point || locale->decimal_point[1])
3174       decimal_point = '.';
3175
3176     /* FIXME: add support for multibyte thousands separators.  */
3177     thousands_sep = to_uchar (*locale->thousands_sep);
3178     if (! thousands_sep || locale->thousands_sep[1])
3179       thousands_sep = -1;
3180   }
3181
3182   have_read_stdin = false;
3183   inittables ();
3184
3185   {
3186     size_t i;
3187     static int const sig[] =
3188       {
3189         /* The usual suspects.  */
3190         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
3191 #ifdef SIGPOLL
3192         SIGPOLL,
3193 #endif
3194 #ifdef SIGPROF
3195         SIGPROF,
3196 #endif
3197 #ifdef SIGVTALRM
3198         SIGVTALRM,
3199 #endif
3200 #ifdef SIGXCPU
3201         SIGXCPU,
3202 #endif
3203 #ifdef SIGXFSZ
3204         SIGXFSZ,
3205 #endif
3206       };
3207     enum { nsigs = ARRAY_CARDINALITY (sig) };
3208
3209 #if SA_NOCLDSTOP
3210     struct sigaction act;
3211
3212     sigemptyset (&caught_signals);
3213     for (i = 0; i < nsigs; i++)
3214       {
3215         sigaction (sig[i], NULL, &act);
3216         if (act.sa_handler != SIG_IGN)
3217           sigaddset (&caught_signals, sig[i]);
3218       }
3219
3220     act.sa_handler = sighandler;
3221     act.sa_mask = caught_signals;
3222     act.sa_flags = 0;
3223
3224     for (i = 0; i < nsigs; i++)
3225       if (sigismember (&caught_signals, sig[i]))
3226         sigaction (sig[i], &act, NULL);
3227 #else
3228     for (i = 0; i < nsigs; i++)
3229       if (signal (sig[i], SIG_IGN) != SIG_IGN)
3230         {
3231           signal (sig[i], sighandler);
3232           siginterrupt (sig[i], 1);
3233         }
3234 #endif
3235   }
3236
3237   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
3238   atexit (exit_cleanup);
3239
3240   gkey.sword = gkey.eword = SIZE_MAX;
3241   gkey.ignore = NULL;
3242   gkey.translate = NULL;
3243   gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
3244   gkey.si_present = -1;
3245   gkey.random = gkey.version = false;
3246   gkey.month = gkey.reverse = false;
3247   gkey.skipsblanks = gkey.skipeblanks = false;
3248
3249   files = xnmalloc (argc, sizeof *files);
3250
3251   for (;;)
3252     {
3253       /* Parse an operand as a file after "--" was seen; or if
3254          pedantic and a file was seen, unless the POSIX version
3255          predates 1003.1-2001 and -c was not seen and the operand is
3256          "-o FILE" or "-oFILE".  */
3257       int oi = -1;
3258
3259       if (c == -1
3260           || (posixly_correct && nfiles != 0
3261               && ! (obsolete_usage
3262                     && ! checkonly
3263                     && optind != argc
3264                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
3265                     && (argv[optind][2] || optind + 1 != argc)))
3266           || ((c = getopt_long (argc, argv, short_options,
3267                                 long_options, &oi))
3268               == -1))
3269         {
3270           if (argc <= optind)
3271             break;
3272           files[nfiles++] = argv[optind++];
3273         }
3274       else switch (c)
3275         {
3276         case 1:
3277           key = NULL;
3278           if (optarg[0] == '+')
3279             {
3280               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3281                                       && ISDIGIT (argv[optind][1]));
3282               obsolete_usage |= minus_pos_usage & ~posixly_correct;
3283               if (obsolete_usage)
3284                 {
3285                   /* Treat +POS1 [-POS2] as a key if possible; but silently
3286                      treat an operand as a file if it is not a valid +POS1.  */
3287                   key = key_init (&key_buf);
3288                   s = parse_field_count (optarg + 1, &key->sword, NULL);
3289                   if (s && *s == '.')
3290                     s = parse_field_count (s + 1, &key->schar, NULL);
3291                   if (! (key->sword | key->schar))
3292                     key->sword = SIZE_MAX;
3293                   if (! s || *set_ordering (s, key, bl_start))
3294                     key = NULL;
3295                   else
3296                     {
3297                       if (minus_pos_usage)
3298                         {
3299                           char const *optarg1 = argv[optind++];
3300                           s = parse_field_count (optarg1 + 1, &key->eword,
3301                                              N_("invalid number after `-'"));
3302                           if (*s == '.')
3303                             s = parse_field_count (s + 1, &key->echar,
3304                                                N_("invalid number after `.'"));
3305                           if (*set_ordering (s, key, bl_end))
3306                             badfieldspec (optarg1,
3307                                       N_("stray character in field spec"));
3308                         }
3309                       insertkey (key);
3310                     }
3311                 }
3312             }
3313           if (! key)
3314             files[nfiles++] = optarg;
3315           break;
3316
3317         case SORT_OPTION:
3318           c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3319           /* Fall through. */
3320         case 'b':
3321         case 'd':
3322         case 'f':
3323         case 'g':
3324         case 'h':
3325         case 'i':
3326         case 'M':
3327         case 'n':
3328         case 'r':
3329         case 'R':
3330         case 'V':
3331           {
3332             char str[2];
3333             str[0] = c;
3334             str[1] = '\0';
3335             set_ordering (str, &gkey, bl_both);
3336           }
3337           break;
3338
3339         case CHECK_OPTION:
3340           c = (optarg
3341                ? XARGMATCH ("--check", optarg, check_args, check_types)
3342                : 'c');
3343           /* Fall through.  */
3344         case 'c':
3345         case 'C':
3346           if (checkonly && checkonly != c)
3347             incompatible_options ("cC");
3348           checkonly = c;
3349           break;
3350
3351         case COMPRESS_PROGRAM_OPTION:
3352           if (compress_program && !STREQ (compress_program, optarg))
3353             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3354           compress_program = optarg;
3355           break;
3356
3357         case FILES0_FROM_OPTION:
3358           files_from = optarg;
3359           break;
3360
3361         case 'k':
3362           key = key_init (&key_buf);
3363
3364           /* Get POS1. */
3365           s = parse_field_count (optarg, &key->sword,
3366                                  N_("invalid number at field start"));
3367           if (! key->sword--)
3368             {
3369               /* Provoke with `sort -k0' */
3370               badfieldspec (optarg, N_("field number is zero"));
3371             }
3372           if (*s == '.')
3373             {
3374               s = parse_field_count (s + 1, &key->schar,
3375                                      N_("invalid number after `.'"));
3376               if (! key->schar--)
3377                 {
3378                   /* Provoke with `sort -k1.0' */
3379                   badfieldspec (optarg, N_("character offset is zero"));
3380                 }
3381             }
3382           if (! (key->sword | key->schar))
3383             key->sword = SIZE_MAX;
3384           s = set_ordering (s, key, bl_start);
3385           if (*s != ',')
3386             {
3387               key->eword = SIZE_MAX;
3388               key->echar = 0;
3389             }
3390           else
3391             {
3392               /* Get POS2. */
3393               s = parse_field_count (s + 1, &key->eword,
3394                                      N_("invalid number after `,'"));
3395               if (! key->eword--)
3396                 {
3397                   /* Provoke with `sort -k1,0' */
3398                   badfieldspec (optarg, N_("field number is zero"));
3399                 }
3400               if (*s == '.')
3401                 {
3402                   s = parse_field_count (s + 1, &key->echar,
3403                                          N_("invalid number after `.'"));
3404                 }
3405               s = set_ordering (s, key, bl_end);
3406             }
3407           if (*s)
3408             badfieldspec (optarg, N_("stray character in field spec"));
3409           insertkey (key);
3410           break;
3411
3412         case 'm':
3413           mergeonly = true;
3414           break;
3415
3416         case NMERGE_OPTION:
3417           specify_nmerge (oi, c, optarg);
3418           break;
3419
3420         case 'o':
3421           if (outfile && !STREQ (outfile, optarg))
3422             error (SORT_FAILURE, 0, _("multiple output files specified"));
3423           outfile = optarg;
3424           break;
3425
3426         case RANDOM_SOURCE_OPTION:
3427           if (random_source && !STREQ (random_source, optarg))
3428             error (SORT_FAILURE, 0, _("multiple random sources specified"));
3429           random_source = optarg;
3430           break;
3431
3432         case 's':
3433           stable = true;
3434           break;
3435
3436         case 'S':
3437           specify_sort_size (oi, c, optarg);
3438           break;
3439
3440         case 't':
3441           {
3442             char newtab = optarg[0];
3443             if (! newtab)
3444               error (SORT_FAILURE, 0, _("empty tab"));
3445             if (optarg[1])
3446               {
3447                 if (STREQ (optarg, "\\0"))
3448                   newtab = '\0';
3449                 else
3450                   {
3451                     /* Provoke with `sort -txx'.  Complain about
3452                        "multi-character tab" instead of "multibyte tab", so
3453                        that the diagnostic's wording does not need to be
3454                        changed once multibyte characters are supported.  */
3455                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
3456                            quote (optarg));
3457                   }
3458               }
3459             if (tab != TAB_DEFAULT && tab != newtab)
3460               error (SORT_FAILURE, 0, _("incompatible tabs"));
3461             tab = newtab;
3462           }
3463           break;
3464
3465         case 'T':
3466           add_temp_dir (optarg);
3467           break;
3468
3469         case 'u':
3470           unique = true;
3471           break;
3472
3473         case 'y':
3474           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3475              through Solaris 7.  It is also accepted by many non-Solaris
3476              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3477              -y is marked as obsolete starting with Solaris 8 (1999), but is
3478              still accepted as of Solaris 10 prerelease (2004).
3479
3480              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3481              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3482              and which in general ignores the argument after "-y" if it
3483              consists entirely of digits (it can even be empty).  */
3484           if (optarg == argv[optind - 1])
3485             {
3486               char const *p;
3487               for (p = optarg; ISDIGIT (*p); p++)
3488                 continue;
3489               optind -= (*p != '\0');
3490             }
3491           break;
3492
3493         case 'z':
3494           eolchar = 0;
3495           break;
3496
3497         case_GETOPT_HELP_CHAR;
3498
3499         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3500
3501         default:
3502           usage (SORT_FAILURE);
3503         }
3504     }
3505
3506   if (files_from)
3507     {
3508       FILE *stream;
3509
3510       /* When using --files0-from=F, you may not specify any files
3511          on the command-line.  */
3512       if (nfiles)
3513         {
3514           error (0, 0, _("extra operand %s"), quote (files[0]));
3515           fprintf (stderr, "%s\n",
3516                    _("file operands cannot be combined with --files0-from"));
3517           usage (SORT_FAILURE);
3518         }
3519
3520       if (STREQ (files_from, "-"))
3521         stream = stdin;
3522       else
3523         {
3524           stream = fopen (files_from, "r");
3525           if (stream == NULL)
3526             error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3527                    quote (files_from));
3528         }
3529
3530       readtokens0_init (&tok);
3531
3532       if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3533         error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3534                quote (files_from));
3535
3536       if (tok.n_tok)
3537         {
3538           size_t i;
3539           free (files);
3540           files = tok.tok;
3541           nfiles = tok.n_tok;
3542           for (i = 0; i < nfiles; i++)
3543           {
3544               if (STREQ (files[i], "-"))
3545                 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3546                                           "no file name of %s allowed"),
3547                        quote (files[i]));
3548               else if (files[i][0] == '\0')
3549                 {
3550                   /* Using the standard `filename:line-number:' prefix here is
3551                      not totally appropriate, since NUL is the separator, not NL,
3552                      but it might be better than nothing.  */
3553                   unsigned long int file_number = i + 1;
3554                   error (SORT_FAILURE, 0,
3555                          _("%s:%lu: invalid zero-length file name"),
3556                          quotearg_colon (files_from), file_number);
3557                 }
3558           }
3559         }
3560       else
3561         error (SORT_FAILURE, 0, _("no input from %s"),
3562                quote (files_from));
3563     }
3564
3565   /* Inheritance of global options to individual keys. */
3566   for (key = keylist; key; key = key->next)
3567     {
3568       if (! (key->ignore
3569              || key->translate
3570              || (key->skipsblanks
3571                  | key->reverse
3572                  | key->skipeblanks
3573                  | key->month
3574                  | key->numeric
3575                  | key->version
3576                  | key->general_numeric
3577                  | key->human_numeric
3578                  | key->random)))
3579         {
3580           key->ignore = gkey.ignore;
3581           key->translate = gkey.translate;
3582           key->skipsblanks = gkey.skipsblanks;
3583           key->skipeblanks = gkey.skipeblanks;
3584           key->month = gkey.month;
3585           key->numeric = gkey.numeric;
3586           key->general_numeric = gkey.general_numeric;
3587           key->human_numeric = gkey.human_numeric;
3588           key->random = gkey.random;
3589           key->reverse = gkey.reverse;
3590           key->version = gkey.version;
3591         }
3592
3593       need_random |= key->random;
3594     }
3595
3596   if (!keylist && (gkey.ignore
3597                    || gkey.translate
3598                    || (gkey.skipsblanks
3599                        | gkey.skipeblanks
3600                        | gkey.month
3601                        | gkey.numeric
3602                        | gkey.general_numeric
3603                        | gkey.human_numeric
3604                        | gkey.random
3605                        | gkey.version)))
3606     {
3607       insertkey (&gkey);
3608       need_random |= gkey.random;
3609     }
3610
3611   check_ordering_compatibility ();
3612
3613   reverse = gkey.reverse;
3614
3615   if (need_random)
3616     {
3617       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3618       if (! randread_source)
3619         die (_("open failed"), random_source);
3620     }
3621
3622   if (temp_dir_count == 0)
3623     {
3624       char const *tmp_dir = getenv ("TMPDIR");
3625       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3626     }
3627
3628   if (nfiles == 0)
3629     {
3630       static char *minus = (char *) "-";
3631       nfiles = 1;
3632       free (files);
3633       files = &minus;
3634     }
3635
3636   /* Need to re-check that we meet the minimum requirement for memory
3637      usage with the final value for NMERGE. */
3638   if (0 < sort_size)
3639     sort_size = MAX (sort_size, MIN_SORT_SIZE);
3640
3641   if (checkonly)
3642     {
3643       if (nfiles > 1)
3644         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3645                quote (files[1]), checkonly);
3646
3647       if (outfile)
3648         {
3649           static char opts[] = {0, 'o', 0};
3650           opts[0] = checkonly;
3651           incompatible_options (opts);
3652         }
3653
3654       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3655          input is not properly sorted.  */
3656       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3657     }
3658
3659   if (mergeonly)
3660     {
3661       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3662       size_t i;
3663
3664       for (i = 0; i < nfiles; ++i)
3665         sortfiles[i].name = files[i];
3666
3667       merge (sortfiles, 0, nfiles, outfile);
3668       IF_LINT (free (sortfiles));
3669     }
3670   else
3671     sort (files, nfiles, outfile);
3672
3673   if (have_read_stdin && fclose (stdin) == EOF)
3674     die (_("close failed"), "-");
3675
3676   exit (EXIT_SUCCESS);
3677 }