src/sort.c

   1 /* sort - sort lines of text (with all kinds of options).
   2    Copyright (C) 1988, 1991-2009 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17    Written December 1988 by Mike Haertel.
  18    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  19    or (US mail) as Mike Haertel c/o Free Software Foundation.
  20
  21    Ørn E. Hansen added NLS support in 1997.  */
  22
  23 #include <config.h>
  24
  25 #include <getopt.h>
  26 #include <sys/types.h>
  27 #include <sys/wait.h>
  28 #include <signal.h>
  29 #include "system.h"
  30 #include "argmatch.h"
  31 #include "error.h"
  32 #include "filevercmp.h"
  33 #include "hash.h"
  34 #include "md5.h"
  35 #include "physmem.h"
  36 #include "posixver.h"
  37 #include "quote.h"
  38 #include "quotearg.h"
  39 #include "randread.h"
  40 #include "readtokens0.h"
  41 #include "stdio--.h"
  42 #include "stdlib--.h"
  43 #include "strnumcmp.h"
  44 #include "xmemcoll.h"
  45 #include "xmemxfrm.h"
  46 #include "xstrtol.h"
  47
  48 #if HAVE_SYS_RESOURCE_H
  49 # include <sys/resource.h>
  50 #endif
  51 #ifndef RLIMIT_DATA
  52 struct rlimit { size_t rlim_cur; };
  53 # define getrlimit(Resource, Rlp) (-1)
  54 #endif
  55
  56 /* The official name of this program (e.g., no `g' prefix).  */
  57 #define PROGRAM_NAME "sort"
  58
  59 #define AUTHORS \
  60   proper_name ("Mike Haertel"), \
  61   proper_name ("Paul Eggert")
  62
  63 #if HAVE_LANGINFO_CODESET
  64 # include <langinfo.h>
  65 #endif
  66
  67 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  68    present.  */
  69 #ifndef SA_NOCLDSTOP
  70 # define SA_NOCLDSTOP 0
  71 /* No sigprocmask.  Always 'return' zero. */
  72 # define sigprocmask(How, Set, Oset) (0)
  73 # define sigset_t int
  74 # if ! HAVE_SIGINTERRUPT
  75 #  define siginterrupt(sig, flag) /* empty */
  76 # endif
  77 #endif
  78
  79 #if !defined OPEN_MAX && defined NR_OPEN
  80 # define OPEN_MAX NR_OPEN
  81 #endif
  82 #if !defined OPEN_MAX
  83 # define OPEN_MAX 20
  84 #endif
  85
  86 #define UCHAR_LIM (UCHAR_MAX + 1)
  87
  88 #ifndef DEFAULT_TMPDIR
  89 # define DEFAULT_TMPDIR "/tmp"
  90 #endif
  91
  92 /* Exit statuses.  */
  93 enum
  94   {
  95     /* POSIX says to exit with status 1 if invoked with -c and the
  96        input is not properly sorted.  */
  97     SORT_OUT_OF_ORDER = 1,
  98
  99     /* POSIX says any other irregular exit must exit with a status
 100        code greater than 1.  */
 101     SORT_FAILURE = 2
 102   };
 103
 104 enum
 105   {
 106     /* The number of times we should try to fork a compression process
 107        (we retry if the fork call fails).  We don't _need_ to compress
 108        temp files, this is just to reduce disk access, so this number
 109        can be small.  */
 110     MAX_FORK_TRIES_COMPRESS = 2,
 111
 112     /* The number of times we should try to fork a decompression process.
 113        If we can't fork a decompression process, we can't sort, so this
 114        number should be big.  */
 115     MAX_FORK_TRIES_DECOMPRESS = 8
 116   };
 117
 118 /* The representation of the decimal point in the current locale.  */
 119 static int decimal_point;
 120
 121 /* Thousands separator; if -1, then there isn't one.  */
 122 static int thousands_sep;
 123
 124 /* Nonzero if the corresponding locales are hard.  */
 125 static bool hard_LC_COLLATE;
 126 #if HAVE_NL_LANGINFO
 127 static bool hard_LC_TIME;
 128 #endif
 129
 130 #define NONZERO(x) ((x) != 0)
 131
 132 /* The kind of blanks for '-b' to skip in various options. */
 133 enum blanktype { bl_start, bl_end, bl_both };
 134
 135 /* The character marking end of line. Default to \n. */
 136 static char eolchar = '\n';
 137
 138 /* Lines are held in core as counted strings. */
 139 struct line
 140 {
 141   char *text;                   /* Text of the line. */
 142   size_t length;                /* Length including final newline. */
 143   char *keybeg;                 /* Start of first key. */
 144   char *keylim;                 /* Limit of first key. */
 145 };
 146
 147 /* Input buffers. */
 148 struct buffer
 149 {
 150   char *buf;                    /* Dynamically allocated buffer,
 151                                    partitioned into 3 regions:
 152                                    - input data;
 153                                    - unused area;
 154                                    - an array of lines, in reverse order.  */
 155   size_t used;                  /* Number of bytes used for input data.  */
 156   size_t nlines;                /* Number of lines in the line array.  */
 157   size_t alloc;                 /* Number of bytes allocated. */
 158   size_t left;                  /* Number of bytes left from previous reads. */
 159   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 160   bool eof;                     /* An EOF has been read.  */
 161 };
 162
 163 struct keyfield
 164 {
 165   size_t sword;                 /* Zero-origin 'word' to start at. */
 166   size_t schar;                 /* Additional characters to skip. */
 167   size_t eword;                 /* Zero-origin first word after field. */
 168   size_t echar;                 /* Additional characters in field. */
 169   bool const *ignore;           /* Boolean array of characters to ignore. */
 170   char const *translate;        /* Translation applied to characters. */
 171   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 172   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 173   bool numeric;                 /* Flag for numeric comparison.  Handle
 174                                    strings of digits with optional decimal
 175                                    point, but no exponential notation. */
 176   bool random;                  /* Sort by random hash of key.  */
 177   bool general_numeric;         /* Flag for general, numeric comparison.
 178                                    Handle numbers in exponential notation. */
 179   bool human_numeric;           /* Flag for sorting by human readable
 180                                    units with either SI xor IEC prefixes. */
 181   int si_present;               /* Flag for checking for mixed SI and IEC. */
 182   bool month;                   /* Flag for comparison by month name. */
 183   bool reverse;                 /* Reverse the sense of comparison. */
 184   bool version;                 /* sort by version number */
 185   struct keyfield *next;        /* Next keyfield to try. */
 186 };
 187
 188 struct month
 189 {
 190   char const *name;
 191   int val;
 192 };
 193
 194 /* FIXME: None of these tables work with multibyte character sets.
 195    Also, there are many other bugs when handling multibyte characters.
 196    One way to fix this is to rewrite `sort' to use wide characters
 197    internally, but doing this with good performance is a bit
 198    tricky.  */
 199
 200 /* Table of blanks.  */
 201 static bool blanks[UCHAR_LIM];
 202
 203 /* Table of non-printing characters. */
 204 static bool nonprinting[UCHAR_LIM];
 205
 206 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 207 static bool nondictionary[UCHAR_LIM];
 208
 209 /* Translation table folding lower case to upper.  */
 210 static char fold_toupper[UCHAR_LIM];
 211
 212 #define MONTHS_PER_YEAR 12
 213
 214 /* Table mapping month names to integers.
 215    Alphabetic order allows binary search. */
 216 static struct month monthtab[] =
 217 {
 218   {"APR", 4},
 219   {"AUG", 8},
 220   {"DEC", 12},
 221   {"FEB", 2},
 222   {"JAN", 1},
 223   {"JUL", 7},
 224   {"JUN", 6},
 225   {"MAR", 3},
 226   {"MAY", 5},
 227   {"NOV", 11},
 228   {"OCT", 10},
 229   {"SEP", 9}
 230 };
 231
 232 /* During the merge phase, the number of files to merge at once. */
 233 #define NMERGE_DEFAULT 16
 234
 235 /* Minimum size for a merge or check buffer.  */
 236 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 237
 238 /* Minimum sort size; the code might not work with smaller sizes.  */
 239 #define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
 240
 241 /* The number of bytes needed for a merge or check buffer, which can
 242    function relatively efficiently even if it holds only one line.  If
 243    a longer line is seen, this value is increased.  */
 244 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 245
 246 /* The approximate maximum number of bytes of main memory to use, as
 247    specified by the user.  Zero if the user has not specified a size.  */
 248 static size_t sort_size;
 249
 250 /* The guessed size for non-regular files.  */
 251 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 252
 253 /* Array of directory names in which any temporary files are to be created. */
 254 static char const **temp_dirs;
 255
 256 /* Number of temporary directory names used.  */
 257 static size_t temp_dir_count;
 258
 259 /* Number of allocated slots in temp_dirs.  */
 260 static size_t temp_dir_alloc;
 261
 262 /* Flag to reverse the order of all comparisons. */
 263 static bool reverse;
 264
 265 /* Flag for stable sort.  This turns off the last ditch bytewise
 266    comparison of lines, and instead leaves lines in the same order
 267    they were read if all keys compare equal.  */
 268 static bool stable;
 269
 270 /* If TAB has this value, blanks separate fields.  */
 271 enum { TAB_DEFAULT = CHAR_MAX + 1 };
 272
 273 /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 274    separated by the empty string between a non-blank character and a blank
 275    character. */
 276 static int tab = TAB_DEFAULT;
 277
 278 /* Flag to remove consecutive duplicate lines from the output.
 279    Only the last of a sequence of equal lines will be output. */
 280 static bool unique;
 281
 282 /* Nonzero if any of the input files are the standard input. */
 283 static bool have_read_stdin;
 284
 285 /* List of key field comparisons to be tried.  */
 286 static struct keyfield *keylist;
 287
 288 /* Program used to (de)compress temp files.  Must accept -d.  */
 289 static char const *compress_program;
 290
 291 /* Maximum number of files to merge in one go.  If more than this
 292    number are present, temp files will be used. */
 293 static unsigned int nmerge = NMERGE_DEFAULT;
 294
 295 static void sortlines_temp (struct line *, size_t, struct line *);
 296
 297 /* Report MESSAGE for FILE, then clean up and exit.
 298    If FILE is null, it represents standard output.  */
 299
 300 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 301 static void
 302 die (char const *message, char const *file)
 303 {
 304   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 305   exit (SORT_FAILURE);
 306 }
 307
 308 void
 309 usage (int status)
 310 {
 311   if (status != EXIT_SUCCESS)
 312     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 313              program_name);
 314   else
 315     {
 316       printf (_("\
 317 Usage: %s [OPTION]... [FILE]...\n\
 318   or:  %s [OPTION]... --files0-from=F\n\
 319 "),
 320               program_name, program_name);
 321       fputs (_("\
 322 Write sorted concatenation of all FILE(s) to standard output.\n\
 323 \n\
 324 "), stdout);
 325       fputs (_("\
 326 Mandatory arguments to long options are mandatory for short options too.\n\
 327 "), stdout);
 328       fputs (_("\
 329 Ordering options:\n\
 330 \n\
 331 "), stdout);
 332       fputs (_("\
 333   -b, --ignore-leading-blanks  ignore leading blanks\n\
 334   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 335   -f, --ignore-case           fold lower case to upper case characters\n\
 336 "), stdout);
 337       fputs (_("\
 338   -g, --general-numeric-sort  compare according to general numerical value\n\
 339   -i, --ignore-nonprinting    consider only printable characters\n\
 340   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 341 "), stdout);
 342       fputs (_("\
 343   -h, --human-numeric-sort    compare human readable numbers (e.g., 2K 1G)\n\
 344 "), stdout);
 345       fputs (_("\
 346   -n, --numeric-sort          compare according to string numerical value\n\
 347   -R, --random-sort           sort by random hash of keys\n\
 348       --random-source=FILE    get random bytes from FILE\n\
 349   -r, --reverse               reverse the result of comparisons\n\
 350 "), stdout);
 351       fputs (_("\
 352       --sort=WORD             sort according to WORD:\n\
 353                                 general-numeric -g, human-numeric -h, month -M,\n\
 354                                 numeric -n, random -R, version -V\n\
 355   -V, --version-sort          natural sort of (version) numbers within text\n\
 356 \n\
 357 "), stdout);
 358       fputs (_("\
 359 Other options:\n\
 360 \n\
 361 "), stdout);
 362       fputs (_("\
 363       --batch-size=NMERGE   merge at most NMERGE inputs at once;\n\
 364                             for more use temp files\n\
 365 "), stdout);
 366       fputs (_("\
 367   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
 368   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
 369       --compress-program=PROG  compress temporaries with PROG;\n\
 370                               decompress them with PROG -d\n\
 371       --files0-from=F       read input from the files specified by\n\
 372                             NUL-terminated names in file F;\n\
 373                             If F is - then read names from standard input\n\
 374 "), stdout);
 375       fputs (_("\
 376   -k, --key=POS1[,POS2]     start a key at POS1 (origin 1), end it at POS2\n\
 377                             (default end of line)\n\
 378   -m, --merge               merge already sorted files; do not sort\n\
 379 "), stdout);
 380       fputs (_("\
 381   -o, --output=FILE         write result to FILE instead of standard output\n\
 382   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 383   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 384 "), stdout);
 385       printf (_("\
 386   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
 387   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 388                               multiple options specify multiple directories\n\
 389   -u, --unique              with -c, check for strict ordering;\n\
 390                               without -c, output only the first of an equal run\n\
 391 "), DEFAULT_TMPDIR);
 392       fputs (_("\
 393   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 394 "), stdout);
 395       fputs (HELP_OPTION_DESCRIPTION, stdout);
 396       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 397       fputs (_("\
 398 \n\
 399 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 400 in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
 401 in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
 402 one or more single-letter ordering options, which override global ordering\n\
 403 options for that key.  If no key is given, use the entire line as the key.\n\
 404 \n\
 405 SIZE may be followed by the following multiplicative suffixes:\n\
 406 "), stdout);
 407       fputs (_("\
 408 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 409 \n\
 410 With no FILE, or when FILE is -, read standard input.\n\
 411 \n\
 412 *** WARNING ***\n\
 413 The locale specified by the environment affects sort order.\n\
 414 Set LC_ALL=C to get the traditional sort order that uses\n\
 415 native byte values.\n\
 416 "), stdout );
 417       emit_bug_reporting_address ();
 418     }
 419
 420   exit (status);
 421 }
 422
 423 /* For long options that have no equivalent short option, use a
 424    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 425 enum
 426 {
 427   CHECK_OPTION = CHAR_MAX + 1,
 428   COMPRESS_PROGRAM_OPTION,
 429   FILES0_FROM_OPTION,
 430   NMERGE_OPTION,
 431   RANDOM_SOURCE_OPTION,
 432   SORT_OPTION
 433 };
 434
 435 static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
 436
 437 static struct option const long_options[] =
 438 {
 439   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 440   {"check", optional_argument, NULL, CHECK_OPTION},
 441   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
 442   {"dictionary-order", no_argument, NULL, 'd'},
 443   {"ignore-case", no_argument, NULL, 'f'},
 444   {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
 445   {"general-numeric-sort", no_argument, NULL, 'g'},
 446   {"ignore-nonprinting", no_argument, NULL, 'i'},
 447   {"key", required_argument, NULL, 'k'},
 448   {"merge", no_argument, NULL, 'm'},
 449   {"month-sort", no_argument, NULL, 'M'},
 450   {"numeric-sort", no_argument, NULL, 'n'},
 451   {"human-numeric-sort", no_argument, NULL, 'h'},
 452   {"version-sort", no_argument, NULL, 'V'},
 453   {"random-sort", no_argument, NULL, 'R'},
 454   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 455   {"sort", required_argument, NULL, SORT_OPTION},
 456   {"output", required_argument, NULL, 'o'},
 457   {"reverse", no_argument, NULL, 'r'},
 458   {"stable", no_argument, NULL, 's'},
 459   {"batch-size", required_argument, NULL, NMERGE_OPTION},
 460   {"buffer-size", required_argument, NULL, 'S'},
 461   {"field-separator", required_argument, NULL, 't'},
 462   {"temporary-directory", required_argument, NULL, 'T'},
 463   {"unique", no_argument, NULL, 'u'},
 464   {"zero-terminated", no_argument, NULL, 'z'},
 465   {GETOPT_HELP_OPTION_DECL},
 466   {GETOPT_VERSION_OPTION_DECL},
 467   {NULL, 0, NULL, 0},
 468 };
 469
 470 #define CHECK_TABLE \
 471   _ct_("quiet",          'C') \
 472   _ct_("silent",         'C') \
 473   _ct_("diagnose-first", 'c')
 474
 475 static char const *const check_args[] =
 476 {
 477 #define _ct_(_s, _c) _s,
 478   CHECK_TABLE NULL
 479 #undef  _ct_
 480 };
 481 static char const check_types[] =
 482 {
 483 #define _ct_(_s, _c) _c,
 484   CHECK_TABLE
 485 #undef  _ct_
 486 };
 487
 488 #define SORT_TABLE \
 489   _st_("general-numeric", 'g') \
 490   _st_("human-numeric",   'h') \
 491   _st_("month",           'M') \
 492   _st_("numeric",         'n') \
 493   _st_("random",          'R') \
 494   _st_("version",         'V')
 495
 496 static char const *const sort_args[] =
 497 {
 498 #define _st_(_s, _c) _s,
 499   SORT_TABLE NULL
 500 #undef  _st_
 501 };
 502 static char const sort_types[] =
 503 {
 504 #define _st_(_s, _c) _c,
 505   SORT_TABLE
 506 #undef  _st_
 507 };
 508
 509 /* The set of signals that are caught.  */
 510 static sigset_t caught_signals;
 511
 512 /* Critical section status.  */
 513 struct cs_status
 514 {
 515   bool valid;
 516   sigset_t sigs;
 517 };
 518
 519 /* Enter a critical section.  */
 520 static struct cs_status
 521 cs_enter (void)
 522 {
 523   struct cs_status status;
 524   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
 525   return status;
 526 }
 527
 528 /* Leave a critical section.  */
 529 static void
 530 cs_leave (struct cs_status status)
 531 {
 532   if (status.valid)
 533     {
 534       /* Ignore failure when restoring the signal mask. */
 535       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
 536     }
 537 }
 538
 539 /* The list of temporary files. */
 540 struct tempnode
 541 {
 542   struct tempnode *volatile next;
 543   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 544   char name[1];  /* Actual size is 1 + file name length.  */
 545 };
 546 static struct tempnode *volatile temphead;
 547 static struct tempnode *volatile *temptail = &temphead;
 548
 549 struct sortfile
 550 {
 551   char const *name;
 552   pid_t pid;     /* If compressed, the pid of compressor, else zero */
 553 };
 554
 555 /* A table where we store compression process states.  We clean up all
 556    processes in a timely manner so as not to exhaust system resources,
 557    so we store the info on whether the process is still running, or has
 558    been reaped here.  */
 559 static Hash_table *proctab;
 560
 561 enum { INIT_PROCTAB_SIZE = 47 };
 562
 563 enum procstate { ALIVE, ZOMBIE };
 564
 565 /* A proctab entry.  The COUNT field is there in case we fork a new
 566    compression process that has the same PID as an old zombie process
 567    that is still in the table (because the process to decompress the
 568    temp file it was associated with hasn't started yet).  */
 569 struct procnode
 570 {
 571   pid_t pid;
 572   enum procstate state;
 573   size_t count;
 574 };
 575
 576 static size_t
 577 proctab_hasher (const void *entry, size_t tabsize)
 578 {
 579   const struct procnode *node = entry;
 580   return node->pid % tabsize;
 581 }
 582
 583 static bool
 584 proctab_comparator (const void *e1, const void *e2)
 585 {
 586   const struct procnode *n1 = e1, *n2 = e2;
 587   return n1->pid == n2->pid;
 588 }
 589
 590 /* The total number of forked processes (compressors and decompressors)
 591    that have not been reaped yet. */
 592 static size_t nprocs;
 593
 594 /* The number of child processes we'll allow before we try to reap some. */
 595 enum { MAX_PROCS_BEFORE_REAP = 2 };
 596
 597 /* If 0 < PID, wait for the child process with that PID to exit.
 598    If PID is -1, clean up a random child process which has finished and
 599    return the process ID of that child.  If PID is -1 and no processes
 600    have quit yet, return 0 without waiting.  */
 601
 602 static pid_t
 603 reap (pid_t pid)
 604 {
 605   int status;
 606   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
 607
 608   if (cpid < 0)
 609     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
 610            compress_program);
 611   else if (0 < cpid)
 612     {
 613       if (! WIFEXITED (status) || WEXITSTATUS (status))
 614         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
 615                compress_program);
 616       --nprocs;
 617     }
 618
 619   return cpid;
 620 }
 621
 622 /* Add the PID of a running compression process to proctab, or update
 623    the entry COUNT and STATE fields if it's already there.  This also
 624    creates the table for us the first time it's called.  */
 625
 626 static void
 627 register_proc (pid_t pid)
 628 {
 629   struct procnode test, *node;
 630
 631   if (! proctab)
 632     {
 633       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
 634                                  proctab_hasher,
 635                                  proctab_comparator,
 636                                  free);
 637       if (! proctab)
 638         xalloc_die ();
 639     }
 640
 641   test.pid = pid;
 642   node = hash_lookup (proctab, &test);
 643   if (node)
 644     {
 645       node->state = ALIVE;
 646       ++node->count;
 647     }
 648   else
 649     {
 650       node = xmalloc (sizeof *node);
 651       node->pid = pid;
 652       node->state = ALIVE;
 653       node->count = 1;
 654       if (hash_insert (proctab, node) == NULL)
 655         xalloc_die ();
 656     }
 657 }
 658
 659 /* This is called when we reap a random process.  We don't know
 660    whether we have reaped a compression process or a decompression
 661    process until we look in the table.  If there's an ALIVE entry for
 662    it, then we have reaped a compression process, so change the state
 663    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
 664
 665 static void
 666 update_proc (pid_t pid)
 667 {
 668   struct procnode test, *node;
 669
 670   test.pid = pid;
 671   node = hash_lookup (proctab, &test);
 672   if (node)
 673     node->state = ZOMBIE;
 674 }
 675
 676 /* This is for when we need to wait for a compression process to exit.
 677    If it has a ZOMBIE entry in the table then it's already dead and has
 678    been reaped.  Note that if there's an ALIVE entry for it, it still may
 679    already have died and been reaped if a second process was created with
 680    the same PID.  This is probably exceedingly rare, but to be on the safe
 681    side we will have to wait for any compression process with this PID.  */
 682
 683 static void
 684 wait_proc (pid_t pid)
 685 {
 686   struct procnode test, *node;
 687
 688   test.pid = pid;
 689   node = hash_lookup (proctab, &test);
 690   if (node->state == ALIVE)
 691     reap (pid);
 692
 693   node->state = ZOMBIE;
 694   if (! --node->count)
 695     {
 696       hash_delete (proctab, node);
 697       free (node);
 698     }
 699 }
 700
 701 /* Keep reaping finished children as long as there are more to reap.
 702    This doesn't block waiting for any of them, it only reaps those
 703    that are already dead.  */
 704
 705 static void
 706 reap_some (void)
 707 {
 708   pid_t pid;
 709
 710   while (0 < nprocs && (pid = reap (-1)))
 711     update_proc (pid);
 712 }
 713
 714 /* Clean up any remaining temporary files.  */
 715
 716 static void
 717 cleanup (void)
 718 {
 719   struct tempnode const *node;
 720
 721   for (node = temphead; node; node = node->next)
 722     unlink (node->name);
 723   temphead = NULL;
 724 }
 725
 726 /* Cleanup actions to take when exiting.  */
 727
 728 static void
 729 exit_cleanup (void)
 730 {
 731   if (temphead)
 732     {
 733       /* Clean up any remaining temporary files in a critical section so
 734          that a signal handler does not try to clean them too.  */
 735       struct cs_status cs = cs_enter ();
 736       cleanup ();
 737       cs_leave (cs);
 738     }
 739
 740   close_stdout ();
 741 }
 742
 743 /* Create a new temporary file, returning its newly allocated tempnode.
 744    Store into *PFD the file descriptor open for writing.
 745    If the creation fails, return NULL and store -1 into *PFD if the
 746    failure is due to file descriptor exhaustion and
 747    SURVIVE_FD_EXHAUSTION; otherwise, die.  */
 748
 749 static struct tempnode *
 750 create_temp_file (int *pfd, bool survive_fd_exhaustion)
 751 {
 752   static char const slashbase[] = "/sortXXXXXX";
 753   static size_t temp_dir_index;
 754   int fd;
 755   int saved_errno;
 756   char const *temp_dir = temp_dirs[temp_dir_index];
 757   size_t len = strlen (temp_dir);
 758   struct tempnode *node =
 759     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
 760   char *file = node->name;
 761   struct cs_status cs;
 762
 763   memcpy (file, temp_dir, len);
 764   memcpy (file + len, slashbase, sizeof slashbase);
 765   node->next = NULL;
 766   node->pid = 0;
 767   if (++temp_dir_index == temp_dir_count)
 768     temp_dir_index = 0;
 769
 770   /* Create the temporary file in a critical section, to avoid races.  */
 771   cs = cs_enter ();
 772   fd = mkstemp (file);
 773   if (0 <= fd)
 774     {
 775       *temptail = node;
 776       temptail = &node->next;
 777     }
 778   saved_errno = errno;
 779   cs_leave (cs);
 780   errno = saved_errno;
 781
 782   if (fd < 0)
 783     {
 784       if (! (survive_fd_exhaustion && errno == EMFILE))
 785         error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
 786                quote (temp_dir));
 787       free (node);
 788       node = NULL;
 789     }
 790
 791   *pfd = fd;
 792   return node;
 793 }
 794
 795 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 796    standard output; HOW should be "w".  When opening for input, "-"
 797    means standard input.  To avoid confusion, do not return file
 798    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
 799    opening an ordinary FILE.  Return NULL if unsuccessful.  */
 800
 801 static FILE *
 802 stream_open (const char *file, const char *how)
 803 {
 804   if (!file)
 805     return stdout;
 806   if (STREQ (file, "-") && *how == 'r')
 807     {
 808       have_read_stdin = true;
 809       return stdin;
 810     }
 811   return fopen (file, how);
 812 }
 813
 814 /* Same as stream_open, except always return a non-null value; die on
 815    failure.  */
 816
 817 static FILE *
 818 xfopen (const char *file, const char *how)
 819  {
 820   FILE *fp = stream_open (file, how);
 821   if (!fp)
 822     die (_("open failed"), file);
 823   return fp;
 824 }
 825
 826 /* Close FP, whose name is FILE, and report any errors.  */
 827
 828 static void
 829 xfclose (FILE *fp, char const *file)
 830 {
 831   switch (fileno (fp))
 832     {
 833     case STDIN_FILENO:
 834       /* Allow reading stdin from tty more than once.  */
 835       if (feof (fp))
 836         clearerr (fp);
 837       break;
 838
 839     case STDOUT_FILENO:
 840       /* Don't close stdout just yet.  close_stdout does that.  */
 841       if (fflush (fp) != 0)
 842         die (_("fflush failed"), file);
 843       break;
 844
 845     default:
 846       if (fclose (fp) != 0)
 847         die (_("close failed"), file);
 848       break;
 849     }
 850 }
 851
 852 static void
 853 dup2_or_die (int oldfd, int newfd)
 854 {
 855   if (dup2 (oldfd, newfd) < 0)
 856     error (SORT_FAILURE, errno, _("dup2 failed"));
 857 }
 858
 859 /* Fork a child process for piping to and do common cleanup.  The
 860    TRIES parameter tells us how many times to try to fork before
 861    giving up.  Return the PID of the child, or -1 (setting errno)
 862    on failure. */
 863
 864 static pid_t
 865 pipe_fork (int pipefds[2], size_t tries)
 866 {
 867 #if HAVE_WORKING_FORK
 868   struct tempnode *saved_temphead;
 869   int saved_errno;
 870   unsigned int wait_retry = 1;
 871   pid_t pid IF_LINT (= -1);
 872   struct cs_status cs;
 873
 874   if (pipe (pipefds) < 0)
 875     return -1;
 876
 877   while (tries--)
 878     {
 879       /* This is so the child process won't delete our temp files
 880          if it receives a signal before exec-ing.  */
 881       cs = cs_enter ();
 882       saved_temphead = temphead;
 883       temphead = NULL;
 884
 885       pid = fork ();
 886       saved_errno = errno;
 887       if (pid)
 888         temphead = saved_temphead;
 889
 890       cs_leave (cs);
 891       errno = saved_errno;
 892
 893       if (0 <= pid || errno != EAGAIN)
 894         break;
 895       else
 896         {
 897           sleep (wait_retry);
 898           wait_retry *= 2;
 899           reap_some ();
 900         }
 901     }
 902
 903   if (pid < 0)
 904     {
 905       saved_errno = errno;
 906       close (pipefds[0]);
 907       close (pipefds[1]);
 908       errno = saved_errno;
 909     }
 910   else if (pid == 0)
 911     {
 912       close (STDIN_FILENO);
 913       close (STDOUT_FILENO);
 914     }
 915   else
 916     ++nprocs;
 917
 918   return pid;
 919
 920 #else  /* ! HAVE_WORKING_FORK */
 921   return -1;
 922 #endif
 923 }
 924
 925 /* Create a temporary file and start a compression program to filter output
 926    to that file.  Set *PFP to the file handle and if PPID is non-NULL,
 927    set *PPID to the PID of the newly-created process.  If the creation
 928    fails, return NULL if the failure is due to file descriptor
 929    exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die.  */
 930
 931 static char *
 932 maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion)
 933 {
 934   int tempfd;
 935   struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion);
 936   char *name;
 937
 938   if (! node)
 939     return NULL;
 940
 941   name = node->name;
 942
 943   if (compress_program)
 944     {
 945       int pipefds[2];
 946
 947       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
 948       if (0 < node->pid)
 949         {
 950           close (tempfd);
 951           close (pipefds[0]);
 952           tempfd = pipefds[1];
 953
 954           register_proc (node->pid);
 955         }
 956       else if (node->pid == 0)
 957         {
 958           close (pipefds[1]);
 959           dup2_or_die (tempfd, STDOUT_FILENO);
 960           close (tempfd);
 961           dup2_or_die (pipefds[0], STDIN_FILENO);
 962           close (pipefds[0]);
 963
 964           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
 965             error (SORT_FAILURE, errno, _("couldn't execute %s"),
 966                    compress_program);
 967         }
 968       else
 969         node->pid = 0;
 970     }
 971
 972   *pfp = fdopen (tempfd, "w");
 973   if (! *pfp)
 974     die (_("couldn't create temporary file"), name);
 975
 976   if (ppid)
 977     *ppid = node->pid;
 978
 979   return name;
 980 }
 981
 982 /* Create a temporary file and start a compression program to filter output
 983    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
 984    set it to the PID of the newly-created process.  Die on failure.  */
 985
 986 static char *
 987 create_temp (FILE **pfp, pid_t *ppid)
 988 {
 989   return maybe_create_temp (pfp, ppid, false);
 990 }
 991
 992 /* Open a compressed temp file and start a decompression process through
 993    which to filter the input.  PID must be the valid processes ID of the
 994    process used to compress the file.  Return NULL (setting errno to
 995    EMFILE) if we ran out of file descriptors, and die on any other
 996    kind of failure.  */
 997
 998 static FILE *
 999 open_temp (const char *name, pid_t pid)
1000 {
1001   int tempfd, pipefds[2];
1002   FILE *fp = NULL;
1003
1004   wait_proc (pid);
1005
1006   tempfd = open (name, O_RDONLY);
1007   if (tempfd < 0)
1008     return NULL;
1009
1010   switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS))
1011     {
1012     case -1:
1013       if (errno != EMFILE)
1014         error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
1015                compress_program);
1016       close (tempfd);
1017       errno = EMFILE;
1018       break;
1019
1020     case 0:
1021       close (pipefds[0]);
1022       dup2_or_die (tempfd, STDIN_FILENO);
1023       close (tempfd);
1024       dup2_or_die (pipefds[1], STDOUT_FILENO);
1025       close (pipefds[1]);
1026
1027       execlp (compress_program, compress_program, "-d", (char *) NULL);
1028       error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
1029              compress_program);
1030
1031     default:
1032       close (tempfd);
1033       close (pipefds[1]);
1034
1035       fp = fdopen (pipefds[0], "r");
1036       if (! fp)
1037         {
1038           int saved_errno = errno;
1039           close (pipefds[0]);
1040           errno = saved_errno;
1041         }
1042       break;
1043     }
1044
1045   return fp;
1046 }
1047
1048 static void
1049 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
1050 {
1051   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
1052     die (_("write failed"), output_file);
1053 }
1054
1055 /* Append DIR to the array of temporary directory names.  */
1056 static void
1057 add_temp_dir (char const *dir)
1058 {
1059   if (temp_dir_count == temp_dir_alloc)
1060     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1061
1062   temp_dirs[temp_dir_count++] = dir;
1063 }
1064
1065 /* Remove NAME from the list of temporary files.  */
1066
1067 static void
1068 zaptemp (const char *name)
1069 {
1070   struct tempnode *volatile *pnode;
1071   struct tempnode *node;
1072   struct tempnode *next;
1073   int unlink_status;
1074   int unlink_errno = 0;
1075   struct cs_status cs;
1076
1077   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1078     continue;
1079
1080   /* Unlink the temporary file in a critical section to avoid races.  */
1081   next = node->next;
1082   cs = cs_enter ();
1083   unlink_status = unlink (name);
1084   unlink_errno = errno;
1085   *pnode = next;
1086   cs_leave (cs);
1087
1088   if (unlink_status != 0)
1089     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1090   if (! next)
1091     temptail = pnode;
1092   free (node);
1093 }
1094
1095 #if HAVE_NL_LANGINFO
1096
1097 static int
1098 struct_month_cmp (const void *m1, const void *m2)
1099 {
1100   struct month const *month1 = m1;
1101   struct month const *month2 = m2;
1102   return strcmp (month1->name, month2->name);
1103 }
1104
1105 #endif
1106
1107 /* Initialize the character class tables. */
1108
1109 static void
1110 inittables (void)
1111 {
1112   size_t i;
1113
1114   for (i = 0; i < UCHAR_LIM; ++i)
1115     {
1116       blanks[i] = !! isblank (i);
1117       nonprinting[i] = ! isprint (i);
1118       nondictionary[i] = ! isalnum (i) && ! isblank (i);
1119       fold_toupper[i] = toupper (i);
1120     }
1121
1122 #if HAVE_NL_LANGINFO
1123   /* If we're not in the "C" locale, read different names for months.  */
1124   if (hard_LC_TIME)
1125     {
1126       for (i = 0; i < MONTHS_PER_YEAR; i++)
1127         {
1128           char const *s;
1129           size_t s_len;
1130           size_t j;
1131           char *name;
1132
1133           s = (char *) nl_langinfo (ABMON_1 + i);
1134           s_len = strlen (s);
1135           monthtab[i].name = name = xmalloc (s_len + 1);
1136           monthtab[i].val = i + 1;
1137
1138           for (j = 0; j < s_len; j++)
1139             name[j] = fold_toupper[to_uchar (s[j])];
1140           name[j] = '\0';
1141         }
1142       qsort ((void *) monthtab, MONTHS_PER_YEAR,
1143              sizeof *monthtab, struct_month_cmp);
1144     }
1145 #endif
1146 }
1147
1148 /* Specify how many inputs may be merged at once.
1149    This may be set on the command-line with the
1150    --batch-size option. */
1151 static void
1152 specify_nmerge (int oi, char c, char const *s)
1153 {
1154   uintmax_t n;
1155   struct rlimit rlimit;
1156   enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1157
1158   /* Try to find out how many file descriptors we'll be able
1159      to open.  We need at least nmerge + 3 (STDIN_FILENO,
1160      STDOUT_FILENO and STDERR_FILENO). */
1161   unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1162                               ? rlimit.rlim_cur
1163                               : OPEN_MAX)
1164                              - 3);
1165
1166   if (e == LONGINT_OK)
1167     {
1168       nmerge = n;
1169       if (nmerge != n)
1170         e = LONGINT_OVERFLOW;
1171       else
1172         {
1173           if (nmerge < 2)
1174             {
1175               error (0, 0, _("invalid --%s argument %s"),
1176                      long_options[oi].name, quote(s));
1177               error (SORT_FAILURE, 0,
1178                      _("minimum --%s argument is %s"),
1179                      long_options[oi].name, quote("2"));
1180             }
1181           else if (max_nmerge < nmerge)
1182             {
1183               e = LONGINT_OVERFLOW;
1184             }
1185           else
1186             return;
1187         }
1188     }
1189
1190   if (e == LONGINT_OVERFLOW)
1191     {
1192       char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1193       error (0, 0, _("--%s argument %s too large"),
1194              long_options[oi].name, quote(s));
1195       error (SORT_FAILURE, 0,
1196              _("maximum --%s argument with current rlimit is %s"),
1197              long_options[oi].name,
1198              uinttostr (max_nmerge, max_nmerge_buf));
1199     }
1200   else
1201     xstrtol_fatal (e, oi, c, long_options, s);
1202 }
1203
1204 /* Specify the amount of main memory to use when sorting.  */
1205 static void
1206 specify_sort_size (int oi, char c, char const *s)
1207 {
1208   uintmax_t n;
1209   char *suffix;
1210   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1211
1212   /* The default unit is KiB.  */
1213   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1214     {
1215       if (n <= UINTMAX_MAX / 1024)
1216         n *= 1024;
1217       else
1218         e = LONGINT_OVERFLOW;
1219     }
1220
1221   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1222   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1223     switch (suffix[0])
1224       {
1225       case 'b':
1226         e = LONGINT_OK;
1227         break;
1228
1229       case '%':
1230         {
1231           double mem = physmem_total () * n / 100;
1232
1233           /* Use "<", not "<=", to avoid problems with rounding.  */
1234           if (mem < UINTMAX_MAX)
1235             {
1236               n = mem;
1237               e = LONGINT_OK;
1238             }
1239           else
1240             e = LONGINT_OVERFLOW;
1241         }
1242         break;
1243       }
1244
1245   if (e == LONGINT_OK)
1246     {
1247       /* If multiple sort sizes are specified, take the maximum, so
1248          that option order does not matter.  */
1249       if (n < sort_size)
1250         return;
1251
1252       sort_size = n;
1253       if (sort_size == n)
1254         {
1255           sort_size = MAX (sort_size, MIN_SORT_SIZE);
1256           return;
1257         }
1258
1259       e = LONGINT_OVERFLOW;
1260     }
1261
1262   xstrtol_fatal (e, oi, c, long_options, s);
1263 }
1264
1265 /* Return the default sort size.  */
1266 static size_t
1267 default_sort_size (void)
1268 {
1269   /* Let MEM be available memory or 1/8 of total memory, whichever
1270      is greater.  */
1271   double avail = physmem_available ();
1272   double total = physmem_total ();
1273   double mem = MAX (avail, total / 8);
1274   struct rlimit rlimit;
1275
1276   /* Let SIZE be MEM, but no more than the maximum object size or
1277      system resource limits.  Avoid the MIN macro here, as it is not
1278      quite right when only one argument is floating point.  Don't
1279      bother to check for values like RLIM_INFINITY since in practice
1280      they are not much less than SIZE_MAX.  */
1281   size_t size = SIZE_MAX;
1282   if (mem < size)
1283     size = mem;
1284   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1285     size = rlimit.rlim_cur;
1286 #ifdef RLIMIT_AS
1287   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1288     size = rlimit.rlim_cur;
1289 #endif
1290
1291   /* Leave a large safety margin for the above limits, as failure can
1292      occur when they are exceeded.  */
1293   size /= 2;
1294
1295 #ifdef RLIMIT_RSS
1296   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1297      Exceeding RSS is not fatal, but can be quite slow.  */
1298   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1299     size = rlimit.rlim_cur / 16 * 15;
1300 #endif
1301
1302   /* Use no less than the minimum.  */
1303   return MAX (size, MIN_SORT_SIZE);
1304 }
1305
1306 /* Return the sort buffer size to use with the input files identified
1307    by FPS and FILES, which are alternate names of the same files.
1308    NFILES gives the number of input files; NFPS may be less.  Assume
1309    that each input line requires LINE_BYTES extra bytes' worth of line
1310    information.  Do not exceed the size bound specified by the user
1311    (or a default size bound, if the user does not specify one).  */
1312
1313 static size_t
1314 sort_buffer_size (FILE *const *fps, size_t nfps,
1315                   char *const *files, size_t nfiles,
1316                   size_t line_bytes)
1317 {
1318   /* A bound on the input size.  If zero, the bound hasn't been
1319      determined yet.  */
1320   static size_t size_bound;
1321
1322   /* In the worst case, each input byte is a newline.  */
1323   size_t worst_case_per_input_byte = line_bytes + 1;
1324
1325   /* Keep enough room for one extra input line and an extra byte.
1326      This extra room might be needed when preparing to read EOF.  */
1327   size_t size = worst_case_per_input_byte + 1;
1328
1329   size_t i;
1330
1331   for (i = 0; i < nfiles; i++)
1332     {
1333       struct stat st;
1334       off_t file_size;
1335       size_t worst_case;
1336
1337       if ((i < nfps ? fstat (fileno (fps[i]), &st)
1338            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1339            : stat (files[i], &st))
1340           != 0)
1341         die (_("stat failed"), files[i]);
1342
1343       if (S_ISREG (st.st_mode))
1344         file_size = st.st_size;
1345       else
1346         {
1347           /* The file has unknown size.  If the user specified a sort
1348              buffer size, use that; otherwise, guess the size.  */
1349           if (sort_size)
1350             return sort_size;
1351           file_size = INPUT_FILE_SIZE_GUESS;
1352         }
1353
1354       if (! size_bound)
1355         {
1356           size_bound = sort_size;
1357           if (! size_bound)
1358             size_bound = default_sort_size ();
1359         }
1360
1361       /* Add the amount of memory needed to represent the worst case
1362          where the input consists entirely of newlines followed by a
1363          single non-newline.  Check for overflow.  */
1364       worst_case = file_size * worst_case_per_input_byte + 1;
1365       if (file_size != worst_case / worst_case_per_input_byte
1366           || size_bound - size <= worst_case)
1367         return size_bound;
1368       size += worst_case;
1369     }
1370
1371   return size;
1372 }
1373
1374 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1375    must be at least sizeof (struct line).  Allocate ALLOC bytes
1376    initially.  */
1377
1378 static void
1379 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1380 {
1381   /* Ensure that the line array is properly aligned.  If the desired
1382      size cannot be allocated, repeatedly halve it until allocation
1383      succeeds.  The smaller allocation may hurt overall performance,
1384      but that's better than failing.  */
1385   for (;;)
1386     {
1387       alloc += sizeof (struct line) - alloc % sizeof (struct line);
1388       buf->buf = malloc (alloc);
1389       if (buf->buf)
1390         break;
1391       alloc /= 2;
1392       if (alloc <= line_bytes + 1)
1393         xalloc_die ();
1394     }
1395
1396   buf->line_bytes = line_bytes;
1397   buf->alloc = alloc;
1398   buf->used = buf->left = buf->nlines = 0;
1399   buf->eof = false;
1400 }
1401
1402 /* Return one past the limit of the line array.  */
1403
1404 static inline struct line *
1405 buffer_linelim (struct buffer const *buf)
1406 {
1407   return (struct line *) (buf->buf + buf->alloc);
1408 }
1409
1410 /* Return a pointer to the first character of the field specified
1411    by KEY in LINE. */
1412
1413 static char *
1414 begfield (const struct line *line, const struct keyfield *key)
1415 {
1416   char *ptr = line->text, *lim = ptr + line->length - 1;
1417   size_t sword = key->sword;
1418   size_t schar = key->schar;
1419
1420   /* The leading field separator itself is included in a field when -t
1421      is absent.  */
1422
1423   if (tab != TAB_DEFAULT)
1424     while (ptr < lim && sword--)
1425       {
1426         while (ptr < lim && *ptr != tab)
1427           ++ptr;
1428         if (ptr < lim)
1429           ++ptr;
1430       }
1431   else
1432     while (ptr < lim && sword--)
1433       {
1434         while (ptr < lim && blanks[to_uchar (*ptr)])
1435           ++ptr;
1436         while (ptr < lim && !blanks[to_uchar (*ptr)])
1437           ++ptr;
1438       }
1439
1440   /* If we're ignoring leading blanks when computing the Start
1441      of the field, skip past them here.  */
1442   if (key->skipsblanks)
1443     while (ptr < lim && blanks[to_uchar (*ptr)])
1444       ++ptr;
1445
1446   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1447   ptr = MIN (lim, ptr + schar);
1448
1449   return ptr;
1450 }
1451
1452 /* Return the limit of (a pointer to the first character after) the field
1453    in LINE specified by KEY. */
1454
1455 static char *
1456 limfield (const struct line *line, const struct keyfield *key)
1457 {
1458   char *ptr = line->text, *lim = ptr + line->length - 1;
1459   size_t eword = key->eword, echar = key->echar;
1460
1461   if (echar == 0)
1462     eword++; /* Skip all of end field.  */
1463
1464   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1465      whichever comes first.  If there are more than EWORD fields, leave
1466      PTR pointing at the beginning of the field having zero-based index,
1467      EWORD.  If a delimiter character was specified (via -t), then that
1468      `beginning' is the first character following the delimiting TAB.
1469      Otherwise, leave PTR pointing at the first `blank' character after
1470      the preceding field.  */
1471   if (tab != TAB_DEFAULT)
1472     while (ptr < lim && eword--)
1473       {
1474         while (ptr < lim && *ptr != tab)
1475           ++ptr;
1476         if (ptr < lim && (eword | echar))
1477           ++ptr;
1478       }
1479   else
1480     while (ptr < lim && eword--)
1481       {
1482         while (ptr < lim && blanks[to_uchar (*ptr)])
1483           ++ptr;
1484         while (ptr < lim && !blanks[to_uchar (*ptr)])
1485           ++ptr;
1486       }
1487
1488 #ifdef POSIX_UNSPECIFIED
1489   /* The following block of code makes GNU sort incompatible with
1490      standard Unix sort, so it's ifdef'd out for now.
1491      The POSIX spec isn't clear on how to interpret this.
1492      FIXME: request clarification.
1493
1494      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1495      Date: Thu, 30 May 96 12:20:41 -0400
1496      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1497
1498      [...]I believe I've found another bug in `sort'.
1499
1500      $ cat /tmp/sort.in
1501      a b c 2 d
1502      pq rs 1 t
1503      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1504      a b c 2 d
1505      pq rs 1 t
1506      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1507      pq rs 1 t
1508      a b c 2 d
1509
1510      Unix sort produced the answer I expected: sort on the single character
1511      in column 7.  GNU sort produced different results, because it disagrees
1512      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1513      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1514      "skip M-1 fields, then either N-1 characters or the rest of the current
1515      field, whichever comes first".  This extra clause applies only to
1516      key-ends, not key-starts.
1517      */
1518
1519   /* Make LIM point to the end of (one byte past) the current field.  */
1520   if (tab != TAB_DEFAULT)
1521     {
1522       char *newlim;
1523       newlim = memchr (ptr, tab, lim - ptr);
1524       if (newlim)
1525         lim = newlim;
1526     }
1527   else
1528     {
1529       char *newlim;
1530       newlim = ptr;
1531       while (newlim < lim && blanks[to_uchar (*newlim)])
1532         ++newlim;
1533       while (newlim < lim && !blanks[to_uchar (*newlim)])
1534         ++newlim;
1535       lim = newlim;
1536     }
1537 #endif
1538
1539   if (echar != 0) /* We need to skip over a portion of the end field.  */
1540     {
1541       /* If we're ignoring leading blanks when computing the End
1542          of the field, skip past them here.  */
1543       if (key->skipeblanks)
1544         while (ptr < lim && blanks[to_uchar (*ptr)])
1545           ++ptr;
1546
1547       /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1548       ptr = MIN (lim, ptr + echar);
1549     }
1550
1551   return ptr;
1552 }
1553
1554 /* Fill BUF reading from FP, moving buf->left bytes from the end
1555    of buf->buf to the beginning first.  If EOF is reached and the
1556    file wasn't terminated by a newline, supply one.  Set up BUF's line
1557    table too.  FILE is the name of the file corresponding to FP.
1558    Return true if some input was read.  */
1559
1560 static bool
1561 fillbuf (struct buffer *buf, FILE *fp, char const *file)
1562 {
1563   struct keyfield const *key = keylist;
1564   char eol = eolchar;
1565   size_t line_bytes = buf->line_bytes;
1566   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1567
1568   if (buf->eof)
1569     return false;
1570
1571   if (buf->used != buf->left)
1572     {
1573       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1574       buf->used = buf->left;
1575       buf->nlines = 0;
1576     }
1577
1578   for (;;)
1579     {
1580       char *ptr = buf->buf + buf->used;
1581       struct line *linelim = buffer_linelim (buf);
1582       struct line *line = linelim - buf->nlines;
1583       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1584       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1585
1586       while (line_bytes + 1 < avail)
1587         {
1588           /* Read as many bytes as possible, but do not read so many
1589              bytes that there might not be enough room for the
1590              corresponding line array.  The worst case is when the
1591              rest of the input file consists entirely of newlines,
1592              except that the last byte is not a newline.  */
1593           size_t readsize = (avail - 1) / (line_bytes + 1);
1594           size_t bytes_read = fread (ptr, 1, readsize, fp);
1595           char *ptrlim = ptr + bytes_read;
1596           char *p;
1597           avail -= bytes_read;
1598
1599           if (bytes_read != readsize)
1600             {
1601               if (ferror (fp))
1602                 die (_("read failed"), file);
1603               if (feof (fp))
1604                 {
1605                   buf->eof = true;
1606                   if (buf->buf == ptrlim)
1607                     return false;
1608                   if (ptrlim[-1] != eol)
1609                     *ptrlim++ = eol;
1610                 }
1611             }
1612
1613           /* Find and record each line in the just-read input.  */
1614           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1615             {
1616               ptr = p + 1;
1617               line--;
1618               line->text = line_start;
1619               line->length = ptr - line_start;
1620               mergesize = MAX (mergesize, line->length);
1621               avail -= line_bytes;
1622
1623               if (key)
1624                 {
1625                   /* Precompute the position of the first key for
1626                      efficiency.  */
1627                   line->keylim = (key->eword == SIZE_MAX
1628                                   ? p
1629                                   : limfield (line, key));
1630
1631                   if (key->sword != SIZE_MAX)
1632                     line->keybeg = begfield (line, key);
1633                   else
1634                     {
1635                       if (key->skipsblanks)
1636                         while (blanks[to_uchar (*line_start)])
1637                           line_start++;
1638                       line->keybeg = line_start;
1639                     }
1640                 }
1641
1642               line_start = ptr;
1643             }
1644
1645           ptr = ptrlim;
1646           if (buf->eof)
1647             break;
1648         }
1649
1650       buf->used = ptr - buf->buf;
1651       buf->nlines = buffer_linelim (buf) - line;
1652       if (buf->nlines != 0)
1653         {
1654           buf->left = ptr - line_start;
1655           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1656           return true;
1657         }
1658
1659       {
1660         /* The current input line is too long to fit in the buffer.
1661            Double the buffer size and try again, keeping it properly
1662            aligned.  */
1663         size_t line_alloc = buf->alloc / sizeof (struct line);
1664         buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1665         buf->alloc = line_alloc * sizeof (struct line);
1666       }
1667     }
1668 }
1669
1670 /* Compare strings A and B as numbers without explicitly converting them to
1671    machine numbers.  Comparatively slow for short strings, but asymptotically
1672    hideously fast. */
1673
1674 static int
1675 numcompare (const char *a, const char *b)
1676 {
1677   while (blanks[to_uchar (*a)])
1678     a++;
1679   while (blanks[to_uchar (*b)])
1680     b++;
1681
1682   return strnumcmp (a, b, decimal_point, thousands_sep);
1683 }
1684
1685 /* Exit with an error if a mixture of SI and IEC units detected.  */
1686
1687 static void
1688 check_mixed_SI_IEC (char prefix, struct keyfield *key)
1689 {
1690   int si_present = prefix == 'i';
1691   if (key->si_present != -1 && si_present != key->si_present)
1692     error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
1693   key->si_present = si_present;
1694 }
1695
1696 /* Return an integer which represents the order of magnitude of
1697    the unit following the number.  NUMBER can contain thousands separators
1698    or a decimal point, but not have preceeding blanks.
1699    Negative numbers return a negative unit order.  */
1700
1701 static int
1702 find_unit_order (const char *number, struct keyfield *key)
1703 {
1704   static const char orders [UCHAR_LIM] =
1705     {
1706 #if SOME_DAY_WE_WILL_REQUIRE_C99
1707       ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1708       ['k']=1,
1709 #else
1710       /* Generate the following table with this command:
1711          perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
1712          foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
1713          |fmt  */
1714       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1715       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1716       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
1717       0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
1718       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1719       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1720       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1721       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1722       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1723       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1724       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1725 #endif
1726     };
1727
1728   const unsigned char *p = number;
1729
1730   int sign = 1;
1731
1732   if (*p == '-')
1733     {
1734       sign = -1;
1735       p++;
1736     }
1737
1738   /* Scan to end of number.
1739      Decimals or separators not followed by digits stop the scan.
1740      Numbers ending in decimals or separators are thus considered
1741      to be lacking in units.
1742      FIXME: add support for multibyte thousands_sep and decimal_point.  */
1743
1744   while (ISDIGIT (*p))
1745     {
1746       p++;
1747
1748       if (*p == decimal_point && ISDIGIT (*(p + 1)))
1749         p += 2;
1750       else if (*p == thousands_sep && ISDIGIT (*(p + 1)))
1751         p += 2;
1752     }
1753
1754   int order = orders[*p];
1755
1756   /* For valid units check for MiB vs MB etc.  */
1757   if (order)
1758     check_mixed_SI_IEC (*(p + 1), key);
1759
1760   return sign * order;
1761 }
1762
1763 /* Compare numbers ending in units with SI xor IEC prefixes
1764        <none/unknown> < K/k < M < G < T < P < E < Z < Y
1765    Assume that numbers are properly abbreviated.
1766    i.e. input will never have both 6000K and 5M.  */
1767
1768 static int
1769 human_numcompare (const char *a, const char *b, struct keyfield *key)
1770 {
1771   while (blanks[to_uchar (*a)])
1772     a++;
1773   while (blanks[to_uchar (*b)])
1774     b++;
1775
1776   int order_a = find_unit_order (a, key);
1777   int order_b = find_unit_order (b, key);
1778
1779   return (order_a > order_b ? 1
1780           : order_a < order_b ? -1
1781           : strnumcmp (a, b, decimal_point, thousands_sep));
1782 }
1783
1784 static int
1785 general_numcompare (const char *sa, const char *sb)
1786 {
1787   /* FIXME: add option to warn about failed conversions.  */
1788   /* FIXME: maybe add option to try expensive FP conversion
1789      only if A and B can't be compared more cheaply/accurately.  */
1790
1791   char *ea;
1792   char *eb;
1793   double a = strtod (sa, &ea);
1794   double b = strtod (sb, &eb);
1795
1796   /* Put conversion errors at the start of the collating sequence.  */
1797   if (sa == ea)
1798     return sb == eb ? 0 : -1;
1799   if (sb == eb)
1800     return 1;
1801
1802   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1803      conversion errors but before numbers; sort them by internal
1804      bit-pattern, for lack of a more portable alternative.  */
1805   return (a < b ? -1
1806           : a > b ? 1
1807           : a == b ? 0
1808           : b == b ? -1
1809           : a == a ? 1
1810           : memcmp ((char *) &a, (char *) &b, sizeof a));
1811 }
1812
1813 /* Return an integer in 1..12 of the month name MONTH with length LEN.
1814    Return 0 if the name in S is not recognized.  */
1815
1816 static int
1817 getmonth (char const *month, size_t len)
1818 {
1819   size_t lo = 0;
1820   size_t hi = MONTHS_PER_YEAR;
1821   char const *monthlim = month + len;
1822
1823   for (;;)
1824     {
1825       if (month == monthlim)
1826         return 0;
1827       if (!blanks[to_uchar (*month)])
1828         break;
1829       ++month;
1830     }
1831
1832   do
1833     {
1834       size_t ix = (lo + hi) / 2;
1835       char const *m = month;
1836       char const *n = monthtab[ix].name;
1837
1838       for (;; m++, n++)
1839         {
1840           if (!*n)
1841             return monthtab[ix].val;
1842           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1843             {
1844               hi = ix;
1845               break;
1846             }
1847           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1848             {
1849               lo = ix + 1;
1850               break;
1851             }
1852         }
1853     }
1854   while (lo < hi);
1855
1856   return 0;
1857 }
1858
1859 /* A source of random data.  */
1860 static struct randread_source *randread_source;
1861
1862 /* Return the Ith randomly-generated state.  The caller must invoke
1863    random_state (H) for all H less than I before invoking random_state
1864    (I).  */
1865
1866 static struct md5_ctx
1867 random_state (size_t i)
1868 {
1869   /* An array of states resulting from the random data, and counts of
1870      its used and allocated members.  */
1871   static struct md5_ctx *state;
1872   static size_t used;
1873   static size_t allocated;
1874
1875   struct md5_ctx *s = &state[i];
1876
1877   if (used <= i)
1878     {
1879       unsigned char buf[MD5_DIGEST_SIZE];
1880
1881       used++;
1882
1883       if (allocated <= i)
1884         {
1885           state = X2NREALLOC (state, &allocated);
1886           s = &state[i];
1887         }
1888
1889       randread (randread_source, buf, sizeof buf);
1890       md5_init_ctx (s);
1891       md5_process_bytes (buf, sizeof buf, s);
1892     }
1893
1894   return *s;
1895 }
1896
1897 /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1898    with length LENGTHB.  Return negative if less, zero if equal,
1899    positive if greater.  */
1900
1901 static int
1902 cmp_hashes (char const *texta, size_t lena,
1903             char const *textb, size_t lenb)
1904 {
1905   /* Try random hashes until a pair of hashes disagree.  But if the
1906      first pair of random hashes agree, check whether the keys are
1907      identical and if so report no difference.  */
1908   int diff;
1909   size_t i;
1910   for (i = 0; ; i++)
1911     {
1912       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1913       struct md5_ctx s[2];
1914       s[0] = s[1] = random_state (i);
1915       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1916       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1917       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1918       if (diff != 0)
1919         break;
1920       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1921         break;
1922     }
1923
1924   return diff;
1925 }
1926
1927 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1928    using one or more random hash functions.  */
1929
1930 static int
1931 compare_random (char *restrict texta, size_t lena,
1932                 char *restrict textb, size_t lenb)
1933 {
1934   int diff;
1935
1936   if (! hard_LC_COLLATE)
1937     diff = cmp_hashes (texta, lena, textb, lenb);
1938   else
1939     {
1940       /* Transform the text into the basis of comparison, so that byte
1941          strings that would otherwise considered to be equal are
1942          considered equal here even if their bytes differ.  */
1943
1944       char *buf = NULL;
1945       char stackbuf[4000];
1946       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1947       bool a_fits = tlena <= sizeof stackbuf;
1948       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1949                                (a_fits ? sizeof stackbuf - tlena : 0),
1950                                textb, lenb);
1951
1952       if (a_fits && tlena + tlenb <= sizeof stackbuf)
1953         buf = stackbuf;
1954       else
1955         {
1956           /* Adding 1 to the buffer size lets xmemxfrm run a bit
1957              faster by avoiding the need for an extra buffer copy.  */
1958           buf = xmalloc (tlena + tlenb + 1);
1959           xmemxfrm (buf, tlena + 1, texta, lena);
1960           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1961         }
1962
1963       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1964
1965       if (buf != stackbuf)
1966         free (buf);
1967     }
1968
1969   return diff;
1970 }
1971
1972 /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1973    using filevercmp. See lib/filevercmp.h for function description. */
1974
1975 static int
1976 compare_version (char *restrict texta, size_t lena,
1977                  char *restrict textb, size_t lenb)
1978 {
1979   int diff;
1980
1981   /* It is necessary to save the character after the end of the field.
1982      "filevercmp" works with NUL terminated strings.  Our blocks of
1983      text are not necessarily terminated with a NUL byte. */
1984   char sv_a = texta[lena];
1985   char sv_b = textb[lenb];
1986
1987   texta[lena] = '\0';
1988   textb[lenb] = '\0';
1989
1990   diff = filevercmp (texta, textb);
1991
1992   texta[lena] = sv_a;
1993   textb[lenb] = sv_b;
1994
1995   return diff;
1996 }
1997
1998 /* Compare two lines A and B trying every key in sequence until there
1999    are no more keys or a difference is found. */
2000
2001 static int
2002 keycompare (const struct line *a, const struct line *b)
2003 {
2004   struct keyfield *key = keylist;
2005
2006   /* For the first iteration only, the key positions have been
2007      precomputed for us. */
2008   char *texta = a->keybeg;
2009   char *textb = b->keybeg;
2010   char *lima = a->keylim;
2011   char *limb = b->keylim;
2012
2013   int diff;
2014
2015   for (;;)
2016     {
2017       char const *translate = key->translate;
2018       bool const *ignore = key->ignore;
2019
2020       /* Treat field ends before field starts as empty fields.  */
2021       lima = MAX (texta, lima);
2022       limb = MAX (textb, limb);
2023
2024       /* Find the lengths. */
2025       size_t lena = lima - texta;
2026       size_t lenb = limb - textb;
2027
2028       /* Actually compare the fields. */
2029
2030       if (key->random)
2031         diff = compare_random (texta, lena, textb, lenb);
2032       else if (key->numeric | key->general_numeric | key->human_numeric)
2033         {
2034           char savea = *lima, saveb = *limb;
2035
2036           *lima = *limb = '\0';
2037           diff = (key->numeric ? numcompare (texta, textb)
2038                   : key->general_numeric ? general_numcompare (texta, textb)
2039                   : human_numcompare (texta, textb, key));
2040           *lima = savea, *limb = saveb;
2041         }
2042       else if (key->version)
2043         diff = compare_version (texta, lena, textb, lenb);
2044       else if (key->month)
2045         diff = getmonth (texta, lena) - getmonth (textb, lenb);
2046       /* Sorting like this may become slow, so in a simple locale the user
2047          can select a faster sort that is similar to ascii sort.  */
2048       else if (hard_LC_COLLATE)
2049         {
2050           if (ignore || translate)
2051             {
2052               char buf[4000];
2053               size_t size = lena + 1 + lenb + 1;
2054               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
2055               char *copy_b = copy_a + lena + 1;
2056               size_t new_len_a, new_len_b, i;
2057
2058               /* Ignore and/or translate chars before comparing.  */
2059               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
2060                 {
2061                   if (i < lena)
2062                     {
2063                       copy_a[new_len_a] = (translate
2064                                            ? translate[to_uchar (texta[i])]
2065                                            : texta[i]);
2066                       if (!ignore || !ignore[to_uchar (texta[i])])
2067                         ++new_len_a;
2068                     }
2069                   if (i < lenb)
2070                     {
2071                       copy_b[new_len_b] = (translate
2072                                            ? translate[to_uchar (textb[i])]
2073                                            : textb [i]);
2074                       if (!ignore || !ignore[to_uchar (textb[i])])
2075                         ++new_len_b;
2076                     }
2077                 }
2078
2079               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2080
2081               if (sizeof buf < size)
2082                 free (copy_a);
2083             }
2084           else if (lena == 0)
2085             diff = - NONZERO (lenb);
2086           else if (lenb == 0)
2087             goto greater;
2088           else
2089             diff = xmemcoll (texta, lena, textb, lenb);
2090         }
2091       else if (ignore)
2092         {
2093 #define CMP_WITH_IGNORE(A, B)                                           \
2094   do                                                                    \
2095     {                                                                   \
2096           for (;;)                                                      \
2097             {                                                           \
2098               while (texta < lima && ignore[to_uchar (*texta)])         \
2099                 ++texta;                                                \
2100               while (textb < limb && ignore[to_uchar (*textb)])         \
2101                 ++textb;                                                \
2102               if (! (texta < lima && textb < limb))                     \
2103                 break;                                                  \
2104               diff = to_uchar (A) - to_uchar (B);                       \
2105               if (diff)                                                 \
2106                 goto not_equal;                                         \
2107               ++texta;                                                  \
2108               ++textb;                                                  \
2109             }                                                           \
2110                                                                         \
2111           diff = (texta < lima) - (textb < limb);                       \
2112     }                                                                   \
2113   while (0)
2114
2115           if (translate)
2116             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
2117                              translate[to_uchar (*textb)]);
2118           else
2119             CMP_WITH_IGNORE (*texta, *textb);
2120         }
2121       else if (lena == 0)
2122         diff = - NONZERO (lenb);
2123       else if (lenb == 0)
2124         goto greater;
2125       else
2126         {
2127           if (translate)
2128             {
2129               while (texta < lima && textb < limb)
2130                 {
2131                   diff = (to_uchar (translate[to_uchar (*texta++)])
2132                           - to_uchar (translate[to_uchar (*textb++)]));
2133                   if (diff)
2134                     goto not_equal;
2135                 }
2136             }
2137           else
2138             {
2139               diff = memcmp (texta, textb, MIN (lena, lenb));
2140               if (diff)
2141                 goto not_equal;
2142             }
2143           diff = lena < lenb ? -1 : lena != lenb;
2144         }
2145
2146       if (diff)
2147         goto not_equal;
2148
2149       key = key->next;
2150       if (! key)
2151         break;
2152
2153       /* Find the beginning and limit of the next field.  */
2154       if (key->eword != SIZE_MAX)
2155         lima = limfield (a, key), limb = limfield (b, key);
2156       else
2157         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2158
2159       if (key->sword != SIZE_MAX)
2160         texta = begfield (a, key), textb = begfield (b, key);
2161       else
2162         {
2163           texta = a->text, textb = b->text;
2164           if (key->skipsblanks)
2165             {
2166               while (texta < lima && blanks[to_uchar (*texta)])
2167                 ++texta;
2168               while (textb < limb && blanks[to_uchar (*textb)])
2169                 ++textb;
2170             }
2171         }
2172     }
2173
2174   return 0;
2175
2176  greater:
2177   diff = 1;
2178  not_equal:
2179   return key->reverse ? -diff : diff;
2180 }
2181
2182 /* Compare two lines A and B, returning negative, zero, or positive
2183    depending on whether A compares less than, equal to, or greater than B. */
2184
2185 static int
2186 compare (const struct line *a, const struct line *b)
2187 {
2188   int diff;
2189   size_t alen, blen;
2190
2191   /* First try to compare on the specified keys (if any).
2192      The only two cases with no key at all are unadorned sort,
2193      and unadorned sort -r. */
2194   if (keylist)
2195     {
2196       diff = keycompare (a, b);
2197       if (diff | unique | stable)
2198         return diff;
2199     }
2200
2201   /* If the keys all compare equal (or no keys were specified)
2202      fall through to the default comparison.  */
2203   alen = a->length - 1, blen = b->length - 1;
2204
2205   if (alen == 0)
2206     diff = - NONZERO (blen);
2207   else if (blen == 0)
2208     diff = 1;
2209   else if (hard_LC_COLLATE)
2210     diff = xmemcoll (a->text, alen, b->text, blen);
2211   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2212     diff = alen < blen ? -1 : alen != blen;
2213
2214   return reverse ? -diff : diff;
2215 }
2216
2217 /* Check that the lines read from FILE_NAME come in order.  Return
2218    true if they are in order.  If CHECKONLY == 'c', also print a
2219    diagnostic (FILE_NAME, line number, contents of line) to stderr if
2220    they are not in order.  */
2221
2222 static bool
2223 check (char const *file_name, char checkonly)
2224 {
2225   FILE *fp = xfopen (file_name, "r");
2226   struct buffer buf;            /* Input buffer. */
2227   struct line temp;             /* Copy of previous line. */
2228   size_t alloc = 0;
2229   uintmax_t line_number = 0;
2230   struct keyfield const *key = keylist;
2231   bool nonunique = ! unique;
2232   bool ordered = true;
2233
2234   initbuf (&buf, sizeof (struct line),
2235            MAX (merge_buffer_size, sort_size));
2236   temp.text = NULL;
2237
2238   while (fillbuf (&buf, fp, file_name))
2239     {
2240       struct line const *line = buffer_linelim (&buf);
2241       struct line const *linebase = line - buf.nlines;
2242
2243       /* Make sure the line saved from the old buffer contents is
2244          less than or equal to the first line of the new buffer. */
2245       if (alloc && nonunique <= compare (&temp, line - 1))
2246         {
2247         found_disorder:
2248           {
2249             if (checkonly == 'c')
2250               {
2251                 struct line const *disorder_line = line - 1;
2252                 uintmax_t disorder_line_number =
2253                   buffer_linelim (&buf) - disorder_line + line_number;
2254                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2255                 fprintf (stderr, _("%s: %s:%s: disorder: "),
2256                          program_name, file_name,
2257                          umaxtostr (disorder_line_number, hr_buf));
2258                 write_bytes (disorder_line->text, disorder_line->length,
2259                              stderr, _("standard error"));
2260               }
2261
2262             ordered = false;
2263             break;
2264           }
2265         }
2266
2267       /* Compare each line in the buffer with its successor.  */
2268       while (linebase < --line)
2269         if (nonunique <= compare (line, line - 1))
2270           goto found_disorder;
2271
2272       line_number += buf.nlines;
2273
2274       /* Save the last line of the buffer.  */
2275       if (alloc < line->length)
2276         {
2277           do
2278             {
2279               alloc *= 2;
2280               if (! alloc)
2281                 {
2282                   alloc = line->length;
2283                   break;
2284                 }
2285             }
2286           while (alloc < line->length);
2287
2288           temp.text = xrealloc (temp.text, alloc);
2289         }
2290       memcpy (temp.text, line->text, line->length);
2291       temp.length = line->length;
2292       if (key)
2293         {
2294           temp.keybeg = temp.text + (line->keybeg - line->text);
2295           temp.keylim = temp.text + (line->keylim - line->text);
2296         }
2297     }
2298
2299   xfclose (fp, file_name);
2300   free (buf.buf);
2301   free (temp.text);
2302   return ordered;
2303 }
2304
2305 /* Open FILES (there are NFILES of them) and store the resulting array
2306    of stream pointers into (*PFPS).  Allocate the array.  Return the
2307    number of successfully opened files, setting errno if this value is
2308    less than NFILES.  */
2309
2310 static size_t
2311 open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps)
2312 {
2313   FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps);
2314   int i;
2315
2316   /* Open as many input files as we can.  */
2317   for (i = 0; i < nfiles; i++)
2318     {
2319       fps[i] = (files[i].pid
2320                 ? open_temp (files[i].name, files[i].pid)
2321                 : stream_open (files[i].name, "r"));
2322       if (!fps[i])
2323         break;
2324     }
2325
2326   return i;
2327 }
2328
2329 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2330    files (all of which are at the start of the FILES array), and
2331    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2332    FPS is the vector of open stream corresponding to the files.
2333    Close input and output streams before returning.
2334    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2335    the output file is standard output.  */
2336
2337 static void
2338 mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2339           FILE *ofp, char const *output_file, FILE **fps)
2340 {
2341   struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer);
2342                                 /* Input buffers for each file. */
2343   struct line saved;            /* Saved line storage for unique check. */
2344   struct line const *savedline = NULL;
2345                                 /* &saved if there is a saved line. */
2346   size_t savealloc = 0;         /* Size allocated for the saved line. */
2347   struct line const **cur = xnmalloc (nfiles, sizeof *cur);
2348                                 /* Current line in each line table. */
2349   struct line const **base = xnmalloc (nfiles, sizeof *base);
2350                                 /* Base of each line table.  */
2351   size_t *ord = xnmalloc (nfiles, sizeof *ord);
2352                                 /* Table representing a permutation of fps,
2353                                    such that cur[ord[0]] is the smallest line
2354                                    and will be next output. */
2355   size_t i;
2356   size_t j;
2357   size_t t;
2358   struct keyfield const *key = keylist;
2359   saved.text = NULL;
2360
2361   /* Read initial lines from each input file. */
2362   for (i = 0; i < nfiles; )
2363     {
2364       initbuf (&buffer[i], sizeof (struct line),
2365                MAX (merge_buffer_size, sort_size / nfiles));
2366       if (fillbuf (&buffer[i], fps[i], files[i].name))
2367         {
2368           struct line const *linelim = buffer_linelim (&buffer[i]);
2369           cur[i] = linelim - 1;
2370           base[i] = linelim - buffer[i].nlines;
2371           i++;
2372         }
2373       else
2374         {
2375           /* fps[i] is empty; eliminate it from future consideration.  */
2376           xfclose (fps[i], files[i].name);
2377           if (i < ntemps)
2378             {
2379               ntemps--;
2380               zaptemp (files[i].name);
2381             }
2382           free (buffer[i].buf);
2383           --nfiles;
2384           for (j = i; j < nfiles; ++j)
2385             {
2386               files[j] = files[j + 1];
2387               fps[j] = fps[j + 1];
2388             }
2389         }
2390     }
2391
2392   /* Set up the ord table according to comparisons among input lines.
2393      Since this only reorders two items if one is strictly greater than
2394      the other, it is stable. */
2395   for (i = 0; i < nfiles; ++i)
2396     ord[i] = i;
2397   for (i = 1; i < nfiles; ++i)
2398     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2399       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2400
2401   /* Repeatedly output the smallest line until no input remains. */
2402   while (nfiles)
2403     {
2404       struct line const *smallest = cur[ord[0]];
2405
2406       /* If uniquified output is turned on, output only the first of
2407          an identical series of lines. */
2408       if (unique)
2409         {
2410           if (savedline && compare (savedline, smallest))
2411             {
2412               savedline = NULL;
2413               write_bytes (saved.text, saved.length, ofp, output_file);
2414             }
2415           if (!savedline)
2416             {
2417               savedline = &saved;
2418               if (savealloc < smallest->length)
2419                 {
2420                   do
2421                     if (! savealloc)
2422                       {
2423                         savealloc = smallest->length;
2424                         break;
2425                       }
2426                   while ((savealloc *= 2) < smallest->length);
2427
2428                   saved.text = xrealloc (saved.text, savealloc);
2429                 }
2430               saved.length = smallest->length;
2431               memcpy (saved.text, smallest->text, saved.length);
2432               if (key)
2433                 {
2434                   saved.keybeg =
2435                     saved.text + (smallest->keybeg - smallest->text);
2436                   saved.keylim =
2437                     saved.text + (smallest->keylim - smallest->text);
2438                 }
2439             }
2440         }
2441       else
2442         write_bytes (smallest->text, smallest->length, ofp, output_file);
2443
2444       /* Check if we need to read more lines into core. */
2445       if (base[ord[0]] < smallest)
2446         cur[ord[0]] = smallest - 1;
2447       else
2448         {
2449           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2450             {
2451               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2452               cur[ord[0]] = linelim - 1;
2453               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2454             }
2455           else
2456             {
2457               /* We reached EOF on fps[ord[0]].  */
2458               for (i = 1; i < nfiles; ++i)
2459                 if (ord[i] > ord[0])
2460                   --ord[i];
2461               --nfiles;
2462               xfclose (fps[ord[0]], files[ord[0]].name);
2463               if (ord[0] < ntemps)
2464                 {
2465                   ntemps--;
2466                   zaptemp (files[ord[0]].name);
2467                 }
2468               free (buffer[ord[0]].buf);
2469               for (i = ord[0]; i < nfiles; ++i)
2470                 {
2471                   fps[i] = fps[i + 1];
2472                   files[i] = files[i + 1];
2473                   buffer[i] = buffer[i + 1];
2474                   cur[i] = cur[i + 1];
2475                   base[i] = base[i + 1];
2476                 }
2477               for (i = 0; i < nfiles; ++i)
2478                 ord[i] = ord[i + 1];
2479               continue;
2480             }
2481         }
2482
2483       /* The new line just read in may be larger than other lines
2484          already in main memory; push it back in the queue until we
2485          encounter a line larger than it.  Optimize for the common
2486          case where the new line is smallest.  */
2487       {
2488         size_t lo = 1;
2489         size_t hi = nfiles;
2490         size_t probe = lo;
2491         size_t ord0 = ord[0];
2492         size_t count_of_smaller_lines;
2493
2494         while (lo < hi)
2495           {
2496             int cmp = compare (cur[ord0], cur[ord[probe]]);
2497             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2498               hi = probe;
2499             else
2500               lo = probe + 1;
2501             probe = (lo + hi) / 2;
2502           }
2503
2504         count_of_smaller_lines = lo - 1;
2505         for (j = 0; j < count_of_smaller_lines; j++)
2506           ord[j] = ord[j + 1];
2507         ord[count_of_smaller_lines] = ord0;
2508       }
2509
2510       /* Free up some resources every once in a while.  */
2511       if (MAX_PROCS_BEFORE_REAP < nprocs)
2512         reap_some ();
2513     }
2514
2515   if (unique && savedline)
2516     {
2517       write_bytes (saved.text, saved.length, ofp, output_file);
2518       free (saved.text);
2519     }
2520
2521   xfclose (ofp, output_file);
2522   free(fps);
2523   free(buffer);
2524   free(ord);
2525   free(base);
2526   free(cur);
2527 }
2528
2529 /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2530    files (all of which are at the start of the FILES array), and
2531    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2532    Close input and output files before returning.
2533    OUTPUT_FILE gives the name of the output file.
2534
2535    Return the number of files successfully merged.  This number can be
2536    less than NFILES if we ran low on file descriptors, but in this
2537    case it is never less than 2.  */
2538
2539 static size_t
2540 mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles,
2541             FILE *ofp, char const *output_file)
2542 {
2543   FILE **fps;
2544   size_t nopened = open_input_files (files, nfiles, &fps);
2545   if (nopened < nfiles && nopened < 2)
2546     die (_("open failed"), files[nopened].name);
2547   mergefps (files, ntemps, nopened, ofp, output_file, fps);
2548   return nopened;
2549 }
2550
2551 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2552    and HI (with NHI members).  T, LO, and HI point just past their
2553    respective arrays, and the arrays are in reverse order.  NLO and
2554    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2555
2556 static inline void
2557 mergelines (struct line *t,
2558             struct line const *lo, size_t nlo,
2559             struct line const *hi, size_t nhi)
2560 {
2561   for (;;)
2562     if (compare (lo - 1, hi - 1) <= 0)
2563       {
2564         *--t = *--lo;
2565         if (! --nlo)
2566           {
2567             /* HI - NHI equalled T - (NLO + NHI) when this function
2568                began.  Therefore HI must equal T now, and there is no
2569                need to copy from HI to T.  */
2570             return;
2571           }
2572       }
2573     else
2574       {
2575         *--t = *--hi;
2576         if (! --nhi)
2577           {
2578             do
2579               *--t = *--lo;
2580             while (--nlo);
2581
2582             return;
2583           }
2584       }
2585 }
2586
2587 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2588    NLINES must be at least 2.
2589    The input and output arrays are in reverse order, and LINES and
2590    TEMP point just past the end of their respective arrays.
2591
2592    Use a recursive divide-and-conquer algorithm, in the style
2593    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2594    the optimization suggested by exercise 5.2.4-10; this requires room
2595    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2596    writes that this memory optimization was originally published by
2597    D. A. Bell, Comp J. 1 (1958), 75.  */
2598
2599 static void
2600 sortlines (struct line *lines, size_t nlines, struct line *temp)
2601 {
2602   if (nlines == 2)
2603     {
2604       if (0 < compare (&lines[-1], &lines[-2]))
2605         {
2606           struct line tmp = lines[-1];
2607           lines[-1] = lines[-2];
2608           lines[-2] = tmp;
2609         }
2610     }
2611   else
2612     {
2613       size_t nlo = nlines / 2;
2614       size_t nhi = nlines - nlo;
2615       struct line *lo = lines;
2616       struct line *hi = lines - nlo;
2617       struct line *sorted_lo = temp;
2618
2619       sortlines (hi, nhi, temp);
2620       if (1 < nlo)
2621         sortlines_temp (lo, nlo, sorted_lo);
2622       else
2623         sorted_lo[-1] = lo[-1];
2624
2625       mergelines (lines, sorted_lo, nlo, hi, nhi);
2626     }
2627 }
2628
2629 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2630    rather than sorting in place.  */
2631
2632 static void
2633 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2634 {
2635   if (nlines == 2)
2636     {
2637       /* Declare `swap' as int, not bool, to work around a bug
2638          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2639          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2640       int swap = (0 < compare (&lines[-1], &lines[-2]));
2641       temp[-1] = lines[-1 - swap];
2642       temp[-2] = lines[-2 + swap];
2643     }
2644   else
2645     {
2646       size_t nlo = nlines / 2;
2647       size_t nhi = nlines - nlo;
2648       struct line *lo = lines;
2649       struct line *hi = lines - nlo;
2650       struct line *sorted_hi = temp - nlo;
2651
2652       sortlines_temp (hi, nhi, sorted_hi);
2653       if (1 < nlo)
2654         sortlines (lo, nlo, temp);
2655
2656       mergelines (temp, lo, nlo, sorted_hi, nhi);
2657     }
2658 }
2659
2660 /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2661    the same as OUTFILE.  If found, merge the found instances (and perhaps
2662    some other files) into a temporary file so that it can in turn be
2663    merged into OUTFILE without destroying OUTFILE before it is completely
2664    read.  Return the new value of NFILES, which differs from the old if
2665    some merging occurred.
2666
2667    This test ensures that an otherwise-erroneous use like
2668    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2669    It's not clear that POSIX requires this nicety.
2670    Detect common error cases, but don't try to catch obscure cases like
2671    "cat ... FILE ... | sort -m -o FILE"
2672    where traditional "sort" doesn't copy the input and where
2673    people should know that they're getting into trouble anyway.
2674    Catching these obscure cases would slow down performance in
2675    common cases.  */
2676
2677 static size_t
2678 avoid_trashing_input (struct sortfile *files, size_t ntemps,
2679                       size_t nfiles, char const *outfile)
2680 {
2681   size_t i;
2682   bool got_outstat = false;
2683   struct stat outstat;
2684
2685   for (i = ntemps; i < nfiles; i++)
2686     {
2687       bool is_stdin = STREQ (files[i].name, "-");
2688       bool same;
2689       struct stat instat;
2690
2691       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2692         same = true;
2693       else
2694         {
2695           if (! got_outstat)
2696             {
2697               if ((outfile
2698                    ? stat (outfile, &outstat)
2699                    : fstat (STDOUT_FILENO, &outstat))
2700                   != 0)
2701                 break;
2702               got_outstat = true;
2703             }
2704
2705           same = (((is_stdin
2706                     ? fstat (STDIN_FILENO, &instat)
2707                     : stat (files[i].name, &instat))
2708                    == 0)
2709                   && SAME_INODE (instat, outstat));
2710         }
2711
2712       if (same)
2713         {
2714           FILE *tftp;
2715           pid_t pid;
2716           char *temp = create_temp (&tftp, &pid);
2717           size_t num_merged = 0;
2718           do
2719             {
2720               num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
2721               files[i].name = temp;
2722               files[i].pid = pid;
2723
2724               if (i + num_merged < nfiles)
2725                 memmove(&files[i + 1], &files[i + num_merged],
2726                         num_merged * sizeof *files);
2727               ntemps += 1;
2728               nfiles -= num_merged - 1;;
2729               i += num_merged;
2730             }
2731           while (i < nfiles);
2732         }
2733     }
2734
2735   return nfiles;
2736 }
2737
2738 /* Merge the input FILES.  NTEMPS is the number of files at the
2739    start of FILES that are temporary; it is zero at the top level.
2740    NFILES is the total number of files.  Put the output in
2741    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2742
2743 static void
2744 merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2745        char const *output_file)
2746 {
2747   while (nmerge < nfiles)
2748     {
2749       /* Number of input files processed so far.  */
2750       size_t in;
2751
2752       /* Number of output files generated so far.  */
2753       size_t out;
2754
2755       /* nfiles % NMERGE; this counts input files that are left over
2756          after all full-sized merges have been done.  */
2757       size_t remainder;
2758
2759       /* Number of easily-available slots at the next loop iteration.  */
2760       size_t cheap_slots;
2761
2762       /* Do as many NMERGE-size merges as possible. In the case that
2763          nmerge is bogus, increment by the maximum number of file
2764          descriptors allowed.  */
2765       for (out = in = 0; nmerge <= nfiles - in; out++)
2766         {
2767           FILE *tfp;
2768           pid_t pid;
2769           char *temp = create_temp (&tfp, &pid);
2770           size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge),
2771                                           nmerge, tfp, temp);
2772           ntemps -= MIN (ntemps, num_merged);
2773           files[out].name = temp;
2774           files[out].pid = pid;
2775           in += num_merged;
2776         }
2777
2778       remainder = nfiles - in;
2779       cheap_slots = nmerge - out % nmerge;
2780
2781       if (cheap_slots < remainder)
2782         {
2783           /* So many files remain that they can't all be put into the last
2784              NMERGE-sized output window.  Do one more merge.  Merge as few
2785              files as possible, to avoid needless I/O.  */
2786           size_t nshortmerge = remainder - cheap_slots + 1;
2787           FILE *tfp;
2788           pid_t pid;
2789           char *temp = create_temp (&tfp, &pid);
2790           size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge),
2791                                           nshortmerge, tfp, temp);
2792           ntemps -= MIN (ntemps, num_merged);
2793           files[out].name = temp;
2794           files[out++].pid = pid;
2795           in += num_merged;
2796         }
2797
2798       /* Put the remaining input files into the last NMERGE-sized output
2799          window, so they will be merged in the next pass.  */
2800       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2801       ntemps += out;
2802       nfiles -= in - out;
2803     }
2804
2805   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2806
2807   /* We aren't guaranteed that this final mergefiles will work, therefore we
2808      try to merge into the output, and then merge as much as we can into a
2809      temp file if we can't. Repeat.  */
2810
2811   for (;;)
2812     {
2813       /* Merge directly into the output file if possible.  */
2814       FILE **fps;
2815       size_t nopened = open_input_files (files, nfiles, &fps);
2816
2817       if (nopened == nfiles)
2818         {
2819           FILE *ofp = stream_open (output_file, "w");
2820           if (ofp)
2821             {
2822               mergefps (files, ntemps, nfiles, ofp, output_file, fps);
2823               break;
2824             }
2825           if (errno != EMFILE || nopened <= 2)
2826             die (_("open failed"), output_file);
2827         }
2828       else if (nopened <= 2)
2829         die (_("open failed"), files[nopened].name);
2830
2831       /* We ran out of file descriptors.  Close one of the input
2832          files, to gain a file descriptor.  Then create a temporary
2833          file with our spare file descriptor.  Retry if that failed
2834          (e.g., some other process could open a file between the time
2835          we closed and tried to create).  */
2836       FILE *tfp;
2837       pid_t pid;
2838       char *temp;
2839       do
2840         {
2841           nopened--;
2842           xfclose (fps[nopened], files[nopened].name);
2843           temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2));
2844         }
2845       while (!temp);
2846
2847       /* Merge into the newly allocated temporary.  */
2848       mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps);
2849       ntemps -= MIN (ntemps, nopened);
2850       files[0].name = temp;
2851       files[0].pid = pid;
2852
2853       memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files);
2854       ntemps++;
2855       nfiles -= nopened - 1;
2856     }
2857 }
2858
2859 /* Sort NFILES FILES onto OUTPUT_FILE. */
2860
2861 static void
2862 sort (char * const *files, size_t nfiles, char const *output_file)
2863 {
2864   struct buffer buf;
2865   size_t ntemps = 0;
2866   bool output_file_created = false;
2867
2868   buf.alloc = 0;
2869
2870   while (nfiles)
2871     {
2872       char const *temp_output;
2873       char const *file = *files;
2874       FILE *fp = xfopen (file, "r");
2875       FILE *tfp;
2876       size_t bytes_per_line = (2 * sizeof (struct line)
2877                                - sizeof (struct line) / 2);
2878
2879       if (! buf.alloc)
2880         initbuf (&buf, bytes_per_line,
2881                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2882       buf.eof = false;
2883       files++;
2884       nfiles--;
2885
2886       while (fillbuf (&buf, fp, file))
2887         {
2888           struct line *line;
2889           struct line *linebase;
2890
2891           if (buf.eof && nfiles
2892               && (bytes_per_line + 1
2893                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2894             {
2895               /* End of file, but there is more input and buffer room.
2896                  Concatenate the next input file; this is faster in
2897                  the usual case.  */
2898               buf.left = buf.used;
2899               break;
2900             }
2901
2902           line = buffer_linelim (&buf);
2903           linebase = line - buf.nlines;
2904           if (1 < buf.nlines)
2905             sortlines (line, buf.nlines, linebase);
2906           if (buf.eof && !nfiles && !ntemps && !buf.left)
2907             {
2908               xfclose (fp, file);
2909               tfp = xfopen (output_file, "w");
2910               temp_output = output_file;
2911               output_file_created = true;
2912             }
2913           else
2914             {
2915               ++ntemps;
2916               temp_output = create_temp (&tfp, NULL);
2917             }
2918
2919           do
2920             {
2921               line--;
2922               write_bytes (line->text, line->length, tfp, temp_output);
2923               if (unique)
2924                 while (linebase < line && compare (line, line - 1) == 0)
2925                   line--;
2926             }
2927           while (linebase < line);
2928
2929           xfclose (tfp, temp_output);
2930
2931           /* Free up some resources every once in a while.  */
2932           if (MAX_PROCS_BEFORE_REAP < nprocs)
2933             reap_some ();
2934
2935           if (output_file_created)
2936             goto finish;
2937         }
2938       xfclose (fp, file);
2939     }
2940
2941  finish:
2942   free (buf.buf);
2943
2944   if (! output_file_created)
2945     {
2946       size_t i;
2947       struct tempnode *node = temphead;
2948       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2949       for (i = 0; node; i++)
2950         {
2951           tempfiles[i].name = node->name;
2952           tempfiles[i].pid = node->pid;
2953           node = node->next;
2954         }
2955       merge (tempfiles, ntemps, ntemps, output_file);
2956       free (tempfiles);
2957     }
2958 }
2959
2960 /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2961
2962 static void
2963 insertkey (struct keyfield *key_arg)
2964 {
2965   struct keyfield **p;
2966   struct keyfield *key = xmemdup (key_arg, sizeof *key);
2967
2968   for (p = &keylist; *p; p = &(*p)->next)
2969     continue;
2970   *p = key;
2971   key->next = NULL;
2972 }
2973
2974 /* Report a bad field specification SPEC, with extra info MSGID.  */
2975
2976 static void badfieldspec (char const *, char const *)
2977      ATTRIBUTE_NORETURN;
2978 static void
2979 badfieldspec (char const *spec, char const *msgid)
2980 {
2981   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2982          _(msgid), quote (spec));
2983   abort ();
2984 }
2985
2986 /* Report incompatible options.  */
2987
2988 static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2989 static void
2990 incompatible_options (char const *opts)
2991 {
2992   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
2993   abort ();
2994 }
2995
2996 /* Check compatibility of ordering options.  */
2997
2998 static void
2999 check_ordering_compatibility (void)
3000 {
3001   struct keyfield const *key;
3002
3003   for (key = keylist; key; key = key->next)
3004     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
3005               + key->version + !!key->ignore + key->human_numeric))
3006         || (key->random && key->translate))
3007       {
3008         /* The following is too big, but guaranteed to be "big enough". */
3009         char opts[sizeof short_options];
3010         char *p = opts;
3011         if (key->ignore == nondictionary)
3012           *p++ = 'd';
3013         if (key->translate)
3014           *p++ = 'f';
3015         if (key->general_numeric)
3016           *p++ = 'g';
3017         if (key->human_numeric)
3018           *p++ = 'h';
3019         if (key->ignore == nonprinting)
3020           *p++ = 'i';
3021         if (key->month)
3022           *p++ = 'M';
3023         if (key->numeric)
3024           *p++ = 'n';
3025         if (key->version)
3026           *p++ = 'V';
3027         if (key->random)
3028           *p++ = 'R';
3029         *p = '\0';
3030         incompatible_options (opts);
3031       }
3032 }
3033
3034 /* Parse the leading integer in STRING and store the resulting value
3035    (which must fit into size_t) into *VAL.  Return the address of the
3036    suffix after the integer.  If the value is too large, silently
3037    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
3038    failure; otherwise, report MSGID and exit on failure.  */
3039
3040 static char const *
3041 parse_field_count (char const *string, size_t *val, char const *msgid)
3042 {
3043   char *suffix;
3044   uintmax_t n;
3045
3046   switch (xstrtoumax (string, &suffix, 10, &n, ""))
3047     {
3048     case LONGINT_OK:
3049     case LONGINT_INVALID_SUFFIX_CHAR:
3050       *val = n;
3051       if (*val == n)
3052         break;
3053       /* Fall through.  */
3054     case LONGINT_OVERFLOW:
3055     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
3056       *val = SIZE_MAX;
3057       break;
3058
3059     case LONGINT_INVALID:
3060       if (msgid)
3061         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
3062                _(msgid), quote (string));
3063       return NULL;
3064     }
3065
3066   return suffix;
3067 }
3068
3069 /* Handle interrupts and hangups. */
3070
3071 static void
3072 sighandler (int sig)
3073 {
3074   if (! SA_NOCLDSTOP)
3075     signal (sig, SIG_IGN);
3076
3077   cleanup ();
3078
3079   signal (sig, SIG_DFL);
3080   raise (sig);
3081 }
3082
3083 /* Set the ordering options for KEY specified in S.
3084    Return the address of the first character in S that
3085    is not a valid ordering option.
3086    BLANKTYPE is the kind of blanks that 'b' should skip. */
3087
3088 static char *
3089 set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
3090 {
3091   while (*s)
3092     {
3093       switch (*s)
3094         {
3095         case 'b':
3096           if (blanktype == bl_start || blanktype == bl_both)
3097             key->skipsblanks = true;
3098           if (blanktype == bl_end || blanktype == bl_both)
3099             key->skipeblanks = true;
3100           break;
3101         case 'd':
3102           key->ignore = nondictionary;
3103           break;
3104         case 'f':
3105           key->translate = fold_toupper;
3106           break;
3107         case 'g':
3108           key->general_numeric = true;
3109           break;
3110         case 'h':
3111           key->human_numeric = true;
3112           break;
3113         case 'i':
3114           /* Option order should not matter, so don't let -i override
3115              -d.  -d implies -i, but -i does not imply -d.  */
3116           if (! key->ignore)
3117             key->ignore = nonprinting;
3118           break;
3119         case 'M':
3120           key->month = true;
3121           break;
3122         case 'n':
3123           key->numeric = true;
3124           break;
3125         case 'R':
3126           key->random = true;
3127           break;
3128         case 'r':
3129           key->reverse = true;
3130           break;
3131         case 'V':
3132           key->version = true;
3133           break;
3134         default:
3135           return (char *) s;
3136         }
3137       ++s;
3138     }
3139   return (char *) s;
3140 }
3141
3142 static struct keyfield *
3143 key_init (struct keyfield *key)
3144 {
3145   memset (key, 0, sizeof *key);
3146   key->eword = SIZE_MAX;
3147   key->si_present = -1;
3148   return key;
3149 }
3150
3151 int
3152 main (int argc, char **argv)
3153 {
3154   struct keyfield *key;
3155   struct keyfield key_buf;
3156   struct keyfield gkey;
3157   char const *s;
3158   int c = 0;
3159   char checkonly = 0;
3160   bool mergeonly = false;
3161   char *random_source = NULL;
3162   bool need_random = false;
3163   size_t nfiles = 0;
3164   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
3165   bool obsolete_usage = (posix2_version () < 200112);
3166   char **files;
3167   char *files_from = NULL;
3168   struct Tokens tok;
3169   char const *outfile = NULL;
3170
3171   initialize_main (&argc, &argv);
3172   set_program_name (argv[0]);
3173   setlocale (LC_ALL, "");
3174   bindtextdomain (PACKAGE, LOCALEDIR);
3175   textdomain (PACKAGE);
3176
3177   initialize_exit_failure (SORT_FAILURE);
3178
3179   hard_LC_COLLATE = hard_locale (LC_COLLATE);
3180 #if HAVE_NL_LANGINFO
3181   hard_LC_TIME = hard_locale (LC_TIME);
3182 #endif
3183
3184   /* Get locale's representation of the decimal point.  */
3185   {
3186     struct lconv const *locale = localeconv ();
3187
3188     /* If the locale doesn't define a decimal point, or if the decimal
3189        point is multibyte, use the C locale's decimal point.  FIXME:
3190        add support for multibyte decimal points.  */
3191     decimal_point = to_uchar (locale->decimal_point[0]);
3192     if (! decimal_point || locale->decimal_point[1])
3193       decimal_point = '.';
3194
3195     /* FIXME: add support for multibyte thousands separators.  */
3196     thousands_sep = to_uchar (*locale->thousands_sep);
3197     if (! thousands_sep || locale->thousands_sep[1])
3198       thousands_sep = -1;
3199   }
3200
3201   have_read_stdin = false;
3202   inittables ();
3203
3204   {
3205     size_t i;
3206     static int const sig[] =
3207       {
3208         /* The usual suspects.  */
3209         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
3210 #ifdef SIGPOLL
3211         SIGPOLL,
3212 #endif
3213 #ifdef SIGPROF
3214         SIGPROF,
3215 #endif
3216 #ifdef SIGVTALRM
3217         SIGVTALRM,
3218 #endif
3219 #ifdef SIGXCPU
3220         SIGXCPU,
3221 #endif
3222 #ifdef SIGXFSZ
3223         SIGXFSZ,
3224 #endif
3225       };
3226     enum { nsigs = ARRAY_CARDINALITY (sig) };
3227
3228 #if SA_NOCLDSTOP
3229     struct sigaction act;
3230
3231     sigemptyset (&caught_signals);
3232     for (i = 0; i < nsigs; i++)
3233       {
3234         sigaction (sig[i], NULL, &act);
3235         if (act.sa_handler != SIG_IGN)
3236           sigaddset (&caught_signals, sig[i]);
3237       }
3238
3239     act.sa_handler = sighandler;
3240     act.sa_mask = caught_signals;
3241     act.sa_flags = 0;
3242
3243     for (i = 0; i < nsigs; i++)
3244       if (sigismember (&caught_signals, sig[i]))
3245         sigaction (sig[i], &act, NULL);
3246 #else
3247     for (i = 0; i < nsigs; i++)
3248       if (signal (sig[i], SIG_IGN) != SIG_IGN)
3249         {
3250           signal (sig[i], sighandler);
3251           siginterrupt (sig[i], 1);
3252         }
3253 #endif
3254   }
3255
3256   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
3257   atexit (exit_cleanup);
3258
3259   gkey.sword = gkey.eword = SIZE_MAX;
3260   gkey.ignore = NULL;
3261   gkey.translate = NULL;
3262   gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
3263   gkey.si_present = -1;
3264   gkey.random = gkey.version = false;
3265   gkey.month = gkey.reverse = false;
3266   gkey.skipsblanks = gkey.skipeblanks = false;
3267
3268   files = xnmalloc (argc, sizeof *files);
3269
3270   for (;;)
3271     {
3272       /* Parse an operand as a file after "--" was seen; or if
3273          pedantic and a file was seen, unless the POSIX version
3274          predates 1003.1-2001 and -c was not seen and the operand is
3275          "-o FILE" or "-oFILE".  */
3276       int oi = -1;
3277
3278       if (c == -1
3279           || (posixly_correct && nfiles != 0
3280               && ! (obsolete_usage
3281                     && ! checkonly
3282                     && optind != argc
3283                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
3284                     && (argv[optind][2] || optind + 1 != argc)))
3285           || ((c = getopt_long (argc, argv, short_options,
3286                                 long_options, &oi))
3287               == -1))
3288         {
3289           if (argc <= optind)
3290             break;
3291           files[nfiles++] = argv[optind++];
3292         }
3293       else switch (c)
3294         {
3295         case 1:
3296           key = NULL;
3297           if (optarg[0] == '+')
3298             {
3299               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3300                                       && ISDIGIT (argv[optind][1]));
3301               obsolete_usage |= minus_pos_usage & ~posixly_correct;
3302               if (obsolete_usage)
3303                 {
3304                   /* Treat +POS1 [-POS2] as a key if possible; but silently
3305                      treat an operand as a file if it is not a valid +POS1.  */
3306                   key = key_init (&key_buf);
3307                   s = parse_field_count (optarg + 1, &key->sword, NULL);
3308                   if (s && *s == '.')
3309                     s = parse_field_count (s + 1, &key->schar, NULL);
3310                   if (! (key->sword | key->schar))
3311                     key->sword = SIZE_MAX;
3312                   if (! s || *set_ordering (s, key, bl_start))
3313                     key = NULL;
3314                   else
3315                     {
3316                       if (minus_pos_usage)
3317                         {
3318                           char const *optarg1 = argv[optind++];
3319                           s = parse_field_count (optarg1 + 1, &key->eword,
3320                                              N_("invalid number after `-'"));
3321                           if (*s == '.')
3322                             s = parse_field_count (s + 1, &key->echar,
3323                                                N_("invalid number after `.'"));
3324                           if (*set_ordering (s, key, bl_end))
3325                             badfieldspec (optarg1,
3326                                       N_("stray character in field spec"));
3327                         }
3328                       insertkey (key);
3329                     }
3330                 }
3331             }
3332           if (! key)
3333             files[nfiles++] = optarg;
3334           break;
3335
3336         case SORT_OPTION:
3337           c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3338           /* Fall through. */
3339         case 'b':
3340         case 'd':
3341         case 'f':
3342         case 'g':
3343         case 'h':
3344         case 'i':
3345         case 'M':
3346         case 'n':
3347         case 'r':
3348         case 'R':
3349         case 'V':
3350           {
3351             char str[2];
3352             str[0] = c;
3353             str[1] = '\0';
3354             set_ordering (str, &gkey, bl_both);
3355           }
3356           break;
3357
3358         case CHECK_OPTION:
3359           c = (optarg
3360                ? XARGMATCH ("--check", optarg, check_args, check_types)
3361                : 'c');
3362           /* Fall through.  */
3363         case 'c':
3364         case 'C':
3365           if (checkonly && checkonly != c)
3366             incompatible_options ("cC");
3367           checkonly = c;
3368           break;
3369
3370         case COMPRESS_PROGRAM_OPTION:
3371           if (compress_program && !STREQ (compress_program, optarg))
3372             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3373           compress_program = optarg;
3374           break;
3375
3376         case FILES0_FROM_OPTION:
3377           files_from = optarg;
3378           break;
3379
3380         case 'k':
3381           key = key_init (&key_buf);
3382
3383           /* Get POS1. */
3384           s = parse_field_count (optarg, &key->sword,
3385                                  N_("invalid number at field start"));
3386           if (! key->sword--)
3387             {
3388               /* Provoke with `sort -k0' */
3389               badfieldspec (optarg, N_("field number is zero"));
3390             }
3391           if (*s == '.')
3392             {
3393               s = parse_field_count (s + 1, &key->schar,
3394                                      N_("invalid number after `.'"));
3395               if (! key->schar--)
3396                 {
3397                   /* Provoke with `sort -k1.0' */
3398                   badfieldspec (optarg, N_("character offset is zero"));
3399                 }
3400             }
3401           if (! (key->sword | key->schar))
3402             key->sword = SIZE_MAX;
3403           s = set_ordering (s, key, bl_start);
3404           if (*s != ',')
3405             {
3406               key->eword = SIZE_MAX;
3407               key->echar = 0;
3408             }
3409           else
3410             {
3411               /* Get POS2. */
3412               s = parse_field_count (s + 1, &key->eword,
3413                                      N_("invalid number after `,'"));
3414               if (! key->eword--)
3415                 {
3416                   /* Provoke with `sort -k1,0' */
3417                   badfieldspec (optarg, N_("field number is zero"));
3418                 }
3419               if (*s == '.')
3420                 {
3421                   s = parse_field_count (s + 1, &key->echar,
3422                                          N_("invalid number after `.'"));
3423                 }
3424               s = set_ordering (s, key, bl_end);
3425             }
3426           if (*s)
3427             badfieldspec (optarg, N_("stray character in field spec"));
3428           insertkey (key);
3429           break;
3430
3431         case 'm':
3432           mergeonly = true;
3433           break;
3434
3435         case NMERGE_OPTION:
3436           specify_nmerge (oi, c, optarg);
3437           break;
3438
3439         case 'o':
3440           if (outfile && !STREQ (outfile, optarg))
3441             error (SORT_FAILURE, 0, _("multiple output files specified"));
3442           outfile = optarg;
3443           break;
3444
3445         case RANDOM_SOURCE_OPTION:
3446           if (random_source && !STREQ (random_source, optarg))
3447             error (SORT_FAILURE, 0, _("multiple random sources specified"));
3448           random_source = optarg;
3449           break;
3450
3451         case 's':
3452           stable = true;
3453           break;
3454
3455         case 'S':
3456           specify_sort_size (oi, c, optarg);
3457           break;
3458
3459         case 't':
3460           {
3461             char newtab = optarg[0];
3462             if (! newtab)
3463               error (SORT_FAILURE, 0, _("empty tab"));
3464             if (optarg[1])
3465               {
3466                 if (STREQ (optarg, "\\0"))
3467                   newtab = '\0';
3468                 else
3469                   {
3470                     /* Provoke with `sort -txx'.  Complain about
3471                        "multi-character tab" instead of "multibyte tab", so
3472                        that the diagnostic's wording does not need to be
3473                        changed once multibyte characters are supported.  */
3474                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
3475                            quote (optarg));
3476                   }
3477               }
3478             if (tab != TAB_DEFAULT && tab != newtab)
3479               error (SORT_FAILURE, 0, _("incompatible tabs"));
3480             tab = newtab;
3481           }
3482           break;
3483
3484         case 'T':
3485           add_temp_dir (optarg);
3486           break;
3487
3488         case 'u':
3489           unique = true;
3490           break;
3491
3492         case 'y':
3493           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3494              through Solaris 7.  It is also accepted by many non-Solaris
3495              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3496              -y is marked as obsolete starting with Solaris 8 (1999), but is
3497              still accepted as of Solaris 10 prerelease (2004).
3498
3499              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3500              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3501              and which in general ignores the argument after "-y" if it
3502              consists entirely of digits (it can even be empty).  */
3503           if (optarg == argv[optind - 1])
3504             {
3505               char const *p;
3506               for (p = optarg; ISDIGIT (*p); p++)
3507                 continue;
3508               optind -= (*p != '\0');
3509             }
3510           break;
3511
3512         case 'z':
3513           eolchar = 0;
3514           break;
3515
3516         case_GETOPT_HELP_CHAR;
3517
3518         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3519
3520         default:
3521           usage (SORT_FAILURE);
3522         }
3523     }
3524
3525   if (files_from)
3526     {
3527       FILE *stream;
3528
3529       /* When using --files0-from=F, you may not specify any files
3530          on the command-line.  */
3531       if (nfiles)
3532         {
3533           error (0, 0, _("extra operand %s"), quote (files[0]));
3534           fprintf (stderr, "%s\n",
3535                    _("file operands cannot be combined with --files0-from"));
3536           usage (SORT_FAILURE);
3537         }
3538
3539       if (STREQ (files_from, "-"))
3540         stream = stdin;
3541       else
3542         {
3543           stream = fopen (files_from, "r");
3544           if (stream == NULL)
3545             error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3546                    quote (files_from));
3547         }
3548
3549       readtokens0_init (&tok);
3550
3551       if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3552         error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3553                quote (files_from));
3554
3555       if (tok.n_tok)
3556         {
3557           size_t i;
3558           free (files);
3559           files = tok.tok;
3560           nfiles = tok.n_tok;
3561           for (i = 0; i < nfiles; i++)
3562           {
3563               if (STREQ (files[i], "-"))
3564                 error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3565                                           "no file name of %s allowed"),
3566                        quote (files[i]));
3567               else if (files[i][0] == '\0')
3568                 {
3569                   /* Using the standard `filename:line-number:' prefix here is
3570                      not totally appropriate, since NUL is the separator, not NL,
3571                      but it might be better than nothing.  */
3572                   unsigned long int file_number = i + 1;
3573                   error (SORT_FAILURE, 0,
3574                          _("%s:%lu: invalid zero-length file name"),
3575                          quotearg_colon (files_from), file_number);
3576                 }
3577           }
3578         }
3579       else
3580         error (SORT_FAILURE, 0, _("no input from %s"),
3581                quote (files_from));
3582     }
3583
3584   /* Inheritance of global options to individual keys. */
3585   for (key = keylist; key; key = key->next)
3586     {
3587       if (! (key->ignore
3588              || key->translate
3589              || (key->skipsblanks
3590                  | key->reverse
3591                  | key->skipeblanks
3592                  | key->month
3593                  | key->numeric
3594                  | key->version
3595                  | key->general_numeric
3596                  | key->human_numeric
3597                  | key->random)))
3598         {
3599           key->ignore = gkey.ignore;
3600           key->translate = gkey.translate;
3601           key->skipsblanks = gkey.skipsblanks;
3602           key->skipeblanks = gkey.skipeblanks;
3603           key->month = gkey.month;
3604           key->numeric = gkey.numeric;
3605           key->general_numeric = gkey.general_numeric;
3606           key->human_numeric = gkey.human_numeric;
3607           key->random = gkey.random;
3608           key->reverse = gkey.reverse;
3609           key->version = gkey.version;
3610         }
3611
3612       need_random |= key->random;
3613     }
3614
3615   if (!keylist && (gkey.ignore
3616                    || gkey.translate
3617                    || (gkey.skipsblanks
3618                        | gkey.skipeblanks
3619                        | gkey.month
3620                        | gkey.numeric
3621                        | gkey.general_numeric
3622                        | gkey.human_numeric
3623                        | gkey.random
3624                        | gkey.version)))
3625     {
3626       insertkey (&gkey);
3627       need_random |= gkey.random;
3628     }
3629
3630   check_ordering_compatibility ();
3631
3632   reverse = gkey.reverse;
3633
3634   if (need_random)
3635     {
3636       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3637       if (! randread_source)
3638         die (_("open failed"), random_source);
3639     }
3640
3641   if (temp_dir_count == 0)
3642     {
3643       char const *tmp_dir = getenv ("TMPDIR");
3644       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3645     }
3646
3647   if (nfiles == 0)
3648     {
3649       static char *minus = (char *) "-";
3650       nfiles = 1;
3651       free (files);
3652       files = &minus;
3653     }
3654
3655   /* Need to re-check that we meet the minimum requirement for memory
3656      usage with the final value for NMERGE. */
3657   if (0 < sort_size)
3658     sort_size = MAX (sort_size, MIN_SORT_SIZE);
3659
3660   if (checkonly)
3661     {
3662       if (nfiles > 1)
3663         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3664                quote (files[1]), checkonly);
3665
3666       if (outfile)
3667         {
3668           static char opts[] = {0, 'o', 0};
3669           opts[0] = checkonly;
3670           incompatible_options (opts);
3671         }
3672
3673       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3674          input is not properly sorted.  */
3675       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3676     }
3677
3678   if (mergeonly)
3679     {
3680       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3681       size_t i;
3682
3683       for (i = 0; i < nfiles; ++i)
3684         sortfiles[i].name = files[i];
3685
3686       merge (sortfiles, 0, nfiles, outfile);
3687       IF_LINT (free (sortfiles));
3688     }
3689   else
3690     sort (files, nfiles, outfile);
3691
3692   if (have_read_stdin && fclose (stdin) == EOF)
3693     die (_("close failed"), "-");
3694
3695   exit (EXIT_SUCCESS);
3696 }