HAMMER VFS - Major retooling of the refcount mechanics, and fix a deadlock
[dragonfly.git] / gnu / usr.bin / sort / sort.c
blobf9d8d5ee93a66c544e2c564430b68531373bbecf
1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
23 * $FreeBSD: src/gnu/usr.bin/sort/sort.c,v 1.15.2.4 2002/04/17 11:41:42 ache Exp $
24 * $DragonFly: src/gnu/usr.bin/sort/sort.c,v 1.3 2004/02/03 19:22:59 dillon Exp $
27 #include <config.h>
29 /* Get isblank from GNU libc. */
30 #define _GNU_SOURCE
32 #include <sys/types.h>
33 #include <signal.h>
34 #include <stdio.h>
35 #ifdef __DragonFly__
36 #include <locale.h>
37 #endif
38 #include "system.h"
39 #include "version.h"
40 #include "long-options.h"
41 #include "error.h"
42 #include "xstrtod.h"
44 #ifdef HAVE_LIMITS_H
45 #include <limits.h>
46 #else
47 #ifndef UCHAR_MAX
48 #define UCHAR_MAX 255
49 #endif
50 #endif
51 #ifndef STDC_HEADERS
52 char *malloc ();
53 char *realloc ();
54 void free ();
55 #endif
57 /* Undefine, to avoid warning about redefinition on some systems. */
58 #undef min
59 #define min(a, b) ((a) < (b) ? (a) : (b))
61 #define UCHAR_LIM (UCHAR_MAX + 1)
62 #define UCHAR(c) ((unsigned char) (c))
64 #ifndef DEFAULT_TMPDIR
65 #define DEFAULT_TMPDIR "/tmp"
66 #endif
68 /* The kind of blanks for '-b' to skip in various options. */
69 enum blanktype { bl_start, bl_end, bl_both };
71 /* Lines are held in core as counted strings. */
72 struct line
74 char *text; /* Text of the line. */
75 int length; /* Length not including final newline. */
76 char *keybeg; /* Start of first key. */
77 char *keylim; /* Limit of first key. */
80 /* Arrays of lines. */
81 struct lines
83 struct line *lines; /* Dynamically allocated array of lines. */
84 int used; /* Number of slots used. */
85 int alloc; /* Number of slots allocated. */
86 int limit; /* Max number of slots to allocate. */
89 /* Input buffers. */
90 struct buffer
92 char *buf; /* Dynamically allocated buffer. */
93 int used; /* Number of bytes used. */
94 int alloc; /* Number of bytes allocated. */
95 int left; /* Number of bytes left after line parsing. */
98 struct keyfield
100 int sword; /* Zero-origin 'word' to start at. */
101 int schar; /* Additional characters to skip. */
102 int skipsblanks; /* Skip leading white space at start. */
103 int eword; /* Zero-origin first word after field. */
104 int echar; /* Additional characters in field. */
105 int skipeblanks; /* Skip trailing white space at finish. */
106 int *ignore; /* Boolean array of characters to ignore. */
107 char *translate; /* Translation applied to characters. */
108 int numeric; /* Flag for numeric comparison. Handle
109 strings of digits with optional decimal
110 point, but no exponential notation. */
111 int general_numeric; /* Flag for general, numeric comparison.
112 Handle numbers in exponential notation. */
113 int month; /* Flag for comparison by month name. */
114 int reverse; /* Reverse the sense of comparison. */
115 struct keyfield *next; /* Next keyfield to try. */
118 struct month
120 char *name;
121 int val;
124 /* The name this program was run with. */
125 char *program_name;
127 /* Table of digits. */
128 static int digits[UCHAR_LIM];
130 /* Table of white space. */
131 static int blanks[UCHAR_LIM];
133 /* Table of non-printing characters. */
134 static int nonprinting[UCHAR_LIM];
136 /* Table of non-dictionary characters (not letters, digits, or blanks). */
137 static int nondictionary[UCHAR_LIM];
139 /* Translation table folding lower case to upper. */
140 static char fold_toupper[UCHAR_LIM];
142 /* Table mapping 3-letter month names to integers.
143 Alphabetic order allows binary search. */
144 static struct month const monthtab[] =
146 {"APR", 4},
147 {"AUG", 8},
148 {"DEC", 12},
149 {"FEB", 2},
150 {"JAN", 1},
151 {"JUL", 7},
152 {"JUN", 6},
153 {"MAR", 3},
154 {"MAY", 5},
155 {"NOV", 11},
156 {"OCT", 10},
157 {"SEP", 9}
160 /* During the merge phase, the number of files to merge at once. */
161 #define NMERGE 16
163 /* Initial buffer size for in core sorting. Will not grow unless a
164 line longer than this is seen. */
165 static int sortalloc = 512 * 1024;
167 /* Initial buffer size for in core merge buffers. Bear in mind that
168 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
169 static int mergealloc = 16 * 1024;
171 /* Guess of average line length. */
172 static int linelength = 30;
174 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
175 #define LINEALLOC (256 * 1024)
177 /* Prefix for temporary file names. */
178 static char *temp_file_prefix;
180 /* Flag to reverse the order of all comparisons. */
181 static int reverse;
183 /* Flag for stable sort. This turns off the last ditch bytewise
184 comparison of lines, and instead leaves lines in the same order
185 they were read if all keys compare equal. */
186 static int stable;
188 /* Tab character separating fields. If NUL, then fields are separated
189 by the empty string between a non-whitespace character and a whitespace
190 character. */
191 static char tab;
193 /* Flag to remove consecutive duplicate lines from the output.
194 Only the last of a sequence of equal lines will be output. */
195 static int unique;
197 /* Nonzero if any of the input files are the standard input. */
198 static int have_read_stdin;
200 /* Lists of key field comparisons to be tried. */
201 static struct keyfield keyhead;
203 #ifdef __DragonFly__
204 static unsigned char decimal_point;
206 static int
207 COLLDIFF (int a, int b)
209 static char s[2][2];
211 if ((unsigned char)a == (unsigned char)b)
212 return 0;
213 s[0][0] = a;
214 s[1][0] = b;
215 return strcoll(s[0], s[1]);
218 static int
219 collcmp(char *a, char *b, int mini)
221 char sa, sb;
222 int r;
224 sa = a[mini];
225 a[mini] = '\0';
226 sb = b[mini];
227 b[mini] = '\0';
228 r = strcoll(a, b);
229 a[mini] = sa;
230 b[mini] = sb;
231 return r;
233 #endif /* __DragonFly__ */
235 static void
236 usage (int status)
238 if (status != 0)
239 fprintf (stderr, _("Try `%s --help' for more information.\n"),
240 program_name);
241 else
243 printf (_("\
244 Usage: %s [OPTION]... [FILE]...\n\
246 program_name);
247 printf (_("\
248 Write sorted concatenation of all FILE(s) to standard output.\n\
250 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
251 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
252 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
253 -b ignore leading blanks in sort fields or keys\n\
254 -c check if given files already sorted, do not sort\n\
255 -d consider only [a-zA-Z0-9 ] characters in keys\n\
256 -f fold lower case to upper case characters in keys\n\
257 -g compare according to general numerical value, imply -b\n\
258 -i consider only [\\040-\\0176] characters in keys\n\
259 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
260 -m merge already sorted files, do not sort\n\
261 -n compare according to string numerical value, imply -b\n\
262 -o FILE write result on FILE instead of standard output\n\
263 -r reverse the result of comparisons\n\
264 -s stabilize sort by disabling last resort comparison\n\
265 -t SEP use SEParator instead of non- to whitespace transition\n\
266 -u with -c, check for strict ordering\n\
267 -u with -m, only output the first of an equal sequence\n\
268 --help display this help and exit\n\
269 --version output version information and exit\n\
271 POS is F[.C][OPTS], where F is the field number and C the character\n\
272 position in the field, both counted from zero. OPTS is made up of one\n\
273 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
274 for that key. If no key given, use the entire line as key. With no\n\
275 FILE, or when FILE is -, read standard input.\n\
277 , DEFAULT_TMPDIR);
279 exit (status);
282 /* The list of temporary files. */
283 static struct tempnode
285 char *name;
286 struct tempnode *next;
287 } temphead;
289 /* Clean up any remaining temporary files. */
291 static void
292 cleanup (void)
294 struct tempnode *node;
296 for (node = temphead.next; node; node = node->next)
297 unlink (node->name);
300 /* Allocate N bytes of memory dynamically, with error checking. */
302 static char *
303 xmalloc (unsigned int n)
305 char *p;
307 p = malloc (n);
308 if (p == 0)
310 error (0, 0, _("virtual memory exhausted"));
311 cleanup ();
312 exit (2);
314 return p;
317 /* Change the size of an allocated block of memory P to N bytes,
318 with error checking.
319 If P is NULL, run xmalloc.
320 If N is 0, run free and return NULL. */
322 static char *
323 xrealloc (char *p, unsigned int n)
325 if (p == 0)
326 return xmalloc (n);
327 if (n == 0)
329 free (p);
330 return 0;
332 p = realloc (p, n);
333 if (p == 0)
335 error (0, 0, _("virtual memory exhausted"));
336 cleanup ();
337 exit (2);
339 return p;
342 static FILE *
343 xtmpfopen (const char *file)
345 FILE *fp;
346 int fd;
348 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
349 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
351 error (0, errno, "%s", file);
352 cleanup ();
353 exit (2);
356 return fp;
359 static FILE *
360 xfopen (const char *file, const char *how)
362 FILE *fp;
364 if (strcmp (file, "-") == 0)
366 fp = stdin;
368 else
370 if ((fp = fopen (file, how)) == NULL)
372 error (0, errno, "%s", file);
373 cleanup ();
374 exit (2);
378 if (fp == stdin)
379 have_read_stdin = 1;
380 return fp;
383 static void
384 xfclose (FILE *fp)
386 if (fp == stdin)
388 /* Allow reading stdin from tty more than once. */
389 if (feof (fp))
390 clearerr (fp);
392 else if (fp == stdout)
394 if (fflush (fp) != 0)
396 error (0, errno, _("flushing file"));
397 cleanup ();
398 exit (2);
401 else
403 if (fclose (fp) != 0)
405 error (0, errno, _("error closing file"));
406 cleanup ();
407 exit (2);
412 static void
413 xfwrite (const char *buf, int size, int nelem, FILE *fp)
415 if (fwrite (buf, size, nelem, fp) != nelem)
417 error (0, errno, _("write error"));
418 cleanup ();
419 exit (2);
423 /* Return a name for a temporary file. */
425 static char *
426 tempname (void)
428 int fd;
429 int len = strlen (temp_file_prefix);
430 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
431 struct tempnode *node;
433 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
434 sprintf (name,
435 "%s%ssortXXXXXX",
436 temp_file_prefix,
437 (len && temp_file_prefix[len - 1] != '/') ? "/" : "");
439 if ((fd = mkstemp(name)) == -1)
441 error (0, errno, _("mkstemp error"));
442 cleanup ();
443 exit (2);
445 close(fd);
447 node->name = name;
448 node->next = temphead.next;
449 temphead.next = node;
450 return name;
453 /* Search through the list of temporary files for NAME;
454 remove it if it is found on the list. */
456 static void
457 zaptemp (char *name)
459 struct tempnode *node, *temp;
461 for (node = &temphead; node->next; node = node->next)
462 if (!strcmp (name, node->next->name))
463 break;
464 if (node->next)
466 temp = node->next;
467 unlink (temp->name);
468 free (temp->name);
469 node->next = temp->next;
470 free ((char *) temp);
474 /* Initialize the character class tables. */
476 static void
477 inittables (void)
479 int i;
481 for (i = 0; i < UCHAR_LIM; ++i)
483 if (ISBLANK (i))
484 blanks[i] = 1;
485 if (ISDIGIT (i))
486 digits[i] = 1;
487 if (!ISPRINT (i))
488 nonprinting[i] = 1;
489 if (!ISALNUM (i) && !ISBLANK (i))
490 nondictionary[i] = 1;
491 if (ISLOWER (i))
492 fold_toupper[i] = toupper (i);
493 else
494 fold_toupper[i] = i;
498 /* Initialize BUF, allocating ALLOC bytes initially. */
500 static void
501 initbuf (struct buffer *buf, int alloc)
503 buf->alloc = alloc;
504 buf->buf = xmalloc (buf->alloc);
505 buf->used = buf->left = 0;
508 /* Fill BUF reading from FP, moving buf->left bytes from the end
509 of buf->buf to the beginning first. If EOF is reached and the
510 file wasn't terminated by a newline, supply one. Return a count
511 of bytes buffered. */
513 static int
514 fillbuf (struct buffer *buf, FILE *fp)
516 int cc;
518 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
519 buf->used = buf->left;
521 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
523 if (buf->used == buf->alloc)
525 buf->alloc *= 2;
526 buf->buf = xrealloc (buf->buf, buf->alloc);
528 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
529 if (ferror (fp))
531 error (0, errno, _("read error"));
532 cleanup ();
533 exit (2);
535 buf->used += cc;
538 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
540 if (buf->used == buf->alloc)
542 buf->alloc *= 2;
543 buf->buf = xrealloc (buf->buf, buf->alloc);
545 buf->buf[buf->used++] = '\n';
548 return buf->used;
551 /* Initialize LINES, allocating space for ALLOC lines initially.
552 LIMIT is the maximum possible number of lines to allocate space
553 for, ever. */
555 static void
556 initlines (struct lines *lines, int alloc, int limit)
558 lines->alloc = alloc;
559 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
560 lines->used = 0;
561 lines->limit = limit;
564 /* Return a pointer to the first character of the field specified
565 by KEY in LINE. */
567 static char *
568 begfield (const struct line *line, const struct keyfield *key)
570 register char *ptr = line->text, *lim = ptr + line->length;
571 register int sword = key->sword, schar = key->schar;
573 if (tab)
574 while (ptr < lim && sword--)
576 while (ptr < lim && *ptr != tab)
577 ++ptr;
578 if (ptr < lim)
579 ++ptr;
581 else
582 while (ptr < lim && sword--)
584 while (ptr < lim && blanks[UCHAR (*ptr)])
585 ++ptr;
586 while (ptr < lim && !blanks[UCHAR (*ptr)])
587 ++ptr;
590 if (key->skipsblanks)
591 while (ptr < lim && blanks[UCHAR (*ptr)])
592 ++ptr;
594 if (ptr + schar <= lim)
595 ptr += schar;
596 else
597 ptr = lim;
599 return ptr;
602 /* Return the limit of (a pointer to the first character after) the field
603 in LINE specified by KEY. */
605 static char *
606 limfield (const struct line *line, const struct keyfield *key)
608 register char *ptr = line->text, *lim = ptr + line->length;
609 register int eword = key->eword, echar = key->echar;
611 /* Note: from the POSIX spec:
612 The leading field separator itself is included in
613 a field when -t is not used. FIXME: move this comment up... */
615 /* Move PTR past EWORD fields or to one past the last byte on LINE,
616 whichever comes first. If there are more than EWORD fields, leave
617 PTR pointing at the beginning of the field having zero-based index,
618 EWORD. If a delimiter character was specified (via -t), then that
619 `beginning' is the first character following the delimiting TAB.
620 Otherwise, leave PTR pointing at the first `blank' character after
621 the preceding field. */
622 if (tab)
623 while (ptr < lim && eword--)
625 while (ptr < lim && *ptr != tab)
626 ++ptr;
627 if (ptr < lim && (eword || echar > 0))
628 ++ptr;
630 else
631 while (ptr < lim && eword--)
633 while (ptr < lim && blanks[UCHAR (*ptr)])
634 ++ptr;
635 while (ptr < lim && !blanks[UCHAR (*ptr)])
636 ++ptr;
639 /* Make LIM point to the end of (one byte past) the current field. */
640 if (tab)
642 char *newlim;
643 newlim = memchr (ptr, tab, lim - ptr);
644 if (newlim)
645 lim = newlim;
647 else
649 char *newlim;
650 newlim = ptr;
651 while (newlim < lim && blanks[UCHAR (*newlim)])
652 ++newlim;
653 while (newlim < lim && !blanks[UCHAR (*newlim)])
654 ++newlim;
655 lim = newlim;
658 /* If we're skipping leading blanks, don't start counting characters
659 until after skipping past any leading blanks. */
660 if (key->skipsblanks)
661 while (ptr < lim && blanks[UCHAR (*ptr)])
662 ++ptr;
664 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
665 if (ptr + echar <= lim)
666 ptr += echar;
667 else
668 ptr = lim;
670 return ptr;
673 /* FIXME */
675 void
676 trim_trailing_blanks (const char *a_start, char **a_end)
678 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
679 --(*a_end);
682 /* Find the lines in BUF, storing pointers and lengths in LINES.
683 Also replace newlines in BUF with NULs. */
685 static void
686 findlines (struct buffer *buf, struct lines *lines)
688 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
689 struct keyfield *key = keyhead.next;
691 lines->used = 0;
693 while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
694 && lines->used < lines->limit)
696 /* There are various places in the code that rely on a NUL
697 being at the end of in-core lines; NULs inside the lines
698 will not cause trouble, though. */
699 *ptr = '\0';
701 if (lines->used == lines->alloc)
703 lines->alloc *= 2;
704 lines->lines = (struct line *)
705 xrealloc ((char *) lines->lines,
706 lines->alloc * sizeof (struct line));
709 lines->lines[lines->used].text = beg;
710 lines->lines[lines->used].length = ptr - beg;
712 /* Precompute the position of the first key for efficiency. */
713 if (key)
715 if (key->eword >= 0)
716 lines->lines[lines->used].keylim =
717 limfield (&lines->lines[lines->used], key);
718 else
719 lines->lines[lines->used].keylim = ptr;
721 if (key->sword >= 0)
722 lines->lines[lines->used].keybeg =
723 begfield (&lines->lines[lines->used], key);
724 else
726 if (key->skipsblanks)
727 while (blanks[UCHAR (*beg)])
728 ++beg;
729 lines->lines[lines->used].keybeg = beg;
731 if (key->skipeblanks)
733 trim_trailing_blanks (lines->lines[lines->used].keybeg,
734 &lines->lines[lines->used].keylim);
737 else
739 lines->lines[lines->used].keybeg = 0;
740 lines->lines[lines->used].keylim = 0;
743 ++lines->used;
744 beg = ptr + 1;
747 buf->left = lim - beg;
750 /* Compare strings A and B containing decimal fractions < 1. Each string
751 should begin with a decimal point followed immediately by the digits
752 of the fraction. Strings not of this form are considered to be zero. */
754 static int
755 fraccompare (register const char *a, register const char *b)
757 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
759 #ifdef __DragonFly__
760 if (tmpa == decimal_point && tmpb == decimal_point)
761 #else
762 if (tmpa == '.' && tmpb == '.')
763 #endif
766 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
767 while (tmpa == tmpb && digits[tmpa]);
768 if (digits[tmpa] && digits[tmpb])
769 return tmpa - tmpb;
770 if (digits[tmpa])
772 while (tmpa == '0')
773 tmpa = UCHAR (*++a);
774 if (digits[tmpa])
775 return 1;
776 return 0;
778 if (digits[tmpb])
780 while (tmpb == '0')
781 tmpb = UCHAR (*++b);
782 if (digits[tmpb])
783 return -1;
784 return 0;
786 return 0;
788 #ifdef __DragonFly__
789 else if (tmpa == decimal_point)
790 #else
791 else if (tmpa == '.')
792 #endif
795 tmpa = UCHAR (*++a);
796 while (tmpa == '0');
797 if (digits[tmpa])
798 return 1;
799 return 0;
801 #ifdef __DragonFly__
802 else if (tmpb == decimal_point)
803 #else
804 else if (tmpb == '.')
805 #endif
808 tmpb = UCHAR (*++b);
809 while (tmpb == '0');
810 if (digits[tmpb])
811 return -1;
812 return 0;
814 return 0;
817 /* Compare strings A and B as numbers without explicitly converting them to
818 machine numbers. Comparatively slow for short strings, but asymptotically
819 hideously fast. */
821 static int
822 numcompare (register const char *a, register const char *b)
824 register int tmpa, tmpb, loga, logb, tmp;
826 tmpa = UCHAR (*a);
827 tmpb = UCHAR (*b);
829 while (blanks[tmpa])
830 tmpa = UCHAR (*++a);
831 while (blanks[tmpb])
832 tmpb = UCHAR (*++b);
834 if (tmpa == '-')
837 tmpa = UCHAR (*++a);
838 while (tmpa == '0');
839 if (tmpb != '-')
841 #ifdef __DragonFly__
842 if (tmpa == decimal_point)
843 #else
844 if (tmpa == '.')
845 #endif
847 tmpa = UCHAR (*++a);
848 while (tmpa == '0');
849 if (digits[tmpa])
850 return -1;
851 while (tmpb == '0')
852 tmpb = UCHAR (*++b);
853 #ifdef __DragonFly__
854 if (tmpb == decimal_point)
855 #else
856 if (tmpb == '.')
857 #endif
859 tmpb = UCHAR (*++b);
860 while (tmpb == '0');
861 if (digits[tmpb])
862 return -1;
863 return 0;
866 tmpb = UCHAR (*++b);
867 while (tmpb == '0');
869 while (tmpa == tmpb && digits[tmpa])
870 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
872 #ifdef __DragonFly__
873 if ((tmpa == decimal_point && !digits[tmpb]) ||
874 (tmpb == decimal_point && !digits[tmpa]))
875 #else
876 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
877 #endif
878 return -fraccompare (a, b);
880 if (digits[tmpa])
881 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
883 else
884 loga = 0;
886 if (digits[tmpb])
887 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
889 else
890 logb = 0;
892 if ((tmp = logb - loga) != 0)
893 return tmp;
895 if (!loga)
896 return 0;
898 #ifdef __DragonFly__
899 return COLLDIFF (tmpb, tmpa);
900 #else
901 return tmpb - tmpa;
902 #endif
904 else if (tmpb == '-')
907 tmpb = UCHAR (*++b);
908 while (tmpb == '0');
909 #ifdef __DragonFly__
910 if (tmpb == decimal_point)
911 #else
912 if (tmpb == '.')
913 #endif
915 tmpb = UCHAR (*++b);
916 while (tmpb == '0');
917 if (digits[tmpb])
918 return 1;
919 while (tmpa == '0')
920 tmpa = UCHAR (*++a);
921 #ifdef __DragonFly__
922 if (tmpa == decimal_point)
923 #else
924 if (tmpa == '.')
925 #endif
927 tmpa = UCHAR (*++a);
928 while (tmpa == '0');
929 if (digits[tmpa])
930 return 1;
931 return 0;
933 else
935 while (tmpa == '0')
936 tmpa = UCHAR (*++a);
937 while (tmpb == '0')
938 tmpb = UCHAR (*++b);
940 while (tmpa == tmpb && digits[tmpa])
941 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
943 #ifdef __DragonFly__
944 if ((tmpa == decimal_point && !digits[tmpb]) ||
945 (tmpb == decimal_point && !digits[tmpa]))
946 #else
947 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
948 #endif
949 return fraccompare (a, b);
951 if (digits[tmpa])
952 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
954 else
955 loga = 0;
957 if (digits[tmpb])
958 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
960 else
961 logb = 0;
963 if ((tmp = loga - logb) != 0)
964 return tmp;
966 if (!loga)
967 return 0;
969 #ifdef __DragonFly__
970 return COLLDIFF (tmpa, tmpb);
971 #else
972 return tmpa - tmpb;
973 #endif
977 static int
978 general_numcompare (const char *sa, const char *sb)
980 double a, b;
981 /* FIXME: add option to warn about failed conversions. */
982 /* FIXME: maybe add option to try expensive FP conversion
983 only if A and B can't be compared more cheaply/accurately. */
984 if (xstrtod (sa, NULL, &a))
986 a = 0;
988 if (xstrtod (sb, NULL, &b))
990 b = 0;
992 return a == b ? 0 : a < b ? -1 : 1;
995 /* Return an integer <= 12 associated with month name S with length LEN,
996 0 if the name in S is not recognized. */
998 static int
999 getmonth (const char *s, int len)
1001 char month[4];
1002 register int i, lo = 0, hi = 12;
1004 while (len > 0 && blanks[UCHAR(*s)])
1005 ++s, --len;
1007 if (len < 3)
1008 return 0;
1010 for (i = 0; i < 3; ++i)
1011 month[i] = fold_toupper[UCHAR (s[i])];
1012 month[3] = '\0';
1014 while (hi - lo > 1)
1015 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
1016 hi = (lo + hi) / 2;
1017 else
1018 lo = (lo + hi) / 2;
1019 if (!strcmp (month, monthtab[lo].name))
1020 return monthtab[lo].val;
1021 return 0;
1024 /* Compare two lines A and B trying every key in sequence until there
1025 are no more keys or a difference is found. */
1027 static int
1028 keycompare (const struct line *a, const struct line *b)
1030 register char *texta, *textb, *lima, *limb, *translate;
1031 register int *ignore;
1032 struct keyfield *key;
1033 int diff = 0, iter = 0, lena, lenb;
1035 for (key = keyhead.next; key; key = key->next, ++iter)
1037 ignore = key->ignore;
1038 translate = key->translate;
1040 /* Find the beginning and limit of each field. */
1041 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1043 if (key->eword >= 0)
1044 lima = limfield (a, key), limb = limfield (b, key);
1045 else
1046 lima = a->text + a->length, limb = b->text + b->length;
1048 if (key->sword >= 0)
1049 texta = begfield (a, key), textb = begfield (b, key);
1050 else
1052 texta = a->text, textb = b->text;
1053 if (key->skipsblanks)
1055 while (texta < lima && blanks[UCHAR (*texta)])
1056 ++texta;
1057 while (textb < limb && blanks[UCHAR (*textb)])
1058 ++textb;
1062 else
1064 /* For the first iteration only, the key positions have
1065 been precomputed for us. */
1066 texta = a->keybeg, lima = a->keylim;
1067 textb = b->keybeg, limb = b->keylim;
1070 /* Find the lengths. */
1071 lena = lima - texta, lenb = limb - textb;
1072 if (lena < 0)
1073 lena = 0;
1074 if (lenb < 0)
1075 lenb = 0;
1077 if (key->skipeblanks)
1079 char *a_end = texta + lena;
1080 char *b_end = textb + lenb;
1081 trim_trailing_blanks (texta, &a_end);
1082 trim_trailing_blanks (textb, &b_end);
1083 lena = a_end - texta;
1084 lenb = b_end - textb;
1087 /* Actually compare the fields. */
1088 if (key->numeric)
1090 if (*lima || *limb)
1092 char savea = *lima, saveb = *limb;
1094 *lima = *limb = '\0';
1095 diff = numcompare (texta, textb);
1096 *lima = savea, *limb = saveb;
1098 else
1099 diff = numcompare (texta, textb);
1101 if (diff)
1102 return key->reverse ? -diff : diff;
1103 continue;
1105 else if (key->general_numeric)
1107 if (*lima || *limb)
1109 char savea = *lima, saveb = *limb;
1111 *lima = *limb = '\0';
1112 diff = general_numcompare (texta, textb);
1113 *lima = savea, *limb = saveb;
1115 else
1116 diff = general_numcompare (texta, textb);
1118 if (diff)
1119 return key->reverse ? -diff : diff;
1120 continue;
1122 else if (key->month)
1124 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1125 if (diff)
1126 return key->reverse ? -diff : diff;
1127 continue;
1129 else if (ignore && translate)
1131 #ifdef __DragonFly__
1132 #define CMP_FUNC(A, B) COLLDIFF ((A), (B))
1133 #else
1134 #define CMP_FUNC(A, B) (A) - (B)
1135 #endif
1136 #define CMP_WITH_IGNORE(A, B) \
1137 do \
1139 while (texta < lima && textb < limb) \
1141 while (texta < lima && ignore[UCHAR (*texta)]) \
1142 ++texta; \
1143 while (textb < limb && ignore[UCHAR (*textb)]) \
1144 ++textb; \
1145 if (texta < lima && textb < limb) \
1147 if ((A) != (B)) \
1149 diff = CMP_FUNC((A), (B)); \
1150 break; \
1152 ++texta; \
1153 ++textb; \
1156 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1157 diff = -1; \
1158 else if (texta < lima && textb == limb \
1159 && !ignore[UCHAR (*texta)]) \
1160 diff = 1; \
1163 if (diff == 0) \
1165 while (texta < lima && ignore[UCHAR (*texta)]) \
1166 ++texta; \
1167 while (textb < limb && ignore[UCHAR (*textb)]) \
1168 ++textb; \
1170 if (texta == lima && textb < limb) \
1171 diff = -1; \
1172 else if (texta < lima && textb == limb) \
1173 diff = 1; \
1175 /* Relative lengths are meaningless if characters were ignored. \
1176 Handling this case here avoids what might be an invalid length \
1177 comparison below. */ \
1178 if (diff == 0 && texta == lima && textb == limb) \
1179 return 0; \
1181 while (0)
1183 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1184 else if (ignore)
1185 CMP_WITH_IGNORE (*texta, *textb);
1186 else if (translate)
1187 while (texta < lima && textb < limb)
1189 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1191 #ifdef __DragonFly__
1192 diff = COLLDIFF (translate[UCHAR (*--texta)],
1193 translate[UCHAR (*--textb)]);
1194 #else
1195 diff = (translate[UCHAR (*--texta)]
1196 - translate[UCHAR (*--textb)]);
1197 #endif
1198 break;
1201 else
1202 #ifdef __DragonFly__
1203 diff = collcmp (texta, textb, min (lena, lenb));
1204 #else
1205 diff = memcmp (texta, textb, min (lena, lenb));
1206 #endif
1208 if (diff)
1209 return key->reverse ? -diff : diff;
1210 if ((diff = lena - lenb) != 0)
1211 return key->reverse ? -diff : diff;
1214 return 0;
1217 /* Compare two lines A and B, returning negative, zero, or positive
1218 depending on whether A compares less than, equal to, or greater than B. */
1220 static int
1221 compare (register const struct line *a, register const struct line *b)
1223 int diff, tmpa, tmpb, mini;
1225 /* First try to compare on the specified keys (if any).
1226 The only two cases with no key at all are unadorned sort,
1227 and unadorned sort -r. */
1228 if (keyhead.next)
1230 diff = keycompare (a, b);
1231 if (diff != 0)
1232 return diff;
1233 if (unique || stable)
1234 return 0;
1237 /* If the keys all compare equal (or no keys were specified)
1238 fall through to the default byte-by-byte comparison. */
1239 tmpa = a->length, tmpb = b->length;
1240 mini = min (tmpa, tmpb);
1241 if (mini == 0)
1242 diff = tmpa - tmpb;
1243 else
1245 char *ap = a->text, *bp = b->text;
1247 #ifndef __DragonFly__
1248 diff = UCHAR (*ap) - UCHAR (*bp);
1249 if (diff == 0)
1251 #endif
1252 #ifdef __DragonFly__
1253 diff = collcmp (ap, bp, mini);
1254 #else
1255 diff = memcmp (ap, bp, mini);
1256 #endif
1257 if (diff == 0)
1258 diff = tmpa - tmpb;
1259 #ifndef __DragonFly__
1261 #endif
1264 return reverse ? -diff : diff;
1267 /* Check that the lines read from the given FP come in order. Return
1268 1 if they do and 0 if there is a disorder.
1269 FIXME: return number of first out-of-order line if not sorted. */
1271 static int
1272 checkfp (FILE *fp)
1274 struct buffer buf; /* Input buffer. */
1275 struct lines lines; /* Lines scanned from the buffer. */
1276 struct line temp; /* Copy of previous line. */
1277 int cc; /* Character count. */
1278 int alloc, sorted = 1;
1280 initbuf (&buf, mergealloc);
1281 initlines (&lines, mergealloc / linelength + 1,
1282 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1283 alloc = linelength;
1284 temp.text = xmalloc (alloc);
1286 cc = fillbuf (&buf, fp);
1287 if (cc == 0)
1288 goto finish;
1290 findlines (&buf, &lines);
1292 while (1)
1294 struct line *prev_line; /* Pointer to previous line. */
1295 int cmp; /* Result of calling compare. */
1296 int i;
1298 /* Compare each line in the buffer with its successor. */
1299 for (i = 0; i < lines.used - 1; ++i)
1301 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1302 if ((unique && cmp >= 0) || (cmp > 0))
1304 sorted = 0;
1305 goto finish;
1309 /* Save the last line of the buffer and refill the buffer. */
1310 prev_line = lines.lines + (lines.used - 1);
1311 if (prev_line->length > alloc)
1313 while (prev_line->length + 1 > alloc)
1314 alloc *= 2;
1315 temp.text = xrealloc (temp.text, alloc);
1317 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1318 temp.length = prev_line->length;
1319 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1320 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1322 cc = fillbuf (&buf, fp);
1323 if (cc == 0)
1324 break;
1326 findlines (&buf, &lines);
1327 /* Make sure the line saved from the old buffer contents is
1328 less than or equal to the first line of the new buffer. */
1329 cmp = compare (&temp, &lines.lines[0]);
1330 if ((unique && cmp >= 0) || (cmp > 0))
1332 sorted = 0;
1333 break;
1337 finish:
1338 xfclose (fp);
1339 free (buf.buf);
1340 free ((char *) lines.lines);
1341 free (temp.text);
1342 return sorted;
1345 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1346 Close FPS before returning. */
1348 static void
1349 mergefps (FILE **fps, register int nfps, FILE *ofp)
1351 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1352 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1353 struct line saved; /* Saved line for unique check. */
1354 int savedflag = 0; /* True if there is a saved line. */
1355 int savealloc; /* Size allocated for the saved line. */
1356 int cur[NMERGE]; /* Current line in each line table. */
1357 int ord[NMERGE]; /* Table representing a permutation of fps,
1358 such that lines[ord[0]].lines[cur[ord[0]]]
1359 is the smallest line and will be next
1360 output. */
1361 register int i, j, t;
1363 #ifdef lint /* Suppress `used before initialized' warning. */
1364 savealloc = 0;
1365 #endif
1367 /* Allocate space for a saved line if necessary. */
1368 if (unique)
1370 savealloc = linelength;
1371 saved.text = xmalloc (savealloc);
1374 /* Read initial lines from each input file. */
1375 for (i = 0; i < nfps; ++i)
1377 initbuf (&buffer[i], mergealloc);
1378 /* If a file is empty, eliminate it from future consideration. */
1379 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1381 xfclose (fps[i]);
1382 --nfps;
1383 for (j = i; j < nfps; ++j)
1384 fps[j] = fps[j + 1];
1386 if (i == nfps)
1387 free (buffer[i].buf);
1388 else
1390 initlines (&lines[i], mergealloc / linelength + 1,
1391 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1392 findlines (&buffer[i], &lines[i]);
1393 cur[i] = 0;
1397 /* Set up the ord table according to comparisons among input lines.
1398 Since this only reorders two items if one is strictly greater than
1399 the other, it is stable. */
1400 for (i = 0; i < nfps; ++i)
1401 ord[i] = i;
1402 for (i = 1; i < nfps; ++i)
1403 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1404 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1405 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1407 /* Repeatedly output the smallest line until no input remains. */
1408 while (nfps)
1410 /* If uniqified output is turned on, output only the first of
1411 an identical series of lines. */
1412 if (unique)
1414 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1416 xfwrite (saved.text, 1, saved.length, ofp);
1417 putc ('\n', ofp);
1418 savedflag = 0;
1420 if (!savedflag)
1422 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1424 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1425 savealloc *= 2;
1426 saved.text = xrealloc (saved.text, savealloc);
1428 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1429 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1430 saved.length + 1);
1431 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1433 saved.keybeg = saved.text +
1434 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1435 - lines[ord[0]].lines[cur[ord[0]]].text);
1437 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1439 saved.keylim = saved.text +
1440 (lines[ord[0]].lines[cur[ord[0]]].keylim
1441 - lines[ord[0]].lines[cur[ord[0]]].text);
1443 savedflag = 1;
1446 else
1448 xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
1449 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1450 putc ('\n', ofp);
1453 /* Check if we need to read more lines into core. */
1454 if (++cur[ord[0]] == lines[ord[0]].used)
1455 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1457 findlines (&buffer[ord[0]], &lines[ord[0]]);
1458 cur[ord[0]] = 0;
1460 else
1462 /* We reached EOF on fps[ord[0]]. */
1463 for (i = 1; i < nfps; ++i)
1464 if (ord[i] > ord[0])
1465 --ord[i];
1466 --nfps;
1467 xfclose (fps[ord[0]]);
1468 free (buffer[ord[0]].buf);
1469 free ((char *) lines[ord[0]].lines);
1470 for (i = ord[0]; i < nfps; ++i)
1472 fps[i] = fps[i + 1];
1473 buffer[i] = buffer[i + 1];
1474 lines[i] = lines[i + 1];
1475 cur[i] = cur[i + 1];
1477 for (i = 0; i < nfps; ++i)
1478 ord[i] = ord[i + 1];
1479 continue;
1482 /* The new line just read in may be larger than other lines
1483 already in core; push it back in the queue until we encounter
1484 a line larger than it. */
1485 for (i = 1; i < nfps; ++i)
1487 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1488 &lines[ord[i]].lines[cur[ord[i]]]);
1489 if (!t)
1490 t = ord[0] - ord[i];
1491 if (t < 0)
1492 break;
1494 t = ord[0];
1495 for (j = 1; j < i; ++j)
1496 ord[j - 1] = ord[j];
1497 ord[i - 1] = t;
1500 if (unique && savedflag)
1502 xfwrite (saved.text, 1, saved.length, ofp);
1503 putc ('\n', ofp);
1504 free (saved.text);
1508 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1510 static void
1511 sortlines (struct line *lines, int nlines, struct line *temp)
1513 register struct line *lo, *hi, *t;
1514 register int nlo, nhi;
1516 if (nlines == 2)
1518 if (compare (&lines[0], &lines[1]) > 0)
1519 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1520 return;
1523 nlo = nlines / 2;
1524 lo = lines;
1525 nhi = nlines - nlo;
1526 hi = lines + nlo;
1528 if (nlo > 1)
1529 sortlines (lo, nlo, temp);
1531 if (nhi > 1)
1532 sortlines (hi, nhi, temp);
1534 t = temp;
1536 while (nlo && nhi)
1537 if (compare (lo, hi) <= 0)
1538 *t++ = *lo++, --nlo;
1539 else
1540 *t++ = *hi++, --nhi;
1541 while (nlo--)
1542 *t++ = *lo++;
1544 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1545 *lo++ = *t++;
1548 /* Check that each of the NFILES FILES is ordered.
1549 Return a count of disordered files. */
1551 static int
1552 check (char **files, int nfiles)
1554 int i, disorders = 0;
1555 FILE *fp;
1557 for (i = 0; i < nfiles; ++i)
1559 fp = xfopen (files[i], "r");
1560 if (!checkfp (fp))
1562 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1563 ++disorders;
1566 return disorders;
1569 /* Merge NFILES FILES onto OFP. */
1571 static void
1572 merge (char **files, int nfiles, FILE *ofp)
1574 int i, j, t;
1575 char *temp;
1576 FILE *fps[NMERGE], *tfp;
1578 while (nfiles > NMERGE)
1580 t = 0;
1581 for (i = 0; i < nfiles / NMERGE; ++i)
1583 for (j = 0; j < NMERGE; ++j)
1584 fps[j] = xfopen (files[i * NMERGE + j], "r");
1585 tfp = xtmpfopen (temp = tempname ());
1586 mergefps (fps, NMERGE, tfp);
1587 xfclose (tfp);
1588 for (j = 0; j < NMERGE; ++j)
1589 zaptemp (files[i * NMERGE + j]);
1590 files[t++] = temp;
1592 for (j = 0; j < nfiles % NMERGE; ++j)
1593 fps[j] = xfopen (files[i * NMERGE + j], "r");
1594 tfp = xtmpfopen (temp = tempname ());
1595 mergefps (fps, nfiles % NMERGE, tfp);
1596 xfclose (tfp);
1597 for (j = 0; j < nfiles % NMERGE; ++j)
1598 zaptemp (files[i * NMERGE + j]);
1599 files[t++] = temp;
1600 nfiles = t;
1603 for (i = 0; i < nfiles; ++i)
1604 fps[i] = xfopen (files[i], "r");
1605 mergefps (fps, i, ofp);
1606 for (i = 0; i < nfiles; ++i)
1607 zaptemp (files[i]);
1610 /* Sort NFILES FILES onto OFP. */
1612 static void
1613 sort (char **files, int nfiles, FILE *ofp)
1615 struct buffer buf;
1616 struct lines lines;
1617 struct line *tmp;
1618 int i, ntmp;
1619 FILE *fp, *tfp;
1620 struct tempnode *node;
1621 int n_temp_files = 0;
1622 char **tempfiles;
1624 initbuf (&buf, sortalloc);
1625 initlines (&lines, sortalloc / linelength + 1,
1626 LINEALLOC / sizeof (struct line));
1627 ntmp = lines.alloc;
1628 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1630 while (nfiles--)
1632 fp = xfopen (*files++, "r");
1633 while (fillbuf (&buf, fp))
1635 findlines (&buf, &lines);
1636 if (lines.used > ntmp)
1638 while (lines.used > ntmp)
1639 ntmp *= 2;
1640 tmp = (struct line *)
1641 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1643 sortlines (lines.lines, lines.used, tmp);
1644 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1645 tfp = ofp;
1646 else
1648 ++n_temp_files;
1649 tfp = xtmpfopen (tempname ());
1651 for (i = 0; i < lines.used; ++i)
1652 if (!unique || i == 0
1653 || compare (&lines.lines[i], &lines.lines[i - 1]))
1655 xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
1656 putc ('\n', tfp);
1658 if (tfp != ofp)
1659 xfclose (tfp);
1661 xfclose (fp);
1664 free (buf.buf);
1665 free ((char *) lines.lines);
1666 free ((char *) tmp);
1668 if (n_temp_files)
1670 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1671 i = n_temp_files;
1672 for (node = temphead.next; i > 0; node = node->next)
1673 tempfiles[--i] = node->name;
1674 merge (tempfiles, n_temp_files, ofp);
1675 free ((char *) tempfiles);
1679 /* Insert key KEY at the end of the list (`keyhead'). */
1681 static void
1682 insertkey (struct keyfield *key)
1684 struct keyfield *k = &keyhead;
1686 while (k->next)
1687 k = k->next;
1688 k->next = key;
1689 key->next = NULL;
1692 static void
1693 badfieldspec (const char *s)
1695 error (2, 0, _("invalid field specification `%s'"), s);
1698 /* Handle interrupts and hangups. */
1700 static void
1701 sighandler (int sig)
1703 #ifdef SA_INTERRUPT
1704 struct sigaction sigact;
1706 sigact.sa_handler = SIG_DFL;
1707 sigemptyset (&sigact.sa_mask);
1708 sigact.sa_flags = 0;
1709 sigaction (sig, &sigact, NULL);
1710 #else /* !SA_INTERRUPT */
1711 signal (sig, SIG_DFL);
1712 #endif /* SA_INTERRUPT */
1713 cleanup ();
1714 kill (getpid (), sig);
1717 /* Set the ordering options for KEY specified in S.
1718 Return the address of the first character in S that
1719 is not a valid ordering option.
1720 BLANKTYPE is the kind of blanks that 'b' should skip. */
1722 static char *
1723 set_ordering (register const char *s, struct keyfield *key,
1724 enum blanktype blanktype)
1726 while (*s)
1728 switch (*s)
1730 case 'b':
1731 if (blanktype == bl_start || blanktype == bl_both)
1732 key->skipsblanks = 1;
1733 if (blanktype == bl_end || blanktype == bl_both)
1734 key->skipeblanks = 1;
1735 break;
1736 case 'd':
1737 key->ignore = nondictionary;
1738 break;
1739 case 'f':
1740 key->translate = fold_toupper;
1741 break;
1742 case 'g':
1743 key->general_numeric = 1;
1744 break;
1745 case 'i':
1746 key->ignore = nonprinting;
1747 break;
1748 case 'M':
1749 key->month = 1;
1750 break;
1751 case 'n':
1752 key->numeric = 1;
1753 if (blanktype == bl_start || blanktype == bl_both)
1754 key->skipsblanks = 1;
1755 if (blanktype == bl_end || blanktype == bl_both)
1756 key->skipeblanks = 1;
1757 break;
1758 case 'r':
1759 key->reverse = 1;
1760 break;
1761 default:
1762 return (char *) s;
1764 ++s;
1766 return (char *) s;
1770 main (int argc, char **argv)
1772 struct keyfield *key = NULL, gkey;
1773 char *s;
1774 int i, t, t2;
1775 int checkonly = 0, mergeonly = 0, nfiles = 0;
1776 char *minus = "-", *outfile = minus, **files, *tmp;
1777 FILE *ofp;
1778 #ifdef SA_INTERRUPT
1779 struct sigaction oldact, newact;
1780 #endif /* SA_INTERRUPT */
1782 #ifdef __DragonFly__
1783 (void) setlocale(LC_ALL, "");
1784 decimal_point = localeconv()->decimal_point[0];
1785 #endif
1786 program_name = argv[0];
1788 parse_long_options (argc, argv, "sort", version_string, usage);
1790 have_read_stdin = 0;
1791 inittables ();
1793 temp_file_prefix = getenv ("TMPDIR");
1794 if (temp_file_prefix == NULL)
1795 temp_file_prefix = DEFAULT_TMPDIR;
1797 #ifdef SA_INTERRUPT
1798 newact.sa_handler = sighandler;
1799 sigemptyset (&newact.sa_mask);
1800 newact.sa_flags = 0;
1802 sigaction (SIGINT, NULL, &oldact);
1803 if (oldact.sa_handler != SIG_IGN)
1804 sigaction (SIGINT, &newact, NULL);
1805 sigaction (SIGHUP, NULL, &oldact);
1806 if (oldact.sa_handler != SIG_IGN)
1807 sigaction (SIGHUP, &newact, NULL);
1808 sigaction (SIGPIPE, NULL, &oldact);
1809 if (oldact.sa_handler != SIG_IGN)
1810 sigaction (SIGPIPE, &newact, NULL);
1811 sigaction (SIGTERM, NULL, &oldact);
1812 if (oldact.sa_handler != SIG_IGN)
1813 sigaction (SIGTERM, &newact, NULL);
1814 #else /* !SA_INTERRUPT */
1815 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1816 signal (SIGINT, sighandler);
1817 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1818 signal (SIGHUP, sighandler);
1819 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1820 signal (SIGPIPE, sighandler);
1821 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1822 signal (SIGTERM, sighandler);
1823 #endif /* !SA_INTERRUPT */
1825 gkey.sword = gkey.eword = -1;
1826 gkey.ignore = NULL;
1827 gkey.translate = NULL;
1828 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1829 gkey.skipsblanks = gkey.skipeblanks = 0;
1831 files = (char **) xmalloc (sizeof (char *) * argc);
1833 for (i = 1; i < argc; ++i)
1835 if (argv[i][0] == '+')
1837 if (key)
1838 insertkey (key);
1839 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1840 key->eword = -1;
1841 key->ignore = NULL;
1842 key->translate = NULL;
1843 key->skipsblanks = key->skipeblanks = 0;
1844 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1845 s = argv[i] + 1;
1846 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1847 badfieldspec (argv[i]);
1848 for (t = 0; digits[UCHAR (*s)]; ++s)
1849 t = 10 * t + *s - '0';
1850 t2 = 0;
1851 if (*s == '.')
1852 for (++s; digits[UCHAR (*s)]; ++s)
1853 t2 = 10 * t2 + *s - '0';
1854 if (t2 || t)
1856 key->sword = t;
1857 key->schar = t2;
1859 else
1860 key->sword = -1;
1861 s = set_ordering (s, key, bl_start);
1862 if (*s)
1863 badfieldspec (argv[i]);
1865 else if (argv[i][0] == '-' && argv[i][1])
1867 s = argv[i] + 1;
1868 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1870 if (!key)
1871 usage (2);
1872 for (t = 0; digits[UCHAR (*s)]; ++s)
1873 t = t * 10 + *s - '0';
1874 t2 = 0;
1875 if (*s == '.')
1876 for (++s; digits[UCHAR (*s)]; ++s)
1877 t2 = t2 * 10 + *s - '0';
1878 key->eword = t;
1879 key->echar = t2;
1880 s = set_ordering (s, key, bl_end);
1881 if (*s)
1882 badfieldspec (argv[i]);
1883 insertkey (key);
1884 key = NULL;
1886 else
1887 while (*s)
1889 s = set_ordering (s, &gkey, bl_both);
1890 switch (*s)
1892 case '\0':
1893 break;
1894 case 'c':
1895 checkonly = 1;
1896 break;
1897 case 'k':
1898 if (s[1])
1899 ++s;
1900 else
1902 if (i == argc - 1)
1903 error (2, 0, _("option `-k' requires an argument"));
1904 else
1905 s = argv[++i];
1907 if (key)
1908 insertkey (key);
1909 key = (struct keyfield *)
1910 xmalloc (sizeof (struct keyfield));
1911 memset (key, 0, sizeof (struct keyfield));
1912 key->eword = -1;
1913 key->ignore = NULL;
1914 key->translate = NULL;
1915 key->skipsblanks = key->skipeblanks = 0;
1916 key->numeric = key->month = key->reverse = 0;
1917 /* Get POS1. */
1918 if (!digits[UCHAR (*s)])
1919 badfieldspec (argv[i]);
1920 for (t = 0; digits[UCHAR (*s)]; ++s)
1921 t = 10 * t + *s - '0';
1922 if (t == 0)
1924 /* Provoke with `sort -k0' */
1925 error (0, 0, _("the starting field number argument \
1926 to the `-k' option must be positive"));
1927 badfieldspec (argv[i]);
1929 --t;
1930 t2 = 0;
1931 if (*s == '.')
1933 if (!digits[UCHAR (s[1])])
1935 /* Provoke with `sort -k1.' */
1936 error (0, 0, _("starting field spec has `.' but \
1937 lacks following character offset"));
1938 badfieldspec (argv[i]);
1940 for (++s; digits[UCHAR (*s)]; ++s)
1941 t2 = 10 * t2 + *s - '0';
1942 if (t2 == 0)
1944 /* Provoke with `sort -k1.0' */
1945 error (0, 0, _("starting field character offset \
1946 argument to the `-k' option\nmust be positive"));
1947 badfieldspec (argv[i]);
1949 --t2;
1951 if (t2 || t)
1953 key->sword = t;
1954 key->schar = t2;
1956 else
1957 key->sword = -1;
1958 s = set_ordering (s, key, bl_start);
1959 if (*s == 0)
1961 key->eword = -1;
1962 key->echar = 0;
1964 else if (*s != ',')
1965 badfieldspec (argv[i]);
1966 else if (*s == ',')
1968 /* Skip over comma. */
1969 ++s;
1970 if (*s == 0)
1972 /* Provoke with `sort -k1,' */
1973 error (0, 0, _("field specification has `,' but \
1974 lacks following field spec"));
1975 badfieldspec (argv[i]);
1977 /* Get POS2. */
1978 for (t = 0; digits[UCHAR (*s)]; ++s)
1979 t = t * 10 + *s - '0';
1980 if (t == 0)
1982 /* Provoke with `sort -k1,0' */
1983 error (0, 0, _("ending field number argument \
1984 to the `-k' option must be positive"));
1985 badfieldspec (argv[i]);
1987 --t;
1988 t2 = 0;
1989 if (*s == '.')
1991 if (!digits[UCHAR (s[1])])
1993 /* Provoke with `sort -k1,1.' */
1994 error (0, 0, _("ending field spec has `.' \
1995 but lacks following character offset"));
1996 badfieldspec (argv[i]);
1998 for (++s; digits[UCHAR (*s)]; ++s)
1999 t2 = t2 * 10 + *s - '0';
2001 else
2003 /* `-k 2,3' is equivalent to `+1 -3'. */
2004 ++t;
2006 key->eword = t;
2007 key->echar = t2;
2008 s = set_ordering (s, key, bl_end);
2009 if (*s)
2010 badfieldspec (argv[i]);
2012 insertkey (key);
2013 key = NULL;
2014 goto outer;
2015 case 'm':
2016 mergeonly = 1;
2017 break;
2018 case 'o':
2019 if (s[1])
2020 outfile = s + 1;
2021 else
2023 if (i == argc - 1)
2024 error (2, 0, _("option `-o' requires an argument"));
2025 else
2026 outfile = argv[++i];
2028 goto outer;
2029 case 's':
2030 stable = 1;
2031 break;
2032 case 't':
2033 if (s[1])
2034 tab = *++s;
2035 else if (i < argc - 1)
2037 tab = *argv[++i];
2038 goto outer;
2040 else
2041 error (2, 0, _("option `-t' requires an argument"));
2042 break;
2043 case 'T':
2044 if (s[1])
2045 temp_file_prefix = ++s;
2046 else
2048 if (i < argc - 1)
2049 temp_file_prefix = argv[++i];
2050 else
2051 error (2, 0, _("option `-T' requires an argument"));
2053 goto outer;
2054 /* break; */
2055 case 'u':
2056 unique = 1;
2057 break;
2058 case 'y':
2059 /* Accept and ignore e.g. -y0 for compatibility with
2060 Solaris 2. */
2061 goto outer;
2062 default:
2063 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2064 argv[0], *s);
2065 usage (2);
2067 if (*s)
2068 ++s;
2071 else /* Not an option. */
2073 files[nfiles++] = argv[i];
2075 outer:;
2078 if (key)
2079 insertkey (key);
2081 /* Inheritance of global options to individual keys. */
2082 for (key = keyhead.next; key; key = key->next)
2083 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2084 && !key->skipeblanks && !key->month && !key->numeric
2085 && !key->general_numeric)
2087 key->ignore = gkey.ignore;
2088 key->translate = gkey.translate;
2089 key->skipsblanks = gkey.skipsblanks;
2090 key->skipeblanks = gkey.skipeblanks;
2091 key->month = gkey.month;
2092 key->numeric = gkey.numeric;
2093 key->general_numeric = gkey.general_numeric;
2094 key->reverse = gkey.reverse;
2097 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2098 || gkey.skipeblanks || gkey.month || gkey.numeric
2099 || gkey.general_numeric))
2100 insertkey (&gkey);
2101 reverse = gkey.reverse;
2103 if (nfiles == 0)
2105 nfiles = 1;
2106 files = &minus;
2109 if (checkonly)
2110 exit (check (files, nfiles) != 0);
2112 if (strcmp (outfile, "-"))
2114 struct stat outstat;
2115 if (stat (outfile, &outstat) == 0)
2117 /* The following code prevents a race condition when
2118 people use the brain dead shell programming idiom:
2119 cat file | sort -o file
2120 This feature is provided for historical compatibility,
2121 but we strongly discourage ever relying on this in
2122 new shell programs. */
2124 /* Temporarily copy each input file that might be another name
2125 for the output file. When in doubt (e.g. a pipe), copy. */
2126 for (i = 0; i < nfiles; ++i)
2128 char buf[8192];
2129 FILE *fp;
2130 int cc;
2132 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2134 struct stat instat;
2135 if ((strcmp (files[i], "-")
2136 ? stat (files[i], &instat)
2137 : fstat (fileno (stdin), &instat)) != 0)
2139 error (0, errno, "%s", files[i]);
2140 cleanup ();
2141 exit (2);
2143 if (S_ISREG (instat.st_mode)
2144 && (instat.st_ino != outstat.st_ino
2145 || instat.st_dev != outstat.st_dev))
2147 /* We know the files are distinct. */
2148 continue;
2152 fp = xfopen (files[i], "r");
2153 tmp = tempname ();
2154 ofp = xtmpfopen (tmp);
2155 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2156 xfwrite (buf, 1, cc, ofp);
2157 if (ferror (fp))
2159 error (0, errno, "%s", files[i]);
2160 cleanup ();
2161 exit (2);
2163 xfclose (ofp);
2164 xfclose (fp);
2165 files[i] = tmp;
2168 ofp = xfopen (outfile, "w");
2170 else
2171 ofp = stdout;
2173 if (mergeonly)
2174 merge (files, nfiles, ofp);
2175 else
2176 sort (files, nfiles, ofp);
2177 cleanup ();
2179 /* If we wait for the implicit flush on exit, and the parent process
2180 has closed stdout (e.g., exec >&- in a shell), then the output file
2181 winds up empty. I don't understand why. This is under SunOS,
2182 Solaris, Ultrix, and Irix. This premature fflush makes the output
2183 reappear. --karl@cs.umb.edu */
2184 if (fflush (ofp) < 0)
2185 error (1, errno, _("%s: write error"), outfile);
2187 if (have_read_stdin && fclose (stdin) == EOF)
2188 error (1, errno, outfile);
2189 if (ferror (stdout) || fclose (stdout) == EOF)
2190 error (1, errno, _("%s: write error"), outfile);
2192 exit (0);