sort(1): Use asprintf(3) when fixing legacy options.
[dragonfly.git] / usr.bin / sort / sort.c
blob9583cd61dd42b2653de2559e5d18fb6c99d05497
1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
27 * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #if defined(SORT_RANDOM)
41 #include <md5.h>
42 #endif
43 #include <regex.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <wchar.h>
51 #include <wctype.h>
53 #include "coll.h"
54 #include "file.h"
55 #include "sort.h"
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
62 #if defined(SORT_RANDOM)
63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
64 #else
65 #define OPTIONS "bcCdfghik:Mmno:rsS:t:T:uVz"
66 #endif
68 #if defined(SORT_RANDOM)
69 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
70 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
72 static bool need_random;
73 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
74 static const void *random_seed;
75 static size_t random_seed_size;
77 MD5_CTX md5_ctx;
78 #endif
81 * Default messages to use when NLS is disabled or no catalogue
82 * is found.
84 const char *nlsstr[] = { "",
85 /* 1*/"mutually exclusive flags",
86 /* 2*/"extra argument not allowed with -c",
87 /* 3*/"Unknown feature",
88 /* 4*/"Wrong memory buffer specification",
89 /* 5*/"0 field in key specs",
90 /* 6*/"0 column in key specs",
91 /* 7*/"Wrong file mode",
92 /* 8*/"Cannot open file for reading",
93 /* 9*/"Radix sort cannot be used with these sort options",
94 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
95 /*11*/"Invalid key position",
96 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
97 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
98 "[-o outfile] [--batch-size size] [--files0-from file] "
99 "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
100 "[--mmap] "
101 #if defined(SORT_THREADS)
102 "[--parallel thread_no] "
103 #endif
104 "[--human-numeric-sort] "
105 #if defined(SORT_RANDOM)
106 "[--version-sort] [--random-sort [--random-source file]] "
107 #else
108 "[--version-sort] "
109 #endif
110 "[--compress-program program] [file ...]\n" };
112 struct sort_opts sort_opts_vals;
114 bool debug_sort;
115 bool need_hint;
117 #if defined(SORT_THREADS)
118 unsigned int ncpu = 1;
119 size_t nthreads = 1;
120 #endif
122 static bool gnusort_numeric_compatibility;
124 static struct sort_mods default_sort_mods_object;
125 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
127 static bool print_symbols_on_debug;
130 * Arguments from file (when file0-from option is used:
132 static size_t argc_from_file0 = (size_t)-1;
133 static char **argv_from_file0;
136 * Placeholder symbols for options which have no single-character equivalent
138 enum
140 SORT_OPT = CHAR_MAX + 1,
141 HELP_OPT,
142 FF_OPT,
143 BS_OPT,
144 VERSION_OPT,
145 DEBUG_OPT,
146 #if defined(SORT_THREADS)
147 PARALLEL_OPT,
148 #endif
149 #if defined(SORT_RANDOM)
150 RANDOMSOURCE_OPT,
151 #endif
152 COMPRESSPROGRAM_OPT,
153 QSORT_OPT,
154 MERGESORT_OPT,
155 HEAPSORT_OPT,
156 RADIXSORT_OPT,
157 MMAP_OPT
160 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
161 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
163 static struct option long_options[] = {
164 { "batch-size", required_argument, NULL, BS_OPT },
165 { "buffer-size", required_argument, NULL, 'S' },
166 { "check", optional_argument, NULL, 'c' },
167 { "check=silent|quiet", optional_argument, NULL, 'C' },
168 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
169 { "debug", no_argument, NULL, DEBUG_OPT },
170 { "dictionary-order", no_argument, NULL, 'd' },
171 { "field-separator", required_argument, NULL, 't' },
172 { "files0-from", required_argument, NULL, FF_OPT },
173 { "general-numeric-sort", no_argument, NULL, 'g' },
174 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
175 { "help",no_argument, NULL, HELP_OPT },
176 { "human-numeric-sort", no_argument, NULL, 'h' },
177 { "ignore-leading-blanks", no_argument, NULL, 'b' },
178 { "ignore-case", no_argument, NULL, 'f' },
179 { "ignore-nonprinting", no_argument, NULL, 'i' },
180 { "key", required_argument, NULL, 'k' },
181 { "merge", no_argument, NULL, 'm' },
182 { "mergesort", no_argument, NULL, MERGESORT_OPT },
183 { "mmap", no_argument, NULL, MMAP_OPT },
184 { "month-sort", no_argument, NULL, 'M' },
185 { "numeric-sort", no_argument, NULL, 'n' },
186 { "output", required_argument, NULL, 'o' },
187 #if defined(SORT_THREADS)
188 { "parallel", required_argument, NULL, PARALLEL_OPT },
189 #endif
190 { "qsort", no_argument, NULL, QSORT_OPT },
191 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
192 #if defined(SORT_RANDOM)
193 { "random-sort", no_argument, NULL, 'R' },
194 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
195 #endif
196 { "reverse", no_argument, NULL, 'r' },
197 { "sort", required_argument, NULL, SORT_OPT },
198 { "stable", no_argument, NULL, 's' },
199 { "temporary-directory",required_argument, NULL, 'T' },
200 { "unique", no_argument, NULL, 'u' },
201 { "version", no_argument, NULL, VERSION_OPT },
202 { "version-sort",no_argument, NULL, 'V' },
203 { "zero-terminated", no_argument, NULL, 'z' },
204 { NULL, no_argument, NULL, 0 }
207 static void fix_obsolete_keys(int *argc, char **argv);
210 * Check where sort modifier is present
212 static bool
213 sort_modifier_empty(struct sort_mods *sm)
216 if (sm == NULL)
217 return (true);
218 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
219 #ifdef SORT_RANDOM
220 sm->Rflag ||
221 #endif
222 sm->rflag || sm->hflag || sm->dflag || sm->fflag));
226 * Print out usage text.
228 static void
229 usage(bool opt_err)
231 FILE *out;
233 out = opt_err ? stderr : stdout;
235 fprintf(out, getstr(12), getprogname());
236 if (opt_err)
237 exit(2);
238 exit(0);
242 * Read input file names from a file (file0-from option).
244 static void
245 read_fns_from_file0(const char *fn)
247 FILE *f;
248 char *line = NULL;
249 size_t linesize = 0;
250 ssize_t linelen;
252 if (fn == NULL)
253 return;
255 f = fopen(fn, "r");
256 if (f == NULL)
257 err(2, "%s", fn);
259 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
260 if (*line != '\0') {
261 if (argc_from_file0 == (size_t) - 1)
262 argc_from_file0 = 0;
263 ++argc_from_file0;
264 argv_from_file0 = sort_realloc(argv_from_file0,
265 argc_from_file0 * sizeof(char *));
266 if (argv_from_file0 == NULL)
267 err(2, NULL);
268 argv_from_file0[argc_from_file0 - 1] = line;
269 } else {
270 free(line);
272 line = NULL;
273 linesize = 0;
275 if (ferror(f))
276 err(2, "%s: getdelim", fn);
278 closefile(f, fn);
282 * Check how much RAM is available for the sort.
284 static void
285 set_hw_params(void)
287 long pages, psize;
289 pages = psize = 0;
291 #if defined(SORT_THREADS)
292 ncpu = 1;
293 #endif
295 pages = sysconf(_SC_PHYS_PAGES);
296 if (pages < 1) {
297 perror("sysconf pages");
298 pages = 1;
300 psize = sysconf(_SC_PAGESIZE);
301 if (psize < 1) {
302 perror("sysconf psize");
303 psize = 4096;
305 #if defined(SORT_THREADS)
306 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
307 if (ncpu < 1)
308 ncpu = 1;
309 else if(ncpu > 32)
310 ncpu = 32;
312 nthreads = ncpu;
313 #endif
315 free_memory = (unsigned long long) pages * (unsigned long long) psize;
316 available_free_memory = free_memory / 2;
318 if (available_free_memory < 1024)
319 available_free_memory = 1024;
323 * Convert "plain" symbol to wide symbol, with default value.
325 static void
326 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
329 if (wc && c) {
330 int res;
332 res = mbtowc(wc, c, MB_CUR_MAX);
333 if (res < 1)
334 *wc = def;
339 * Set current locale symbols.
341 static void
342 set_locale(void)
344 struct lconv *lc;
345 const char *locale;
347 setlocale(LC_ALL, "");
349 lc = localeconv();
351 if (lc) {
352 /* obtain LC_NUMERIC info */
353 /* Convert to wide char form */
354 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
355 symbol_decimal_point);
356 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
357 symbol_thousands_sep);
358 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
359 symbol_positive_sign);
360 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
361 symbol_negative_sign);
364 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
365 gnusort_numeric_compatibility = true;
367 locale = setlocale(LC_COLLATE, NULL);
369 if (locale) {
370 char *tmpl;
371 const char *cclocale;
373 tmpl = sort_strdup(locale);
374 cclocale = setlocale(LC_COLLATE, "C");
375 if (cclocale && !strcmp(cclocale, tmpl))
376 byte_sort = true;
377 else {
378 const char *pclocale;
380 pclocale = setlocale(LC_COLLATE, "POSIX");
381 if (pclocale && !strcmp(pclocale, tmpl))
382 byte_sort = true;
384 setlocale(LC_COLLATE, tmpl);
385 sort_free(tmpl);
390 * Set directory temporary files.
392 static void
393 set_tmpdir(void)
395 char *td;
397 td = getenv("TMPDIR");
398 if (td != NULL)
399 tmpdir = sort_strdup(td);
403 * Parse -S option.
405 static unsigned long long
406 parse_memory_buffer_value(const char *value)
409 if (value == NULL)
410 return (available_free_memory);
411 else {
412 char *endptr;
413 unsigned long long membuf;
415 endptr = NULL;
416 errno = 0;
417 membuf = strtoll(value, &endptr, 10);
419 if (errno != 0) {
420 warn("%s",getstr(4));
421 membuf = available_free_memory;
422 } else {
423 switch (*endptr){
424 case 'Y':
425 membuf *= 1024;
426 /* FALLTHROUGH */
427 case 'Z':
428 membuf *= 1024;
429 /* FALLTHROUGH */
430 case 'E':
431 membuf *= 1024;
432 /* FALLTHROUGH */
433 case 'P':
434 membuf *= 1024;
435 /* FALLTHROUGH */
436 case 'T':
437 membuf *= 1024;
438 /* FALLTHROUGH */
439 case 'G':
440 membuf *= 1024;
441 /* FALLTHROUGH */
442 case 'M':
443 membuf *= 1024;
444 /* FALLTHROUGH */
445 case '\0':
446 case 'K':
447 membuf *= 1024;
448 /* FALLTHROUGH */
449 case 'b':
450 break;
451 case '%':
452 membuf = (available_free_memory * membuf) /
453 100;
454 break;
455 default:
456 warnc(EINVAL, "%s", optarg);
457 membuf = available_free_memory;
460 return (membuf);
465 * Signal handler that clears the temporary files.
467 static void
468 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
469 void *context __unused)
472 clear_tmp_files();
473 exit(-1);
477 * Set signal handler on panic signals.
479 static void
480 set_signal_handler(void)
482 struct sigaction sa;
484 memset(&sa, 0, sizeof(sa));
485 sa.sa_sigaction = &sig_handler;
486 sa.sa_flags = SA_SIGINFO;
488 if (sigaction(SIGTERM, &sa, NULL) < 0) {
489 perror("sigaction");
490 return;
492 if (sigaction(SIGHUP, &sa, NULL) < 0) {
493 perror("sigaction");
494 return;
496 if (sigaction(SIGINT, &sa, NULL) < 0) {
497 perror("sigaction");
498 return;
500 if (sigaction(SIGQUIT, &sa, NULL) < 0) {
501 perror("sigaction");
502 return;
504 if (sigaction(SIGABRT, &sa, NULL) < 0) {
505 perror("sigaction");
506 return;
508 if (sigaction(SIGBUS, &sa, NULL) < 0) {
509 perror("sigaction");
510 return;
512 if (sigaction(SIGSEGV, &sa, NULL) < 0) {
513 perror("sigaction");
514 return;
516 if (sigaction(SIGUSR1, &sa, NULL) < 0) {
517 perror("sigaction");
518 return;
520 if (sigaction(SIGUSR2, &sa, NULL) < 0) {
521 perror("sigaction");
522 return;
527 * Print "unknown" message and exit with status 2.
529 static void
530 unknown(const char *what)
533 errx(2, "%s: %s", getstr(3), what);
537 * Check whether contradictory input options are used.
539 static void
540 check_mutually_exclusive_flags(char c, bool *mef_flags)
542 int fo_index, mec;
543 bool found_others, found_this;
545 found_others = found_this = false;
546 fo_index = 0;
548 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
549 mec = mutually_exclusive_flags[i];
551 if (mec != c) {
552 if (mef_flags[i]) {
553 if (found_this)
554 errx(1, "%c:%c: %s", c, mec, getstr(1));
555 found_others = true;
556 fo_index = i;
558 } else {
559 if (found_others)
560 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
561 mef_flags[i] = true;
562 found_this = true;
568 * Initialise sort opts data.
570 static void
571 set_sort_opts(void)
574 memset(&default_sort_mods_object, 0,
575 sizeof(default_sort_mods_object));
576 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
577 default_sort_mods_object.func =
578 get_sort_func(&default_sort_mods_object);
582 * Set a sort modifier on a sort modifiers object.
584 static bool
585 set_sort_modifier(struct sort_mods *sm, int c)
588 if (sm) {
589 switch (c){
590 case 'b':
591 sm->bflag = true;
592 break;
593 case 'd':
594 sm->dflag = true;
595 break;
596 case 'f':
597 sm->fflag = true;
598 break;
599 case 'g':
600 sm->gflag = true;
601 need_hint = true;
602 break;
603 case 'i':
604 sm->iflag = true;
605 break;
606 #ifdef SORT_RANDOM
607 case 'R':
608 sm->Rflag = true;
609 need_random = true;
610 break;
611 #endif
612 case 'M':
613 initialise_months();
614 sm->Mflag = true;
615 need_hint = true;
616 break;
617 case 'n':
618 sm->nflag = true;
619 need_hint = true;
620 print_symbols_on_debug = true;
621 break;
622 case 'r':
623 sm->rflag = true;
624 break;
625 case 'V':
626 sm->Vflag = true;
627 break;
628 case 'h':
629 sm->hflag = true;
630 need_hint = true;
631 print_symbols_on_debug = true;
632 break;
633 default:
634 return false;
636 sort_opts_vals.complex_sort = true;
637 sm->func = get_sort_func(sm);
639 return (true);
643 * Parse POS in -k option.
645 static int
646 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
648 regmatch_t pmatch[4];
649 regex_t re;
650 char *c, *f;
651 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
652 size_t len, nmatch;
653 int ret;
655 ret = -1;
656 nmatch = 4;
657 c = f = NULL;
659 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
660 return (-1);
662 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
663 goto end;
665 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
666 goto end;
668 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
669 goto end;
671 len = pmatch[1].rm_eo - pmatch[1].rm_so;
672 f = sort_malloc((len + 1) * sizeof(char));
674 strncpy(f, s + pmatch[1].rm_so, len);
675 f[len] = '\0';
677 if (second) {
678 errno = 0;
679 ks->f2 = (size_t) strtoul(f, NULL, 10);
680 if (errno != 0)
681 err(2, "-k");
682 if (ks->f2 == 0) {
683 warn("%s",getstr(5));
684 goto end;
686 } else {
687 errno = 0;
688 ks->f1 = (size_t) strtoul(f, NULL, 10);
689 if (errno != 0)
690 err(2, "-k");
691 if (ks->f1 == 0) {
692 warn("%s",getstr(5));
693 goto end;
697 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
698 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
699 c = sort_malloc((len + 1) * sizeof(char));
701 strncpy(c, s + pmatch[2].rm_so + 1, len);
702 c[len] = '\0';
704 if (second) {
705 errno = 0;
706 ks->c2 = (size_t) strtoul(c, NULL, 10);
707 if (errno != 0)
708 err(2, "-k");
709 } else {
710 errno = 0;
711 ks->c1 = (size_t) strtoul(c, NULL, 10);
712 if (errno != 0)
713 err(2, "-k");
714 if (ks->c1 == 0) {
715 warn("%s",getstr(6));
716 goto end;
719 } else {
720 if (second)
721 ks->c2 = 0;
722 else
723 ks->c1 = 1;
726 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
727 regoff_t i = 0;
729 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
730 check_mutually_exclusive_flags(s[i], mef_flags);
731 if (s[i] == 'b') {
732 if (second)
733 ks->pos2b = true;
734 else
735 ks->pos1b = true;
736 } else if (!set_sort_modifier(&(ks->sm), s[i]))
737 goto end;
741 ret = 0;
743 end:
745 if (c)
746 sort_free(c);
747 if (f)
748 sort_free(f);
749 regfree(&re);
751 return (ret);
755 * Parse -k option value.
757 static int
758 parse_k(const char *s, struct key_specs *ks)
760 int ret = -1;
761 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
762 { false, false, false, false, false, false };
764 if (s && *s) {
765 char *sptr;
767 sptr = strchr(s, ',');
768 if (sptr) {
769 size_t size1;
770 char *pos1, *pos2;
772 size1 = sptr - s;
774 if (size1 < 1)
775 return (-1);
776 pos1 = sort_malloc((size1 + 1) * sizeof(char));
778 strncpy(pos1, s, size1);
779 pos1[size1] = '\0';
781 ret = parse_pos(pos1, ks, mef_flags, false);
783 sort_free(pos1);
784 if (ret < 0)
785 return (ret);
787 pos2 = sort_strdup(sptr + 1);
788 ret = parse_pos(pos2, ks, mef_flags, true);
789 sort_free(pos2);
790 } else
791 ret = parse_pos(s, ks, mef_flags, false);
794 return (ret);
798 * Parse POS in +POS -POS option.
800 static int
801 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
803 regex_t re;
804 regmatch_t pmatch[4];
805 char *c, *f;
806 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
807 int ret;
808 size_t len, nmatch;
810 ret = -1;
811 nmatch = 4;
812 c = f = NULL;
813 *nc = *nf = 0;
815 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
816 return (-1);
818 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
819 goto end;
821 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
822 goto end;
824 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
825 goto end;
827 len = pmatch[1].rm_eo - pmatch[1].rm_so;
828 f = sort_malloc((len + 1) * sizeof(char));
830 strncpy(f, s + pmatch[1].rm_so, len);
831 f[len] = '\0';
833 errno = 0;
834 *nf = (size_t) strtoul(f, NULL, 10);
835 if (errno != 0)
836 errx(2, "%s", getstr(11));
838 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
839 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
840 c = sort_malloc((len + 1) * sizeof(char));
842 strncpy(c, s + pmatch[2].rm_so + 1, len);
843 c[len] = '\0';
845 errno = 0;
846 *nc = (size_t) strtoul(c, NULL, 10);
847 if (errno != 0)
848 errx(2, "%s", getstr(11));
851 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
853 len = pmatch[3].rm_eo - pmatch[3].rm_so;
855 strncpy(sopts, s + pmatch[3].rm_so, len);
856 sopts[len] = '\0';
859 ret = 0;
861 end:
862 if (c)
863 sort_free(c);
864 if (f)
865 sort_free(f);
866 regfree(&re);
868 return (ret);
872 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
874 static void
875 fix_obsolete_keys(int *argc, char **argv)
877 char *snew = NULL;
879 for (int i = 1; i < *argc; i++) {
880 char *arg1;
882 arg1 = argv[i];
884 if (strlen(arg1) > 1 && arg1[0] == '+') {
885 int c1, f1;
886 char sopts1[128];
888 sopts1[0] = 0;
889 c1 = f1 = 0;
891 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
892 continue;
893 else {
894 f1 += 1;
895 c1 += 1;
896 if (i + 1 < *argc) {
897 char *arg2 = argv[i + 1];
899 if (strlen(arg2) > 1 &&
900 arg2[0] == '-') {
901 int c2, f2;
902 char sopts2[128];
904 sopts2[0] = 0;
905 c2 = f2 = 0;
907 if (parse_pos_obs(arg2 + 1,
908 &f2, &c2, sopts2) >= 0) {
909 if (c2 > 0)
910 f2 += 1;
911 if (asprintf(&snew,
912 "-k%d.%d%s,%d.%d%s",
913 f1, c1, sopts1,
914 f2, c2, sopts2)== -1)
915 return;
916 argv[i] = snew;
917 for (int j = i + 1; j + 1 < *argc; j++)
918 argv[j] = argv[j + 1];
919 *argc -= 1;
920 continue;
924 asprintf(&snew, "-k%d.%d%s", f1, c1, sopts1);
925 argv[i] = snew;
932 * Set random seed
934 #if defined(SORT_RANDOM)
935 static void
936 set_random_seed(void)
938 if (need_random) {
940 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
941 FILE* fseed;
942 MD5_CTX ctx;
943 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
944 size_t sz = 0;
946 fseed = openfile(random_source, "r");
947 while (!feof(fseed)) {
948 int cr;
950 cr = fgetc(fseed);
951 if (cr == EOF)
952 break;
954 rsd[sz++] = (char) cr;
956 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
957 break;
960 closefile(fseed, random_source);
962 MD5Init(&ctx);
963 MD5Update(&ctx, rsd, sz);
965 random_seed = MD5End(&ctx, NULL);
966 random_seed_size = strlen(random_seed);
968 } else {
969 MD5_CTX ctx;
970 char *b;
972 MD5Init(&ctx);
973 b = MD5File(random_source, NULL);
974 if (b == NULL)
975 err(2, NULL);
977 random_seed = b;
978 random_seed_size = strlen(b);
981 MD5Init(&md5_ctx);
982 if(random_seed_size>0) {
983 MD5Update(&md5_ctx, random_seed, random_seed_size);
987 #endif
990 * Main function.
993 main(int argc, char **argv)
995 char *outfile, *real_outfile;
996 int c, result;
997 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
998 { false, false, false, false, false, false };
1000 result = 0;
1001 outfile = sort_strdup("-");
1002 real_outfile = NULL;
1004 struct sort_mods *sm = &default_sort_mods_object;
1006 init_tmp_files();
1008 set_signal_handler();
1010 set_hw_params();
1011 set_locale();
1012 set_tmpdir();
1013 set_sort_opts();
1015 fix_obsolete_keys(&argc, argv);
1017 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1018 != -1)) {
1020 check_mutually_exclusive_flags(c, mef_flags);
1022 if (!set_sort_modifier(sm, c)) {
1024 switch (c) {
1025 case 'c':
1026 sort_opts_vals.cflag = true;
1027 if (optarg) {
1028 if (!strcmp(optarg, "diagnose-first"))
1030 else if (!strcmp(optarg, "silent") ||
1031 !strcmp(optarg, "quiet"))
1032 sort_opts_vals.csilentflag = true;
1033 else if (*optarg)
1034 unknown(optarg);
1036 break;
1037 case 'C':
1038 sort_opts_vals.cflag = true;
1039 sort_opts_vals.csilentflag = true;
1040 break;
1041 case 'k':
1043 sort_opts_vals.complex_sort = true;
1044 sort_opts_vals.kflag = true;
1046 keys_num++;
1047 keys = sort_realloc(keys, keys_num *
1048 sizeof(struct key_specs));
1049 memset(&(keys[keys_num - 1]), 0,
1050 sizeof(struct key_specs));
1052 if (parse_k(optarg, &(keys[keys_num - 1]))
1053 < 0) {
1054 errc(2, EINVAL, "-k %s", optarg);
1057 break;
1059 case 'm':
1060 sort_opts_vals.mflag = true;
1061 break;
1062 case 'o':
1063 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1064 strcpy(outfile, optarg);
1065 break;
1066 case 's':
1067 sort_opts_vals.sflag = true;
1068 break;
1069 case 'S':
1070 available_free_memory =
1071 parse_memory_buffer_value(optarg);
1072 break;
1073 case 'T':
1074 tmpdir = sort_strdup(optarg);
1075 break;
1076 case 't':
1077 while (strlen(optarg) > 1) {
1078 if (optarg[0] != '\\') {
1079 errc(2, EINVAL, "%s", optarg);
1081 optarg += 1;
1082 if (*optarg == '0') {
1083 *optarg = 0;
1084 break;
1087 sort_opts_vals.tflag = true;
1088 sort_opts_vals.field_sep = btowc(optarg[0]);
1089 if (sort_opts_vals.field_sep == WEOF) {
1090 errno = EINVAL;
1091 err(2, NULL);
1093 if (!gnusort_numeric_compatibility) {
1094 if (symbol_decimal_point == sort_opts_vals.field_sep)
1095 symbol_decimal_point = WEOF;
1096 if (symbol_thousands_sep == sort_opts_vals.field_sep)
1097 symbol_thousands_sep = WEOF;
1098 if (symbol_negative_sign == sort_opts_vals.field_sep)
1099 symbol_negative_sign = WEOF;
1100 if (symbol_positive_sign == sort_opts_vals.field_sep)
1101 symbol_positive_sign = WEOF;
1103 break;
1104 case 'u':
1105 sort_opts_vals.uflag = true;
1106 /* stable sort for the correct unique val */
1107 sort_opts_vals.sflag = true;
1108 break;
1109 case 'z':
1110 sort_opts_vals.zflag = true;
1111 break;
1112 case SORT_OPT:
1113 if (optarg) {
1114 if (!strcmp(optarg, "general-numeric"))
1115 set_sort_modifier(sm, 'g');
1116 else if (!strcmp(optarg, "human-numeric"))
1117 set_sort_modifier(sm, 'h');
1118 else if (!strcmp(optarg, "numeric"))
1119 set_sort_modifier(sm, 'n');
1120 else if (!strcmp(optarg, "month"))
1121 set_sort_modifier(sm, 'M');
1122 #if defined(SORT_RANDOM)
1123 else if (!strcmp(optarg, "random"))
1124 set_sort_modifier(sm, 'R');
1125 #endif
1126 else
1127 unknown(optarg);
1129 break;
1130 #if defined(SORT_THREADS)
1131 case PARALLEL_OPT:
1132 nthreads = (size_t)(atoi(optarg));
1133 if (nthreads < 1)
1134 nthreads = 1;
1135 if (nthreads > 1024)
1136 nthreads = 1024;
1137 break;
1138 #endif
1139 case QSORT_OPT:
1140 sort_opts_vals.sort_method = SORT_QSORT;
1141 break;
1142 case MERGESORT_OPT:
1143 sort_opts_vals.sort_method = SORT_MERGESORT;
1144 break;
1145 case MMAP_OPT:
1146 use_mmap = true;
1147 break;
1148 case HEAPSORT_OPT:
1149 sort_opts_vals.sort_method = SORT_HEAPSORT;
1150 break;
1151 case RADIXSORT_OPT:
1152 sort_opts_vals.sort_method = SORT_RADIXSORT;
1153 break;
1154 #if defined(SORT_RANDOM)
1155 case RANDOMSOURCE_OPT:
1156 random_source = strdup(optarg);
1157 break;
1158 #endif
1159 case COMPRESSPROGRAM_OPT:
1160 compress_program = strdup(optarg);
1161 break;
1162 case FF_OPT:
1163 read_fns_from_file0(optarg);
1164 break;
1165 case BS_OPT:
1167 errno = 0;
1168 long mof = strtol(optarg, NULL, 10);
1169 if (errno != 0)
1170 err(2, "--batch-size");
1171 if (mof >= 2)
1172 max_open_files = (size_t) mof + 1;
1174 break;
1175 case VERSION_OPT:
1176 printf("%s\n", VERSION);
1177 exit(EXIT_SUCCESS);
1178 /* NOTREACHED */
1179 break;
1180 case DEBUG_OPT:
1181 debug_sort = true;
1182 break;
1183 case HELP_OPT:
1184 usage(false);
1185 /* NOTREACHED */
1186 break;
1187 default:
1188 usage(true);
1189 /* NOTREACHED */
1194 argc -= optind;
1195 argv += optind;
1197 #ifndef WITHOUT_NLS
1198 catalog = catopen("sort", NL_CAT_LOCALE);
1199 #endif
1201 if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1202 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1204 #ifndef WITHOUT_NLS
1205 catclose(catalog);
1206 #endif
1208 if (keys_num == 0) {
1209 keys_num = 1;
1210 keys = sort_realloc(keys, sizeof(struct key_specs));
1211 memset(&(keys[0]), 0, sizeof(struct key_specs));
1212 keys[0].c1 = 1;
1213 keys[0].pos1b = default_sort_mods->bflag;
1214 keys[0].pos2b = default_sort_mods->bflag;
1215 memcpy(&(keys[0].sm), default_sort_mods,
1216 sizeof(struct sort_mods));
1219 for (size_t i = 0; i < keys_num; i++) {
1220 struct key_specs *ks;
1222 ks = &(keys[i]);
1224 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1225 !(ks->pos2b)) {
1226 ks->pos1b = sm->bflag;
1227 ks->pos2b = sm->bflag;
1228 memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1231 ks->sm.func = get_sort_func(&(ks->sm));
1234 if (argv_from_file0) {
1235 argc = argc_from_file0;
1236 argv = argv_from_file0;
1239 if (debug_sort) {
1240 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1241 #if defined(SORT_THREADS)
1242 printf("Number of CPUs: %d\n",(int)ncpu);
1243 nthreads = 1;
1244 #endif
1245 printf("Using collate rules of %s locale\n",
1246 setlocale(LC_COLLATE, NULL));
1247 if (byte_sort)
1248 printf("Byte sort is used\n");
1249 if (print_symbols_on_debug) {
1250 printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1251 if (symbol_thousands_sep)
1252 printf("Thousands separator: <%lc>\n",
1253 symbol_thousands_sep);
1254 printf("Positive sign: <%lc>\n", symbol_positive_sign);
1255 printf("Negative sign: <%lc>\n", symbol_negative_sign);
1259 #if defined(SORT_RANDOM)
1260 set_random_seed();
1261 #endif
1263 /* Case when the outfile equals one of the input files: */
1264 if (strcmp(outfile, "-")) {
1266 for(int i = 0; i < argc; ++i) {
1267 if (strcmp(argv[i], outfile) == 0) {
1268 real_outfile = sort_strdup(outfile);
1269 for(;;) {
1270 char* tmp = sort_malloc(strlen(outfile) +
1271 strlen(".tmp") + 1);
1273 strcpy(tmp, outfile);
1274 strcpy(tmp + strlen(tmp), ".tmp");
1275 sort_free(outfile);
1276 outfile = tmp;
1277 if (access(outfile, F_OK) < 0)
1278 break;
1280 tmp_file_atexit(outfile);
1285 #if defined(SORT_THREADS)
1286 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1287 nthreads = 1;
1288 #endif
1290 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1291 struct file_list fl;
1292 struct sort_list list;
1294 sort_list_init(&list);
1295 file_list_init(&fl, true);
1297 if (argc < 1)
1298 procfile("-", &list, &fl);
1299 else {
1300 while (argc > 0) {
1301 procfile(*argv, &list, &fl);
1302 --argc;
1303 ++argv;
1307 if (fl.count < 1)
1308 sort_list_to_file(&list, outfile);
1309 else {
1310 if (list.count > 0) {
1311 char *flast = new_tmp_file_name();
1313 sort_list_to_file(&list, flast);
1314 file_list_add(&fl, flast, false);
1316 merge_files(&fl, outfile);
1319 file_list_clean(&fl);
1322 * We are about to exit the program, so we can ignore
1323 * the clean-up for speed
1325 * sort_list_clean(&list);
1328 } else if (sort_opts_vals.cflag) {
1329 result = (argc == 0) ? (check("-")) : (check(*argv));
1330 } else if (sort_opts_vals.mflag) {
1331 struct file_list fl;
1333 file_list_init(&fl, false);
1334 file_list_populate(&fl, argc, argv, true);
1335 merge_files(&fl, outfile);
1336 file_list_clean(&fl);
1339 if (real_outfile) {
1340 unlink(real_outfile);
1341 if (rename(outfile, real_outfile) < 0)
1342 err(2, NULL);
1343 sort_free(real_outfile);
1346 sort_free(outfile);
1348 return (result);