kernel - Validate that previous pte was cleaned.
[dragonfly.git] / usr.bin / sort / sort.c
blobd5340862653e240ec7670f837b8bbd7859a18312
1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
27 * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
55 #ifndef WITHOUT_NLS
56 #include <nl_types.h>
57 nl_catd catalog;
58 #endif
60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
65 static bool need_random;
66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67 static const void *random_seed;
68 static size_t random_seed_size;
70 MD5_CTX md5_ctx;
73 * Default messages to use when NLS is disabled or no catalogue
74 * is found.
76 const char *nlsstr[] = { "",
77 /* 1*/"mutually exclusive flags",
78 /* 2*/"extra argument not allowed with -c",
79 /* 3*/"Unknown feature",
80 /* 4*/"Wrong memory buffer specification",
81 /* 5*/"0 field in key specs",
82 /* 6*/"0 column in key specs",
83 /* 7*/"Wrong file mode",
84 /* 8*/"Cannot open file for reading",
85 /* 9*/"Radix sort cannot be used with these sort options",
86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87 /*11*/"Invalid key position",
88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90 "[-o outfile] [--batch-size size] [--files0-from file] "
91 "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92 "[--mmap] "
93 #if defined(SORT_THREADS)
94 "[--parallel thread_no] "
95 #endif
96 "[--human-numeric-sort] "
97 "[--version-sort] [--random-sort [--random-source file]] "
98 "[--compress-program program] [file ...]\n" };
100 struct sort_opts sort_opts_vals;
102 bool debug_sort;
103 bool need_hint;
105 #if defined(SORT_THREADS)
106 unsigned int ncpu = 1;
107 size_t nthreads = 1;
108 #endif
110 static bool gnusort_numeric_compatibility;
112 static struct sort_mods default_sort_mods_object;
113 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
115 static bool print_symbols_on_debug;
118 * Arguments from file (when file0-from option is used:
120 static size_t argc_from_file0 = (size_t)-1;
121 static char **argv_from_file0;
124 * Placeholder symbols for options which have no single-character equivalent
126 enum
128 SORT_OPT = CHAR_MAX + 1,
129 HELP_OPT,
130 FF_OPT,
131 BS_OPT,
132 VERSION_OPT,
133 DEBUG_OPT,
134 #if defined(SORT_THREADS)
135 PARALLEL_OPT,
136 #endif
137 RANDOMSOURCE_OPT,
138 COMPRESSPROGRAM_OPT,
139 QSORT_OPT,
140 MERGESORT_OPT,
141 HEAPSORT_OPT,
142 RADIXSORT_OPT,
143 MMAP_OPT
146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
149 static struct option long_options[] = {
150 { "batch-size", required_argument, NULL, BS_OPT },
151 { "buffer-size", required_argument, NULL, 'S' },
152 { "check", optional_argument, NULL, 'c' },
153 { "check=silent|quiet", optional_argument, NULL, 'C' },
154 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155 { "debug", no_argument, NULL, DEBUG_OPT },
156 { "dictionary-order", no_argument, NULL, 'd' },
157 { "field-separator", required_argument, NULL, 't' },
158 { "files0-from", required_argument, NULL, FF_OPT },
159 { "general-numeric-sort", no_argument, NULL, 'g' },
160 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
161 { "help",no_argument, NULL, HELP_OPT },
162 { "human-numeric-sort", no_argument, NULL, 'h' },
163 { "ignore-leading-blanks", no_argument, NULL, 'b' },
164 { "ignore-case", no_argument, NULL, 'f' },
165 { "ignore-nonprinting", no_argument, NULL, 'i' },
166 { "key", required_argument, NULL, 'k' },
167 { "merge", no_argument, NULL, 'm' },
168 { "mergesort", no_argument, NULL, MERGESORT_OPT },
169 { "mmap", no_argument, NULL, MMAP_OPT },
170 { "month-sort", no_argument, NULL, 'M' },
171 { "numeric-sort", no_argument, NULL, 'n' },
172 { "output", required_argument, NULL, 'o' },
173 #if defined(SORT_THREADS)
174 { "parallel", required_argument, NULL, PARALLEL_OPT },
175 #endif
176 { "qsort", no_argument, NULL, QSORT_OPT },
177 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
178 { "random-sort", no_argument, NULL, 'R' },
179 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180 { "reverse", no_argument, NULL, 'r' },
181 { "sort", required_argument, NULL, SORT_OPT },
182 { "stable", no_argument, NULL, 's' },
183 { "temporary-directory",required_argument, NULL, 'T' },
184 { "unique", no_argument, NULL, 'u' },
185 { "version", no_argument, NULL, VERSION_OPT },
186 { "version-sort",no_argument, NULL, 'V' },
187 { "zero-terminated", no_argument, NULL, 'z' },
188 { NULL, no_argument, NULL, 0 }
191 void fix_obsolete_keys(int *argc, char **argv);
194 * Check where sort modifier is present
196 static bool
197 sort_modifier_empty(struct sort_mods *sm)
200 if (sm == NULL)
201 return (true);
202 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
207 * Print out usage text.
209 static void
210 usage(bool opt_err)
212 // struct option *o;
213 FILE *out;
215 out = stdout;
216 // o = &(long_options[0]);
218 if (opt_err)
219 out = stderr;
220 fprintf(out, getstr(12), getprogname());
221 if (opt_err)
222 exit(2);
223 exit(0);
227 * Read input file names from a file (file0-from option).
229 static void
230 read_fns_from_file0(const char *fn)
232 FILE *f;
233 char *line = NULL;
234 size_t linesize = 0;
235 ssize_t linelen;
237 if (fn == NULL)
238 return;
240 f = fopen(fn, "r");
241 if (f == NULL)
242 err(2, "%s", fn);
244 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
245 if (*line != '\0') {
246 if (argc_from_file0 == (size_t) - 1)
247 argc_from_file0 = 0;
248 ++argc_from_file0;
249 argv_from_file0 = sort_realloc(argv_from_file0,
250 argc_from_file0 * sizeof(char *));
251 if (argv_from_file0 == NULL)
252 err(2, NULL);
253 argv_from_file0[argc_from_file0 - 1] = line;
254 } else {
255 free(line);
257 line = NULL;
258 linesize = 0;
260 if (ferror(f))
261 err(2, "%s: getdelim", fn);
263 closefile(f, fn);
267 * Check how much RAM is available for the sort.
269 static void
270 set_hw_params(void)
272 long pages, psize;
274 pages = psize = 0;
276 #if defined(SORT_THREADS)
277 ncpu = 1;
278 #endif
280 pages = sysconf(_SC_PHYS_PAGES);
281 if (pages < 1) {
282 perror("sysconf pages");
283 psize = 1;
285 psize = sysconf(_SC_PAGESIZE);
286 if (psize < 1) {
287 perror("sysconf psize");
288 psize = 4096;
290 #if defined(SORT_THREADS)
291 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
292 if (ncpu < 1)
293 ncpu = 1;
294 else if(ncpu > 32)
295 ncpu = 32;
297 nthreads = ncpu;
298 #endif
300 free_memory = (unsigned long long) pages * (unsigned long long) psize;
301 available_free_memory = free_memory / 2;
303 if (available_free_memory < 1024)
304 available_free_memory = 1024;
308 * Convert "plain" symbol to wide symbol, with default value.
310 static void
311 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
314 if (wc && c) {
315 int res;
317 res = mbtowc(wc, c, MB_CUR_MAX);
318 if (res < 1)
319 *wc = def;
324 * Set current locale symbols.
326 static void
327 set_locale(void)
329 struct lconv *lc;
330 const char *locale;
332 setlocale(LC_ALL, "");
334 lc = localeconv();
336 if (lc) {
337 /* obtain LC_NUMERIC info */
338 /* Convert to wide char form */
339 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
340 symbol_decimal_point);
341 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
342 symbol_thousands_sep);
343 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
344 symbol_positive_sign);
345 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
346 symbol_negative_sign);
349 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
350 gnusort_numeric_compatibility = true;
352 locale = setlocale(LC_COLLATE, NULL);
354 if (locale) {
355 char *tmpl;
356 const char *cclocale;
358 tmpl = sort_strdup(locale);
359 cclocale = setlocale(LC_COLLATE, "C");
360 if (cclocale && !strcmp(cclocale, tmpl))
361 byte_sort = true;
362 else {
363 const char *pclocale;
365 pclocale = setlocale(LC_COLLATE, "POSIX");
366 if (pclocale && !strcmp(pclocale, tmpl))
367 byte_sort = true;
369 setlocale(LC_COLLATE, tmpl);
370 sort_free(tmpl);
375 * Set directory temporary files.
377 static void
378 set_tmpdir(void)
380 char *td;
382 td = getenv("TMPDIR");
383 if (td != NULL)
384 tmpdir = sort_strdup(td);
388 * Parse -S option.
390 static unsigned long long
391 parse_memory_buffer_value(const char *value)
394 if (value == NULL)
395 return (available_free_memory);
396 else {
397 char *endptr;
398 unsigned long long membuf;
400 endptr = NULL;
401 errno = 0;
402 membuf = strtoll(value, &endptr, 10);
404 if (errno != 0) {
405 warn("%s",getstr(4));
406 membuf = available_free_memory;
407 } else {
408 switch (*endptr){
409 case 'Y':
410 membuf *= 1024;
411 /* FALLTHROUGH */
412 case 'Z':
413 membuf *= 1024;
414 /* FALLTHROUGH */
415 case 'E':
416 membuf *= 1024;
417 /* FALLTHROUGH */
418 case 'P':
419 membuf *= 1024;
420 /* FALLTHROUGH */
421 case 'T':
422 membuf *= 1024;
423 /* FALLTHROUGH */
424 case 'G':
425 membuf *= 1024;
426 /* FALLTHROUGH */
427 case 'M':
428 membuf *= 1024;
429 /* FALLTHROUGH */
430 case '\0':
431 case 'K':
432 membuf *= 1024;
433 /* FALLTHROUGH */
434 case 'b':
435 break;
436 case '%':
437 membuf = (available_free_memory * membuf) /
438 100;
439 break;
440 default:
441 warnc(EINVAL, "%s", optarg);
442 membuf = available_free_memory;
445 return (membuf);
450 * Signal handler that clears the temporary files.
452 static void
453 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
454 void *context __unused)
457 clear_tmp_files();
458 exit(-1);
462 * Set signal handler on panic signals.
464 static void
465 set_signal_handler(void)
467 struct sigaction sa;
469 memset(&sa, 0, sizeof(sa));
470 sa.sa_sigaction = &sig_handler;
471 sa.sa_flags = SA_SIGINFO;
473 if (sigaction(SIGTERM, &sa, NULL) < 0) {
474 perror("sigaction");
475 return;
477 if (sigaction(SIGHUP, &sa, NULL) < 0) {
478 perror("sigaction");
479 return;
481 if (sigaction(SIGINT, &sa, NULL) < 0) {
482 perror("sigaction");
483 return;
485 if (sigaction(SIGQUIT, &sa, NULL) < 0) {
486 perror("sigaction");
487 return;
489 if (sigaction(SIGABRT, &sa, NULL) < 0) {
490 perror("sigaction");
491 return;
493 if (sigaction(SIGBUS, &sa, NULL) < 0) {
494 perror("sigaction");
495 return;
497 if (sigaction(SIGSEGV, &sa, NULL) < 0) {
498 perror("sigaction");
499 return;
501 if (sigaction(SIGUSR1, &sa, NULL) < 0) {
502 perror("sigaction");
503 return;
505 if (sigaction(SIGUSR2, &sa, NULL) < 0) {
506 perror("sigaction");
507 return;
512 * Print "unknown" message and exit with status 2.
514 static void
515 unknown(const char *what)
518 errx(2, "%s: %s", getstr(3), what);
522 * Check whether contradictory input options are used.
524 static void
525 check_mutually_exclusive_flags(char c, bool *mef_flags)
527 int fo_index, mec;
528 bool found_others, found_this;
530 found_others = found_this = false;
531 fo_index = 0;
533 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
534 mec = mutually_exclusive_flags[i];
536 if (mec != c) {
537 if (mef_flags[i]) {
538 if (found_this)
539 errx(1, "%c:%c: %s", c, mec, getstr(1));
540 found_others = true;
541 fo_index = i;
543 } else {
544 if (found_others)
545 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
546 mef_flags[i] = true;
547 found_this = true;
553 * Initialise sort opts data.
555 static void
556 set_sort_opts(void)
559 memset(&default_sort_mods_object, 0,
560 sizeof(default_sort_mods_object));
561 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
562 default_sort_mods_object.func =
563 get_sort_func(&default_sort_mods_object);
567 * Set a sort modifier on a sort modifiers object.
569 static bool
570 set_sort_modifier(struct sort_mods *sm, int c)
573 if (sm) {
574 switch (c){
575 case 'b':
576 sm->bflag = true;
577 break;
578 case 'd':
579 sm->dflag = true;
580 break;
581 case 'f':
582 sm->fflag = true;
583 break;
584 case 'g':
585 sm->gflag = true;
586 need_hint = true;
587 break;
588 case 'i':
589 sm->iflag = true;
590 break;
591 case 'R':
592 sm->Rflag = true;
593 need_random = true;
594 break;
595 case 'M':
596 initialise_months();
597 sm->Mflag = true;
598 need_hint = true;
599 break;
600 case 'n':
601 sm->nflag = true;
602 need_hint = true;
603 print_symbols_on_debug = true;
604 break;
605 case 'r':
606 sm->rflag = true;
607 break;
608 case 'V':
609 sm->Vflag = true;
610 break;
611 case 'h':
612 sm->hflag = true;
613 need_hint = true;
614 print_symbols_on_debug = true;
615 break;
616 default:
617 return false;
619 sort_opts_vals.complex_sort = true;
620 sm->func = get_sort_func(sm);
622 return (true);
626 * Parse POS in -k option.
628 static int
629 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
631 regmatch_t pmatch[4];
632 regex_t re;
633 char *c, *f;
634 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
635 size_t len, nmatch;
636 int ret;
638 ret = -1;
639 nmatch = 4;
640 c = f = NULL;
642 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
643 return (-1);
645 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
646 goto end;
648 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
649 goto end;
651 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
652 goto end;
654 len = pmatch[1].rm_eo - pmatch[1].rm_so;
655 f = sort_malloc((len + 1) * sizeof(char));
657 strncpy(f, s + pmatch[1].rm_so, len);
658 f[len] = '\0';
660 if (second) {
661 errno = 0;
662 ks->f2 = (size_t) strtoul(f, NULL, 10);
663 if (errno != 0)
664 err(2, "-k");
665 if (ks->f2 == 0) {
666 warn("%s",getstr(5));
667 goto end;
669 } else {
670 errno = 0;
671 ks->f1 = (size_t) strtoul(f, NULL, 10);
672 if (errno != 0)
673 err(2, "-k");
674 if (ks->f1 == 0) {
675 warn("%s",getstr(5));
676 goto end;
680 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
681 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
682 c = sort_malloc((len + 1) * sizeof(char));
684 strncpy(c, s + pmatch[2].rm_so + 1, len);
685 c[len] = '\0';
687 if (second) {
688 errno = 0;
689 ks->c2 = (size_t) strtoul(c, NULL, 10);
690 if (errno != 0)
691 err(2, "-k");
692 } else {
693 errno = 0;
694 ks->c1 = (size_t) strtoul(c, NULL, 10);
695 if (errno != 0)
696 err(2, "-k");
697 if (ks->c1 == 0) {
698 warn("%s",getstr(6));
699 goto end;
702 } else {
703 if (second)
704 ks->c2 = 0;
705 else
706 ks->c1 = 1;
709 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
710 regoff_t i = 0;
712 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
713 check_mutually_exclusive_flags(s[i], mef_flags);
714 if (s[i] == 'b') {
715 if (second)
716 ks->pos2b = true;
717 else
718 ks->pos1b = true;
719 } else if (!set_sort_modifier(&(ks->sm), s[i]))
720 goto end;
724 ret = 0;
726 end:
728 if (c)
729 sort_free(c);
730 if (f)
731 sort_free(f);
732 regfree(&re);
734 return (ret);
738 * Parse -k option value.
740 static int
741 parse_k(const char *s, struct key_specs *ks)
743 int ret = -1;
744 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
745 { false, false, false, false, false, false };
747 if (s && *s) {
748 char *sptr;
750 sptr = strchr(s, ',');
751 if (sptr) {
752 size_t size1;
753 char *pos1, *pos2;
755 size1 = sptr - s;
757 if (size1 < 1)
758 return (-1);
759 pos1 = sort_malloc((size1 + 1) * sizeof(char));
761 strncpy(pos1, s, size1);
762 pos1[size1] = '\0';
764 ret = parse_pos(pos1, ks, mef_flags, false);
766 sort_free(pos1);
767 if (ret < 0)
768 return (ret);
770 pos2 = sort_strdup(sptr + 1);
771 ret = parse_pos(pos2, ks, mef_flags, true);
772 sort_free(pos2);
773 } else
774 ret = parse_pos(s, ks, mef_flags, false);
777 return (ret);
781 * Parse POS in +POS -POS option.
783 static int
784 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
786 regex_t re;
787 regmatch_t pmatch[4];
788 char *c, *f;
789 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
790 int ret;
791 size_t len, nmatch;
793 ret = -1;
794 nmatch = 4;
795 c = f = NULL;
796 *nc = *nf = 0;
798 if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
799 return (-1);
801 if (regexec(&re, s, nmatch, pmatch, 0) != 0)
802 goto end;
804 if (pmatch[0].rm_eo <= pmatch[0].rm_so)
805 goto end;
807 if (pmatch[1].rm_eo <= pmatch[1].rm_so)
808 goto end;
810 len = pmatch[1].rm_eo - pmatch[1].rm_so;
811 f = sort_malloc((len + 1) * sizeof(char));
813 strncpy(f, s + pmatch[1].rm_so, len);
814 f[len] = '\0';
816 errno = 0;
817 *nf = (size_t) strtoul(f, NULL, 10);
818 if (errno != 0)
819 errx(2, "%s", getstr(11));
821 if (pmatch[2].rm_eo > pmatch[2].rm_so) {
822 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
823 c = sort_malloc((len + 1) * sizeof(char));
825 strncpy(c, s + pmatch[2].rm_so + 1, len);
826 c[len] = '\0';
828 errno = 0;
829 *nc = (size_t) strtoul(c, NULL, 10);
830 if (errno != 0)
831 errx(2, "%s", getstr(11));
834 if (pmatch[3].rm_eo > pmatch[3].rm_so) {
836 len = pmatch[3].rm_eo - pmatch[3].rm_so;
838 strncpy(sopts, s + pmatch[3].rm_so, len);
839 sopts[len] = '\0';
842 ret = 0;
844 end:
845 if (c)
846 sort_free(c);
847 if (f)
848 sort_free(f);
849 regfree(&re);
851 return (ret);
855 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
857 void
858 fix_obsolete_keys(int *argc, char **argv)
860 char sopt[129];
862 for (int i = 1; i < *argc; i++) {
863 char *arg1;
865 arg1 = argv[i];
867 if (strlen(arg1) > 1 && arg1[0] == '+') {
868 int c1, f1;
869 char sopts1[128];
871 sopts1[0] = 0;
872 c1 = f1 = 0;
874 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
875 continue;
876 else {
877 f1 += 1;
878 c1 += 1;
879 if (i + 1 < *argc) {
880 char *arg2 = argv[i + 1];
882 if (strlen(arg2) > 1 &&
883 arg2[0] == '-') {
884 int c2, f2;
885 char sopts2[128];
887 sopts2[0] = 0;
888 c2 = f2 = 0;
890 if (parse_pos_obs(arg2 + 1,
891 &f2, &c2, sopts2) >= 0) {
892 if (c2 > 0)
893 f2 += 1;
894 sprintf(sopt, "-k%d.%d%s,%d.%d%s",
895 f1, c1, sopts1, f2, c2, sopts2);
896 argv[i] = sort_strdup(sopt);
897 for (int j = i + 1; j + 1 < *argc; j++)
898 argv[j] = argv[j + 1];
899 *argc -= 1;
900 continue;
904 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
905 argv[i] = sort_strdup(sopt);
912 * Set random seed
914 static void
915 set_random_seed(void)
917 if (need_random) {
919 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
920 FILE* fseed;
921 MD5_CTX ctx;
922 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
923 size_t sz = 0;
925 fseed = openfile(random_source, "r");
926 while (!feof(fseed)) {
927 int cr;
929 cr = fgetc(fseed);
930 if (cr == EOF)
931 break;
933 rsd[sz++] = (char) cr;
935 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
936 break;
939 closefile(fseed, random_source);
941 MD5Init(&ctx);
942 MD5Update(&ctx, rsd, sz);
944 random_seed = MD5End(&ctx, NULL);
945 random_seed_size = strlen(random_seed);
947 } else {
948 MD5_CTX ctx;
949 char *b;
951 MD5Init(&ctx);
952 b = MD5File(random_source, NULL);
953 if (b == NULL)
954 err(2, NULL);
956 random_seed = b;
957 random_seed_size = strlen(b);
960 MD5Init(&md5_ctx);
961 if(random_seed_size>0) {
962 MD5Update(&md5_ctx, random_seed, random_seed_size);
968 * Main function.
971 main(int argc, char **argv)
973 char *outfile, *real_outfile;
974 int c, result;
975 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
976 { false, false, false, false, false, false };
978 result = 0;
979 outfile = sort_strdup("-");
980 real_outfile = NULL;
982 struct sort_mods *sm = &default_sort_mods_object;
984 init_tmp_files();
986 set_signal_handler();
988 set_hw_params();
989 set_locale();
990 set_tmpdir();
991 set_sort_opts();
993 fix_obsolete_keys(&argc, argv);
995 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
996 != -1)) {
998 check_mutually_exclusive_flags(c, mef_flags);
1000 if (!set_sort_modifier(sm, c)) {
1002 switch (c) {
1003 case 'c':
1004 sort_opts_vals.cflag = true;
1005 if (optarg) {
1006 if (!strcmp(optarg, "diagnose-first"))
1008 else if (!strcmp(optarg, "silent") ||
1009 !strcmp(optarg, "quiet"))
1010 sort_opts_vals.csilentflag = true;
1011 else if (*optarg)
1012 unknown(optarg);
1014 break;
1015 case 'C':
1016 sort_opts_vals.cflag = true;
1017 sort_opts_vals.csilentflag = true;
1018 break;
1019 case 'k':
1021 sort_opts_vals.complex_sort = true;
1022 sort_opts_vals.kflag = true;
1024 keys_num++;
1025 keys = sort_realloc(keys, keys_num *
1026 sizeof(struct key_specs));
1027 memset(&(keys[keys_num - 1]), 0,
1028 sizeof(struct key_specs));
1030 if (parse_k(optarg, &(keys[keys_num - 1]))
1031 < 0) {
1032 errc(2, EINVAL, "-k %s", optarg);
1035 break;
1037 case 'm':
1038 sort_opts_vals.mflag = true;
1039 break;
1040 case 'o':
1041 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1042 strcpy(outfile, optarg);
1043 break;
1044 case 's':
1045 sort_opts_vals.sflag = true;
1046 break;
1047 case 'S':
1048 available_free_memory =
1049 parse_memory_buffer_value(optarg);
1050 break;
1051 case 'T':
1052 tmpdir = sort_strdup(optarg);
1053 break;
1054 case 't':
1055 while (strlen(optarg) > 1) {
1056 if (optarg[0] != '\\') {
1057 errc(2, EINVAL, "%s", optarg);
1059 optarg += 1;
1060 if (*optarg == '0') {
1061 *optarg = 0;
1062 break;
1065 sort_opts_vals.tflag = true;
1066 sort_opts_vals.field_sep = btowc(optarg[0]);
1067 if (sort_opts_vals.field_sep == WEOF) {
1068 errno = EINVAL;
1069 err(2, NULL);
1071 if (!gnusort_numeric_compatibility) {
1072 if (symbol_decimal_point == sort_opts_vals.field_sep)
1073 symbol_decimal_point = WEOF;
1074 if (symbol_thousands_sep == sort_opts_vals.field_sep)
1075 symbol_thousands_sep = WEOF;
1076 if (symbol_negative_sign == sort_opts_vals.field_sep)
1077 symbol_negative_sign = WEOF;
1078 if (symbol_positive_sign == sort_opts_vals.field_sep)
1079 symbol_positive_sign = WEOF;
1081 break;
1082 case 'u':
1083 sort_opts_vals.uflag = true;
1084 /* stable sort for the correct unique val */
1085 sort_opts_vals.sflag = true;
1086 break;
1087 case 'z':
1088 sort_opts_vals.zflag = true;
1089 break;
1090 case SORT_OPT:
1091 if (optarg) {
1092 if (!strcmp(optarg, "general-numeric"))
1093 set_sort_modifier(sm, 'g');
1094 else if (!strcmp(optarg, "human-numeric"))
1095 set_sort_modifier(sm, 'h');
1096 else if (!strcmp(optarg, "numeric"))
1097 set_sort_modifier(sm, 'n');
1098 else if (!strcmp(optarg, "month"))
1099 set_sort_modifier(sm, 'M');
1100 else if (!strcmp(optarg, "random"))
1101 set_sort_modifier(sm, 'R');
1102 else
1103 unknown(optarg);
1105 break;
1106 #if defined(SORT_THREADS)
1107 case PARALLEL_OPT:
1108 nthreads = (size_t)(atoi(optarg));
1109 if (nthreads < 1)
1110 nthreads = 1;
1111 if (nthreads > 1024)
1112 nthreads = 1024;
1113 break;
1114 #endif
1115 case QSORT_OPT:
1116 sort_opts_vals.sort_method = SORT_QSORT;
1117 break;
1118 case MERGESORT_OPT:
1119 sort_opts_vals.sort_method = SORT_MERGESORT;
1120 break;
1121 case MMAP_OPT:
1122 use_mmap = true;
1123 break;
1124 case HEAPSORT_OPT:
1125 sort_opts_vals.sort_method = SORT_HEAPSORT;
1126 break;
1127 case RADIXSORT_OPT:
1128 sort_opts_vals.sort_method = SORT_RADIXSORT;
1129 break;
1130 case RANDOMSOURCE_OPT:
1131 random_source = strdup(optarg);
1132 break;
1133 case COMPRESSPROGRAM_OPT:
1134 compress_program = strdup(optarg);
1135 break;
1136 case FF_OPT:
1137 read_fns_from_file0(optarg);
1138 break;
1139 case BS_OPT:
1141 errno = 0;
1142 long mof = strtol(optarg, NULL, 10);
1143 if (errno != 0)
1144 err(2, "--batch-size");
1145 if (mof >= 2)
1146 max_open_files = (size_t) mof + 1;
1148 break;
1149 case VERSION_OPT:
1150 printf("%s\n", VERSION);
1151 exit(EXIT_SUCCESS);
1152 /* NOTREACHED */
1153 break;
1154 case DEBUG_OPT:
1155 debug_sort = true;
1156 break;
1157 case HELP_OPT:
1158 usage(false);
1159 /* NOTREACHED */
1160 break;
1161 default:
1162 usage(true);
1163 /* NOTREACHED */
1168 argc -= optind;
1169 argv += optind;
1171 #ifndef WITHOUT_NLS
1172 catalog = catopen("sort", NL_CAT_LOCALE);
1173 #endif
1175 if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1176 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1178 #ifndef WITHOUT_NLS
1179 catclose(catalog);
1180 #endif
1182 if (keys_num == 0) {
1183 keys_num = 1;
1184 keys = sort_realloc(keys, sizeof(struct key_specs));
1185 memset(&(keys[0]), 0, sizeof(struct key_specs));
1186 keys[0].c1 = 1;
1187 keys[0].pos1b = default_sort_mods->bflag;
1188 keys[0].pos2b = default_sort_mods->bflag;
1189 memcpy(&(keys[0].sm), default_sort_mods,
1190 sizeof(struct sort_mods));
1193 for (size_t i = 0; i < keys_num; i++) {
1194 struct key_specs *ks;
1196 ks = &(keys[i]);
1198 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1199 !(ks->pos2b)) {
1200 ks->pos1b = sm->bflag;
1201 ks->pos2b = sm->bflag;
1202 memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1205 ks->sm.func = get_sort_func(&(ks->sm));
1208 if (argv_from_file0) {
1209 argc = argc_from_file0;
1210 argv = argv_from_file0;
1213 if (debug_sort) {
1214 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1215 #if defined(SORT_THREADS)
1216 printf("Number of CPUs: %d\n",(int)ncpu);
1217 nthreads = 1;
1218 #endif
1219 printf("Using collate rules of %s locale\n",
1220 setlocale(LC_COLLATE, NULL));
1221 if (byte_sort)
1222 printf("Byte sort is used\n");
1223 if (print_symbols_on_debug) {
1224 printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1225 if (symbol_thousands_sep)
1226 printf("Thousands separator: <%lc>\n",
1227 symbol_thousands_sep);
1228 printf("Positive sign: <%lc>\n", symbol_positive_sign);
1229 printf("Negative sign: <%lc>\n", symbol_negative_sign);
1233 set_random_seed();
1235 /* Case when the outfile equals one of the input files: */
1236 if (strcmp(outfile, "-")) {
1238 for(int i = 0; i < argc; ++i) {
1239 if (strcmp(argv[i], outfile) == 0) {
1240 real_outfile = sort_strdup(outfile);
1241 for(;;) {
1242 char* tmp = sort_malloc(strlen(outfile) +
1243 strlen(".tmp") + 1);
1245 strcpy(tmp, outfile);
1246 strcpy(tmp + strlen(tmp), ".tmp");
1247 sort_free(outfile);
1248 outfile = tmp;
1249 if (access(outfile, F_OK) < 0)
1250 break;
1252 tmp_file_atexit(outfile);
1257 #if defined(SORT_THREADS)
1258 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1259 nthreads = 1;
1260 #endif
1262 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1263 struct file_list fl;
1264 struct sort_list list;
1266 sort_list_init(&list);
1267 file_list_init(&fl, true);
1269 if (argc < 1)
1270 procfile("-", &list, &fl);
1271 else {
1272 while (argc > 0) {
1273 procfile(*argv, &list, &fl);
1274 --argc;
1275 ++argv;
1279 if (fl.count < 1)
1280 sort_list_to_file(&list, outfile);
1281 else {
1282 if (list.count > 0) {
1283 char *flast = new_tmp_file_name();
1285 sort_list_to_file(&list, flast);
1286 file_list_add(&fl, flast, false);
1288 merge_files(&fl, outfile);
1291 file_list_clean(&fl);
1294 * We are about to exit the program, so we can ignore
1295 * the clean-up for speed
1297 * sort_list_clean(&list);
1300 } else if (sort_opts_vals.cflag) {
1301 result = (argc == 0) ? (check("-")) : (check(*argv));
1302 } else if (sort_opts_vals.mflag) {
1303 struct file_list fl;
1305 file_list_init(&fl, false);
1306 file_list_populate(&fl, argc, argv, true);
1307 merge_files(&fl, outfile);
1308 file_list_clean(&fl);
1311 if (real_outfile) {
1312 unlink(real_outfile);
1313 if (rename(outfile, real_outfile) < 0)
1314 err(2, NULL);
1315 sort_free(real_outfile);
1318 sort_free(outfile);
1320 return (result);