Fix UTIME_OMIT handling
[dragonfly.git] / usr.bin / sort / coll.c
blob1ba7e601eff1181130112d4ea3bd28a9fb72f67b
1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
27 * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $
31 #include <sys/types.h>
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 #if defined(SORT_RANDOM)
43 #include <openssl/md5.h>
44 #endif
46 #include "coll.h"
47 #include "vsort.h"
49 struct key_specs *keys;
50 size_t keys_num = 0;
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 #if defined(SORT_RANDOM)
64 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
65 #endif
66 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
69 * Allocate keys array
71 struct keys_array *
72 keys_array_alloc(void)
74 struct keys_array *ka;
75 size_t sz;
77 sz = keys_array_size();
78 ka = sort_malloc(sz);
79 memset(ka, 0, sz);
81 return (ka);
85 * Calculate whether we need key hint space
87 static size_t
88 key_hint_size(void)
91 return (need_hint ? sizeof(struct key_hint) : 0);
95 * Calculate keys array size
97 size_t
98 keys_array_size(void)
101 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
105 * Clean data of keys array
107 void
108 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
111 if (ka) {
112 for (size_t i = 0; i < keys_num; ++i)
113 if (ka->key[i].k && ka->key[i].k != s)
114 bwsfree(ka->key[i].k);
115 memset(ka, 0, keys_array_size());
120 * Set value of a key in the keys set
122 void
123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
126 if (ka && keys_num > ind) {
127 struct key_value *kv;
129 kv = &(ka->key[ind]);
131 if (kv->k && kv->k != s)
132 bwsfree(kv->k);
133 kv->k = s;
138 * Initialize a sort list item
140 struct sort_list_item *
141 sort_list_item_alloc(void)
143 struct sort_list_item *si;
144 size_t sz;
146 sz = sizeof(struct sort_list_item) + keys_array_size();
147 si = sort_malloc(sz);
148 memset(si, 0, sz);
150 return (si);
153 size_t
154 sort_list_item_size(struct sort_list_item *si)
156 size_t ret = 0;
158 if (si) {
159 ret = sizeof(struct sort_list_item) + keys_array_size();
160 if (si->str)
161 ret += bws_memsize(si->str);
162 for (size_t i = 0; i < keys_num; ++i) {
163 struct key_value *kv;
165 kv = &(si->ka.key[i]);
167 if (kv->k != si->str)
168 ret += bws_memsize(kv->k);
171 return (ret);
175 * Calculate key for a sort list item
177 static void
178 sort_list_item_make_key(struct sort_list_item *si)
181 preproc(si->str, &(si->ka));
185 * Set value of a sort list item.
186 * Return combined string and keys memory size.
188 void
189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
192 if (si) {
193 clean_keys_array(si->str, &(si->ka));
194 if (si->str) {
195 if (si->str == str) {
196 /* we are trying to reset the same string */
197 return;
198 } else {
199 bwsfree(si->str);
200 si->str = NULL;
203 si->str = str;
204 sort_list_item_make_key(si);
209 * De-allocate a sort list item object memory
211 void
212 sort_list_item_clean(struct sort_list_item *si)
215 if (si) {
216 clean_keys_array(si->str, &(si->ka));
217 if (si->str) {
218 bwsfree(si->str);
219 si->str = NULL;
225 * Skip columns according to specs
227 static size_t
228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
229 bool skip_blanks, bool *empty_key)
231 if (cols < 1)
232 return (BWSLEN(s) + 1);
234 if (skip_blanks)
235 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
236 ++start;
238 while (start < BWSLEN(s) && cols > 1) {
239 --cols;
240 ++start;
243 if (start >= BWSLEN(s))
244 *empty_key = true;
246 return (start);
250 * Skip fields according to specs
252 static size_t
253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
256 if (fields < 2) {
257 if (BWSLEN(s) == 0)
258 *empty_field = true;
259 return (0);
260 } else if (!(sort_opts_vals.tflag)) {
261 size_t cpos = 0;
262 bool pb = true;
264 while (cpos < BWSLEN(s)) {
265 bool isblank;
267 isblank = iswblank(BWS_GET(s, cpos));
269 if (isblank && !pb) {
270 --fields;
271 if (fields <= 1)
272 return (cpos);
274 pb = isblank;
275 ++cpos;
277 if (fields > 1)
278 *empty_field = true;
279 return (cpos);
280 } else {
281 size_t cpos = 0;
283 while (cpos < BWSLEN(s)) {
284 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
285 --fields;
286 if (fields <= 1)
287 return (cpos + 1);
289 ++cpos;
291 if (fields > 1)
292 *empty_field = true;
293 return (cpos);
298 * Find fields start
300 static void
301 find_field_start(const struct bwstring *s, struct key_specs *ks,
302 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
305 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
306 if (!*empty_field)
307 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
308 ks->pos1b, empty_key);
309 else
310 *empty_key = true;
314 * Find end key position
316 static size_t
317 find_field_end(const struct bwstring *s, struct key_specs *ks)
319 size_t f2, next_field_start, pos_end;
320 bool empty_field, empty_key;
322 pos_end = 0;
323 next_field_start = 0;
324 empty_field = false;
325 empty_key = false;
326 f2 = ks->f2;
328 if (f2 == 0)
329 return (BWSLEN(s) + 1);
330 else {
331 if (ks->c2 == 0) {
332 next_field_start = skip_fields_to_start(s, f2 + 1,
333 &empty_field);
334 if ((next_field_start > 0) && sort_opts_vals.tflag &&
335 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
336 next_field_start - 1)))
337 --next_field_start;
338 } else
339 next_field_start = skip_fields_to_start(s, f2,
340 &empty_field);
343 if (empty_field || (next_field_start >= BWSLEN(s)))
344 return (BWSLEN(s) + 1);
346 if (ks->c2) {
347 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
348 ks->pos2b, &empty_key);
349 if (pos_end < BWSLEN(s))
350 ++pos_end;
351 } else
352 pos_end = next_field_start;
354 return (pos_end);
358 * Cut a field according to the key specs
360 static struct bwstring *
361 cut_field(const struct bwstring *s, struct key_specs *ks)
363 struct bwstring *ret = NULL;
365 if (s && ks) {
366 size_t field_start, key_end, key_start, sz;
367 bool empty_field, empty_key;
369 field_start = 0;
370 key_start = 0;
371 empty_field = false;
372 empty_key = false;
374 find_field_start(s, ks, &field_start, &key_start,
375 &empty_field, &empty_key);
377 if (empty_key)
378 sz = 0;
379 else {
380 key_end = find_field_end(s, ks);
381 sz = (key_end < key_start) ? 0 : (key_end - key_start);
384 ret = bwsalloc(sz);
385 if (sz)
386 bwsnocpy(ret, s, key_start, sz);
387 } else
388 ret = bwsalloc(0);
390 return (ret);
394 * Preprocesses a line applying the necessary transformations
395 * specified by command line options and returns the preprocessed
396 * string, which can be used to compare.
399 preproc(struct bwstring *s, struct keys_array *ka)
402 if (sort_opts_vals.kflag)
403 for (size_t i = 0; i < keys_num; i++) {
404 struct bwstring *key;
405 struct key_specs *kspecs;
406 struct sort_mods *sm;
408 kspecs = &(keys[i]);
409 key = cut_field(s, kspecs);
411 sm = &(kspecs->sm);
412 if (sm->dflag)
413 key = dictionary_order(key);
414 else if (sm->iflag)
415 key = ignore_nonprinting(key);
416 if (sm->fflag || sm->Mflag)
417 key = ignore_case(key);
419 set_key_on_keys_array(ka, key, i);
421 else {
422 struct bwstring *ret = NULL;
423 struct sort_mods *sm = default_sort_mods;
425 if (sm->bflag) {
426 if (ret == NULL)
427 ret = bwsdup(s);
428 ret = ignore_leading_blanks(ret);
430 if (sm->dflag) {
431 if (ret == NULL)
432 ret = bwsdup(s);
433 ret = dictionary_order(ret);
434 } else if (sm->iflag) {
435 if (ret == NULL)
436 ret = bwsdup(s);
437 ret = ignore_nonprinting(ret);
439 if (sm->fflag || sm->Mflag) {
440 if (ret == NULL)
441 ret = bwsdup(s);
442 ret = ignore_case(ret);
444 if (ret == NULL)
445 set_key_on_keys_array(ka, s, 0);
446 else
447 set_key_on_keys_array(ka, ret, 0);
450 return 0;
453 cmpcoll_t
454 get_sort_func(struct sort_mods *sm)
457 if (sm->nflag)
458 return (numcoll);
459 else if (sm->hflag)
460 return (hnumcoll);
461 else if (sm->gflag)
462 return (gnumcoll);
463 else if (sm->Mflag)
464 return (monthcoll);
465 #if defined(SORT_RANDOM)
466 else if (sm->Rflag)
467 return (randomcoll);
468 #endif
469 else if (sm->Vflag)
470 return (versioncoll);
471 else
472 return (wstrcoll);
476 * Compares the given strings. Returns a positive number if
477 * the first precedes the second, a negative number if the second is
478 * the preceding one, and zero if they are equal. This function calls
479 * the underlying collate functions, which done the actual comparison.
482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
484 struct sort_mods *sm;
485 int res = 0;
487 for (size_t i = 0; i < keys_num; ++i) {
488 sm = &(keys[i].sm);
490 if (sm->rflag)
491 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
492 else
493 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
495 if (res)
496 break;
498 /* offset applies to only the first key */
499 offset = 0;
501 return (res);
505 * Compare two strings.
506 * Plain symbol-by-symbol comparison.
509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
512 if (default_sort_mods->rflag) {
513 const struct bwstring *tmp;
515 tmp = s1;
516 s1 = s2;
517 s2 = tmp;
520 return (bwscoll(s1, s2, 0));
524 * Compare a string and a sort list item, according to the sort specs.
527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
529 struct keys_array *ka1;
530 int ret = 0;
532 ka1 = keys_array_alloc();
534 preproc(str1, ka1);
536 sort_list_item_make_key(*ss2);
538 if (debug_sort) {
539 bwsprintf(stdout, str1, "; s1=<", ">");
540 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
543 ret = key_coll(ka1, &((*ss2)->ka), 0);
545 if (debug_sort)
546 printf("; cmp1=%d", ret);
548 clean_keys_array(str1, ka1);
549 sort_free(ka1);
551 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
552 ret = top_level_str_coll(str1, ((*ss2)->str));
553 if (debug_sort)
554 printf("; cmp2=%d", ret);
557 if (debug_sort)
558 printf("\n");
560 return (ret);
564 * Compare two sort list items, according to the sort specs.
567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
568 size_t offset)
570 int ret;
572 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
574 if (debug_sort) {
575 if (offset)
576 printf("; offset=%d", (int) offset);
577 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
578 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
579 printf("; cmp1=%d\n", ret);
582 if (ret)
583 return (ret);
585 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
586 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
587 if (debug_sort)
588 printf("; cmp2=%d\n", ret);
591 return (ret);
595 * Compare two sort list items, according to the sort specs.
598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
601 return (list_coll_offset(ss1, ss2, 0));
604 #define LSCDEF(N) \
605 static int \
606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
609 return (list_coll_offset(ss1, ss2, N)); \
612 LSCDEF(1)
613 LSCDEF(2)
614 LSCDEF(3)
615 LSCDEF(4)
616 LSCDEF(5)
617 LSCDEF(6)
618 LSCDEF(7)
619 LSCDEF(8)
620 LSCDEF(9)
621 LSCDEF(10)
622 LSCDEF(11)
623 LSCDEF(12)
624 LSCDEF(13)
625 LSCDEF(14)
626 LSCDEF(15)
627 LSCDEF(16)
628 LSCDEF(17)
629 LSCDEF(18)
630 LSCDEF(19)
631 LSCDEF(20)
633 listcoll_t
634 get_list_call_func(size_t offset)
636 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
637 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
638 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
639 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
640 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
641 list_coll_18, list_coll_19, list_coll_20 };
643 if (offset <= 20)
644 return (lsarray[offset]);
646 return (list_coll);
650 * Compare two sort list items, only by their original string.
653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
656 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
660 * Maximum size of a number in the string (before or after decimal point)
662 #define MAX_NUM_SIZE (128)
665 * Set suffix value
667 static void setsuffix(wchar_t c, unsigned char *si)
669 switch (c){
670 case L'k':
671 case L'K':
672 *si = 1;
673 break;
674 case L'M':
675 *si = 2;
676 break;
677 case L'G':
678 *si = 3;
679 break;
680 case L'T':
681 *si = 4;
682 break;
683 case L'P':
684 *si = 5;
685 break;
686 case L'E':
687 *si = 6;
688 break;
689 case L'Z':
690 *si = 7;
691 break;
692 case L'Y':
693 *si = 8;
694 break;
695 default:
696 *si = 0;
701 * Read string s and parse the string into a fixed-decimal-point number.
702 * sign equals -1 if the number is negative (explicit plus is not allowed,
703 * according to GNU sort's "info sort".
704 * The number part before decimal point is in the smain, after the decimal
705 * point is in sfrac, tail is the pointer to the remainder of the string.
707 static int
708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
710 bwstring_iterator s;
712 s = bws_begin(s0);
714 /* always end the fraction with zero, even if we have no fraction */
715 sfrac[0] = 0;
717 while (iswblank(bws_get_iter_value(s)))
718 s = bws_iterator_inc(s, 1);
720 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
721 *sign = -1;
722 s = bws_iterator_inc(s, 1);
725 // This is '0', not '\0', do not change this
726 while (iswdigit(bws_get_iter_value(s)) &&
727 (bws_get_iter_value(s) == L'0'))
728 s = bws_iterator_inc(s, 1);
730 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
731 if (iswdigit(bws_get_iter_value(s))) {
732 smain[*main_len] = bws_get_iter_value(s);
733 s = bws_iterator_inc(s, 1);
734 *main_len += 1;
735 } else if (symbol_thousands_sep &&
736 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
737 s = bws_iterator_inc(s, 1);
738 else
739 break;
742 smain[*main_len] = 0;
744 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
745 s = bws_iterator_inc(s, 1);
746 while (iswdigit(bws_get_iter_value(s)) &&
747 *frac_len < MAX_NUM_SIZE) {
748 sfrac[*frac_len] = bws_get_iter_value(s);
749 s = bws_iterator_inc(s, 1);
750 *frac_len += 1;
752 sfrac[*frac_len] = 0;
754 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
755 --(*frac_len);
756 sfrac[*frac_len] = L'\0';
760 setsuffix(bws_get_iter_value(s),si);
762 if ((*main_len + *frac_len) == 0)
763 *sign = 0;
765 return (0);
769 * Implements string sort.
771 static int
772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
775 if (debug_sort) {
776 if (offset)
777 printf("; offset=%d\n", (int) offset);
778 bwsprintf(stdout, kv1->k, "; k1=<", ">");
779 printf("(%zu)", BWSLEN(kv1->k));
780 bwsprintf(stdout, kv2->k, ", k2=<", ">");
781 printf("(%zu)", BWSLEN(kv2->k));
784 return (bwscoll(kv1->k, kv2->k, offset));
788 * Compare two suffixes
790 static inline int
791 cmpsuffix(unsigned char si1, unsigned char si2)
794 return ((char)si1 - (char)si2);
798 * Implements numeric sort for -n and -h.
800 static int
801 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
802 size_t offset __unused, bool use_suffix)
804 struct bwstring *s1, *s2;
805 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
806 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
807 int cmp_res, sign1, sign2;
808 size_t frac1, frac2, main1, main2;
809 unsigned char SI1, SI2;
810 bool e1, e2, key1_read, key2_read;
812 s1 = kv1->k;
813 s2 = kv2->k;
814 sign1 = sign2 = 0;
815 main1 = main2 = 0;
816 frac1 = frac2 = 0;
818 cmp_res = 0;
819 key1_read = key2_read = false;
821 if (debug_sort) {
822 bwsprintf(stdout, s1, "; k1=<", ">");
823 bwsprintf(stdout, s2, ", k2=<", ">");
826 if (s1 == s2)
827 return (0);
829 if (kv1->hint->status == HS_UNINITIALIZED) {
830 /* read the number from the string */
831 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
832 key1_read = true;
833 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
834 if(main1 < 1 && frac1 < 1)
835 kv1->hint->v.nh.empty=true;
836 kv1->hint->v.nh.si = SI1;
837 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
838 HS_INITIALIZED : HS_ERROR;
839 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
842 if (kv2->hint->status == HS_UNINITIALIZED) {
843 /* read the number from the string */
844 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
845 key2_read = true;
846 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
847 if(main2 < 1 && frac2 < 1)
848 kv2->hint->v.nh.empty=true;
849 kv2->hint->v.nh.si = SI2;
850 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
851 HS_INITIALIZED : HS_ERROR;
852 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
855 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
856 HS_INITIALIZED) {
857 unsigned long long n1, n2;
858 bool neg1, neg2;
860 e1 = kv1->hint->v.nh.empty;
861 e2 = kv2->hint->v.nh.empty;
863 if (e1 && e2)
864 return (0);
866 neg1 = kv1->hint->v.nh.neg;
867 neg2 = kv2->hint->v.nh.neg;
869 if (neg1 && !neg2)
870 return (-1);
871 if (neg2 && !neg1)
872 return (+1);
874 if (e1)
875 return (neg2 ? +1 : -1);
876 else if (e2)
877 return (neg1 ? -1 : +1);
880 if (use_suffix) {
881 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
882 if (cmp_res)
883 return (neg1 ? -cmp_res : cmp_res);
886 n1 = kv1->hint->v.nh.n1;
887 n2 = kv2->hint->v.nh.n1;
888 if (n1 < n2)
889 return (neg1 ? +1 : -1);
890 else if (n1 > n2)
891 return (neg1 ? -1 : +1);
894 /* read the numbers from the strings */
895 if (!key1_read)
896 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
897 if (!key2_read)
898 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
900 e1 = ((main1 + frac1) == 0);
901 e2 = ((main2 + frac2) == 0);
903 if (e1 && e2)
904 return (0);
906 /* we know the result if the signs are different */
907 if (sign1 < 0 && sign2 >= 0)
908 return (-1);
909 if (sign1 >= 0 && sign2 < 0)
910 return (+1);
912 if (e1)
913 return ((sign2 < 0) ? +1 : -1);
914 else if (e2)
915 return ((sign1 < 0) ? -1 : +1);
917 if (use_suffix) {
918 cmp_res = cmpsuffix(SI1, SI2);
919 if (cmp_res)
920 return ((sign1 < 0) ? -cmp_res : cmp_res);
923 /* if both numbers are empty assume that the strings are equal */
924 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
925 return (0);
928 * if the main part is of different size, we know the result
929 * (because the leading zeros are removed)
931 if (main1 < main2)
932 cmp_res = -1;
933 else if (main1 > main2)
934 cmp_res = +1;
935 /* if the sizes are equal then simple non-collate string compare gives the correct result */
936 else
937 cmp_res = wcscmp(smain1, smain2);
939 /* check fraction */
940 if (!cmp_res)
941 cmp_res = wcscmp(sfrac1, sfrac2);
943 if (!cmp_res)
944 return (0);
946 /* reverse result if the signs are negative */
947 if (sign1 < 0 && sign2 < 0)
948 cmp_res = -cmp_res;
950 return (cmp_res);
954 * Implements numeric sort (-n).
956 static int
957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
960 return (numcoll_impl(kv1, kv2, offset, false));
964 * Implements 'human' numeric sort (-h).
966 static int
967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
970 return (numcoll_impl(kv1, kv2, offset, true));
974 * Implements random sort (-R).
976 #if defined(SORT_RANDOM)
977 static char *
978 randomcollend(MD5_CTX *ctx)
980 unsigned char digest[MD5_DIGEST_LENGTH];
981 static const char hex[]="0123456789abcdef";
982 char *buf;
983 int i;
985 buf = malloc(MD5_DIGEST_LENGTH * 2 + 1);
986 if (!buf)
987 return NULL;
988 MD5_Final(digest, ctx);
989 for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
990 buf[2*i] = hex[digest[i] >> 4];
991 buf[2*i+1] = hex[digest[i] & 0x0f];
993 buf[MD5_DIGEST_LENGTH * 2] = '\0';
994 return buf;
997 static int
998 randomcoll(struct key_value *kv1, struct key_value *kv2,
999 size_t offset __unused)
1001 struct bwstring *s1, *s2;
1002 MD5_CTX ctx1, ctx2;
1003 char *b1, *b2;
1005 s1 = kv1->k;
1006 s2 = kv2->k;
1008 if (debug_sort) {
1009 bwsprintf(stdout, s1, "; k1=<", ">");
1010 bwsprintf(stdout, s2, ", k2=<", ">");
1013 if (s1 == s2)
1014 return (0);
1016 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1017 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1019 MD5_Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1020 MD5_Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1021 b1 = randomcollend(&ctx1);
1022 b2 = randomcollend(&ctx2);
1023 if (b1 == NULL) {
1024 if (b2 == NULL)
1025 return (0);
1026 else {
1027 sort_free(b2);
1028 return (-1);
1030 } else if (b2 == NULL) {
1031 sort_free(b1);
1032 return (+1);
1033 } else {
1034 int cmp_res;
1036 cmp_res = strcmp(b1,b2);
1037 sort_free(b1);
1038 sort_free(b2);
1040 if (!cmp_res)
1041 cmp_res = bwscoll(s1, s2, 0);
1043 return (cmp_res);
1046 #endif
1049 * Implements version sort (-V).
1051 static int
1052 versioncoll(struct key_value *kv1, struct key_value *kv2,
1053 size_t offset __unused)
1055 struct bwstring *s1, *s2;
1057 s1 = kv1->k;
1058 s2 = kv2->k;
1060 if (debug_sort) {
1061 bwsprintf(stdout, s1, "; k1=<", ">");
1062 bwsprintf(stdout, s2, ", k2=<", ">");
1065 if (s1 == s2)
1066 return (0);
1068 return (vcmp(s1, s2));
1072 * Check for minus infinity
1074 static inline bool
1075 huge_minus(double d, int err1)
1078 if (err1 == ERANGE)
1079 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1080 return (+1);
1082 return (0);
1086 * Check for plus infinity
1088 static inline bool
1089 huge_plus(double d, int err1)
1092 if (err1 == ERANGE)
1093 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1094 return (+1);
1096 return (0);
1100 * Check whether a function is a NAN
1102 static bool
1103 is_nan(double d)
1105 return ((d == NAN) || (isnan(d)));
1109 * Compare two NANs
1111 static int
1112 cmp_nans(double d1, double d2)
1115 if (d1 < d2)
1116 return (-1);
1117 if (d1 > d2)
1118 return (+1);
1119 return (0);
1123 * Implements general numeric sort (-g).
1125 static int
1126 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1127 size_t offset __unused)
1129 double d1, d2;
1130 int err1, err2;
1131 bool empty1, empty2, key1_read, key2_read;
1133 d1 = d2 = 0;
1134 err1 = err2 = 0;
1135 key1_read = key2_read = false;
1137 if (debug_sort) {
1138 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1139 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1142 if (kv1->hint->status == HS_UNINITIALIZED) {
1143 errno = 0;
1144 d1 = bwstod(kv1->k, &empty1);
1145 err1 = errno;
1147 if (empty1)
1148 kv1->hint->v.gh.notnum = true;
1149 else if (err1 == 0) {
1150 kv1->hint->v.gh.d = d1;
1151 kv1->hint->v.gh.nan = is_nan(d1);
1152 kv1->hint->status = HS_INITIALIZED;
1153 } else
1154 kv1->hint->status = HS_ERROR;
1156 key1_read = true;
1159 if (kv2->hint->status == HS_UNINITIALIZED) {
1160 errno = 0;
1161 d2 = bwstod(kv2->k, &empty2);
1162 err2 = errno;
1164 if (empty2)
1165 kv2->hint->v.gh.notnum = true;
1166 else if (err2 == 0) {
1167 kv2->hint->v.gh.d = d2;
1168 kv2->hint->v.gh.nan = is_nan(d2);
1169 kv2->hint->status = HS_INITIALIZED;
1170 } else
1171 kv2->hint->status = HS_ERROR;
1173 key2_read = true;
1176 if (kv1->hint->status == HS_INITIALIZED &&
1177 kv2->hint->status == HS_INITIALIZED) {
1178 if (kv1->hint->v.gh.notnum)
1179 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1180 else if (kv2->hint->v.gh.notnum)
1181 return (+1);
1183 if (kv1->hint->v.gh.nan)
1184 return ((kv2->hint->v.gh.nan) ?
1185 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1186 -1);
1187 else if (kv2->hint->v.gh.nan)
1188 return (+1);
1190 d1 = kv1->hint->v.gh.d;
1191 d2 = kv2->hint->v.gh.d;
1193 if (d1 < d2)
1194 return (-1);
1195 else if (d1 > d2)
1196 return (+1);
1197 else
1198 return (0);
1201 if (!key1_read) {
1202 errno = 0;
1203 d1 = bwstod(kv1->k, &empty1);
1204 err1 = errno;
1207 if (!key2_read) {
1208 errno = 0;
1209 d2 = bwstod(kv2->k, &empty2);
1210 err2 = errno;
1213 /* Non-value case: */
1214 if (empty1)
1215 return (empty2 ? 0 : -1);
1216 else if (empty2)
1217 return (+1);
1219 /* NAN case */
1220 if (is_nan(d1))
1221 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1222 else if (is_nan(d2))
1223 return (+1);
1225 /* Infinities */
1226 if (err1 == ERANGE || err2 == ERANGE) {
1227 /* Minus infinity case */
1228 if (huge_minus(d1, err1)) {
1229 if (huge_minus(d2, err2)) {
1230 if (d1 < d2)
1231 return (-1);
1232 if (d1 > d2)
1233 return (+1);
1234 return (0);
1235 } else
1236 return (-1);
1238 } else if (huge_minus(d2, err2)) {
1239 if (huge_minus(d1, err1)) {
1240 if (d1 < d2)
1241 return (-1);
1242 if (d1 > d2)
1243 return (+1);
1244 return (0);
1245 } else
1246 return (+1);
1249 /* Plus infinity case */
1250 if (huge_plus(d1, err1)) {
1251 if (huge_plus(d2, err2)) {
1252 if (d1 < d2)
1253 return (-1);
1254 if (d1 > d2)
1255 return (+1);
1256 return (0);
1257 } else
1258 return (+1);
1259 } else if (huge_plus(d2, err2)) {
1260 if (huge_plus(d1, err1)) {
1261 if (d1 < d2)
1262 return (-1);
1263 if (d1 > d2)
1264 return (+1);
1265 return (0);
1266 } else
1267 return (-1);
1271 if (d1 < d2)
1272 return (-1);
1273 if (d1 > d2)
1274 return (+1);
1276 return (0);
1280 * Implements month sort (-M).
1282 static int
1283 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1285 int val1, val2;
1286 bool key1_read, key2_read;
1288 val1 = val2 = 0;
1289 key1_read = key2_read = false;
1291 if (debug_sort) {
1292 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1293 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1296 if (kv1->hint->status == HS_UNINITIALIZED) {
1297 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1298 key1_read = true;
1299 kv1->hint->status = HS_INITIALIZED;
1302 if (kv2->hint->status == HS_UNINITIALIZED) {
1303 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1304 key2_read = true;
1305 kv2->hint->status = HS_INITIALIZED;
1308 if (kv1->hint->status == HS_INITIALIZED) {
1309 val1 = kv1->hint->v.Mh.m;
1310 key1_read = true;
1313 if (kv2->hint->status == HS_INITIALIZED) {
1314 val2 = kv2->hint->v.Mh.m;
1315 key2_read = true;
1318 if (!key1_read)
1319 val1 = bws_month_score(kv1->k);
1320 if (!key2_read)
1321 val2 = bws_month_score(kv2->k);
1323 if (val1 == val2) {
1324 return (0);
1326 if (val1 < val2)
1327 return (-1);
1328 return (+1);