usr.bin/sort/coll.c

   1 /*-
   2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
   3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  *
  27  * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $
  28  */
  29
  30
  31 #include <sys/types.h>
  32
  33 #include <errno.h>
  34 #include <err.h>
  35 #include <langinfo.h>
  36 #include <limits.h>
  37 #include <math.h>
  38 #include <stdlib.h>
  39 #include <string.h>
  40 #include <wchar.h>
  41 #include <wctype.h>
  42 #if defined(SORT_RANDOM)
  43 #include <openssl/md5.h>
  44 #endif
  45
  46 #include "coll.h"
  47 #include "vsort.h"
  48
  49 struct key_specs *keys;
  50 size_t keys_num = 0;
  51
  52 wint_t symbol_decimal_point = L'.';
  53 /* there is no default thousands separator in collate rules: */
  54 wint_t symbol_thousands_sep = 0;
  55 wint_t symbol_negative_sign = L'-';
  56 wint_t symbol_positive_sign = L'+';
  57
  58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
  59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
  60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
  61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
  62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
  63 #if defined(SORT_RANDOM)
  64 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
  65 #endif
  66 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
  67
  68 /*
  69  * Allocate keys array
  70  */
  71 struct keys_array *
  72 keys_array_alloc(void)
  73 {
  74         struct keys_array *ka;
  75         size_t sz;
  76
  77         sz = keys_array_size();
  78         ka = sort_malloc(sz);
  79         memset(ka, 0, sz);
  80
  81         return (ka);
  82 }
  83
  84 /*
  85  * Calculate whether we need key hint space
  86  */
  87 static size_t
  88 key_hint_size(void)
  89 {
  90
  91         return (need_hint ? sizeof(struct key_hint) : 0);
  92 }
  93
  94 /*
  95  * Calculate keys array size
  96  */
  97 size_t
  98 keys_array_size(void)
  99 {
 100
 101         return (keys_num * (sizeof(struct key_value) + key_hint_size()));
 102 }
 103
 104 /*
 105  * Clean data of keys array
 106  */
 107 void
 108 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
 109 {
 110
 111         if (ka) {
 112                 for (size_t i = 0; i < keys_num; ++i)
 113                         if (ka->key[i].k && ka->key[i].k != s)
 114                                 bwsfree(ka->key[i].k);
 115                 memset(ka, 0, keys_array_size());
 116         }
 117 }
 118
 119 /*
 120  * Set value of a key in the keys set
 121  */
 122 void
 123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
 124 {
 125
 126         if (ka && keys_num > ind) {
 127                 struct key_value *kv;
 128
 129                 kv = &(ka->key[ind]);
 130
 131                 if (kv->k && kv->k != s)
 132                         bwsfree(kv->k);
 133                 kv->k = s;
 134         }
 135 }
 136
 137 /*
 138  * Initialize a sort list item
 139  */
 140 struct sort_list_item *
 141 sort_list_item_alloc(void)
 142 {
 143         struct sort_list_item *si;
 144         size_t sz;
 145
 146         sz = sizeof(struct sort_list_item) + keys_array_size();
 147         si = sort_malloc(sz);
 148         memset(si, 0, sz);
 149
 150         return (si);
 151 }
 152
 153 size_t
 154 sort_list_item_size(struct sort_list_item *si)
 155 {
 156         size_t ret = 0;
 157
 158         if (si) {
 159                 ret = sizeof(struct sort_list_item) + keys_array_size();
 160                 if (si->str)
 161                         ret += bws_memsize(si->str);
 162                 for (size_t i = 0; i < keys_num; ++i) {
 163                         struct key_value *kv;
 164
 165                         kv = &(si->ka.key[i]);
 166
 167                         if (kv->k != si->str)
 168                                 ret += bws_memsize(kv->k);
 169                 }
 170         }
 171         return (ret);
 172 }
 173
 174 /*
 175  * Calculate key for a sort list item
 176  */
 177 static void
 178 sort_list_item_make_key(struct sort_list_item *si)
 179 {
 180
 181         preproc(si->str, &(si->ka));
 182 }
 183
 184 /*
 185  * Set value of a sort list item.
 186  * Return combined string and keys memory size.
 187  */
 188 void
 189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
 190 {
 191
 192         if (si) {
 193                 clean_keys_array(si->str, &(si->ka));
 194                 if (si->str) {
 195                         if (si->str == str) {
 196                                 /* we are trying to reset the same string */
 197                                 return;
 198                         } else {
 199                                 bwsfree(si->str);
 200                                 si->str = NULL;
 201                         }
 202                 }
 203                 si->str = str;
 204                 sort_list_item_make_key(si);
 205         }
 206 }
 207
 208 /*
 209  * De-allocate a sort list item object memory
 210  */
 211 void
 212 sort_list_item_clean(struct sort_list_item *si)
 213 {
 214
 215         if (si) {
 216                 clean_keys_array(si->str, &(si->ka));
 217                 if (si->str) {
 218                         bwsfree(si->str);
 219                         si->str = NULL;
 220                 }
 221         }
 222 }
 223
 224 /*
 225  * Skip columns according to specs
 226  */
 227 static size_t
 228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
 229     bool skip_blanks, bool *empty_key)
 230 {
 231         if (cols < 1)
 232                 return (BWSLEN(s) + 1);
 233
 234         if (skip_blanks)
 235                 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
 236                         ++start;
 237
 238         while (start < BWSLEN(s) && cols > 1) {
 239                 --cols;
 240                 ++start;
 241         }
 242
 243         if (start >= BWSLEN(s))
 244                 *empty_key = true;
 245
 246         return (start);
 247 }
 248
 249 /*
 250  * Skip fields according to specs
 251  */
 252 static size_t
 253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
 254 {
 255
 256         if (fields < 2) {
 257                 if (BWSLEN(s) == 0)
 258                         *empty_field = true;
 259                 return (0);
 260         } else if (!(sort_opts_vals.tflag)) {
 261                 size_t cpos = 0;
 262                 bool pb = true;
 263
 264                 while (cpos < BWSLEN(s)) {
 265                         bool isblank;
 266
 267                         isblank = iswblank(BWS_GET(s, cpos));
 268
 269                         if (isblank && !pb) {
 270                                 --fields;
 271                                 if (fields <= 1)
 272                                         return (cpos);
 273                         }
 274                         pb = isblank;
 275                         ++cpos;
 276                 }
 277                 if (fields > 1)
 278                         *empty_field = true;
 279                 return (cpos);
 280         } else {
 281                 size_t cpos = 0;
 282
 283                 while (cpos < BWSLEN(s)) {
 284                         if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
 285                                 --fields;
 286                                 if (fields <= 1)
 287                                         return (cpos + 1);
 288                         }
 289                         ++cpos;
 290                 }
 291                 if (fields > 1)
 292                         *empty_field = true;
 293                 return (cpos);
 294         }
 295 }
 296
 297 /*
 298  * Find fields start
 299  */
 300 static void
 301 find_field_start(const struct bwstring *s, struct key_specs *ks,
 302     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
 303 {
 304
 305         *field_start = skip_fields_to_start(s, ks->f1, empty_field);
 306         if (!*empty_field)
 307                 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
 308                     ks->pos1b, empty_key);
 309         else
 310                 *empty_key = true;
 311 }
 312
 313 /*
 314  * Find end key position
 315  */
 316 static size_t
 317 find_field_end(const struct bwstring *s, struct key_specs *ks)
 318 {
 319         size_t f2, next_field_start, pos_end;
 320         bool empty_field, empty_key;
 321
 322         pos_end = 0;
 323         next_field_start = 0;
 324         empty_field = false;
 325         empty_key = false;
 326         f2 = ks->f2;
 327
 328         if (f2 == 0)
 329                 return (BWSLEN(s) + 1);
 330         else {
 331                 if (ks->c2 == 0) {
 332                         next_field_start = skip_fields_to_start(s, f2 + 1,
 333                             &empty_field);
 334                         if ((next_field_start > 0) && sort_opts_vals.tflag &&
 335                             ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
 336                             next_field_start - 1)))
 337                                 --next_field_start;
 338                 } else
 339                         next_field_start = skip_fields_to_start(s, f2,
 340                             &empty_field);
 341         }
 342
 343         if (empty_field || (next_field_start >= BWSLEN(s)))
 344                 return (BWSLEN(s) + 1);
 345
 346         if (ks->c2) {
 347                 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
 348                     ks->pos2b, &empty_key);
 349                 if (pos_end < BWSLEN(s))
 350                         ++pos_end;
 351         } else
 352                 pos_end = next_field_start;
 353
 354         return (pos_end);
 355 }
 356
 357 /*
 358  * Cut a field according to the key specs
 359  */
 360 static struct bwstring *
 361 cut_field(const struct bwstring *s, struct key_specs *ks)
 362 {
 363         struct bwstring *ret = NULL;
 364
 365         if (s && ks) {
 366                 size_t field_start, key_end, key_start, sz;
 367                 bool empty_field, empty_key;
 368
 369                 field_start = 0;
 370                 key_start = 0;
 371                 empty_field = false;
 372                 empty_key = false;
 373
 374                 find_field_start(s, ks, &field_start, &key_start,
 375                     &empty_field, &empty_key);
 376
 377                 if (empty_key)
 378                         sz = 0;
 379                 else {
 380                         key_end = find_field_end(s, ks);
 381                         sz = (key_end < key_start) ? 0 : (key_end - key_start);
 382                 }
 383
 384                 ret = bwsalloc(sz);
 385                 if (sz)
 386                         bwsnocpy(ret, s, key_start, sz);
 387         } else
 388                 ret = bwsalloc(0);
 389
 390         return (ret);
 391 }
 392
 393 /*
 394  * Preprocesses a line applying the necessary transformations
 395  * specified by command line options and returns the preprocessed
 396  * string, which can be used to compare.
 397  */
 398 int
 399 preproc(struct bwstring *s, struct keys_array *ka)
 400 {
 401
 402         if (sort_opts_vals.kflag)
 403                 for (size_t i = 0; i < keys_num; i++) {
 404                         struct bwstring *key;
 405                         struct key_specs *kspecs;
 406                         struct sort_mods *sm;
 407
 408                         kspecs = &(keys[i]);
 409                         key = cut_field(s, kspecs);
 410
 411                         sm = &(kspecs->sm);
 412                         if (sm->dflag)
 413                                 key = dictionary_order(key);
 414                         else if (sm->iflag)
 415                                 key = ignore_nonprinting(key);
 416                         if (sm->fflag || sm->Mflag)
 417                                 key = ignore_case(key);
 418
 419                         set_key_on_keys_array(ka, key, i);
 420                 }
 421         else {
 422                 struct bwstring *ret = NULL;
 423                 struct sort_mods *sm = default_sort_mods;
 424
 425                 if (sm->bflag) {
 426                         if (ret == NULL)
 427                                 ret = bwsdup(s);
 428                         ret = ignore_leading_blanks(ret);
 429                 }
 430                 if (sm->dflag) {
 431                         if (ret == NULL)
 432                                 ret = bwsdup(s);
 433                         ret = dictionary_order(ret);
 434                 } else if (sm->iflag) {
 435                         if (ret == NULL)
 436                                 ret = bwsdup(s);
 437                         ret = ignore_nonprinting(ret);
 438                 }
 439                 if (sm->fflag || sm->Mflag) {
 440                         if (ret == NULL)
 441                                 ret = bwsdup(s);
 442                         ret = ignore_case(ret);
 443                 }
 444                 if (ret == NULL)
 445                         set_key_on_keys_array(ka, s, 0);
 446                 else
 447                         set_key_on_keys_array(ka, ret, 0);
 448         }
 449
 450         return 0;
 451 }
 452
 453 cmpcoll_t
 454 get_sort_func(struct sort_mods *sm)
 455 {
 456
 457         if (sm->nflag)
 458                 return (numcoll);
 459         else if (sm->hflag)
 460                 return (hnumcoll);
 461         else if (sm->gflag)
 462                 return (gnumcoll);
 463         else if (sm->Mflag)
 464                 return (monthcoll);
 465 #if defined(SORT_RANDOM)
 466         else if (sm->Rflag)
 467                 return (randomcoll);
 468 #endif
 469         else if (sm->Vflag)
 470                 return (versioncoll);
 471         else
 472                 return (wstrcoll);
 473 }
 474
 475 /*
 476  * Compares the given strings.  Returns a positive number if
 477  * the first precedes the second, a negative number if the second is
 478  * the preceding one, and zero if they are equal.  This function calls
 479  * the underlying collate functions, which done the actual comparison.
 480  */
 481 int
 482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
 483 {
 484         struct sort_mods *sm;
 485         int res = 0;
 486
 487         for (size_t i = 0; i < keys_num; ++i) {
 488                 sm = &(keys[i].sm);
 489
 490                 if (sm->rflag)
 491                         res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
 492                 else
 493                         res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
 494
 495                 if (res)
 496                         break;
 497
 498                 /* offset applies to only the first key */
 499                 offset = 0;
 500         }
 501         return (res);
 502 }
 503
 504 /*
 505  * Compare two strings.
 506  * Plain symbol-by-symbol comparison.
 507  */
 508 int
 509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
 510 {
 511
 512         if (default_sort_mods->rflag) {
 513                 const struct bwstring *tmp;
 514
 515                 tmp = s1;
 516                 s1 = s2;
 517                 s2 = tmp;
 518         }
 519
 520         return (bwscoll(s1, s2, 0));
 521 }
 522
 523 /*
 524  * Compare a string and a sort list item, according to the sort specs.
 525  */
 526 int
 527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
 528 {
 529         struct keys_array *ka1;
 530         int ret = 0;
 531
 532         ka1 = keys_array_alloc();
 533
 534         preproc(str1, ka1);
 535
 536         sort_list_item_make_key(*ss2);
 537
 538         if (debug_sort) {
 539                 bwsprintf(stdout, str1, "; s1=<", ">");
 540                 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
 541         }
 542
 543         ret = key_coll(ka1, &((*ss2)->ka), 0);
 544
 545         if (debug_sort)
 546                 printf("; cmp1=%d", ret);
 547
 548         clean_keys_array(str1, ka1);
 549         sort_free(ka1);
 550
 551         if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
 552                 ret = top_level_str_coll(str1, ((*ss2)->str));
 553                 if (debug_sort)
 554                         printf("; cmp2=%d", ret);
 555         }
 556
 557         if (debug_sort)
 558                 printf("\n");
 559
 560         return (ret);
 561 }
 562
 563 /*
 564  * Compare two sort list items, according to the sort specs.
 565  */
 566 int
 567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
 568     size_t offset)
 569 {
 570         int ret;
 571
 572         ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
 573
 574         if (debug_sort) {
 575                 if (offset)
 576                         printf("; offset=%d", (int) offset);
 577                 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
 578                 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
 579                 printf("; cmp1=%d\n", ret);
 580         }
 581
 582         if (ret)
 583                 return (ret);
 584
 585         if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
 586                 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
 587                 if (debug_sort)
 588                         printf("; cmp2=%d\n", ret);
 589         }
 590
 591         return (ret);
 592 }
 593
 594 /*
 595  * Compare two sort list items, according to the sort specs.
 596  */
 597 int
 598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
 599 {
 600
 601         return (list_coll_offset(ss1, ss2, 0));
 602 }
 603
 604 #define LSCDEF(N)                                                       \
 605 static int                                                              \
 606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
 607 {                                                                       \
 608                                                                         \
 609         return (list_coll_offset(ss1, ss2, N));                         \
 610 }
 611
 612 LSCDEF(1)
 613 LSCDEF(2)
 614 LSCDEF(3)
 615 LSCDEF(4)
 616 LSCDEF(5)
 617 LSCDEF(6)
 618 LSCDEF(7)
 619 LSCDEF(8)
 620 LSCDEF(9)
 621 LSCDEF(10)
 622 LSCDEF(11)
 623 LSCDEF(12)
 624 LSCDEF(13)
 625 LSCDEF(14)
 626 LSCDEF(15)
 627 LSCDEF(16)
 628 LSCDEF(17)
 629 LSCDEF(18)
 630 LSCDEF(19)
 631 LSCDEF(20)
 632
 633 listcoll_t
 634 get_list_call_func(size_t offset)
 635 {
 636         static const listcoll_t lsarray[] = { list_coll, list_coll_1,
 637             list_coll_2, list_coll_3, list_coll_4, list_coll_5,
 638             list_coll_6, list_coll_7, list_coll_8, list_coll_9,
 639             list_coll_10, list_coll_11, list_coll_12, list_coll_13,
 640             list_coll_14, list_coll_15, list_coll_16, list_coll_17,
 641             list_coll_18, list_coll_19, list_coll_20 };
 642
 643         if (offset <= 20)
 644                 return (lsarray[offset]);
 645
 646         return (list_coll);
 647 }
 648
 649 /*
 650  * Compare two sort list items, only by their original string.
 651  */
 652 int
 653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
 654 {
 655
 656         return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
 657 }
 658
 659 /*
 660  * Maximum size of a number in the string (before or after decimal point)
 661  */
 662 #define MAX_NUM_SIZE (128)
 663
 664 /*
 665  * Set suffix value
 666  */
 667 static void setsuffix(wchar_t c, unsigned char *si)
 668 {
 669         switch (c){
 670         case L'k':
 671         case L'K':
 672                 *si = 1;
 673                 break;
 674         case L'M':
 675                 *si = 2;
 676                 break;
 677         case L'G':
 678                 *si = 3;
 679                 break;
 680         case L'T':
 681                 *si = 4;
 682                 break;
 683         case L'P':
 684                 *si = 5;
 685                 break;
 686         case L'E':
 687                 *si = 6;
 688                 break;
 689         case L'Z':
 690                 *si = 7;
 691                 break;
 692         case L'Y':
 693                 *si = 8;
 694                 break;
 695         default:
 696                 *si = 0;
 697         };
 698 }
 699
 700 /*
 701  * Read string s and parse the string into a fixed-decimal-point number.
 702  * sign equals -1 if the number is negative (explicit plus is not allowed,
 703  * according to GNU sort's "info sort".
 704  * The number part before decimal point is in the smain, after the decimal
 705  * point is in sfrac, tail is the pointer to the remainder of the string.
 706  */
 707 static int
 708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
 709 {
 710         bwstring_iterator s;
 711
 712         s = bws_begin(s0);
 713
 714         /* always end the fraction with zero, even if we have no fraction */
 715         sfrac[0] = 0;
 716
 717         while (iswblank(bws_get_iter_value(s)))
 718                 s = bws_iterator_inc(s, 1);
 719
 720         if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
 721                 *sign = -1;
 722                 s = bws_iterator_inc(s, 1);
 723         }
 724
 725         // This is '0', not '\0', do not change this
 726         while (iswdigit(bws_get_iter_value(s)) &&
 727             (bws_get_iter_value(s) == L'0'))
 728                 s = bws_iterator_inc(s, 1);
 729
 730         while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
 731                 if (iswdigit(bws_get_iter_value(s))) {
 732                         smain[*main_len] = bws_get_iter_value(s);
 733                         s = bws_iterator_inc(s, 1);
 734                         *main_len += 1;
 735                 } else if (symbol_thousands_sep &&
 736                     (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
 737                         s = bws_iterator_inc(s, 1);
 738                 else
 739                         break;
 740         }
 741
 742         smain[*main_len] = 0;
 743
 744         if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
 745                 s = bws_iterator_inc(s, 1);
 746                 while (iswdigit(bws_get_iter_value(s)) &&
 747                     *frac_len < MAX_NUM_SIZE) {
 748                         sfrac[*frac_len] = bws_get_iter_value(s);
 749                         s = bws_iterator_inc(s, 1);
 750                         *frac_len += 1;
 751                 }
 752                 sfrac[*frac_len] = 0;
 753
 754                 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
 755                         --(*frac_len);
 756                         sfrac[*frac_len] = L'\0';
 757                 }
 758         }
 759
 760         setsuffix(bws_get_iter_value(s),si);
 761
 762         if ((*main_len + *frac_len) == 0)
 763                 *sign = 0;
 764
 765         return (0);
 766 }
 767
 768 /*
 769  * Implements string sort.
 770  */
 771 static int
 772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
 773 {
 774
 775         if (debug_sort) {
 776                 if (offset)
 777                         printf("; offset=%d\n", (int) offset);
 778                 bwsprintf(stdout, kv1->k, "; k1=<", ">");
 779                 printf("(%zu)", BWSLEN(kv1->k));
 780                 bwsprintf(stdout, kv2->k, ", k2=<", ">");
 781                 printf("(%zu)", BWSLEN(kv2->k));
 782         }
 783
 784         return (bwscoll(kv1->k, kv2->k, offset));
 785 }
 786
 787 /*
 788  * Compare two suffixes
 789  */
 790 static inline int
 791 cmpsuffix(unsigned char si1, unsigned char si2)
 792 {
 793
 794         return ((char)si1 - (char)si2);
 795 }
 796
 797 /*
 798  * Implements numeric sort for -n and -h.
 799  */
 800 static int
 801 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
 802     size_t offset __unused, bool use_suffix)
 803 {
 804         struct bwstring *s1, *s2;
 805         wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
 806         wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
 807         int cmp_res, sign1, sign2;
 808         size_t frac1, frac2, main1, main2;
 809         unsigned char SI1, SI2;
 810         bool e1, e2, key1_read, key2_read;
 811
 812         s1 = kv1->k;
 813         s2 = kv2->k;
 814         sign1 = sign2 = 0;
 815         main1 = main2 = 0;
 816         frac1 = frac2 = 0;
 817
 818         cmp_res = 0;
 819         key1_read = key2_read = false;
 820
 821         if (debug_sort) {
 822                 bwsprintf(stdout, s1, "; k1=<", ">");
 823                 bwsprintf(stdout, s2, ", k2=<", ">");
 824         }
 825
 826         if (s1 == s2)
 827                 return (0);
 828
 829         if (kv1->hint->status == HS_UNINITIALIZED) {
 830                 /* read the number from the string */
 831                 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
 832                 key1_read = true;
 833                 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
 834                 if(main1 < 1 && frac1 < 1)
 835                         kv1->hint->v.nh.empty=true;
 836                 kv1->hint->v.nh.si = SI1;
 837                 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
 838                     HS_INITIALIZED : HS_ERROR;
 839                 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
 840         }
 841
 842         if (kv2->hint->status == HS_UNINITIALIZED) {
 843                 /* read the number from the string */
 844                 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
 845                 key2_read = true;
 846                 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
 847                 if(main2 < 1 && frac2 < 1)
 848                         kv2->hint->v.nh.empty=true;
 849                 kv2->hint->v.nh.si = SI2;
 850                 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
 851                     HS_INITIALIZED : HS_ERROR;
 852                 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
 853         }
 854
 855         if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
 856             HS_INITIALIZED) {
 857                 unsigned long long n1, n2;
 858                 bool neg1, neg2;
 859
 860                 e1 = kv1->hint->v.nh.empty;
 861                 e2 = kv2->hint->v.nh.empty;
 862
 863                 if (e1 && e2)
 864                         return (0);
 865
 866                 neg1 = kv1->hint->v.nh.neg;
 867                 neg2 = kv2->hint->v.nh.neg;
 868
 869                 if (neg1 && !neg2)
 870                         return (-1);
 871                 if (neg2 && !neg1)
 872                         return (+1);
 873
 874                 if (e1)
 875                         return (neg2 ? +1 : -1);
 876                 else if (e2)
 877                         return (neg1 ? -1 : +1);
 878
 879
 880                 if (use_suffix) {
 881                         cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
 882                         if (cmp_res)
 883                                 return (neg1 ? -cmp_res : cmp_res);
 884                 }
 885
 886                 n1 = kv1->hint->v.nh.n1;
 887                 n2 = kv2->hint->v.nh.n1;
 888                 if (n1 < n2)
 889                         return (neg1 ? +1 : -1);
 890                 else if (n1 > n2)
 891                         return (neg1 ? -1 : +1);
 892         }
 893
 894         /* read the numbers from the strings */
 895         if (!key1_read)
 896                 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
 897         if (!key2_read)
 898                 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
 899
 900         e1 = ((main1 + frac1) == 0);
 901         e2 = ((main2 + frac2) == 0);
 902
 903         if (e1 && e2)
 904                 return (0);
 905
 906         /* we know the result if the signs are different */
 907         if (sign1 < 0 && sign2 >= 0)
 908                 return (-1);
 909         if (sign1 >= 0 && sign2 < 0)
 910                 return (+1);
 911
 912         if (e1)
 913                 return ((sign2 < 0) ? +1 : -1);
 914         else if (e2)
 915                 return ((sign1 < 0) ? -1 : +1);
 916
 917         if (use_suffix) {
 918                 cmp_res = cmpsuffix(SI1, SI2);
 919                 if (cmp_res)
 920                         return ((sign1 < 0) ? -cmp_res : cmp_res);
 921         }
 922
 923         /* if both numbers are empty assume that the strings are equal */
 924         if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
 925                 return (0);
 926
 927         /*
 928          * if the main part is of different size, we know the result
 929          * (because the leading zeros are removed)
 930          */
 931         if (main1 < main2)
 932                 cmp_res = -1;
 933         else if (main1 > main2)
 934                 cmp_res = +1;
 935         /* if the sizes are equal then simple non-collate string compare gives the correct result */
 936         else
 937                 cmp_res = wcscmp(smain1, smain2);
 938
 939         /* check fraction */
 940         if (!cmp_res)
 941                 cmp_res = wcscmp(sfrac1, sfrac2);
 942
 943         if (!cmp_res)
 944                 return (0);
 945
 946         /* reverse result if the signs are negative */
 947         if (sign1 < 0 && sign2 < 0)
 948                 cmp_res = -cmp_res;
 949
 950         return (cmp_res);
 951 }
 952
 953 /*
 954  * Implements numeric sort (-n).
 955  */
 956 static int
 957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
 958 {
 959
 960         return (numcoll_impl(kv1, kv2, offset, false));
 961 }
 962
 963 /*
 964  * Implements 'human' numeric sort (-h).
 965  */
 966 static int
 967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
 968 {
 969
 970         return (numcoll_impl(kv1, kv2, offset, true));
 971 }
 972
 973 /*
 974  * Implements random sort (-R).
 975  */
 976 #if defined(SORT_RANDOM)
 977 static char *
 978 randomcollend(MD5_CTX *ctx)
 979 {
 980         unsigned char digest[MD5_DIGEST_LENGTH];
 981         static const char hex[]="0123456789abcdef";
 982         char *buf;
 983         int i;
 984
 985         buf = malloc(MD5_DIGEST_LENGTH * 2 + 1);
 986         if (!buf)
 987                 return NULL;
 988         MD5_Final(digest, ctx);
 989         for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
 990                 buf[2*i] = hex[digest[i] >> 4];
 991                 buf[2*i+1] = hex[digest[i] & 0x0f];
 992         }
 993         buf[MD5_DIGEST_LENGTH * 2] = '\0';
 994         return buf;
 995 }
 996
 997 static int
 998 randomcoll(struct key_value *kv1, struct key_value *kv2,
 999     size_t offset __unused)
1000 {
1001         struct bwstring *s1, *s2;
1002         MD5_CTX ctx1, ctx2;
1003         char *b1, *b2;
1004
1005         s1 = kv1->k;
1006         s2 = kv2->k;
1007
1008         if (debug_sort) {
1009                 bwsprintf(stdout, s1, "; k1=<", ">");
1010                 bwsprintf(stdout, s2, ", k2=<", ">");
1011         }
1012
1013         if (s1 == s2)
1014                 return (0);
1015
1016         memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1017         memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1018
1019         MD5_Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1020         MD5_Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1021         b1 = randomcollend(&ctx1);
1022         b2 = randomcollend(&ctx2);
1023         if (b1 == NULL) {
1024                 if (b2 == NULL)
1025                         return (0);
1026                 else {
1027                         sort_free(b2);
1028                         return (-1);
1029                 }
1030         } else if (b2 == NULL) {
1031                 sort_free(b1);
1032                 return (+1);
1033         } else {
1034                 int cmp_res;
1035
1036                 cmp_res = strcmp(b1,b2);
1037                 sort_free(b1);
1038                 sort_free(b2);
1039
1040                 if (!cmp_res)
1041                         cmp_res = bwscoll(s1, s2, 0);
1042
1043                 return (cmp_res);
1044         }
1045 }
1046 #endif
1047
1048 /*
1049  * Implements version sort (-V).
1050  */
1051 static int
1052 versioncoll(struct key_value *kv1, struct key_value *kv2,
1053     size_t offset __unused)
1054 {
1055         struct bwstring *s1, *s2;
1056
1057         s1 = kv1->k;
1058         s2 = kv2->k;
1059
1060         if (debug_sort) {
1061                 bwsprintf(stdout, s1, "; k1=<", ">");
1062                 bwsprintf(stdout, s2, ", k2=<", ">");
1063         }
1064
1065         if (s1 == s2)
1066                 return (0);
1067
1068         return (vcmp(s1, s2));
1069 }
1070
1071 /*
1072  * Check for minus infinity
1073  */
1074 static inline bool
1075 huge_minus(double d, int err1)
1076 {
1077
1078         if (err1 == ERANGE)
1079                 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1080                         return (+1);
1081
1082         return (0);
1083 }
1084
1085 /*
1086  * Check for plus infinity
1087  */
1088 static inline bool
1089 huge_plus(double d, int err1)
1090 {
1091
1092         if (err1 == ERANGE)
1093                 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1094                         return (+1);
1095
1096         return (0);
1097 }
1098
1099 /*
1100  * Check whether a function is a NAN
1101  */
1102 static bool
1103 is_nan(double d)
1104 {
1105         return ((d == NAN) || (isnan(d)));
1106 }
1107
1108 /*
1109  * Compare two NANs
1110  */
1111 static int
1112 cmp_nans(double d1, double d2)
1113 {
1114
1115         if (d1 < d2)
1116                 return (-1);
1117         if (d1 > d2)
1118                 return (+1);
1119         return (0);
1120 }
1121
1122 /*
1123  * Implements general numeric sort (-g).
1124  */
1125 static int
1126 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1127     size_t offset __unused)
1128 {
1129         double d1, d2;
1130         int err1, err2;
1131         bool empty1, empty2, key1_read, key2_read;
1132
1133         d1 = d2 = 0;
1134         err1 = err2 = 0;
1135         key1_read = key2_read = false;
1136
1137         if (debug_sort) {
1138                 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1139                 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1140         }
1141
1142         if (kv1->hint->status == HS_UNINITIALIZED) {
1143                 errno = 0;
1144                 d1 = bwstod(kv1->k, &empty1);
1145                 err1 = errno;
1146
1147                 if (empty1)
1148                         kv1->hint->v.gh.notnum = true;
1149                 else if (err1 == 0) {
1150                         kv1->hint->v.gh.d = d1;
1151                         kv1->hint->v.gh.nan = is_nan(d1);
1152                         kv1->hint->status = HS_INITIALIZED;
1153                 } else
1154                         kv1->hint->status = HS_ERROR;
1155
1156                 key1_read = true;
1157         }
1158
1159         if (kv2->hint->status == HS_UNINITIALIZED) {
1160                 errno = 0;
1161                 d2 = bwstod(kv2->k, &empty2);
1162                 err2 = errno;
1163
1164                 if (empty2)
1165                         kv2->hint->v.gh.notnum = true;
1166                 else if (err2 == 0) {
1167                         kv2->hint->v.gh.d = d2;
1168                         kv2->hint->v.gh.nan = is_nan(d2);
1169                         kv2->hint->status = HS_INITIALIZED;
1170                 } else
1171                         kv2->hint->status = HS_ERROR;
1172
1173                 key2_read = true;
1174         }
1175
1176         if (kv1->hint->status == HS_INITIALIZED &&
1177             kv2->hint->status == HS_INITIALIZED) {
1178                 if (kv1->hint->v.gh.notnum)
1179                         return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1180                 else if (kv2->hint->v.gh.notnum)
1181                         return (+1);
1182
1183                 if (kv1->hint->v.gh.nan)
1184                         return ((kv2->hint->v.gh.nan) ?
1185                             cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1186                             -1);
1187                 else if (kv2->hint->v.gh.nan)
1188                         return (+1);
1189
1190                 d1 = kv1->hint->v.gh.d;
1191                 d2 = kv2->hint->v.gh.d;
1192
1193                 if (d1 < d2)
1194                         return (-1);
1195                 else if (d1 > d2)
1196                         return (+1);
1197                 else
1198                         return (0);
1199         }
1200
1201         if (!key1_read) {
1202                 errno = 0;
1203                 d1 = bwstod(kv1->k, &empty1);
1204                 err1 = errno;
1205         }
1206
1207         if (!key2_read) {
1208                 errno = 0;
1209                 d2 = bwstod(kv2->k, &empty2);
1210                 err2 = errno;
1211         }
1212
1213         /* Non-value case: */
1214         if (empty1)
1215                 return (empty2 ? 0 : -1);
1216         else if (empty2)
1217                 return (+1);
1218
1219         /* NAN case */
1220         if (is_nan(d1))
1221                 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1222         else if (is_nan(d2))
1223                 return (+1);
1224
1225         /* Infinities */
1226         if (err1 == ERANGE || err2 == ERANGE) {
1227                 /* Minus infinity case */
1228                 if (huge_minus(d1, err1)) {
1229                         if (huge_minus(d2, err2)) {
1230                                 if (d1 < d2)
1231                                         return (-1);
1232                                 if (d1 > d2)
1233                                         return (+1);
1234                                 return (0);
1235                         } else
1236                                 return (-1);
1237
1238                 } else if (huge_minus(d2, err2)) {
1239                         if (huge_minus(d1, err1)) {
1240                                 if (d1 < d2)
1241                                         return (-1);
1242                                 if (d1 > d2)
1243                                         return (+1);
1244                                 return (0);
1245                         } else
1246                                 return (+1);
1247                 }
1248
1249                 /* Plus infinity case */
1250                 if (huge_plus(d1, err1)) {
1251                         if (huge_plus(d2, err2)) {
1252                                 if (d1 < d2)
1253                                         return (-1);
1254                                 if (d1 > d2)
1255                                         return (+1);
1256                                 return (0);
1257                         } else
1258                                 return (+1);
1259                 } else if (huge_plus(d2, err2)) {
1260                         if (huge_plus(d1, err1)) {
1261                                 if (d1 < d2)
1262                                         return (-1);
1263                                 if (d1 > d2)
1264                                         return (+1);
1265                                 return (0);
1266                         } else
1267                                 return (-1);
1268                 }
1269         }
1270
1271         if (d1 < d2)
1272                 return (-1);
1273         if (d1 > d2)
1274                 return (+1);
1275
1276         return (0);
1277 }
1278
1279 /*
1280  * Implements month sort (-M).
1281  */
1282 static int
1283 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1284 {
1285         int val1, val2;
1286         bool key1_read, key2_read;
1287
1288         val1 = val2 = 0;
1289         key1_read = key2_read = false;
1290
1291         if (debug_sort) {
1292                 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1293                 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1294         }
1295
1296         if (kv1->hint->status == HS_UNINITIALIZED) {
1297                 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1298                 key1_read = true;
1299                 kv1->hint->status = HS_INITIALIZED;
1300         }
1301
1302         if (kv2->hint->status == HS_UNINITIALIZED) {
1303                 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1304                 key2_read = true;
1305                 kv2->hint->status = HS_INITIALIZED;
1306         }
1307
1308         if (kv1->hint->status == HS_INITIALIZED) {
1309                 val1 = kv1->hint->v.Mh.m;
1310                 key1_read = true;
1311         }
1312
1313         if (kv2->hint->status == HS_INITIALIZED) {
1314                 val2 = kv2->hint->v.Mh.m;
1315                 key2_read = true;
1316         }
1317
1318         if (!key1_read)
1319                 val1 = bws_month_score(kv1->k);
1320         if (!key2_read)
1321                 val2 = bws_month_score(kv2->k);
1322
1323         if (val1 == val2) {
1324                 return (0);
1325         }
1326         if (val1 < val2)
1327                 return (-1);
1328         return (+1);
1329 }