string/strcoll.c

   1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #include <endian.h>
  21 #include <stddef.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24
  25 #ifndef STRING_TYPE
  26 # define STRING_TYPE char
  27 # define USTRING_TYPE unsigned char
  28 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
  29 #  define STRCOLL __strcoll_l
  30 # else
  31 #  define STRCOLL strcoll
  32 # endif
  33 # define STRCMP strcmp
  34 #endif
  35
  36 /* Include the shared helper functions.  `strxfrm'/`wcsxfrm' also use
  37    these functions.  */
  38 #include "../locale/weight.h"
  39
  40
  41 /* Compare S1 and S2, returning less than, equal to or
  42    greater than zero if the collated form of S1 is lexicographically
  43    less than, equal to or greater than the collated form of S2.  */
  44 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
  45 int
  46 STRCOLL (s1, s2)
  47      const STRING_TYPE *s1;
  48      const STRING_TYPE *s2;
  49 #else
  50 int
  51 STRCOLL (s1, s2, l)
  52      const STRING_TYPE *s1;
  53      const STRING_TYPE *s2;
  54      __locale_t l;
  55 #endif
  56 {
  57 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
  58   struct locale_data *current = l->__locales[LC_COLLATE];
  59 # if BYTE_ORDER == BIG_ENDIAN
  60   const u_int32_t *collate_table = (const u_int32_t *)
  61     current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].string;
  62   const u_int32_t *collate_extra = (const u_int32_t *)
  63     current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].string;
  64 # elif BYTE_ORDER == LITTLE_ENDIAN
  65   const u_int32_t *collate_table = (const u_int32_t *)
  66     current->values[_NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].string;
  67   const u_int32_t *collate_extra = (const u_int32_t *)
  68     current->values[_NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].string;
  69 # else
  70 #  error bizarre byte order
  71 # endif
  72 #endif
  73   weight_t *s1forw = NULL;
  74   weight_t *s1backw = NULL;
  75   weight_t *s2forw = NULL;
  76   weight_t *s2backw = NULL;
  77   size_t pass;
  78
  79   /* If the current locale does not specify locale data we use normal
  80      8-bit string comparison.  */
  81   if (collate_nrules == 0)
  82     return STRCMP (s1, s2);
  83
  84   /* Get full information about the strings.  This means we get
  85      information for all passes in a special data structure.  */
  86   get_string (s1, s1forw, s1backw);
  87   get_string (s2, s2forw, s2backw);
  88
  89   /* Now we have all the information.  In at most the given number of
  90      passes we can finally decide about the order.  */
  91   for (pass = 0; pass < collate_nrules; ++pass)
  92     {
  93       int forward = (collate_rules[pass] & sort_forward) != 0;
  94       const weight_t *s1run = forward ? s1forw : s1backw;
  95       const weight_t *s2run = forward ? s2forw : s2backw;
  96       int s1idx = forward ? 0 : s1run->data[pass].number - 1;
  97       int s2idx = forward ? 0 : s2run->data[pass].number - 1;
  98
  99       while (1)
 100         {
 101           int s1ignore = 0;
 102           int s2ignore = 0;
 103           u_int32_t w1 = 0;
 104           u_int32_t w2 = 0;
 105
 106           /* Here we have to check for IGNORE entries.  If these are
 107              found we count them and go on with the next value.  */
 108           while (s1run != NULL
 109                  && ((w1 = s1run->data[pass].value[s1idx])
 110                      == (u_int32_t) IGNORE_CHAR))
 111             {
 112               ++s1ignore;
 113               if ((forward && ++s1idx >= s1run->data[pass].number)
 114                   || (!forward && --s1idx < 0))
 115                 {
 116                   weight_t *nextp = forward ? s1run->next : s1run->prev;
 117                   if (nextp == NULL)
 118                     {
 119                       w1 = 0;
 120                       /* No more non-INGOREd elements means lowest
 121                          possible value.  */
 122                       s1ignore = -1;
 123                     }
 124                   else
 125                     s1idx = forward ? 0 : nextp->data[pass].number - 1;
 126                   s1run = nextp;
 127                 }
 128             }
 129
 130           while (s2run != NULL
 131                  && ((w2 = s2run->data[pass].value[s2idx])
 132                      == (u_int32_t) IGNORE_CHAR))
 133             {
 134               ++s2ignore;
 135               if ((forward && ++s2idx >= s2run->data[pass].number)
 136                   || (!forward && --s2idx < 0))
 137                 {
 138                   weight_t *nextp = forward ? s2run->next : s2run->prev;
 139                   if (nextp == NULL)
 140                     {
 141                       w2 = 0;
 142                       /* No more non-INGOREd elements means lowest
 143                          possible value.  */
 144                       s2ignore = -1;
 145                     }
 146                   else
 147                     s2idx = forward ? 0 : nextp->data[pass].number - 1;
 148                   s2run = nextp;
 149                 }
 150             }
 151
 152           /* If one string is completely processed stop.  */
 153           if (s1run == NULL || s2run == NULL)
 154             break;
 155
 156           /* Now we have information of the number of ignored
 157              weights and the value of the next weight.  */
 158           if ((collate_rules[pass] & sort_position) != 0
 159               && s1ignore != s2ignore && (w1 != 0 || w2 != 0))
 160             return s1ignore < s2ignore ? -1 : 1;
 161
 162           if (w1 != w2)
 163             return w1 < w2 ? -1 : 1;
 164
 165           /* We have to increment the index counters.  */
 166           if ((forward && ++s1idx >= s1run->data[pass].number)
 167               || (!forward && --s1idx < 0))
 168             if (forward)
 169               {
 170                 s1run = s1run->next;
 171                 s1idx = 0;
 172               }
 173             else
 174               {
 175                 s1run = s1run->prev;
 176                 if (s1run != NULL)
 177                   s1idx = s1run->data[pass].number - 1;
 178               }
 179
 180           if ((forward && ++s2idx >= s2run->data[pass].number)
 181               || (!forward && --s2idx < 0))
 182             if (forward)
 183               {
 184                 s2run = s2run->next;
 185                 s2idx = 0;
 186               }
 187             else
 188               {
 189                 s2run = s2run->prev;
 190                 if (s2run != NULL)
 191                   s2idx = s2run->data[pass].number - 1;
 192               }
 193
 194         }
 195
 196       if (s1run != s2run)
 197         return s1run != NULL ? 1 : -1;
 198     }
 199
 200   return 0;
 201 }