locale/programs/ld-collate.c

   1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <error.h>
  25 #include <stdlib.h>
  26
  27 #include "charmap.h"
  28 #include "localeinfo.h"
  29 #include "linereader.h"
  30 #include "locfile.h"
  31 #include "localedef.h"
  32
  33 /* Uncomment the following line in the production version.  */
  34 /* #define NDEBUG 1 */
  35 #include <assert.h>
  36
  37 #define obstack_chunk_alloc malloc
  38 #define obstack_chunk_free free
  39
  40 /* Forward declaration.  */
  41 struct element_t;
  42
  43 /* Data type for list of strings.  */
  44 struct section_list
  45 {
  46   struct section_list *next;
  47   /* Name of the section.  */
  48   const char *name;
  49   /* First element of this section.  */
  50   struct element_t *first;
  51   /* Last element of this section.  */
  52   struct element_t *last;
  53   /* These are the rules for this section.  */
  54   enum coll_sort_rule *rules;
  55 };
  56
  57 /* Data type for collating element.  */
  58 struct element_t
  59 {
  60   const char *mbs;
  61   const uint32_t *wcs;
  62   int order;
  63
  64   struct element_t **weights;
  65
  66   /* Where does the definition come from.  */
  67   const char *file;
  68   size_t line;
  69
  70   /* Which section does this belong to.  */
  71   struct section_list *section;
  72
  73   /* Predecessor and successor in the order list.  */
  74   struct element_t *last;
  75   struct element_t *next;
  76 };
  77
  78 /* Data type for collating symbol.  */
  79 struct symbol_t
  80 {
  81   /* Point to place in the order list.  */
  82   struct element_t *order;
  83
  84   /* Where does the definition come from.  */
  85   const char *file;
  86   size_t line;
  87 };
  88
  89
  90 /* The real definition of the struct for the LC_COLLATE locale.  */
  91 struct locale_collate_t
  92 {
  93   int col_weight_max;
  94   int cur_weight_max;
  95
  96   /* List of known scripts.  */
  97   struct section_list *sections;
  98   /* Current section using definition.  */
  99   struct section_list *current_section;
 100   /* There always can be an unnamed section.  */
 101   struct section_list unnamed_section;
 102   /* To make handling of errors easier we have another section.  */
 103   struct section_list error_section;
 104
 105   /* Number of sorting rules given in order_start line.  */
 106   uint32_t nrules;
 107
 108   /* Start of the order list.  */
 109   struct element_t *start;
 110
 111   /* The undefined element.  */
 112   struct element_t undefined;
 113
 114   /* This is the cursor for `reorder_after' insertions.  */
 115   struct element_t *cursor;
 116
 117   /* Remember whether last weight was an ellipsis.  */
 118   int was_ellipsis;
 119
 120   /* Known collating elements.  */
 121   hash_table elem_table;
 122
 123   /* Known collating symbols.  */
 124   hash_table sym_table;
 125
 126   /* Known collation sequences.  */
 127   hash_table seq_table;
 128
 129   struct obstack mempool;
 130
 131   /* The LC_COLLATE category is a bit special as it is sometimes possible
 132      that the definitions from more than one input file contains information.
 133      Therefore we keep all relevant input in a list.  */
 134   struct locale_collate_t *next;
 135 };
 136
 137
 138 /* We have a few global variables which are used for reading all
 139    LC_COLLATE category descriptions in all files.  */
 140 static int nrules;
 141
 142
 143 static struct section_list *
 144 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 145                    struct section_list *next)
 146 {
 147   struct section_list *newp;
 148
 149   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 150                                                 sizeof (*newp));
 151   newp->next = next;
 152   newp->name = string;
 153   newp->first = NULL;
 154
 155   return newp;
 156 }
 157
 158
 159 static struct element_t *
 160 new_element (struct locale_collate_t *collate, const char *mbs,
 161              const uint32_t *wcs)
 162 {
 163   struct element_t *newp;
 164
 165   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 166                                              sizeof (*newp));
 167   newp->mbs = mbs;
 168   newp->wcs = wcs;
 169   newp->order = 0;
 170
 171   newp->file = NULL;
 172   newp->line = 0;
 173
 174   newp->section = NULL;
 175
 176   newp->last = NULL;
 177   newp->next = NULL;
 178
 179   return newp;
 180 }
 181
 182
 183 static struct symbol_t *
 184 new_symbol (struct locale_collate_t *collate)
 185 {
 186   struct symbol_t *newp;
 187
 188   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 189
 190   newp->order = NULL;
 191
 192   newp->file = NULL;
 193   newp->line = 0;
 194
 195   return newp;
 196 }
 197
 198
 199 /* Test whether this name is already defined somewhere.  */
 200 static int
 201 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 202                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 203                  const char *symbol, size_t symbol_len)
 204 {
 205   void *ignore = NULL;
 206
 207   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 208     {
 209       lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
 210       return 1;
 211     }
 212
 213   if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
 214     {
 215       lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
 216       return 1;
 217     }
 218
 219   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 220     {
 221       lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
 222       return 1;
 223     }
 224
 225   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 226     {
 227       lr_error (ldfile, _("`%s' already defined as collating element"),
 228                 symbol);
 229       return 1;
 230     }
 231
 232   return 0;
 233 }
 234
 235
 236 /* Read the direction specification.  */
 237 static void
 238 read_directions (struct linereader *ldfile, struct token *arg,
 239                  struct charmap_t *charmap, struct repertoire_t *repertoire,
 240                  struct locale_collate_t *collate)
 241 {
 242   int cnt = 0;
 243   int max = nrules ?: 10;
 244   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 245   int warned = 0;
 246
 247   while (1)
 248     {
 249       int valid = 0;
 250
 251       if (arg->tok == tok_forward)
 252         {
 253           if (rules[cnt] & sort_backward)
 254             {
 255               if (! warned)
 256                 {
 257                   lr_error (ldfile, _("\
 258 %s: `forward' and `backward' are mutually excluding each other"),
 259                             "LC_COLLATE");
 260                   warned = 1;
 261                 }
 262             }
 263           else if (rules[cnt] & sort_forward)
 264             {
 265               if (! warned)
 266                 {
 267                   lr_error (ldfile, _("\
 268 %s: `%s' mentioned twice in definition of weight %d"),
 269                             "LC_COLLATE", "forward", cnt + 1);
 270                 }
 271             }
 272           else
 273             rules[cnt] |= sort_forward;
 274
 275           valid = 1;
 276         }
 277       else if (arg->tok == tok_backward)
 278         {
 279           if (rules[cnt] & sort_forward)
 280             {
 281               if (! warned)
 282                 {
 283                   lr_error (ldfile, _("\
 284 %s: `forward' and `backward' are mutually excluding each other"),
 285                             "LC_COLLATE");
 286                   warned = 1;
 287                 }
 288             }
 289           else if (rules[cnt] & sort_backward)
 290             {
 291               if (! warned)
 292                 {
 293                   lr_error (ldfile, _("\
 294 %s: `%s' mentioned twice in definition of weight %d"),
 295                             "LC_COLLATE", "backward", cnt + 1);
 296                 }
 297             }
 298           else
 299             rules[cnt] |= sort_backward;
 300
 301           valid = 1;
 302         }
 303       else if (arg->tok == tok_position)
 304         {
 305           if (rules[cnt] & sort_position)
 306             {
 307               if (! warned)
 308                 {
 309                   lr_error (ldfile, _("\
 310 %s: `%s' mentioned twice in definition of weight %d in category `%s'"),
 311                             "LC_COLLATE", "position", cnt + 1);
 312                 }
 313             }
 314           else
 315             rules[cnt] |= sort_position;
 316
 317           valid = 1;
 318         }
 319
 320       if (valid)
 321         arg = lr_token (ldfile, charmap, repertoire);
 322
 323       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 324           || arg->tok == tok_semicolon)
 325         {
 326           if (! valid && ! warned)
 327             {
 328               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 329               warned = 1;
 330             }
 331
 332           /* See whether we have to increment the counter.  */
 333           if (arg->tok != tok_comma && rules[cnt] != 0)
 334             ++cnt;
 335
 336           if (arg->tok == tok_eof || arg->tok == tok_eol)
 337             /* End of line or file, so we exit the loop.  */
 338             break;
 339
 340           if (nrules == 0)
 341             {
 342               /* See whether we have enough room in the array.  */
 343               if (cnt == max)
 344                 {
 345                   max += 10;
 346                   rules = (enum coll_sort_rule *) xrealloc (rules,
 347                                                             max
 348                                                             * sizeof (*rules));
 349                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 350                 }
 351             }
 352           else
 353             {
 354               if (cnt == nrules)
 355                 {
 356                   /* There must not be any more rule.  */
 357                   if (! warned)
 358                     {
 359                       lr_error (ldfile, _("\
 360 %s: too many rules; first entry only had %d"),
 361                                 "LC_COLLATE", nrules);
 362                       warned = 1;
 363                     }
 364
 365                   lr_ignore_rest (ldfile, 0);
 366                   break;
 367                 }
 368             }
 369         }
 370       else
 371         {
 372           if (! warned)
 373             {
 374               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 375               warned = 1;
 376             }
 377         }
 378
 379       arg = lr_token (ldfile, charmap, repertoire);
 380     }
 381
 382   if (nrules == 0)
 383     {
 384       /* Now we know how many rules we have.  */
 385       nrules = cnt;
 386       rules = (enum coll_sort_rule *) xrealloc (rules,
 387                                                 nrules * sizeof (*rules));
 388     }
 389   else
 390     {
 391       if (cnt < nrules)
 392         {
 393           /* Not enough rules in this specification.  */
 394           if (! warned)
 395             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 396
 397           do
 398             rules[cnt] = sort_forward;
 399           while (++cnt < nrules);
 400         }
 401     }
 402
 403   collate->current_section->rules = rules;
 404 }
 405
 406
 407 static void
 408 insert_value (struct linereader *ldfile, struct token *arg,
 409               struct charmap_t *charmap, struct repertoire_t *repertoire,
 410               struct locale_collate_t *collate)
 411 {
 412   /* First find out what kind of symbol this is.  */
 413   struct charseq *seq;
 414   uint32_t wc;
 415   struct element_t *elem = NULL;
 416   int weight_cnt;
 417
 418   /* First determine the wide character.  There must be such a value,
 419      otherwise we ignore it (if it is no collatio symbol or element).  */
 420   wc = repertoire_find_value (repertoire, arg->val.str.startmb,
 421                               arg->val.str.lenmb);
 422
 423   /* Try to find the character in the charmap.  */
 424   seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
 425
 426   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 427     {
 428       /* It's no character, so look through the collation elements and
 429          symbol list.  */
 430       void *result;
 431
 432       if (find_entry (&collate->sym_table, arg->val.str.startmb,
 433                       arg->val.str.lenmb, &result) == 0)
 434         {
 435           /* It's a collation symbol.  */
 436           struct symbol_t *sym = (struct symbol_t *) result;
 437           elem = sym->order;
 438
 439           if (elem == NULL)
 440             elem = sym->order = new_element (collate, arg->val.str.startmb,
 441                                              arg->val.str.startwc);
 442         }
 443       else if (find_entry (&collate->elem_table, arg->val.str.startmb,
 444                            arg->val.str.lenmb, (void **) &elem) != 0)
 445         /* It's also no collation element.  Therefore ignore it.  */
 446         return;
 447     }
 448   else
 449     {
 450       /* Otherwise the symbols stands for an character.  Make sure it is
 451          not already in the table.  */
 452
 453     }
 454
 455   if (elem == NULL)
 456     /* XXX HACK HACK HACK */
 457     return;
 458
 459   /* Test whether this element is not already in the list.  */
 460   if (elem->next != NULL || elem->next == collate->cursor)
 461     {
 462       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
 463                 arg->val.str.lenmb, arg->val.str.startmb,
 464                 elem->file, elem->line);
 465       return;
 466     }
 467
 468   /* Initialize all the fields.  */
 469   elem->file = ldfile->fname;
 470   elem->line = ldfile->lineno;
 471   elem->last = collate->cursor;
 472   elem->next = collate->cursor ? collate->cursor->next : NULL;
 473   elem->weights = (struct element_t **)
 474     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *));
 475   memset (elem->weights, '\0', nrules * sizeof (struct element_t *));
 476
 477   if (collate->current_section->first == NULL)
 478     collate->current_section->first = elem;
 479   if (collate->current_section->last == collate->cursor)
 480     collate->current_section->last = elem;
 481
 482   collate->cursor = elem;
 483
 484   /* Now read the rest of the line.  */
 485   ldfile->return_widestr = 1;
 486
 487   weight_cnt = 0;
 488   do
 489     {
 490       arg = lr_token (ldfile, charmap, repertoire);
 491
 492       if (arg->tok == tok_eof || arg->tok == tok_eol)
 493         {
 494           /* This means the rest of the line uses the current element
 495              as the weight.  */
 496           do
 497             elem->weights[weight_cnt] = elem;
 498           while (++weight_cnt < nrules);
 499
 500           return;
 501         }
 502
 503       if (arg->tok == tok_ignore)
 504         {
 505           /* The weight for this level has to be ignored.  We use the
 506              null pointer to indicate this.  */
 507         }
 508       else if (arg->tok == tok_bsymbol)
 509         {
 510
 511         }
 512     }
 513   while (++weight_cnt < nrules);
 514
 515   lr_ignore_rest (ldfile, weight_cnt == nrules);
 516 }
 517
 518
 519 static void
 520 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
 521                  int ignore_content)
 522 {
 523   if (!ignore_content)
 524     {
 525       struct locale_collate_t *collate;
 526
 527       collate = locale->categories[LC_COLLATE].collate =
 528         (struct locale_collate_t *) xcalloc (1,
 529                                              sizeof (struct locale_collate_t));
 530
 531       /* Init the various data structures.  */
 532       init_hash (&collate->elem_table, 100);
 533       init_hash (&collate->sym_table, 100);
 534       init_hash (&collate->seq_table, 500);
 535       obstack_init (&collate->mempool);
 536
 537       collate->col_weight_max = -1;
 538     }
 539
 540   ldfile->translate_strings = 1;
 541   ldfile->return_widestr = 0;
 542 }
 543
 544
 545 void
 546 collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 547 {
 548 }
 549
 550
 551 void
 552 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 553                 const char *output_path)
 554 {
 555 }
 556
 557
 558 void
 559 collate_read (struct linereader *ldfile, struct localedef_t *result,
 560               struct charmap_t *charmap, const char *repertoire_name,
 561               int ignore_content)
 562 {
 563   struct repertoire_t *repertoire = NULL;
 564   struct locale_collate_t *collate;
 565   struct token *now;
 566   struct token *arg;
 567   enum token_t nowtok;
 568   int state = 0;
 569   int was_ellipsis = 0;
 570
 571   /* Get the repertoire we have to use.  */
 572   if (repertoire_name != NULL)
 573     repertoire = repertoire_read (repertoire_name);
 574
 575   /* The rest of the line containing `LC_COLLATE' must be free.  */
 576   lr_ignore_rest (ldfile, 1);
 577
 578   do
 579     {
 580       now = lr_token (ldfile, charmap, NULL);
 581       nowtok = now->tok;
 582     }
 583   while (nowtok == tok_eol);
 584
 585   if (nowtok == tok_copy)
 586     {
 587       state = 2;
 588       now = lr_token (ldfile, charmap, NULL);
 589       if (now->tok != tok_string)
 590         goto err_label;
 591       /* XXX Use the name */
 592       lr_ignore_rest (ldfile, 1);
 593
 594       now = lr_token (ldfile, charmap, NULL);
 595       nowtok = now->tok;
 596     }
 597
 598   /* Prepare the data structures.  */
 599   collate_startup (ldfile, result, ignore_content);
 600   collate = result->categories[LC_COLLATE].collate;
 601
 602   while (1)
 603     {
 604       /* Of course we don't proceed beyond the end of file.  */
 605       if (nowtok == tok_eof)
 606         break;
 607
 608       /* Ingore empty lines.  */
 609       if (nowtok == tok_eol)
 610         {
 611           now = lr_token (ldfile, charmap, NULL);
 612           nowtok = now->tok;
 613           continue;
 614         }
 615
 616       switch (nowtok)
 617         {
 618         case tok_coll_weight_max:
 619           /* Ignore the rest of the line if we don't need the input of
 620              this line.  */
 621           if (ignore_content)
 622             {
 623               lr_ignore_rest (ldfile, 0);
 624               break;
 625             }
 626
 627           if (state != 0)
 628             goto err_label;
 629
 630           arg = lr_token (ldfile, charmap, NULL);
 631           if (arg->tok != tok_number)
 632             goto err_label;
 633           if (collate->col_weight_max != -1)
 634             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
 635                       "LC_COLLATE", "col_weight_max");
 636           else
 637             collate->col_weight_max = arg->val.num;
 638           lr_ignore_rest (ldfile, 1);
 639           break;
 640
 641         case tok_section_symbol:
 642           /* Ignore the rest of the line if we don't need the input of
 643              this line.  */
 644           if (ignore_content)
 645             {
 646               lr_ignore_rest (ldfile, 0);
 647               break;
 648             }
 649
 650           if (state != 0)
 651             goto err_label;
 652
 653           arg = lr_token (ldfile, charmap, repertoire);
 654           if (arg->tok != tok_bsymbol)
 655             goto err_label;
 656           else if (!ignore_content)
 657             {
 658               /* Check whether this section is already known.  */
 659               struct section_list *known = collate->sections;
 660               while (known != NULL)
 661                 if (strcmp (known->name, arg->val.str.startmb) == 0)
 662                   break;
 663
 664               if (known != NULL)
 665                 {
 666                   lr_error (ldfile,
 667                             _("%s: duplicate declaration of section `%s'"),
 668                             "LC_COLLATE", arg->val.str.startmb);
 669                   free (arg->val.str.startmb);
 670                 }
 671               else
 672                 collate->sections = make_seclist_elem (collate,
 673                                                        arg->val.str.startmb,
 674                                                        collate->sections);
 675
 676               lr_ignore_rest (ldfile, known == NULL);
 677             }
 678           else
 679             {
 680               free (arg->val.str.startmb);
 681               lr_ignore_rest (ldfile, 0);
 682             }
 683           break;
 684
 685         case tok_collating_element:
 686           /* Ignore the rest of the line if we don't need the input of
 687              this line.  */
 688           if (ignore_content)
 689             {
 690               lr_ignore_rest (ldfile, 0);
 691               break;
 692             }
 693
 694           if (state != 0)
 695             goto err_label;
 696
 697           arg = lr_token (ldfile, charmap, repertoire);
 698           if (arg->tok != tok_bsymbol)
 699             goto err_label;
 700           else
 701             {
 702               const char *symbol = arg->val.str.startmb;
 703               size_t symbol_len = arg->val.str.lenmb;
 704
 705               /* Next the `from' keyword.  */
 706               arg = lr_token (ldfile, charmap, repertoire);
 707               if (arg->tok != tok_from)
 708                 {
 709                   free ((char *) symbol);
 710                   goto err_label;
 711                 }
 712
 713               ldfile->return_widestr = 1;
 714
 715               /* Finally the string with the replacement.  */
 716               arg = lr_token (ldfile, charmap, repertoire);
 717               ldfile->return_widestr = 0;
 718               if (arg->tok != tok_string)
 719                 goto err_label;
 720
 721               if (!ignore_content)
 722                 {
 723                   if (symbol == NULL)
 724                     lr_error (ldfile, _("\
 725 %s: unknown character in collating element name"),
 726                               "LC_COLLATE");
 727                   if (arg->val.str.startmb == NULL)
 728                     lr_error (ldfile, _("\
 729 %s: unknown character in collating element definition"),
 730                               "LC_COLLATE");
 731                   if (arg->val.str.startwc == NULL)
 732                     lr_error (ldfile, _("\
 733 %s: unknown wide character in collating element definition"),
 734                               "LC_COLLATE");
 735                   else if (arg->val.str.lenwc < 2)
 736                     lr_error (ldfile, _("\
 737 %s: substitution string in collating element definition must have at least two characters"),
 738                               "LC_COLLATE");
 739
 740                   if (symbol != NULL)
 741                     {
 742                       /* The name is already defined.  */
 743                       if (check_duplicate (ldfile, collate, charmap,
 744                                            repertoire, symbol, symbol_len))
 745                         goto col_elem_free;
 746
 747                       if (insert_entry (&collate->elem_table,
 748                                         symbol, symbol_len,
 749                                         new_element (collate,
 750                                                      arg->val.str.startmb,
 751                                                      arg->val.str.startwc))
 752                           < 0)
 753                         lr_error (ldfile, _("\
 754 error while adding collating element"));
 755                     }
 756                   else
 757                     goto col_elem_free;
 758                 }
 759               else
 760                 {
 761                 col_elem_free:
 762                   if (symbol != NULL)
 763                     free ((char *) symbol);
 764                   if (arg->val.str.startmb != NULL)
 765                     free (arg->val.str.startmb);
 766                   if (arg->val.str.startwc != NULL)
 767                     free (arg->val.str.startwc);
 768                 }
 769               lr_ignore_rest (ldfile, 1);
 770             }
 771           break;
 772
 773         case tok_collating_symbol:
 774           /* Ignore the rest of the line if we don't need the input of
 775              this line.  */
 776           if (ignore_content)
 777             {
 778               lr_ignore_rest (ldfile, 0);
 779               break;
 780             }
 781
 782           if (state != 0)
 783             goto err_label;
 784
 785           arg = lr_token (ldfile, charmap, repertoire);
 786           if (arg->tok != tok_bsymbol)
 787             goto err_label;
 788           else
 789             {
 790               const char *symbol = arg->val.str.startmb;
 791               size_t symbol_len = arg->val.str.lenmb;
 792
 793               if (!ignore_content)
 794                 {
 795                   if (symbol == NULL)
 796                     lr_error (ldfile, _("\
 797 %s: unknown character in collating symbol name"),
 798                               "LC_COLLATE");
 799                   else
 800                     {
 801                       /* The name is already defined.  */
 802                       if (check_duplicate (ldfile, collate, charmap,
 803                                            repertoire, symbol, symbol_len))
 804                         goto col_sym_free;
 805
 806                       if (insert_entry (&collate->sym_table,
 807                                         symbol, symbol_len,
 808                                         new_symbol (collate)) < 0)
 809                         lr_error (ldfile, _("\
 810 error while adding collating symbol"));
 811                     }
 812                 }
 813               else
 814                 {
 815                 col_sym_free:
 816                   if (symbol != NULL)
 817                     free ((char *) symbol);
 818                 }
 819               lr_ignore_rest (ldfile, 1);
 820             }
 821           break;
 822
 823         case tok_symbol_equivalence:
 824           /* Ignore the rest of the line if we don't need the input of
 825              this line.  */
 826           if (ignore_content)
 827             {
 828               lr_ignore_rest (ldfile, 0);
 829               break;
 830             }
 831
 832           if (state != 0)
 833             goto err_label;
 834
 835           arg = lr_token (ldfile, charmap, repertoire);
 836           if (arg->tok != tok_bsymbol)
 837             goto err_label;
 838           else
 839             {
 840               const char *newname = arg->val.str.startmb;
 841               size_t newname_len = arg->val.str.lenmb;
 842               const char *symname;
 843               size_t symname_len;
 844               struct symbol_t *symval;
 845
 846               arg = lr_token (ldfile, charmap, repertoire);
 847               if (arg->tok != tok_bsymbol)
 848                 {
 849                   if (newname != NULL)
 850                     free ((char *) newname);
 851                   goto err_label;
 852                 }
 853
 854               symname = arg->val.str.startmb;
 855               symname_len = arg->val.str.lenmb;
 856
 857               if (!ignore_content)
 858                 {
 859                   if (newname == NULL)
 860                     {
 861                       lr_error (ldfile, _("\
 862 %s: unknown character in equivalent definition name"),
 863                                 "LC_COLLATE");
 864                       goto sym_equiv_free;
 865                     }
 866                   if (symname == NULL)
 867                     {
 868                       lr_error (ldfile, _("\
 869 %s: unknown character in equivalent definition value"),
 870                                 "LC_COLLATE");
 871                       goto sym_equiv_free;
 872                     }
 873                   /* The name is already defined.  */
 874                   if (check_duplicate (ldfile, collate, charmap,
 875                                        repertoire, symname, symname_len))
 876                     goto col_sym_free;
 877
 878                   /* See whether the symbol name is already defined.  */
 879                   if (find_entry (&collate->sym_table, symname, symname_len,
 880                                   (void **) &symval) != 0)
 881                     {
 882                       lr_error (ldfile, _("\
 883 %s: unknown symbol `%s' in equivalent definition"),
 884                                 "LC_COLLATE", symname);
 885                       goto col_sym_free;
 886                     }
 887
 888                   if (insert_entry (&collate->sym_table,
 889                                     newname, newname_len, symval) < 0)
 890                     {
 891                       lr_error (ldfile, _("\
 892 error while adding equivalent collating symbol"));
 893                       goto sym_equiv_free;
 894                     }
 895
 896                   free ((char *) symname);
 897                 }
 898               else
 899                 {
 900                 sym_equiv_free:
 901                   if (newname != NULL)
 902                     free ((char *) newname);
 903                   if (symname != NULL)
 904                     free ((char *) symname);
 905                 }
 906               lr_ignore_rest (ldfile, 1);
 907             }
 908           break;
 909
 910         case tok_order_start:
 911           /* Ignore the rest of the line if we don't need the input of
 912              this line.  */
 913           if (ignore_content)
 914             {
 915               lr_ignore_rest (ldfile, 0);
 916               break;
 917             }
 918
 919           if (state != 0 && state != 1)
 920             goto err_label;
 921           state = 1;
 922
 923           /* The 14652 draft does not specify whether all `order_start' lines
 924              must contain the same number of sort-rules, but 14651 does.  So
 925              we require this here as well.  */
 926           arg = lr_token (ldfile, charmap, repertoire);
 927           if (arg->tok == tok_bsymbol)
 928             {
 929               /* This better should be a section name.  */
 930               struct section_list *sp = collate->sections;
 931               while (sp != NULL
 932                      && strcmp (sp->name, arg->val.str.startmb) != 0)
 933                 sp = sp->next;
 934
 935               if (sp == NULL)
 936                 {
 937                   lr_error (ldfile, _("\
 938 %s: unknown section name `%s'"),
 939                             "LC_COLLATE", arg->val.str.startmb);
 940                   /* We use the error section.  */
 941                   collate->current_section = &collate->error_section;
 942                 }
 943               else
 944                 {
 945                   /* Remember this section.  */
 946                   collate->current_section = sp;
 947
 948                   /* One should not be allowed to open the same
 949                      section twice.  */
 950                   if (sp->first != NULL)
 951                     lr_error (ldfile, _("\
 952 %s: multiple order definitions for section `%s'"),
 953                               "LC_COLLATE", sp->name);
 954
 955                   /* Next should come the end of the line or a semicolon.  */
 956                   arg = lr_token (ldfile, charmap, repertoire);
 957                   if (arg->tok == tok_eol)
 958                     {
 959                       uint32_t cnt;
 960
 961                       /* This means we have exactly one rule: `forward'.  */
 962                       if (collate->nrules > 1)
 963                         lr_error (ldfile, _("\
 964 %s: invalid number of sorting rules"),
 965                                   "LC_COLLATE");
 966                       else
 967                         collate->nrules = 1;
 968                       sp->rules = obstack_alloc (&collate->mempool,
 969                                                  (sizeof (enum coll_sort_rule)
 970                                                   * collate->nrules));
 971                       for (cnt = 0; cnt < collate->nrules; ++cnt)
 972                         sp->rules[cnt] = sort_forward;
 973
 974                       /* Next line.  */
 975                       break;
 976                     }
 977
 978                   /* Get the next token.  */
 979                   arg = lr_token (ldfile, charmap, repertoire);
 980                 }
 981             }
 982           else
 983             {
 984               /* There is no section symbol.  Therefore we use the unnamed
 985                  section.  */
 986               collate->current_section = &collate->unnamed_section;
 987
 988               if (collate->unnamed_section.first != NULL)
 989                 lr_error (ldfile, _("\
 990 %s: multiple order definitions for unnamed section"),
 991                           "LC_COLLATE");
 992             }
 993
 994           /* Now read the direction names.  */
 995           read_directions (ldfile, arg, charmap, repertoire, collate);
 996           break;
 997
 998         case tok_order_end:
 999           /* Ignore the rest of the line if we don't need the input of
1000              this line.  */
1001           if (ignore_content)
1002             {
1003               lr_ignore_rest (ldfile, 0);
1004               break;
1005             }
1006
1007           if (state != 1)
1008             goto err_label;
1009           state = 2;
1010           lr_ignore_rest (ldfile, 1);
1011           break;
1012
1013         case tok_reorder_after:
1014           /* Ignore the rest of the line if we don't need the input of
1015              this line.  */
1016           if (ignore_content)
1017             {
1018               lr_ignore_rest (ldfile, 0);
1019               break;
1020             }
1021
1022           if (state != 2 && state != 3)
1023             goto err_label;
1024           state = 3;
1025           /* XXX get symbol */
1026           break;
1027
1028         case tok_reorder_end:
1029           /* Ignore the rest of the line if we don't need the input of
1030              this line.  */
1031           if (ignore_content)
1032             break;
1033
1034           if (state != 3)
1035             goto err_label;
1036           state = 4;
1037           lr_ignore_rest (ldfile, 1);
1038           break;
1039
1040         case tok_bsymbol:
1041           /* Ignore the rest of the line if we don't need the input of
1042              this line.  */
1043           if (ignore_content)
1044             {
1045               lr_ignore_rest (ldfile, 0);
1046               break;
1047             }
1048
1049           if (state != 1 && state != 3)
1050             goto err_label;
1051
1052           if (state == 3)
1053             {
1054               /* It is possible that we already have this collation sequence.
1055                  In this case we move the entry.  */
1056               struct element_t *seqp;
1057
1058               if (find_entry (&collate->seq_table, arg->val.str.startmb,
1059                               arg->val.str.lenmb, (void **) &seqp) == 0)
1060                 {
1061                   /* Remove the entry from the old position.  */
1062                   if (seqp->last == NULL)
1063                     collate->start = seqp->next;
1064                   else
1065                     seqp->last->next = seqp->next;
1066                   if (seqp->next != NULL)
1067                     seqp->next->last = seqp->last;
1068
1069                   /* We also have to check whether this entry is the
1070                      first or last of a section.  */
1071                   if (seqp->section->first == seqp)
1072                     {
1073                       if (seqp->section->first == seqp->section->last)
1074                         /* This setion has no content anymore.  */
1075                         seqp->section->first = seqp->section->last = NULL;
1076                       else
1077                         seqp->section->first = seqp->next;
1078                     }
1079                   else if (seqp->section->last == seqp)
1080                     seqp->section->last = seqp->last;
1081
1082                   seqp->last = seqp->next = NULL;
1083                 }
1084             }
1085
1086           /* Now insert in the new place.  */
1087           insert_value (ldfile, arg, charmap, repertoire, collate);
1088           break;
1089
1090         case tok_undefined:
1091           /* Ignore the rest of the line if we don't need the input of
1092              this line.  */
1093           if (ignore_content)
1094             {
1095               lr_ignore_rest (ldfile, 0);
1096               break;
1097             }
1098
1099           if (state != 1)
1100             goto err_label;
1101           /* XXX handle UNDEFINED weight */
1102           break;
1103
1104         case tok_ellipsis3:
1105           /* Ignore the rest of the line if we don't need the input of
1106              this line.  */
1107           if (ignore_content)
1108             {
1109               lr_ignore_rest (ldfile, 0);
1110               break;
1111             }
1112
1113           if (state != 1 && state != 3)
1114             goto err_label;
1115
1116           was_ellipsis = 1;
1117           /* XXX Read the remainder of the line and remember what are
1118              the weights.  */
1119           break;
1120
1121         case tok_end:
1122           /* Next we assume `LC_COLLATE'.  */
1123           if (!ignore_content)
1124             {
1125               if (state == 0)
1126                 /* We must either see a copy statement or have
1127                    ordering values.  */
1128                 lr_error (ldfile,
1129                           _("%s: empty category description not allowed"),
1130                           "LC_COLLATE");
1131               else if (state == 1)
1132                 lr_error (ldfile, _("%s: missing `order_end' keyword"),
1133                           "LC_COLLATE");
1134               else if (state == 3)
1135                 error (0, 0, _("%s: missing `reorder-end' keyword"),
1136                        "LC_COLLATE");
1137             }
1138           arg = lr_token (ldfile, charmap, NULL);
1139           if (arg->tok == tok_eof)
1140             break;
1141           if (arg->tok == tok_eol)
1142             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
1143           else if (arg->tok != tok_lc_collate)
1144             lr_error (ldfile, _("\
1145 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
1146           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
1147           return;
1148
1149         default:
1150         err_label:
1151           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
1152         }
1153
1154       /* Prepare for the next round.  */
1155       now = lr_token (ldfile, charmap, NULL);
1156       nowtok = now->tok;
1157     }
1158
1159   /* When we come here we reached the end of the file.  */
1160   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
1161 }
1162
1163
1164 #if 0
1165
1166 /* What kind of symbols get defined?  */
1167 enum coll_symbol
1168 {
1169   undefined,
1170   ellipsis,
1171   character,
1172   element,
1173   symbol
1174 };
1175
1176
1177 typedef struct patch_t
1178 {
1179   const char *fname;
1180   size_t lineno;
1181   const char *token;
1182   union
1183   {
1184     unsigned int *pos;
1185     size_t idx;
1186   } where;
1187   struct patch_t *next;
1188 } patch_t;
1189
1190
1191 typedef struct element_t
1192 {
1193   const char *namemb;
1194   const uint32_t *namewc;
1195   unsigned int this_weight;
1196
1197   struct element_t *next;
1198
1199   unsigned int *ordering;
1200   size_t ordering_len;
1201 } element_t;
1202
1203
1204 /* The real definition of the struct for the LC_COLLATE locale.  */
1205 struct locale_collate_t
1206 {
1207   /* Collate symbol table.  Simple mapping to number.  */
1208   hash_table symbols;
1209
1210   /* The collation elements.  */
1211   hash_table elements;
1212   struct obstack element_mem;
1213
1214   /* The result tables.  */
1215   hash_table resultmb;
1216   hash_table resultwc;
1217
1218   /* Sorting rules given in order_start line.  */
1219   uint32_t nrules;
1220   enum coll_sort_rule *rules;
1221
1222   /* Used while recognizing symbol composed of multiple tokens
1223      (collating-element).  */
1224   const char *combine_token;
1225   size_t combine_token_len;
1226
1227   /* How many sorting order specifications so far.  */
1228   unsigned int order_cnt;
1229
1230   /* Was lastline ellipsis?  */
1231   int was_ellipsis;
1232   /* Value of last entry if was character.  */
1233   uint32_t last_char;
1234   /* Current element.  */
1235   element_t *current_element;
1236   /* What kind of symbol is current element.  */
1237   enum coll_symbol kind;
1238
1239   /* Patch lists.  */
1240   patch_t *current_patch;
1241   patch_t *all_patches;
1242
1243   /* Room for the UNDEFINED information.  */
1244   element_t undefined;
1245   unsigned int undefined_len;
1246
1247   /* Script information.  */
1248   const char **scripts;
1249   unsigned int nscripts;
1250 };
1251
1252
1253 /* Be verbose?  Defined in localedef.c.  */
1254 extern int verbose;
1255
1256
1257
1258 #define obstack_chunk_alloc malloc
1259 #define obstack_chunk_free free
1260
1261
1262 /* Prototypes for local functions.  */
1263 static void collate_startup (struct linereader *ldfile,
1264                              struct localedef_t *locale,
1265                              struct charmap_t *charmap, int ignore_content);
1266
1267
1268 static void
1269 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1270                  struct charmap_t *charset, int ignore_content)
1271 {
1272   struct locale_collate_t *collate;
1273
1274   /* Allocate the needed room.  */
1275   locale->categories[LC_COLLATE].collate = collate =
1276     (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
1277
1278   /* Allocate hash table for collating elements.  */
1279   if (init_hash (&collate->elements, 512))
1280     error (4, 0, _("memory exhausted"));
1281   collate->combine_token = NULL;
1282   obstack_init (&collate->element_mem);
1283
1284   /* Allocate hash table for collating elements.  */
1285   if (init_hash (&collate->symbols, 64))
1286     error (4, 0, _("memory exhausted"));
1287
1288   /* Allocate hash table for result.  */
1289   if (init_hash (&collate->result, 512))
1290     error (4, 0, _("memory exhausted"));
1291
1292   collate->nrules = 0;
1293   collate->nrules_max = 10;
1294   collate->rules
1295     = (enum coll_sort_rule *) xmalloc (collate->nrules_max
1296                                        * sizeof (enum coll_sort_rule));
1297
1298   collate->order_cnt = 1;       /* The smallest weight is 2.  */
1299
1300   collate->was_ellipsis = 0;
1301   collate->last_char = L'\0';   /* 0 because leading ellipsis is allowed.  */
1302
1303   collate->all_patches = NULL;
1304
1305   /* This tells us no UNDEFINED entry was found until now.  */
1306   memset (&collate->undefined, '\0', sizeof (collate->undefined));
1307
1308   ldfile->translate_strings = 0;
1309   ldfile->return_widestr = 0;
1310 }
1311
1312
1313 void
1314 collate_finish (struct localedef_t *locale, struct charset_t *charset,
1315                 struct repertoire_t *repertoire)
1316 {
1317   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1318   patch_t *patch;
1319   size_t cnt;
1320
1321   /* Patch the constructed table so that forward references are
1322      correctly filled.  */
1323   for (patch = collate->all_patches; patch != NULL; patch = patch->next)
1324     {
1325       uint32_t wch;
1326       size_t toklen = strlen (patch->token);
1327       void *ptmp;
1328       unsigned int value = 0;
1329
1330       wch = charset_find_value (&charset->char_table, patch->token, toklen);
1331       if (wch != ILLEGAL_CHAR_VALUE)
1332         {
1333           element_t *runp;
1334
1335           if (find_entry (&collate->result, &wch, sizeof (uint32_t),
1336                           (void *) &runp) < 0)
1337             runp = NULL;
1338           for (; runp != NULL; runp = runp->next)
1339             if (runp->name[0] == wch && runp->name[1] == L'\0')
1340               break;
1341
1342           value = runp == NULL ? 0 : runp->this_weight;
1343         }
1344       else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
1345                >= 0)
1346         {
1347           value = ((element_t *) ptmp)->this_weight;
1348         }
1349       else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
1350                >= 0)
1351         {
1352           value = (unsigned long int) ptmp;
1353         }
1354       else
1355         value = 0;
1356
1357       if (value == 0)
1358         {
1359           if (!be_quiet)
1360             error_at_line (0, 0, patch->fname, patch->lineno,
1361                            _("no weight defined for symbol `%s'"),
1362                            patch->token);
1363         }
1364       else
1365         *patch->where.pos = value;
1366     }
1367
1368   /* If no definition for UNDEFINED is given, all characters in the
1369      given charset must be specified.  */
1370   if (collate->undefined.ordering == NULL)
1371     {
1372       /**************************************************************\
1373       |* XXX We should test whether really an unspecified character *|
1374       |* exists before giving the message.                          *|
1375       \**************************************************************/
1376       uint32_t weight;
1377
1378       if (!be_quiet)
1379         error (0, 0, _("no definition of `UNDEFINED'"));
1380
1381       collate->undefined.ordering_len = collate->nrules;
1382       weight = ++collate->order_cnt;
1383
1384       for (cnt = 0; cnt < collate->nrules; ++cnt)
1385         {
1386           uint32_t one = 1;
1387           obstack_grow (&collate->element_mem, &one, sizeof (one));
1388         }
1389
1390       for (cnt = 0; cnt < collate->nrules; ++cnt)
1391         obstack_grow (&collate->element_mem, &weight, sizeof (weight));
1392
1393       collate->undefined.ordering = obstack_finish (&collate->element_mem);
1394     }
1395
1396   collate->undefined_len = 2;   /* For the name: 1 x uint32_t + L'\0'.  */
1397   for (cnt = 0; cnt < collate->nrules; ++cnt)
1398     collate->undefined_len += 1 + collate->undefined.ordering[cnt];
1399 }
1400
1401
1402
1403 void
1404 collate_output (struct localedef_t *locale, struct charset_t *charset,
1405                 struct repertoire_t *repertoire, const char *output_path)
1406 {
1407   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1408   uint32_t table_size, table_best, level_best, sum_best;
1409   void *last;
1410   element_t *pelem;
1411   uint32_t *name;
1412   size_t len;
1413   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1414   struct iovec iov[2 + nelems];
1415   struct locale_file data;
1416   uint32_t idx[nelems];
1417   struct obstack non_simple;
1418   struct obstack string_pool;
1419   size_t cnt, entry_size;
1420   uint32_t undefined_offset = UINT_MAX;
1421   uint32_t *table, *extra, *table2, *extra2;
1422   size_t extra_len;
1423   uint32_t element_hash_tab_size;
1424   uint32_t *element_hash_tab;
1425   uint32_t *element_hash_tab_ob;
1426   uint32_t element_string_pool_size;
1427   char *element_string_pool;
1428   uint32_t element_value_size;
1429   uint32_t *element_value;
1430   uint32_t *element_value_ob;
1431   uint32_t symbols_hash_tab_size;
1432   uint32_t *symbols_hash_tab;
1433   uint32_t *symbols_hash_tab_ob;
1434   uint32_t symbols_string_pool_size;
1435   char *symbols_string_pool;
1436   uint32_t symbols_class_size;
1437   uint32_t *symbols_class;
1438   uint32_t *symbols_class_ob;
1439   hash_table *hash_tab;
1440   unsigned int dummy_weights[collate->nrules + 1];
1441
1442   sum_best = UINT_MAX;
1443   table_best = 0xffff;
1444   level_best = 0xffff;
1445
1446   /* Compute table size.  */
1447   if (!be_quiet)
1448     fputs (_("\
1449 Computing table size for collation information might take a while..."),
1450            stderr);
1451   for (table_size = 256; table_size < sum_best; ++table_size)
1452     {
1453       size_t hits[table_size];
1454       unsigned int worst = 1;
1455       size_t cnt;
1456
1457       last = NULL;
1458
1459       for (cnt = 0; cnt < 256; ++cnt)
1460         hits[cnt] = 1;
1461       memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
1462
1463       while (iterate_table (&collate->result, &last, (const void **) &name,
1464                             &len, (void **) &pelem) >= 0)
1465         if (pelem->ordering != NULL && pelem->name[0] > 0xff)
1466           if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
1467             {
1468               worst = hits[(unsigned int) pelem->name[0] % table_size];
1469               if (table_size * worst > sum_best)
1470                 break;
1471             }
1472
1473       if (table_size * worst < sum_best)
1474         {
1475           sum_best = table_size * worst;
1476           table_best = table_size;
1477           level_best = worst;
1478         }
1479     }
1480   assert (table_best != 0xffff || level_best != 0xffff);
1481   if (!be_quiet)
1482     fputs (_(" done\n"), stderr);
1483
1484   obstack_init (&non_simple);
1485   obstack_init (&string_pool);
1486
1487   data.magic = LIMAGIC (LC_COLLATE);
1488   data.n = nelems;
1489   iov[0].iov_base = (void *) &data;
1490   iov[0].iov_len = sizeof (data);
1491
1492   iov[1].iov_base = (void *) idx;
1493   iov[1].iov_len = sizeof (idx);
1494
1495   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
1496   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t);
1497
1498   table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t));
1499   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
1500   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
1501     = collate->nrules * sizeof (uint32_t);
1502   /* Another trick here.  Describing the collation method needs only a
1503      few bits (3, to be exact).  But the binary file should be
1504      accessible by machines with both endianesses and so we store both
1505      forms in the same word.  */
1506   for (cnt = 0; cnt < collate->nrules; ++cnt)
1507     table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]);
1508
1509   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
1510   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t);
1511
1512   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
1513   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
1514     = sizeof (uint32_t);
1515
1516   entry_size = 1 + MAX (collate->nrules, 2);
1517
1518   table = (uint32_t *) alloca (table_best * level_best * entry_size
1519                                 * sizeof (table[0]));
1520   memset (table, '\0', table_best * level_best * entry_size
1521           * sizeof (table[0]));
1522
1523
1524   /* Macros for inserting in output table.  */
1525 #define ADD_VALUE(expr)                                                       \
1526   do {                                                                        \
1527     uint32_t to_write = (uint32_t) expr;                                      \
1528     obstack_grow (&non_simple, &to_write, sizeof (to_write));                 \
1529   } while (0)
1530
1531 #define ADD_ELEMENT(pelem, len)                                               \
1532   do {                                                                        \
1533     size_t cnt, idx;                                                          \
1534                                                                               \
1535     ADD_VALUE (len);                                                          \
1536                                                                               \
1537     wlen = wcslen (pelem->name);                                              \
1538     obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \
1539                                                                               \
1540     idx = collate->nrules;                                                    \
1541     for (cnt = 0; cnt < collate->nrules; ++cnt)                               \
1542       {                                                                       \
1543         size_t disp;                                                          \
1544                                                                               \
1545         ADD_VALUE (pelem->ordering[cnt]);                                     \
1546         for (disp = 0; disp < pelem->ordering[cnt]; ++disp)                   \
1547           ADD_VALUE (pelem->ordering[idx++]);                                 \
1548       }                                                                       \
1549   } while (0)
1550
1551 #define ADD_FORWARD(pelem)                                                    \
1552   do {                                                                        \
1553     /* We leave a reference in the main table and put all                     \
1554        information in the table for the extended entries.  */                 \
1555     element_t *runp;                                                          \
1556     element_t *has_simple = NULL;                                             \
1557     size_t wlen;                                                              \
1558                                                                               \
1559     table[(level * table_best + slot) * entry_size + 1]                       \
1560       = FORWARD_CHAR;                                                         \
1561     table[(level * table_best + slot) * entry_size + 2]                       \
1562       = obstack_object_size (&non_simple) / sizeof (uint32_t);                \
1563                                                                               \
1564     /* Here we have to construct the non-simple table entry.  First           \
1565        compute the total length of this entry.  */                            \
1566     for (runp = (pelem); runp != NULL; runp = runp->next)                     \
1567       if (runp->ordering != NULL)                                             \
1568         {                                                                     \
1569           uint32_t value;                                                     \
1570           size_t cnt;                                                         \
1571                                                                               \
1572           value = 1 + wcslen (runp->name) + 1;                                \
1573                                                                               \
1574           for (cnt = 0; cnt < collate->nrules; ++cnt)                         \
1575             /* We have to take care for entries without ordering              \
1576                information.  While reading them they get inserted in the      \
1577                table and later not removed when something goes wrong with     \
1578                reading its weights.  */                                       \
1579             value += 1 + runp->ordering[cnt];                                 \
1580                                                                               \
1581           if (runp->name[1] == L'\0')                                         \
1582             has_simple = runp;                                                \
1583                                                                               \
1584           ADD_ELEMENT (runp, value);                                          \
1585         }                                                                     \
1586                                                                               \
1587     if (has_simple == NULL)                                                   \
1588       {                                                                       \
1589         size_t idx, cnt;                                                      \
1590                                                                               \
1591         ADD_VALUE (collate->undefined_len + 1);                               \
1592                                                                               \
1593         /* Add the name.  */                                                  \
1594         ADD_VALUE ((pelem)->name[0]);                                         \
1595         ADD_VALUE (0);                                                        \
1596                                                                               \
1597         idx = collate->nrules;                                                \
1598         for (cnt = 0; cnt < collate->nrules; ++cnt)                           \
1599           {                                                                   \
1600             size_t disp;                                                      \
1601                                                                               \
1602             ADD_VALUE (collate->undefined.ordering[cnt]);                     \
1603             for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)   \
1604               {                                                               \
1605                 if ((uint32_t) collate->undefined.ordering[idx]               \
1606                     == ELLIPSIS_CHAR)                                         \
1607                   ADD_VALUE ((pelem)->name[0]);                               \
1608                 else                                                          \
1609                   ADD_VALUE (collate->undefined.ordering[idx++]);             \
1610                 ++idx;                                                        \
1611               }                                                               \
1612           }                                                                   \
1613       }                                                                       \
1614   } while (0)
1615
1616
1617
1618   /* Fill the table now.  First we look for all the characters which
1619      fit into one single byte.  This speeds up the 8-bit string
1620      functions.  */
1621   last = NULL;
1622   while (iterate_table (&collate->result, &last, (const void **) &name,
1623                         &len, (void **) &pelem) >= 0)
1624     if (pelem->name[0] <= 0xff)
1625       {
1626         /* We have a single byte name.  Now we must distinguish
1627            between entries in simple form (i.e., only one value per
1628            weight and no collation element starting with the same
1629            character) and those which are not.  */
1630         size_t slot = ((size_t) pelem->name[0]);
1631         const size_t level = 0;
1632
1633         table[slot * entry_size] = pelem->name[0];
1634
1635         if (pelem->name[1] == L'\0' && pelem->next == NULL
1636             && pelem->ordering_len == collate->nrules)
1637           {
1638             /* Yes, we have a simple one.  Lucky us.  */
1639             size_t cnt;
1640
1641             for (cnt = 0; cnt < collate->nrules; ++cnt)
1642               table[slot * entry_size + 1 + cnt]
1643                 = pelem->ordering[collate->nrules + cnt];
1644           }
1645         else
1646           ADD_FORWARD (pelem);
1647       }
1648
1649   /* Now check for missing single byte entries.  If one exist we fill
1650      with the UNDEFINED entry.  */
1651   for (cnt = 0; cnt < 256; ++cnt)
1652     /* The first weight is never 0 for existing entries.  */
1653     if (table[cnt * entry_size + 1] == 0)
1654       {
1655         /* We have to fill in the information from the UNDEFINED
1656            entry.  */
1657         table[cnt * entry_size] = (uint32_t) cnt;
1658
1659         if (collate->undefined.ordering_len == collate->nrules)
1660           {
1661             size_t inner;
1662
1663             for (inner = 0; inner < collate->nrules; ++inner)
1664               if ((uint32_t)collate->undefined.ordering[collate->nrules
1665                                                        + inner]
1666                   == ELLIPSIS_CHAR)
1667                 table[cnt * entry_size + 1 + inner] = cnt;
1668               else
1669                 table[cnt * entry_size + 1 + inner]
1670                   = collate->undefined.ordering[collate->nrules + inner];
1671           }
1672         else
1673           {
1674             if (undefined_offset != UINT_MAX)
1675               {
1676                 table[cnt * entry_size + 1] = FORWARD_CHAR;
1677                 table[cnt * entry_size + 2] = undefined_offset;
1678               }
1679             else
1680               {
1681                 const size_t slot = cnt;
1682                 const size_t level = 0;
1683
1684                 ADD_FORWARD (&collate->undefined);
1685                 undefined_offset = table[cnt * entry_size + 2];
1686               }
1687           }
1688       }
1689
1690   /* Now we are ready for inserting the whole rest.   */
1691   last = NULL;
1692   while (iterate_table (&collate->result, &last, (const void **) &name,
1693                         &len, (void **) &pelem) >= 0)
1694     if (pelem->name[0] > 0xff)
1695       {
1696         /* Find the position.  */
1697         size_t slot = ((size_t) pelem->name[0]) % table_best;
1698         size_t level = 0;
1699
1700         while (table[(level * table_best + slot) * entry_size + 1] != 0)
1701           ++level;
1702         assert (level < level_best);
1703
1704         if (pelem->name[1] == L'\0' && pelem->next == NULL
1705             && pelem->ordering_len == collate->nrules)
1706           {
1707             /* Again a simple entry.  */
1708             size_t inner;
1709
1710             for (inner = 0; inner < collate->nrules; ++inner)
1711               table[(level * table_best + slot) * entry_size + 1 + inner]
1712                 = pelem->ordering[collate->nrules + inner];
1713           }
1714         else
1715           ADD_FORWARD (pelem);
1716       }
1717
1718   /* Add the UNDEFINED entry.  */
1719   {
1720     /* Here we have to construct the non-simple table entry.  */
1721     size_t idx, cnt;
1722
1723     undefined_offset = obstack_object_size (&non_simple);
1724
1725     idx = collate->nrules;
1726     for (cnt = 0; cnt < collate->nrules; ++cnt)
1727       {
1728         size_t disp;
1729
1730         ADD_VALUE (collate->undefined.ordering[cnt]);
1731         for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
1732           ADD_VALUE (collate->undefined.ordering[idx++]);
1733       }
1734   }
1735
1736   /* Finish the extra block.  */
1737   extra_len = obstack_object_size (&non_simple);
1738   extra = (uint32_t *) obstack_finish (&non_simple);
1739   assert ((extra_len % sizeof (uint32_t)) == 0);
1740
1741   /* Now we have to build the two array for the other byte ordering.  */
1742   table2 = (uint32_t *) alloca (table_best * level_best * entry_size
1743                                  * sizeof (table[0]));
1744   extra2 = (uint32_t *) alloca (extra_len);
1745
1746   for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
1747     table2[cnt] = bswap_32 (table[cnt]);
1748
1749   for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt)
1750     extra2[cnt] = bswap_32 (extra2[cnt]);
1751
1752   /* We need a simple hashing table to get a collation-element->chars
1753      mapping.  We again use internal hashing using a secondary hashing
1754      function.
1755
1756      Each string has an associate hashing value V, computed by a
1757      fixed function.  To locate the string we use open addressing with
1758      double hashing.  The first index will be V % M, where M is the
1759      size of the hashing table.  If no entry is found, iterating with
1760      a second, independent hashing function takes place.  This second
1761      value will be 1 + V % (M - 2).  The approximate number of probes
1762      will be
1763
1764           for unsuccessful search: (1 - N / M) ^ -1
1765           for successful search:   - (N / M) ^ -1 * ln (1 - N / M)
1766
1767      where N is the number of keys.
1768
1769      If we now choose M to be the next prime bigger than 4 / 3 * N,
1770      we get the values 4 and 1.85 resp.  Because unsuccessful searches
1771      are unlikely this is a good value.  Formulas: [Knuth, The Art of
1772      Computer Programming, Volume 3, Sorting and Searching, 1973,
1773      Addison Wesley]  */
1774   if (collate->elements.filled == 0)
1775     {
1776       /* We don't need any element table since there are no collating
1777          elements.  */
1778       element_hash_tab_size = 0;
1779       element_hash_tab = NULL;
1780       element_hash_tab_ob = NULL;
1781       element_string_pool_size = 0;
1782       element_string_pool = NULL;
1783       element_value_size = 0;
1784       element_value = NULL;
1785       element_value_ob = NULL;
1786     }
1787   else
1788     {
1789       void *ptr;                /* Running pointer.  */
1790       const char *key;          /* Key for current bucket.  */
1791       size_t keylen;            /* Length of key data.  */
1792       const element_t *data;    /* Data, i.e., the character sequence.  */
1793
1794       element_hash_tab_size = next_prime ((collate->elements.filled * 4) / 3);
1795       if (element_hash_tab_size < 7)
1796         /* We need a minimum to make the following code work.  */
1797         element_hash_tab_size = 7;
1798
1799       element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size
1800                                                       * sizeof (uint32_t)));
1801       memset (element_hash_tab, '\377', (2 * element_hash_tab_size
1802                                          * sizeof (uint32_t)));
1803
1804       ptr = NULL;
1805       while (iterate_table (&collate->elements, &ptr, (const void **) &key,
1806                             &keylen, (void **) &data) == 0)
1807         {
1808           size_t hash_val = hash_string (key, keylen);
1809           size_t idx = hash_val % element_hash_tab_size;
1810
1811           if (element_hash_tab[2 * idx] != (~((uint32_t) 0)))
1812             {
1813               /* We need the second hashing function.  */
1814               size_t c = 1 + (hash_val % (element_hash_tab_size - 2));
1815
1816               do
1817                 if (idx >= element_hash_tab_size - c)
1818                   idx -= element_hash_tab_size - c;
1819                 else
1820                   idx += c;
1821               while (element_hash_tab[2 * idx] != (~((uint32_t) 0)));
1822             }
1823
1824           element_hash_tab[2 * idx] = obstack_object_size (&non_simple);
1825           element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool)
1826                                            / sizeof (uint32_t));
1827
1828           obstack_grow0 (&non_simple, key, keylen);
1829           obstack_grow (&string_pool, data->name,
1830                         (wcslen (data->name) + 1) * sizeof (uint32_t));
1831         }
1832
1833       if (obstack_object_size (&non_simple) % 4 != 0)
1834         obstack_blank (&non_simple,
1835                        4 - (obstack_object_size (&non_simple) % 4));
1836       element_string_pool_size = obstack_object_size (&non_simple);
1837       element_string_pool = obstack_finish (&non_simple);
1838
1839       element_value_size = obstack_object_size (&string_pool);
1840       element_value = obstack_finish (&string_pool);
1841
1842       /* Create the tables for the other byte order.  */
1843       element_hash_tab_ob = obstack_alloc (&non_simple,
1844                                            (2 * element_hash_tab_size
1845                                             * sizeof (uint32_t)));
1846       for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt)
1847         element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]);
1848
1849       element_value_ob = obstack_alloc (&string_pool, element_value_size);
1850       for (cnt = 0; cnt < element_value_size / 4; ++cnt)
1851         element_value_ob[cnt] = bswap_32 (element_value[cnt]);
1852     }
1853
1854   /* Store collation elements as map to collation class.  There are
1855      three kinds of symbols:
1856        - simple characters
1857        - collation elements
1858        - collation symbols
1859      We need to make a table which lets the user to access the primary
1860      weight based on the symbol string.  */
1861   symbols_hash_tab_size = next_prime ((4 * (charset->char_table.filled
1862                                             + collate->elements.filled
1863                                             + collate->symbols.filled)) / 3);
1864   symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
1865                                                   * sizeof (uint32_t)));
1866   memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size
1867                                      * sizeof (uint32_t)));
1868
1869   /* Now fill the array.  First the symbols from the character set,
1870      then the collation elements and last the collation symbols.  */
1871   hash_tab = &charset->char_table;
1872   while (1)
1873     {
1874       void *ptr;        /* Running pointer.  */
1875       const char *key;  /* Key for current bucket.  */
1876       size_t keylen;    /* Length of key data.  */
1877       void *data;       /* Data.  */
1878
1879       ptr = NULL;
1880       while (iterate_table (hash_tab, &ptr, (const void **) &key,
1881                             &keylen, (void **) &data) == 0)
1882         {
1883           size_t hash_val;
1884           size_t idx;
1885           uint32_t word;
1886           unsigned int *weights;
1887
1888           if (hash_tab == &charset->char_table
1889               || hash_tab == &collate->elements)
1890             {
1891               element_t *lastp, *firstp;
1892               uint32_t dummy_name[2];
1893               const uint32_t *name;
1894               size_t name_len;
1895
1896               if (hash_tab == &charset->char_table)
1897                 {
1898                   dummy_name[0] = (uint32_t) ((unsigned long int) data);
1899                   dummy_name[1] = L'\0';
1900                   name = dummy_name;
1901                   name_len = sizeof (uint32_t);
1902                 }
1903               else
1904                 {
1905                   element_t *elemp = (element_t *) data;
1906                   name = elemp->name;
1907                   name_len = wcslen (name) * sizeof (uint32_t);
1908                 }
1909
1910               /* First check whether this character is used at all.  */
1911               if (find_entry (&collate->result, name, name_len,
1912                               (void *) &firstp) < 0)
1913                 /* The symbol is not directly mentioned in the collation.
1914                    I.e., we use the value for UNDEFINED.  */
1915                 lastp = &collate->undefined;
1916               else
1917                 {
1918                   /* The entry for the simple character is always found at
1919                      the end.  */
1920                   lastp = firstp;
1921                   while (lastp->next != NULL && wcscmp (name, lastp->name))
1922                     lastp = lastp->next;
1923                 }
1924
1925               weights = lastp->ordering;
1926             }
1927           else
1928             {
1929               dummy_weights[0] = 1;
1930               dummy_weights[collate->nrules]
1931                 = (unsigned int) ((unsigned long int) data);
1932
1933               weights = dummy_weights;
1934             }
1935
1936           /* In LASTP->ordering we now have the collation class.
1937              Determine the place in the hashing table next.  */
1938           hash_val = hash_string (key, keylen);
1939           idx = hash_val % symbols_hash_tab_size;
1940
1941           if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)))
1942             {
1943               /* We need the second hashing function.  */
1944               size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2));
1945
1946               do
1947                 if (idx >= symbols_hash_tab_size - c)
1948                   idx -= symbols_hash_tab_size - c;
1949                 else
1950                   idx += c;
1951               while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)));
1952             }
1953
1954           symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool);
1955           symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple)
1956                                            / sizeof (uint32_t));
1957
1958           obstack_grow0 (&string_pool, key, keylen);
1959           /* Adding the first weight looks complicated.  We have to deal
1960              with the kind it is stored and with the fact that original
1961              form uses `unsigned int's while we need `uint32_t' here.  */
1962           word = weights[0];
1963           obstack_grow (&non_simple, &word, sizeof (uint32_t));
1964           for (cnt = 0; cnt < weights[0]; ++cnt)
1965             {
1966               word = weights[collate->nrules + cnt];
1967               obstack_grow (&non_simple, &word, sizeof (uint32_t));
1968             }
1969         }
1970
1971       if (hash_tab == &charset->char_table)
1972         hash_tab = &collate->elements;
1973       else if (hash_tab == &collate->elements)
1974         hash_tab = &collate->symbols;
1975       else
1976         break;
1977     }
1978
1979   /* Now we have the complete tables.  */
1980   if (obstack_object_size (&string_pool) % 4 != 0)
1981     obstack_blank (&non_simple, 4 - (obstack_object_size (&string_pool) % 4));
1982   symbols_string_pool_size = obstack_object_size (&string_pool);
1983   symbols_string_pool = obstack_finish (&string_pool);
1984
1985   symbols_class_size = obstack_object_size (&non_simple);
1986   symbols_class = obstack_finish (&non_simple);
1987
1988   /* Generate tables with other byte order.  */
1989   symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
1990                                                      * sizeof (uint32_t)));
1991   for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt)
1992     symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]);
1993
1994   symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size);
1995   for (cnt = 0; cnt < symbols_class_size / 4; ++cnt)
1996     symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]);
1997
1998
1999   /* Store table addresses and lengths.   */
2000 #if __BYTE_ORDER == __BIG_ENDIAN
2001   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
2002   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
2003     = table_best * level_best * entry_size * sizeof (table[0]);
2004
2005   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
2006   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
2007     = table_best * level_best * entry_size * sizeof (table[0]);
2008
2009   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
2010   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
2011
2012   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
2013   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
2014 #else
2015   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
2016   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
2017     = table_best * level_best * entry_size * sizeof (table[0]);
2018
2019   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
2020   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
2021     = table_best * level_best * entry_size * sizeof (table[0]);
2022
2023   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
2024   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
2025
2026   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
2027   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
2028 #endif
2029
2030   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
2031   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t);
2032
2033
2034   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base
2035     = &element_hash_tab_size;
2036   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len
2037     = sizeof (uint32_t);
2038
2039 #if __BYTE_ORDER == __BIG_ENDIAN
2040   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
2041     = element_hash_tab;
2042   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
2043     = 2 * element_hash_tab_size * sizeof (uint32_t);
2044
2045   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
2046     = element_hash_tab_ob;
2047   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
2048     = 2 * element_hash_tab_size * sizeof (uint32_t);
2049 #else
2050   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
2051     = element_hash_tab;
2052   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
2053     = 2 * element_hash_tab_size * sizeof (uint32_t);
2054
2055   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
2056     = element_hash_tab_ob;
2057   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
2058     = 2 * element_hash_tab_size * sizeof (uint32_t);
2059 #endif
2060
2061   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base
2062     = element_string_pool;
2063   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_len
2064     = element_string_pool_size;
2065
2066 #if __BYTE_ORDER == __BIG_ENDIAN
2067   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
2068     = element_value;
2069   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
2070     = element_value_size;
2071
2072   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
2073     = element_value_ob;
2074   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
2075     = element_value_size;
2076 #else
2077   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
2078     = element_value;
2079   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
2080     = element_value_size;
2081
2082   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
2083     = element_value_ob;
2084   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
2085     = element_value_size;
2086 #endif
2087
2088   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base
2089     = &symbols_hash_tab_size;
2090   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len
2091     = sizeof (uint32_t);
2092
2093 #if __BYTE_ORDER == __BIG_ENDIAN
2094   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
2095     = symbols_hash_tab;
2096   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
2097     = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2098
2099   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
2100     = symbols_hash_tab_ob;
2101   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
2102     = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2103 #else
2104   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
2105     = symbols_hash_tab;
2106   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
2107     = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2108
2109   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
2110     = symbols_hash_tab_ob;
2111   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
2112     = 2 * symbols_hash_tab_size * sizeof (uint32_t);
2113 #endif
2114
2115   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base
2116     = symbols_string_pool;
2117   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_len
2118     = symbols_string_pool_size;
2119
2120 #if __BYTE_ORDER == __BIG_ENDIAN
2121   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
2122     = symbols_class;
2123   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
2124     = symbols_class_size;
2125
2126   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
2127     = symbols_class_ob;
2128   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
2129     = symbols_class_size;
2130 #else
2131   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
2132     = symbols_class;
2133   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
2134     = symbols_class_size;
2135
2136   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
2137     = symbols_class_ob;
2138   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
2139     = symbols_class_size;
2140 #endif
2141
2142   /* Update idx array.  */
2143   idx[0] = iov[0].iov_len + iov[1].iov_len;
2144   for (cnt = 1; cnt < nelems; ++cnt)
2145     idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
2146
2147   write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
2148
2149   obstack_free (&non_simple, NULL);
2150   obstack_free (&string_pool, NULL);
2151 }
2152
2153
2154 static int
2155 collate_element_to (struct linereader *ldfile,
2156                     struct locale_collate_t *collate,
2157                     struct token *code, struct charmap_t *charmap,
2158                     struct repertoire_t *repertoire)
2159 {
2160   struct charseq *seq;
2161   uint32_t value;
2162   void *not_used;
2163
2164   seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len);
2165   if (seq != NULL)
2166     {
2167       lr_error (ldfile, _("symbol for multicharacter collating element "
2168                       "`%.*s' duplicates symbolic name in charmap"),
2169                 (int) code->val.str.len, code->val.str.start);
2170       return 1;
2171     }
2172
2173   value = repertoire_find_value (repertoire, code->val.str.start,
2174                                  code->val.str.len);
2175   if (value != ILLEGAL_CHAR_VALUE)
2176     {
2177       lr_error (ldfile, _("symbol for multicharacter collating element "
2178                       "`%.*s' duplicates symbolic name in repertoire"),
2179                 (int) code->val.str.len, code->val.str.start);
2180       return 1;
2181     }
2182
2183   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
2184                   &not_used) >= 0)
2185     {
2186       lr_error (ldfile, _("symbol for multicharacter collating element "
2187                       "`%.*s' duplicates other element definition"),
2188                 (int) code->val.str.len, code->val.str.start);
2189       return 1;
2190     }
2191
2192   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
2193                   &not_used) >= 0)
2194     {
2195       lr_error (ldfile, _("symbol for multicharacter collating element "
2196                       "`%.*s' duplicates symbol definition"),
2197                 (int) code->val.str.len, code->val.str.start);
2198       return 1;
2199     }
2200
2201   return 0;
2202 }
2203
2204
2205 static void
2206 collate_element_from (struct linereader *ldfile,
2207                       struct locale_collate_t *collate,
2208                       const char *to_str, struct token *code,
2209                       struct charmap_t *charmap,
2210                       struct repertoire_t *repertoire)
2211 {
2212   element_t *elemp, *runp;
2213
2214   /* CODE is a string.  */
2215   elemp = (element_t *) obstack_alloc (&collate->element_mem,
2216                                        sizeof (element_t));
2217
2218   /* We have to translate the string.  It may contain <...> character
2219      names.  */
2220   elemp->namemb = code->val.str.startmb;
2221   elemp->namewc = code->val.str.startwc;
2222   elemp->this_weight = 0;
2223   elemp->ordering = NULL;
2224   elemp->ordering_len = 0;
2225
2226   if (elemp->namemb == NULL && elemp->namewc == NULL)
2227     {
2228       /* The string contains characters which are not in the charmap nor
2229          in the repertoire.  Ignore the string.  */
2230       if (verbose)
2231         lr_error (ldfile, _("\
2232 `from' string in collation element declaration contains unknown character"));
2233       return;
2234     }
2235
2236   /* The entries in the linked lists of RESULT are sorting in
2237      descending order.  The order is important for the `strcoll' and
2238      `wcscoll' functions.  */
2239   if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
2240                   (void *) &runp) >= 0)
2241     {
2242       /* We already have an entry with this key.  Check whether it is
2243          identical.  */
2244       element_t *prevp = NULL;
2245       int cmpres;
2246
2247       do
2248         {
2249           cmpres = wcscmp (elemp->namewc, runp->namewc);
2250           if (cmpres <= 0)
2251             break;
2252           prevp = runp;
2253         }
2254       while ((runp = runp->next) != NULL);
2255
2256       if (cmpres == 0)
2257         lr_error (ldfile, _("\
2258 duplicate collating element definition (repertoire)"));
2259       else
2260         {
2261           elemp->next = runp;
2262           if (prevp == NULL)
2263             {
2264               if (set_entry (&collate->resultwc, elemp->namewc,
2265                              sizeof (uint32_t), elemp) < 0)
2266                 error (EXIT_FAILURE, 0, _("\
2267 error while inserting collation element into hash table"));
2268             }
2269           else
2270             prevp->next = elemp;
2271         }
2272     }
2273   else
2274     {
2275       elemp->next = NULL;
2276       if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
2277                         elemp) < 0)
2278         error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
2279     }
2280
2281   /* Now also insert the element definition in the multibyte table.  */
2282   if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0)
2283     {
2284       /* We already have an entry with this key.  Check whether it is
2285          identical.  */
2286       element_t *prevp = NULL;
2287       int cmpres;
2288
2289       do
2290         {
2291           cmpres = strcmp (elemp->namemb, runp->namemb);
2292           if (cmpres <= 0)
2293             break;
2294           prevp = runp;
2295         }
2296       while ((runp = runp->next) != NULL);
2297
2298       if (cmpres == 0)
2299         lr_error (ldfile, _("\
2300 duplicate collating element definition (charmap)"));
2301       else
2302         {
2303           elemp->next = runp;
2304           if (prevp == NULL)
2305             {
2306               if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
2307                 error (EXIT_FAILURE, 0, _("\
2308 error while inserting collation element into hash table"));
2309             }
2310           else
2311             prevp->next = elemp;
2312         }
2313     }
2314   else
2315     {
2316       elemp->next = NULL;
2317       if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
2318         error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
2319     }
2320
2321   /* Finally install the mapping from the `to'-name to the `from'-name.  */
2322   if (insert_entry (&collate->elements, to_str, strlen (to_str),
2323                     (void *) elemp) < 0)
2324     lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
2325               strerror (errno));
2326 }
2327
2328
2329 static void
2330 collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate,
2331                 struct token *code, struct charmap_t *charmap,
2332                 struct repertoire_t *repertoire)
2333 {
2334   uint32_t value;
2335   struct charseq *seq;
2336   void *not_used;
2337
2338   seq = charset_find_value (charmap, code->val.str.start, code->val.str.len);
2339   if (seq != NULL)
2340     {
2341       lr_error (ldfile, _("symbol for multicharacter collating element "
2342                       "`%.*s' duplicates symbolic name in charmap"),
2343                 (int) code->val.str.len, code->val.str.start);
2344       return;
2345     }
2346
2347   value = repertoire (repertoire, code->val.str.start, code->val.str.len);
2348   if (value != ILLEGAL_CHAR_VALUE)
2349     {
2350       lr_error (ldfile, _("symbol for multicharacter collating element "
2351                       "`%.*s' duplicates symbolic name in repertoire"),
2352                 (int) code->val.str.len, code->val.str.start);
2353       return;
2354     }
2355
2356   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
2357                   &not_used) >= 0)
2358     {
2359       lr_error (ldfile, _("symbol for multicharacter collating element "
2360                       "`%.*s' duplicates element definition"),
2361                 (int) code->val.str.len, code->val.str.start);
2362       return;
2363     }
2364
2365   if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
2366                   &not_used) >= 0)
2367     {
2368       lr_error (ldfile, _("symbol for multicharacter collating element "
2369                       "`%.*s' duplicates other symbol definition"),
2370                 (int) code->val.str.len, code->val.str.start);
2371       return;
2372     }
2373
2374   if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
2375                     (void *) 0) < 0)
2376     lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
2377               strerror (errno));
2378 }
2379
2380
2381 void
2382 collate_new_order (struct linereader *ldfile, struct localedef_t *locale,
2383                    enum coll_sort_rule sort_rule)
2384 {
2385   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2386
2387   if (collate->nrules >= collate->nrules_max)
2388     {
2389       collate->nrules_max *= 2;
2390       collate->rules
2391         = (enum coll_sort_rule *) xrealloc (collate->rules,
2392                                             collate->nrules_max
2393                                             * sizeof (enum coll_sort_rule));
2394     }
2395
2396   collate->rules[collate->nrules++] = sort_rule;
2397 }
2398
2399
2400 void
2401 collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale)
2402 {
2403   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2404
2405   collate->rules
2406     = (enum coll_sort_rule *) xrealloc (collate->rules,
2407                                         collate->nrules
2408                                         * sizeof (enum coll_sort_rule));
2409
2410   /* Allocate arrays for temporary weights.  */
2411   collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
2412
2413   /* Choose arbitrary start value for table size.  */
2414   collate->nweight_max = 5 * collate->nrules;
2415   collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
2416 }
2417
2418
2419 int
2420 collate_order_elem (struct linereader *ldfile, struct localedef_t *locale,
2421                     struct token *code, struct charset_t *charset)
2422 {
2423   const uint32_t zero = L'\0';
2424   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2425   int result = 0;
2426   uint32_t value;
2427   void *tmp;
2428   unsigned int i;
2429
2430   switch (code->tok)
2431     {
2432     case tok_bsymbol:
2433       /* We have a string to find in one of the three hashing tables.  */
2434       value = charset_find_value (&charset->char_table, code->val.str.start,
2435                                   code->val.str.len);
2436       if (value != ILLEGAL_CHAR_VALUE)
2437         {
2438           element_t *lastp, *firstp;
2439
2440           collate->kind = character;
2441
2442           if (find_entry (&collate->result, &value, sizeof (uint32_t),
2443                           (void *) &firstp) < 0)
2444             firstp = lastp = NULL;
2445           else
2446             {
2447               /* The entry for the simple character is always found at
2448                  the end.  */
2449               lastp = firstp;
2450               while (lastp->next != NULL)
2451                 lastp = lastp->next;
2452
2453               if (lastp->name[0] == value && lastp->name[1] == L'\0')
2454                 {
2455                   lr_error (ldfile,
2456                             _("duplicate definition for character `%.*s'"),
2457                             (int) code->val.str.len, code->val.str.start);
2458                   lr_ignore_rest (ldfile, 0);
2459                   result = -1;
2460                   break;
2461                 }
2462             }
2463
2464           collate->current_element
2465             = (element_t *) obstack_alloc (&collate->element_mem,
2466                                            sizeof (element_t));
2467
2468           obstack_grow (&collate->element_mem, &value, sizeof (value));
2469           obstack_grow (&collate->element_mem, &zero, sizeof (zero));
2470
2471           collate->current_element->name =
2472             (const uint32_t *) obstack_finish (&collate->element_mem);
2473
2474           collate->current_element->this_weight = ++collate->order_cnt;
2475
2476           collate->current_element->next = NULL;
2477
2478           if (firstp == NULL)
2479             {
2480               if (insert_entry (&collate->result, &value, sizeof (uint32_t),
2481                                 (void *) collate->current_element) < 0)
2482                 {
2483                   lr_error (ldfile, _("cannot insert collation element `%.*s'"),
2484                             (int) code->val.str.len, code->val.str.start);
2485                   exit (4);
2486                 }
2487             }
2488           else
2489             lastp->next = collate->current_element;
2490         }
2491       else if (find_entry (&collate->elements, code->val.str.start,
2492                            code->val.str.len, &tmp) >= 0)
2493         {
2494           collate->current_element = (element_t *) tmp;
2495
2496           if (collate->current_element->this_weight != 0)
2497             {
2498               lr_error (ldfile, _("\
2499 collation element `%.*s' appears more than once: ignore line"),
2500                         (int) code->val.str.len, code->val.str.start);
2501               lr_ignore_rest (ldfile, 0);
2502               result = -1;
2503               break;
2504             }
2505
2506           collate->kind = element;
2507           collate->current_element->this_weight = ++collate->order_cnt;
2508         }
2509       else if (find_entry (&collate->symbols, code->val.str.start,
2510                            code->val.str.len, &tmp) >= 0)
2511         {
2512           unsigned int order = ++collate->order_cnt;
2513
2514           if ((unsigned long int) tmp != 0ul)
2515             {
2516               lr_error (ldfile, _("\
2517 collation symbol `%.*s' appears more than once: ignore line"),
2518                         (int) code->val.str.len, code->val.str.start);
2519               lr_ignore_rest (ldfile, 0);
2520               result = -1;
2521               break;
2522             }
2523
2524           collate->kind = symbol;
2525
2526           if (set_entry (&collate->symbols, code->val.str.start,
2527                          code->val.str.len, (void *) order) < 0)
2528             {
2529               lr_error (ldfile, _("cannot process order specification"));
2530               exit (4);
2531             }
2532         }
2533       else
2534         {
2535           if (verbose)
2536             lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
2537                       (int) code->val.str.len, code->val.str.start);
2538           lr_ignore_rest (ldfile, 0);
2539
2540           result = -1;
2541         }
2542       break;
2543
2544     case tok_undefined:
2545       collate->kind = undefined;
2546       collate->current_element = &collate->undefined;
2547       break;
2548
2549     case tok_ellipsis:
2550       if (collate->was_ellipsis)
2551         {
2552           lr_error (ldfile, _("\
2553 two lines in a row containing `...' are not allowed"));
2554           result = -1;
2555         }
2556       else if (collate->kind != character)
2557         {
2558           /* An ellipsis requires the previous line to be an
2559              character definition.  */
2560           lr_error (ldfile, _("\
2561 line before ellipsis does not contain definition for character constant"));
2562           lr_ignore_rest (ldfile, 0);
2563           result = -1;
2564         }
2565       else
2566         collate->kind = ellipsis;
2567       break;
2568
2569     default:
2570       assert (! "illegal token in `collate_order_elem'");
2571     }
2572
2573   /* Now it's time to handle the ellipsis in the previous line.  We do
2574      this only when the last line contained an definition for a
2575      character, the current line also defines an character, the
2576      character code for the later is bigger than the former.  */
2577   if (collate->was_ellipsis)
2578     {
2579       if (collate->kind != character)
2580         {
2581           lr_error (ldfile, _("\
2582 line after ellipsis must contain character definition"));
2583           lr_ignore_rest (ldfile, 0);
2584           result = -1;
2585         }
2586       else if (collate->last_char > value)
2587         {
2588           lr_error (ldfile, _("end point of ellipsis range is bigger then start"));
2589           lr_ignore_rest (ldfile, 0);
2590           result = -1;
2591         }
2592       else
2593         {
2594           /* We can fill the arrays with the information we need.  */
2595           uint32_t name[2];
2596           unsigned int *data;
2597           size_t *ptr;
2598           size_t cnt;
2599
2600           name[0] = collate->last_char + 1;
2601           name[1] = L'\0';
2602
2603           data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
2604                                           * sizeof (unsigned int));
2605           ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
2606
2607           /* Prepare data.  Because the characters covered by an
2608              ellipsis all have equal values we prepare the data once
2609              and only change the variable number (if there are any).
2610              PTR[...] will point to the entries which will have to be
2611              fixed during the output loop.  */
2612           for (cnt = 0; cnt < collate->nrules; ++cnt)
2613             {
2614               data[cnt] = collate->weight_cnt[cnt];
2615               ptr[cnt] = (cnt == 0
2616                           ? collate->nweight
2617                           : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
2618             }
2619
2620           for (cnt = 0; cnt < collate->nweight; ++cnt)
2621             data[collate->nrules + cnt] = collate->weight[cnt];
2622
2623           for (cnt = 0; cnt < collate->nrules; ++cnt)
2624             if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR)
2625               ptr[cnt] = 0;
2626
2627           while (name[0] <= value)
2628             {
2629               element_t *pelem;
2630
2631               pelem = (element_t *) obstack_alloc (&collate->element_mem,
2632                                                    sizeof (element_t));
2633               pelem->name
2634                 = (const uint32_t *) obstack_copy (&collate->element_mem,
2635                                                   name, 2 * sizeof (uint32_t));
2636               pelem->this_weight = ++collate->order_cnt;
2637
2638               pelem->ordering_len = collate->nweight;
2639               pelem->ordering
2640                 = (unsigned int *) obstack_copy (&collate->element_mem, data,
2641                                                  (collate->nrules
2642                                                   + pelem->ordering_len)
2643                                                  * sizeof (unsigned int));
2644
2645               /* `...' weights need to be adjusted.  */
2646               for (cnt = 0; cnt < collate->nrules; ++cnt)
2647                 if (ptr[cnt] != 0)
2648                   pelem->ordering[ptr[cnt]] = pelem->this_weight;
2649
2650               /* Insert new entry into result table.  */
2651               if (find_entry (&collate->result, name, sizeof (uint32_t),
2652                               (void *) &pelem->next) >= 0)
2653                 {
2654                   if (set_entry (&collate->result, name, sizeof (uint32_t),
2655                                  (void *) pelem) < 0)
2656                     error (4, 0, _("cannot insert into result table"));
2657                 }
2658               else
2659                 {
2660                   pelem->next = NULL;
2661                   if (insert_entry (&collate->result, name, sizeof (uint32_t),
2662                                     (void *) pelem) < 0)
2663                     error (4, 0, _("cannot insert into result table"));
2664                 }
2665
2666               /* Increment counter.  */
2667               ++name[0];
2668             }
2669         }
2670     }
2671
2672   /* Reset counters for weights.  */
2673   collate->weight_idx = 0;
2674   collate->nweight = 0;
2675   for (i = 0; i < collate->nrules; ++i)
2676     collate->weight_cnt[i] = 0;
2677   collate->current_patch = NULL;
2678
2679   return result;
2680 }
2681
2682
2683 int
2684 collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale,
2685                         struct token *code, struct charset_t *charset)
2686 {
2687   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2688   unsigned int here_weight;
2689   uint32_t value;
2690   void *tmp;
2691
2692   assert (code->tok == tok_bsymbol);
2693
2694   value = charset_find_value (&charset->char_table, code->val.str.start,
2695                               code->val.str.len);
2696   if (value != ILLEGAL_CHAR_VALUE)
2697     {
2698       element_t *runp;
2699
2700       if (find_entry (&collate->result, &value, sizeof (uint32_t),
2701                       (void *)&runp) < 0)
2702         runp = NULL;
2703
2704       while (runp != NULL
2705              && (runp->name[0] != value || runp->name[1] != L'\0'))
2706         runp = runp->next;
2707
2708       here_weight = runp == NULL ? 0 : runp->this_weight;
2709     }
2710   else if (find_entry (&collate->elements, code->val.str.start,
2711                        code->val.str.len, &tmp) >= 0)
2712     {
2713       element_t *runp = (element_t *) tmp;
2714
2715       here_weight = runp->this_weight;
2716     }
2717   else if (find_entry (&collate->symbols, code->val.str.start,
2718                        code->val.str.len, &tmp) >= 0)
2719     {
2720       here_weight = (unsigned int) tmp;
2721     }
2722   else
2723     {
2724       if (verbose)
2725         lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
2726                   (int) code->val.str.len, code->val.str.start);
2727       lr_ignore_rest (ldfile, 0);
2728       return -1;
2729     }
2730
2731   /* When we currently work on a collation symbol we do not expect any
2732      weight.  */
2733   if (collate->kind == symbol)
2734     {
2735       lr_error (ldfile, _("\
2736 specification of sorting weight for collation symbol does not make sense"));
2737       lr_ignore_rest (ldfile, 0);
2738       return -1;
2739     }
2740
2741   /* Add to the current collection of weights.  */
2742   if (collate->nweight >= collate->nweight_max)
2743     {
2744       collate->nweight_max *= 2;
2745       collate->weight = (unsigned int *) xrealloc (collate->weight,
2746                                                    collate->nweight_max);
2747     }
2748
2749   /* If the weight is currently not known, we remember to patch the
2750      resulting tables.  */
2751   if (here_weight == 0)
2752     {
2753       patch_t *newp;
2754
2755       newp = (patch_t *) obstack_alloc (&collate->element_mem,
2756                                         sizeof (patch_t));
2757       newp->fname = ldfile->fname;
2758       newp->lineno = ldfile->lineno;
2759       newp->token = (const char *) obstack_copy0 (&collate->element_mem,
2760                                                   code->val.str.start,
2761                                                   code->val.str.len);
2762       newp->where.idx = collate->nweight++;
2763       newp->next = collate->current_patch;
2764       collate->current_patch = newp;
2765     }
2766   else
2767     collate->weight[collate->nweight++] = here_weight;
2768   ++collate->weight_cnt[collate->weight_idx];
2769
2770   return 0;
2771 }
2772
2773
2774 int
2775 collate_next_weight (struct linereader *ldfile, struct localedef_t *locale)
2776 {
2777   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2778
2779   if (collate->kind == symbol)
2780     {
2781       lr_error (ldfile, _("\
2782 specification of sorting weight for collation symbol does not make sense"));
2783       lr_ignore_rest (ldfile, 0);
2784       return -1;
2785     }
2786
2787   ++collate->weight_idx;
2788   if (collate->weight_idx >= collate->nrules)
2789     {
2790       lr_error (ldfile, _("too many weights"));
2791       lr_ignore_rest (ldfile, 0);
2792       return -1;
2793     }
2794
2795   return 0;
2796 }
2797
2798
2799 int
2800 collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale,
2801                        struct token *code, struct charset_t *charset)
2802 {
2803   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2804   unsigned int value = 0;
2805
2806   /* There current tokens can be `IGNORE', `...', or a string.  */
2807   switch (code->tok)
2808     {
2809     case tok_ignore:
2810       /* This token is allowed in all situations.  */
2811       value = IGNORE_CHAR;
2812       break;
2813
2814     case tok_ellipsis:
2815       /* The ellipsis is only allowed for the `...' or `UNDEFINED'
2816          entry.  */
2817       if (collate->kind != ellipsis && collate->kind != undefined)
2818         {
2819           lr_error (ldfile, _("\
2820 `...' must only be used in `...' and `UNDEFINED' entries"));
2821           lr_ignore_rest (ldfile, 0);
2822           return -1;
2823         }
2824       value = ELLIPSIS_CHAR;
2825       break;
2826
2827     case tok_string:
2828       /* This can become difficult.  We have to get the weights which
2829          correspond to the single wide chars in the string.  But some
2830          of the `chars' might not be real characters, but collation
2831          elements or symbols.  And so the string decoder might have
2832          signaled errors.  The string at this point is not translated.
2833          I.e., all <...> sequences are still there.  */
2834       {
2835         char *runp = code->val.str.start;
2836         void *tmp;
2837
2838         while (*runp != '\0')
2839           {
2840             char *startp = (char *) runp;
2841             char *putp = (char *) runp;
2842             uint32_t wch;
2843
2844             /* Lookup weight for char and store it.  */
2845             if (*runp == '<')
2846               {
2847                 while (*++runp != '\0' && *runp != '>')
2848                   {
2849                     if (*runp == ldfile->escape_char)
2850                       if (*++runp == '\0')
2851                         {
2852                           lr_error (ldfile, _("unterminated weight name"));
2853                           lr_ignore_rest (ldfile, 0);
2854                           return -1;
2855                         }
2856                     *putp++ = *runp;
2857                   }
2858                 if (*runp == '>')
2859                   ++runp;
2860
2861                 if (putp == startp)
2862                   {
2863                     lr_error (ldfile, _("empty weight name: line ignored"));
2864                     lr_ignore_rest (ldfile, 0);
2865                     return -1;
2866                   }
2867
2868                 wch = charset_find_value (&charset->char_table, startp,
2869                                           putp - startp);
2870                 if (wch != ILLEGAL_CHAR_VALUE)
2871                   {
2872                     element_t *pelem;
2873
2874                     if (find_entry (&collate->result, &wch, sizeof (uint32_t),
2875                                     (void *)&pelem) < 0)
2876                       pelem = NULL;
2877
2878                     while (pelem != NULL
2879                            && (pelem->name[0] != wch
2880                                || pelem->name[1] != L'\0'))
2881                       pelem = pelem->next;
2882
2883                     value = pelem == NULL ? 0 : pelem->this_weight;
2884                   }
2885                 else if (find_entry (&collate->elements, startp, putp - startp,
2886                                      &tmp) >= 0)
2887                   {
2888                     element_t *pelem = (element_t *) tmp;
2889
2890                     value = pelem->this_weight;
2891                   }
2892                 else if (find_entry (&collate->symbols, startp, putp - startp,
2893                                      &tmp) >= 0)
2894                   {
2895                     value = (unsigned int) tmp;
2896                   }
2897                 else
2898                   {
2899                     if (verbose)
2900                       lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
2901                                 (int) (putp - startp), startp);
2902                     lr_ignore_rest (ldfile, 0);
2903                     return -1;
2904                   }
2905               }
2906             else
2907               {
2908                 element_t *wp;
2909                 uint32_t wch;
2910
2911                 if (*runp == ldfile->escape_char)
2912                   {
2913                     static const char digits[] = "0123456789abcdef";
2914                     const char *dp;
2915                     int base;
2916
2917                     ++runp;
2918                     if (tolower (*runp) == 'x')
2919                       {
2920                         ++runp;
2921                         base = 16;
2922                       }
2923                     else if (tolower (*runp) == 'd')
2924                       {
2925                         ++runp;
2926                         base = 10;
2927                       }
2928                     else
2929                       base = 8;
2930
2931                     dp = strchr (digits, tolower (*runp));
2932                     if (dp == NULL || (dp - digits) >= base)
2933                       {
2934                       illegal_char:
2935                         lr_error (ldfile, _("\
2936 illegal character constant in string"));
2937                         lr_ignore_rest (ldfile, 0);
2938                         return -1;
2939                       }
2940                     wch = dp - digits;
2941                     ++runp;
2942
2943                     dp = strchr (digits, tolower (*runp));
2944                     if (dp == NULL || (dp - digits) >= base)
2945                       goto illegal_char;
2946                     wch *= base;
2947                     wch += dp - digits;
2948                     ++runp;
2949
2950                     if (base != 16)
2951                       {
2952                         dp = strchr (digits, tolower (*runp));
2953                         if (dp != NULL && (dp - digits < base))
2954                           {
2955                             wch *= base;
2956                             wch += dp - digits;
2957                             ++runp;
2958                           }
2959                       }
2960                   }
2961                 else
2962                   wch = (uint32_t) *runp++;
2963
2964                 /* Lookup the weight for WCH.  */
2965                 if (find_entry (&collate->result, &wch, sizeof (wch),
2966                                 (void *)&wp) < 0)
2967                   wp = NULL;
2968
2969                 while (wp != NULL
2970                        && (wp->name[0] != wch || wp->name[1] != L'\0'))
2971                   wp = wp->next;
2972
2973                 value = wp == NULL ? 0 : wp->this_weight;
2974
2975                 /* To get the correct name for the error message.  */
2976                 putp = runp;
2977
2978                 /**************************************************\
2979                 |* I know here is something wrong.  Characters in *|
2980                 |* the string which are not in the <...> form     *|
2981                 |* cannot be declared forward for now!!!          *|
2982                 \**************************************************/
2983               }
2984
2985             /* Store in weight array.  */
2986             if (collate->nweight >= collate->nweight_max)
2987               {
2988                 collate->nweight_max *= 2;
2989                 collate->weight
2990                   = (unsigned int *) xrealloc (collate->weight,
2991                                                collate->nweight_max);
2992               }
2993
2994             if (value == 0)
2995               {
2996                 patch_t *newp;
2997
2998                 newp = (patch_t *) obstack_alloc (&collate->element_mem,
2999                                                   sizeof (patch_t));
3000                 newp->fname = ldfile->fname;
3001                 newp->lineno = ldfile->lineno;
3002                 newp->token
3003                   = (const char *) obstack_copy0 (&collate->element_mem,
3004                                                   startp, putp - startp);
3005                 newp->where.idx = collate->nweight++;
3006                 newp->next = collate->current_patch;
3007                 collate->current_patch = newp;
3008               }
3009             else
3010               collate->weight[collate->nweight++] = value;
3011             ++collate->weight_cnt[collate->weight_idx];
3012           }
3013       }
3014       return 0;
3015
3016     default:
3017       assert (! "should not happen");
3018     }
3019
3020
3021   if (collate->nweight >= collate->nweight_max)
3022     {
3023       collate->nweight_max *= 2;
3024       collate->weight = (unsigned int *) xrealloc (collate->weight,
3025                                                    collate->nweight_max);
3026     }
3027
3028   collate->weight[collate->nweight++] = value;
3029   ++collate->weight_cnt[collate->weight_idx];
3030
3031   return 0;
3032 }
3033
3034
3035 void
3036 collate_end_weight (struct linereader *ldfile, struct localedef_t *locale)
3037 {
3038   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
3039   element_t *pelem = collate->current_element;
3040
3041   if (collate->kind == symbol)
3042     {
3043       /* We don't have to do anything.  */
3044       collate->was_ellipsis = 0;
3045       return;
3046     }
3047
3048   if (collate->kind == ellipsis)
3049     {
3050       /* Before the next line is processed the ellipsis is handled.  */
3051       collate->was_ellipsis = 1;
3052       return;
3053     }
3054
3055   assert (collate->kind == character || collate->kind == element
3056           || collate->kind == undefined);
3057
3058   /* Fill in the missing weights.  */
3059   while (++collate->weight_idx < collate->nrules)
3060     {
3061       collate->weight[collate->nweight++] = pelem->this_weight;
3062       ++collate->weight_cnt[collate->weight_idx];
3063     }
3064
3065   /* Now we know how many ordering weights the current
3066      character/element has.  Allocate room in the element structure
3067      and copy information.  */
3068   pelem->ordering_len = collate->nweight;
3069
3070   /* First we write an array with the number of values for each
3071      weight.  */
3072   obstack_grow (&collate->element_mem, collate->weight_cnt,
3073                 collate->nrules * sizeof (unsigned int));
3074
3075   /* Now the weights itselves.  */
3076   obstack_grow (&collate->element_mem, collate->weight,
3077                 collate->nweight * sizeof (unsigned int));
3078
3079   /* Get result.  */
3080   pelem->ordering = obstack_finish (&collate->element_mem);
3081
3082   /* Now we handle the "patches".  */
3083   while (collate->current_patch != NULL)
3084     {
3085       patch_t *this_patch;
3086
3087       this_patch = collate->current_patch;
3088
3089       this_patch->where.pos = &pelem->ordering[collate->nrules
3090                                               + this_patch->where.idx];
3091
3092       collate->current_patch = this_patch->next;
3093       this_patch->next = collate->all_patches;
3094       collate->all_patches = this_patch;
3095     }
3096
3097   /* Set information for next round.  */
3098   collate->was_ellipsis = 0;
3099   if (collate->kind != undefined)
3100     collate->last_char = pelem->name[0];
3101 }
3102
3103
3104 /* The parser for the LC_CTYPE section of the locale definition.  */
3105 void
3106 read_lc_collate (struct linereader *ldfile, struct localedef_t *result,
3107                  struct charmap_t *charmap, struct repertoire_t *repertoire,
3108                  int ignore_content)
3109 {
3110   struct locale_collate_t *collate;
3111   int did_copy = 0;
3112   const char *save_str;
3113
3114   /* The rest of the line containing `LC_COLLATE' must be free.  */
3115   lr_ignore_rest (ldfile, 1);
3116
3117   now = lr_token (ldfile, charmap, NULL);
3118   nowtok = now->tok;
3119
3120   /* If we see `copy' now we are almost done.  */
3121   if (nowtok == tok_copy)
3122     {
3123       handle_copy (ldfile, charmap, repertoire, result, tok_lc_collate,
3124                    LC_COLLATE, "LC_COLLATE", ignore_content);
3125       did_copy = 1;
3126     }
3127
3128   /* Prepare the data structures.  */
3129   collate_startup (ldfile, result, charmap, ignore_content);
3130   collate = result->categories[LC_COLLATE].collate;
3131
3132   while (1)
3133     {
3134       /* Of course we don't proceed beyond the end of file.  */
3135       if (nowtok == tok_eof)
3136         break;
3137
3138       /* Ignore empty lines.  */
3139       if (nowtok == tok_eol)
3140         {
3141           now = lr_token (ldfile, charmap, NULL);
3142           nowtok = now->tok;
3143           continue;
3144         }
3145
3146       switch (nowtok)
3147         {
3148         case tok_coll_weight_max:
3149           if (did_copy)
3150             goto err_label;
3151           /* The rest of the line must be a single integer value.  */
3152           now = lr_token (ldfile, charmap, NULL);
3153           if (now->tok != tok_number)
3154             goto err_label;
3155           /* We simply forget about the value we just read, the implementation
3156              has no fixed limits.  */
3157           lr_ignore_rest (ldfile, 1);
3158           break;
3159
3160         case tok_script:
3161           if (did_copy)
3162             goto err_label;
3163           /* We expect the name of the script in brackets.  */
3164           now = lr_token (ldfile, charmap, NULL);
3165           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
3166             goto err_label;
3167           if (now->tok != tok_bsymbol)
3168             {
3169               lr_error (ldfile, _("\
3170 script name `%s' must not duplicate any known name"),
3171                         tok->val.str.startmb);
3172               lr_ignore_rest (ldfile, 0);
3173               break;
3174             }
3175           collate->scripts = xmalloc (collate->scripts,
3176                                       (collate->nscripts
3177                                        * sizeof (const char *)));
3178           collate->scripts[collate->nscripts++] = tok->val.str.startmb;
3179           lr_ignore_rest (ldfile, 1);
3180           break;
3181
3182         case tok_collating_element:
3183           if (did_copy)
3184             goto err_label;
3185           /* Get the first argument, a symbol in brackets.  */
3186           now = lr_token (ldfile, charmap, NULL);
3187           if (now->tok != tok_bsymbol)
3188             goto err_label;
3189           /* Test it.  */
3190           if (collate_element_to (ldfile, collate, now, charmap, repertoire))
3191             {
3192               /* An error occurred.  */
3193               lr_ignore_rest (ldfile, 0);
3194               break;
3195             }
3196           save_str = tok->val.str.startmb;
3197           /* Next comes `from'.  */
3198           now = lr_token (ldfile, charmap, NULL);
3199           if (now->tok != tok_from)
3200             goto err_label;
3201           /* Now comes a string.  */
3202           now = lr_token (ldfile, charmap, repertoire);
3203           if (now->tok != tok_string)
3204             goto err_label;
3205           collate_element_from (ldfile, collate, save_str, now, charmap,
3206                                 repertoire);
3207           /* The rest of the line should be empty.  */
3208           lr_ignore_rest (ldfile, 1);
3209           break;
3210
3211         case tok_collating_symbol:
3212           if (did_copy)
3213             goto err_label;
3214           /* Get the argument, a single symbol in brackets.  */
3215           now = lr_token (ldfile, charmap, NULL);
3216           if (now->tok != tok_bsymbol)
3217             goto err_label;
3218           collate_symbol (ldfile, collate, now, charmap, repertoire);
3219           break;
3220
3221         case tok_order_start:
3222           if (did_copy)
3223             goto err_label;
3224
3225           /* We expect now a scripting symbol or start right away
3226              with the order keywords.  Or we have no argument at all
3227              in which means `forward'.  */
3228           now = lr_token (ldfile, charmap, NULL);
3229           if (now->tok == tok_eol)
3230             {
3231               static enum coll_sort_rule default_rule = sort_forward;
3232               /* Use a single `forward' rule.  */
3233               collate->nrules = 1;
3234               collate->rules = &default_rule;
3235             }
3236           else
3237             {
3238               /* XXX We don't recognize the ISO 14651 extensions yet.  */
3239               uint32_t nrules = 0;
3240               uint32_t nrules_max = 32;
3241               enum coll_sort_rule *rules = alloca (nrules_max
3242                                                    * sizeof (*rules));
3243               int saw_semicolon = 0;
3244
3245               memset (rules, '\0', nrules_max * sizeof (*rules));
3246               do
3247                 {
3248                   if (now->tok != tok_forward && now->tok != tok_backward
3249                       && now->tok != tok_position)
3250                     goto err_label;
3251
3252                   if (saw_semicolon)
3253                     {
3254                       if (nrules == nrules_max)
3255                         {
3256                           newp = alloca (nrules_max * 2 * sizeof (*rules));
3257                           rules = memcpy (newp, rules,
3258                                           nrules_max * sizeof (*rules));
3259                           memset (&rules[nrules_max], '\0',
3260                                   nrules_max * sizeof (*rules));
3261                           nrules_max *= 2;
3262                         }
3263                       ++nrules;
3264                     }
3265
3266                   switch (now->tok)
3267                     {
3268                     case tok_forward:
3269                       if ((rules[nrules] & sort_backward) != 0)
3270                         {
3271                           lr_error (ldfile, _("\
3272 `forward' and `backward' order exclude each other"));
3273                           lr_ignore_rest (ldfile, 0);
3274                           goto error_sort;
3275                         }
3276                       rules[nrules] |= sort_forward;
3277                       break;
3278                     case tok_backward:
3279                       if ((rules[nrules] & sort_forward) != 0)
3280                         {
3281                           lr_error (ldfile, _("\
3282 `forward' and `backward' order exclude each other"));
3283                           lr_ignore_rest (ldfile, 0);
3284                           goto error_sort;
3285                         }
3286                       rules[nrules] |= sort_backward;
3287                       break;
3288                     case tok_position:
3289                       rules[nrules] |= tok_position;
3290                       break;
3291                     }
3292
3293                   /* Get the next token.  This is either the end of the line,
3294                      a comma or a semicolon.  */
3295                   now = lr_token (ldfile, charmap, NULL);
3296                   if (now->tok == tok_comma || now->tok == tok_semicolon)
3297                     {
3298                       saw_semicolon = now->tok == tok_semicolon;
3299                       now = lr_token (ldfile, charmap, NULL);
3300                     }
3301                 }
3302               while (now->tok != tok_eol || now->tok != tok_eof);
3303
3304             error_sort:
3305               collate->nrules = nrules;
3306               collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)),
3307                                        rules, nrules * sizeof (*rules));
3308             }
3309
3310           /* Now read the rules.  */
3311           read_rules (ldfile, collate, charmap, repertoire);
3312           break;
3313
3314         case tok_reorder_after:
3315           break;
3316
3317         case tok_reorder_script_after:
3318           break;
3319
3320         default:
3321         err_label:
3322           if (now->tok != tok_eof)
3323             SYNTAX_ERROR (_("syntax error in %s locale definition"),
3324                           "LC_COLLATE");
3325         }
3326
3327       /* Prepare for the next round.  */
3328       now = lr_token (ldfile, charmap, NULL);
3329       nowtok = now->tok;
3330     }
3331
3332   /* When we come here we reached the end of the file.  */
3333   lr_error (ldfile, _("premature end of file while reading category `%s'"),
3334             "LC_COLLATE");
3335 }
3336
3337 #endif